[RFC,2/4] media: videobuf2: Replace bufs array by a list

Message ID 20230313135916.862852-3-benjamin.gaignard@collabora.com
State New
Headers
Series Allow more than 32 vb2 buffers per queue |

Commit Message

Benjamin Gaignard March 13, 2023, 1:59 p.m. UTC
  Replacing bufs array by a list allows to remove the 32 buffers
limit per queue.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
---
 .../media/common/videobuf2/videobuf2-core.c   | 14 ++------------
 include/media/videobuf2-core.h                | 19 +++++++++++++------
 2 files changed, 15 insertions(+), 18 deletions(-)
  

Comments

Laurent Pinchart March 13, 2023, 6:11 p.m. UTC | #1
Hi Benjamin,

Thank you for the patch.

On Mon, Mar 13, 2023 at 02:59:14PM +0100, Benjamin Gaignard wrote:
> Replacing bufs array by a list allows to remove the 32 buffers
> limit per queue.
> 
> Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
> ---
>  .../media/common/videobuf2/videobuf2-core.c   | 14 ++------------
>  include/media/videobuf2-core.h                | 19 +++++++++++++------
>  2 files changed, 15 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c
> index b51152ace763..96597d339a07 100644
> --- a/drivers/media/common/videobuf2/videobuf2-core.c
> +++ b/drivers/media/common/videobuf2/videobuf2-core.c
> @@ -412,10 +412,6 @@ static int __vb2_queue_alloc(struct vb2_queue *q, enum vb2_memory memory,
>  	struct vb2_buffer *vb;
>  	int ret;
>  
> -	/* Ensure that q->num_buffers+num_buffers is below VB2_MAX_FRAME */
> -	num_buffers = min_t(unsigned int, num_buffers,
> -			    VB2_MAX_FRAME - q->num_buffers);
> -

We can indeed drop this check now, but shouldn't we introduce some kind
of resource accounting and limitation ? Otherwise any unpriviledged
userspace will be able to starve system memory. This could be
implemented on top, as the problem largely exists today already, but I'd
like to at least record this in a TODO comment.

I also wonder if we should still limit the number of allocated buffers.
The limit could be large, for instance 1024 buffers, and it would be an
in-kernel limit that could be increased later if needed. I'm concerned
that dropping the limit completely will allow userspace to request
UINT_MAX buffers, which may cause integer overflows somewhere. Limiting
the number of buffers would avoid extensive review of all the code that
deals with counting buffers.

>  	for (buffer = 0; buffer < num_buffers; ++buffer) {
>  		/* Allocate vb2 buffer structures */
>  		vb = kzalloc(q->buf_struct_size, GFP_KERNEL);
> @@ -797,9 +793,7 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
>  	/*
>  	 * Make sure the requested values and current defaults are sane.
>  	 */
> -	WARN_ON(q->min_buffers_needed > VB2_MAX_FRAME);
>  	num_buffers = max_t(unsigned int, *count, q->min_buffers_needed);
> -	num_buffers = min_t(unsigned int, num_buffers, VB2_MAX_FRAME);
>  	memset(q->alloc_devs, 0, sizeof(q->alloc_devs));
>  	/*
>  	 * Set this now to ensure that drivers see the correct q->memory value
> @@ -915,11 +909,6 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
>  	bool no_previous_buffers = !q->num_buffers;
>  	int ret;
>  
> -	if (q->num_buffers == VB2_MAX_FRAME) {
> -		dprintk(q, 1, "maximum number of buffers already allocated\n");
> -		return -ENOBUFS;
> -	}
> -
>  	if (no_previous_buffers) {
>  		if (q->waiting_in_dqbuf && *count) {
>  			dprintk(q, 1, "another dup()ped fd is waiting for a buffer\n");
> @@ -944,7 +933,7 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
>  			return -EINVAL;
>  	}
>  
> -	num_buffers = min(*count, VB2_MAX_FRAME - q->num_buffers);
> +	num_buffers = *count;
>  
>  	if (requested_planes && requested_sizes) {
>  		num_planes = requested_planes;
> @@ -2444,6 +2433,7 @@ int vb2_core_queue_init(struct vb2_queue *q)
>  
>  	INIT_LIST_HEAD(&q->queued_list);
>  	INIT_LIST_HEAD(&q->done_list);
> +	INIT_LIST_HEAD(&q->allocated_bufs);
>  	spin_lock_init(&q->done_lock);
>  	mutex_init(&q->mmap_lock);
>  	init_waitqueue_head(&q->done_wq);
> diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
> index d18c57e7aef0..47f1f35eb9cb 100644
> --- a/include/media/videobuf2-core.h
> +++ b/include/media/videobuf2-core.h
> @@ -276,6 +276,8 @@ struct vb2_buffer {
>  	 * done_entry:		entry on the list that stores all buffers ready
>  	 *			to be dequeued to userspace
>  	 * vb2_plane:		per-plane information; do not change
> +	 * allocated_entry:	entry on the list that stores all buffers allocated
> +	 *			for the queue.
>  	 */
>  	enum vb2_buffer_state	state;
>  	unsigned int		synced:1;
> @@ -287,6 +289,7 @@ struct vb2_buffer {
>  	struct vb2_plane	planes[VB2_MAX_PLANES];
>  	struct list_head	queued_entry;
>  	struct list_head	done_entry;
> +	struct list_head	allocated_entry;
>  #ifdef CONFIG_VIDEO_ADV_DEBUG
>  	/*
>  	 * Counters for how often these buffer-related ops are
> @@ -556,7 +559,7 @@ struct vb2_buf_ops {
>   * @mmap_lock:	private mutex used when buffers are allocated/freed/mmapped
>   * @memory:	current memory type used
>   * @dma_dir:	DMA mapping direction.
> - * @bufs:	videobuf2 buffer structures
> + * @allocated_bufs: list of buffer allocated for the queue.
>   * @num_buffers: number of allocated/used buffers
>   * @queued_list: list of buffers currently queued from userspace
>   * @queued_count: number of buffers queued and ready for streaming.
> @@ -619,7 +622,7 @@ struct vb2_queue {
>  	struct mutex			mmap_lock;
>  	unsigned int			memory;
>  	enum dma_data_direction		dma_dir;
> -	struct vb2_buffer		*bufs[VB2_MAX_FRAME];
> +	struct list_head		allocated_bufs;
>  	unsigned int			num_buffers;
>  
>  	struct list_head		queued_list;
> @@ -1239,8 +1242,12 @@ static inline void vb2_clear_last_buffer_dequeued(struct vb2_queue *q)
>  static inline struct vb2_buffer *vb2_get_buffer(struct vb2_queue *q,
>  						unsigned int index)
>  {
> -	if (index < q->num_buffers)
> -		return q->bufs[index];
> +	struct vb2_buffer *vb;
> +
> +	list_for_each_entry(vb, &q->allocated_bufs, allocated_entry)
> +		if (vb->index == index)
> +			return vb;
> +
>  	return NULL;
>  }
>  
> @@ -1251,7 +1258,7 @@ static inline struct vb2_buffer *vb2_get_buffer(struct vb2_queue *q,
>   */
>  static inline void vb2_set_buffer(struct vb2_queue *q, struct vb2_buffer *vb)
>  {
> -	q->bufs[vb->index] = vb;
> +	list_add_tail(&vb->allocated_entry, &q->allocated_bufs);
>  }
>  
>  /**
> @@ -1261,7 +1268,7 @@ static inline void vb2_set_buffer(struct vb2_queue *q, struct vb2_buffer *vb)
>   */
>  static inline void vb2_del_buffer(struct vb2_queue *q, struct vb2_buffer *vb)
>  {
> -	q->bufs[vb->index] = NULL;
> +	list_del(&vb->allocated_entry);
>  }
>  
>  /*
  
David Laight March 13, 2023, 11:16 p.m. UTC | #2
From: Laurent Pinchart
> Sent: 13 March 2023 18:12
> 
> Hi Benjamin,
> 
> Thank you for the patch.
> 
> On Mon, Mar 13, 2023 at 02:59:14PM +0100, Benjamin Gaignard wrote:
> > Replacing bufs array by a list allows to remove the 32 buffers
> > limit per queue.

Is the limit actually a problem?
Arrays of pointers have locking and caching advantages over
linked lists.

...
> > @@ -1239,8 +1242,12 @@ static inline void vb2_clear_last_buffer_dequeued(struct vb2_queue *q)
> >  static inline struct vb2_buffer *vb2_get_buffer(struct vb2_queue *q,
> >  						unsigned int index)
> >  {
> > -	if (index < q->num_buffers)
> > -		return q->bufs[index];
> > +	struct vb2_buffer *vb;
> > +
> > +	list_for_each_entry(vb, &q->allocated_bufs, allocated_entry)
> > +		if (vb->index == index)
> > +			return vb;
> > +
> >  	return NULL;

You really don't want to be doing that....

There are schemes for unbounded arrays.
Scanning a linked list isn't a very good one.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
  
Hans Verkuil March 14, 2023, 8:55 a.m. UTC | #3
On 14/03/2023 00:16, David Laight wrote:
> From: Laurent Pinchart
>> Sent: 13 March 2023 18:12
>>
>> Hi Benjamin,
>>
>> Thank you for the patch.
>>
>> On Mon, Mar 13, 2023 at 02:59:14PM +0100, Benjamin Gaignard wrote:
>>> Replacing bufs array by a list allows to remove the 32 buffers
>>> limit per queue.
> 
> Is the limit actually a problem?
> Arrays of pointers have locking and caching advantages over
> linked lists.

I'm not so keen on using a list either. Adding or deleting buffers will
be an infrequency operation, so using an array of pointers and reallocing
it if needed would be perfectly fine. Buffer lookup based on the index
should be really fast, though.

Why not start with a dynamically allocated array of 32 vb2_buffer pointers?
And keep doubling the size (reallocing) whenever more buffers are needed,
up to some maximum (1024 would be a good initial value for that, I think).
This max could be even a module option.

A simple spinlock is sufficient, I think, to regulate access to the
struct vb2_buffer **bufs pointer in vb2_queue. From what I can see it is
not needed in interrupt context (i.e. vb2_buffer_done).

Regards,

	Hans

> 
> ...
>>> @@ -1239,8 +1242,12 @@ static inline void vb2_clear_last_buffer_dequeued(struct vb2_queue *q)
>>>  static inline struct vb2_buffer *vb2_get_buffer(struct vb2_queue *q,
>>>  						unsigned int index)
>>>  {
>>> -	if (index < q->num_buffers)
>>> -		return q->bufs[index];
>>> +	struct vb2_buffer *vb;
>>> +
>>> +	list_for_each_entry(vb, &q->allocated_bufs, allocated_entry)
>>> +		if (vb->index == index)
>>> +			return vb;
>>> +
>>>  	return NULL;
> 
> You really don't want to be doing that....
> 
> There are schemes for unbounded arrays.
> Scanning a linked list isn't a very good one.
> 
> 	David
> 
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)
  
David Laight March 14, 2023, 10:11 a.m. UTC | #4
From: Hans Verkuil
> Sent: 14 March 2023 08:55
...
> Why not start with a dynamically allocated array of 32 vb2_buffer pointers?
> And keep doubling the size (reallocing) whenever more buffers are needed,
> up to some maximum (1024 would be a good initial value for that, I think).
> This max could be even a module option.

I don't know the typical uses (or the code at all).
But it might be worth having a small array in the structure itself.
Useful if there are typically always (say) less than 8 buffers.
For larger sizes use the (IIRC) kmalloc_size() to find the actual
size of the structure that will be allocate and set the array
size appropriately.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
  
Hans Verkuil March 14, 2023, 10:42 a.m. UTC | #5
Hi David,

On 3/14/23 11:11, David Laight wrote:
> From: Hans Verkuil
>> Sent: 14 March 2023 08:55
> ...
>> Why not start with a dynamically allocated array of 32 vb2_buffer pointers?
>> And keep doubling the size (reallocing) whenever more buffers are needed,
>> up to some maximum (1024 would be a good initial value for that, I think).
>> This max could be even a module option.
> 
> I don't know the typical uses (or the code at all).
> But it might be worth having a small array in the structure itself.
> Useful if there are typically always (say) less than 8 buffers.
> For larger sizes use the (IIRC) kmalloc_size() to find the actual
> size of the structure that will be allocate and set the array
> size appropriately.

The typical usage is that applications allocate N buffers with the
VIDIOC_REQBUFS ioctl, and in most cases that's all they use. The
current max is 32 buffers, so allocating that initially (usually
during probe()) will cover all current cases with a single one-time
kzalloc.

Only if the application wants to allocate more than 32 buffers will
there be a slight overhead.

Regards,

	Hans

> 
> 	David
> 
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)
  
Nicolas Dufresne March 15, 2023, 1:57 p.m. UTC | #6
Le lundi 13 mars 2023 à 20:11 +0200, Laurent Pinchart a écrit :
> > -	/* Ensure that q->num_buffers+num_buffers is below VB2_MAX_FRAME */
> > -	num_buffers = min_t(unsigned int, num_buffers,
> > -			    VB2_MAX_FRAME - q->num_buffers);
> > -
> 
> We can indeed drop this check now, but shouldn't we introduce some kind
> of resource accounting and limitation ? Otherwise any unpriviledged
> userspace will be able to starve system memory. This could be
> implemented on top, as the problem largely exists today already, but I'd
> like to at least record this in a TODO comment.

The current limit already isn't work for resource accounting and limitation for
m2m drivers. You can open a device, allocate 32 buffers, and close that device
keeping the memory around. And redo this process as many times as you want.

A TODO is most appropriate, but I would prefer to see this done at a memory
layer level (rather then v4l2 specific), so that limits and accounting works
with containers and other sandboxes.

> 
> I also wonder if we should still limit the number of allocated buffers.
> The limit could be large, for instance 1024 buffers, and it would be an
> in-kernel limit that could be increased later if needed. I'm concerned
> that dropping the limit completely will allow userspace to request
> UINT_MAX buffers, which may cause integer overflows somewhere. Limiting
> the number of buffers would avoid extensive review of all the code that
> deals with counting buffers.
  
Laurent Pinchart March 19, 2023, 11:33 p.m. UTC | #7
On Wed, Mar 15, 2023 at 09:57:51AM -0400, Nicolas Dufresne wrote:
> Le lundi 13 mars 2023 à 20:11 +0200, Laurent Pinchart a écrit :
> > > -	/* Ensure that q->num_buffers+num_buffers is below VB2_MAX_FRAME */
> > > -	num_buffers = min_t(unsigned int, num_buffers,
> > > -			    VB2_MAX_FRAME - q->num_buffers);
> > > -
> > 
> > We can indeed drop this check now, but shouldn't we introduce some kind
> > of resource accounting and limitation ? Otherwise any unpriviledged
> > userspace will be able to starve system memory. This could be
> > implemented on top, as the problem largely exists today already, but I'd
> > like to at least record this in a TODO comment.
> 
> The current limit already isn't work for resource accounting and limitation for
> m2m drivers. You can open a device, allocate 32 buffers, and close that device
> keeping the memory around. And redo this process as many times as you want.

I know, that's why I mentioned that the problem largely exists today
already.

> A TODO is most appropriate, but I would prefer to see this done at a memory
> layer level (rather then v4l2 specific), so that limits and accounting works
> with containers and other sandboxes.

I haven't thought about how this could be implemented, all I know is
that it's about time to tackle this issue, so I would like to at least
record it.

> > I also wonder if we should still limit the number of allocated buffers.
> > The limit could be large, for instance 1024 buffers, and it would be an
> > in-kernel limit that could be increased later if needed. I'm concerned
> > that dropping the limit completely will allow userspace to request
> > UINT_MAX buffers, which may cause integer overflows somewhere. Limiting
> > the number of buffers would avoid extensive review of all the code that
> > deals with counting buffers.
>
  
Laurent Pinchart March 19, 2023, 11:33 p.m. UTC | #8
Hi Hans,

On Tue, Mar 14, 2023 at 11:42:46AM +0100, Hans Verkuil wrote:
> On 3/14/23 11:11, David Laight wrote:
> > From: Hans Verkuil
> >> Sent: 14 March 2023 08:55
> > ...
> >> Why not start with a dynamically allocated array of 32 vb2_buffer pointers?
> >> And keep doubling the size (reallocing) whenever more buffers are needed,
> >> up to some maximum (1024 would be a good initial value for that, I think).
> >> This max could be even a module option.

The kernel has IDR and IDA APIs, why not use them instead of reinventing
the wheel ?

> > I don't know the typical uses (or the code at all).
> > But it might be worth having a small array in the structure itself.
> > Useful if there are typically always (say) less than 8 buffers.
> > For larger sizes use the (IIRC) kmalloc_size() to find the actual
> > size of the structure that will be allocate and set the array
> > size appropriately.
> 
> The typical usage is that applications allocate N buffers with the
> VIDIOC_REQBUFS ioctl, and in most cases that's all they use.

Note that once we get DELETE_BUF (or DELETE_BUFS) support I'd like to
encourage applications to use the new API, and deprecate REQBUFS
(dropping it isn't on my radar, as it would take forever before no
userspace uses it anymore).

> The
> current max is 32 buffers, so allocating that initially (usually
> during probe()) will cover all current cases with a single one-time
> kzalloc.

Pre-allocating for the most common usage patterns is fine with me.

> Only if the application wants to allocate more than 32 buffers will
> there be a slight overhead.
  
Nicolas Dufresne March 22, 2023, 2:50 p.m. UTC | #9
Hi Laurent,

Le lundi 20 mars 2023 à 01:33 +0200, Laurent Pinchart a écrit :
> > The typical usage is that applications allocate N buffers with the
> > VIDIOC_REQBUFS ioctl, and in most cases that's all they use.
> 
> Note that once we get DELETE_BUF (or DELETE_BUFS) support I'd like to
> encourage applications to use the new API, and deprecate REQBUFS
> (dropping it isn't on my radar, as it would take forever before no
> userspace uses it anymore).

I was wondering if you can extend on this. I'm worried the count semantic might
prevent emulating it over create_bufs() ops, but if that works, did you meant to
emulate it so driver no longer have to implement reqbufs() if they have
create_bufs() ?

Nicolas
  
Laurent Pinchart March 22, 2023, 3:01 p.m. UTC | #10
On Wed, Mar 22, 2023 at 10:50:52AM -0400, Nicolas Dufresne wrote:
> Hi Laurent,
> 
> Le lundi 20 mars 2023 à 01:33 +0200, Laurent Pinchart a écrit :
> > > The typical usage is that applications allocate N buffers with the
> > > VIDIOC_REQBUFS ioctl, and in most cases that's all they use.
> > 
> > Note that once we get DELETE_BUF (or DELETE_BUFS) support I'd like to
> > encourage applications to use the new API, and deprecate REQBUFS
> > (dropping it isn't on my radar, as it would take forever before no
> > userspace uses it anymore).
> 
> I was wondering if you can extend on this. I'm worried the count semantic might
> prevent emulating it over create_bufs() ops, but if that works, did you meant to
> emulate it so driver no longer have to implement reqbufs() if they have
> create_bufs() ?

For drivers it should be fairly simply, as the reqbufs and create_bufs
ioctl handlers should just point to the corresponding videobuf2 helpers.

What I meant is that I'd like to encourage userspace to use the
VIDIOC_CREATE_BUFS ioctl instead of VIDIOC_REQBUFS.
  
Nicolas Dufresne March 24, 2023, 3:14 p.m. UTC | #11
Le mercredi 22 mars 2023 à 17:01 +0200, Laurent Pinchart a écrit :
> On Wed, Mar 22, 2023 at 10:50:52AM -0400, Nicolas Dufresne wrote:
> > Hi Laurent,
> > 
> > Le lundi 20 mars 2023 à 01:33 +0200, Laurent Pinchart a écrit :
> > > > The typical usage is that applications allocate N buffers with the
> > > > VIDIOC_REQBUFS ioctl, and in most cases that's all they use.
> > > 
> > > Note that once we get DELETE_BUF (or DELETE_BUFS) support I'd like to
> > > encourage applications to use the new API, and deprecate REQBUFS
> > > (dropping it isn't on my radar, as it would take forever before no
> > > userspace uses it anymore).
> > 
> > I was wondering if you can extend on this. I'm worried the count semantic might
> > prevent emulating it over create_bufs() ops, but if that works, did you meant to
> > emulate it so driver no longer have to implement reqbufs() if they have
> > create_bufs() ?
> 
> For drivers it should be fairly simply, as the reqbufs and create_bufs
> ioctl handlers should just point to the corresponding videobuf2 helpers.
> 
> What I meant is that I'd like to encourage userspace to use the
> VIDIOC_CREATE_BUFS ioctl instead of VIDIOC_REQBUFS.
> 

I'm not sure what rationale I can give implementer to "encourage" them to use a
more complex API that needs to copy over the FMT (which has just been set),
specially in the initial pre-allocation case. For most, CREATE_BUFS after SMT
will look like a very redundant and counter intuitive thing. Maybe you have a
more optimistic view on the matter ? Or you have a better idea how we could give
a meaning to having a fmt there on the initial case where the allocation matches
the queue FMT ?

Nicolas
  
Hans Verkuil March 24, 2023, 3:18 p.m. UTC | #12
On 24/03/2023 16:14, Nicolas Dufresne wrote:
> Le mercredi 22 mars 2023 à 17:01 +0200, Laurent Pinchart a écrit :
>> On Wed, Mar 22, 2023 at 10:50:52AM -0400, Nicolas Dufresne wrote:
>>> Hi Laurent,
>>>
>>> Le lundi 20 mars 2023 à 01:33 +0200, Laurent Pinchart a écrit :
>>>>> The typical usage is that applications allocate N buffers with the
>>>>> VIDIOC_REQBUFS ioctl, and in most cases that's all they use.
>>>>
>>>> Note that once we get DELETE_BUF (or DELETE_BUFS) support I'd like to
>>>> encourage applications to use the new API, and deprecate REQBUFS
>>>> (dropping it isn't on my radar, as it would take forever before no
>>>> userspace uses it anymore).
>>>
>>> I was wondering if you can extend on this. I'm worried the count semantic might
>>> prevent emulating it over create_bufs() ops, but if that works, did you meant to
>>> emulate it so driver no longer have to implement reqbufs() if they have
>>> create_bufs() ?
>>
>> For drivers it should be fairly simply, as the reqbufs and create_bufs
>> ioctl handlers should just point to the corresponding videobuf2 helpers.
>>
>> What I meant is that I'd like to encourage userspace to use the
>> VIDIOC_CREATE_BUFS ioctl instead of VIDIOC_REQBUFS.
>>
> 
> I'm not sure what rationale I can give implementer to "encourage" them to use a
> more complex API that needs to copy over the FMT (which has just been set),
> specially in the initial pre-allocation case. For most, CREATE_BUFS after SMT
> will look like a very redundant and counter intuitive thing. Maybe you have a
> more optimistic view on the matter ? Or you have a better idea how we could give
> a meaning to having a fmt there on the initial case where the allocation matches
> the queue FMT ?

I wouldn't mind if we can make a much nicer CREATE_BUFS variant with just the
size instead of a format. That was in hindsight a really bad idea, terrible
over-engineering.

Regards,

	Hans
  
Nicolas Dufresne March 24, 2023, 3:34 p.m. UTC | #13
Le vendredi 24 mars 2023 à 16:18 +0100, Hans Verkuil a écrit :
> On 24/03/2023 16:14, Nicolas Dufresne wrote:
> > Le mercredi 22 mars 2023 à 17:01 +0200, Laurent Pinchart a écrit :
> > > On Wed, Mar 22, 2023 at 10:50:52AM -0400, Nicolas Dufresne wrote:
> > > > Hi Laurent,
> > > > 
> > > > Le lundi 20 mars 2023 à 01:33 +0200, Laurent Pinchart a écrit :
> > > > > > The typical usage is that applications allocate N buffers with the
> > > > > > VIDIOC_REQBUFS ioctl, and in most cases that's all they use.
> > > > > 
> > > > > Note that once we get DELETE_BUF (or DELETE_BUFS) support I'd like to
> > > > > encourage applications to use the new API, and deprecate REQBUFS
> > > > > (dropping it isn't on my radar, as it would take forever before no
> > > > > userspace uses it anymore).
> > > > 
> > > > I was wondering if you can extend on this. I'm worried the count semantic might
> > > > prevent emulating it over create_bufs() ops, but if that works, did you meant to
> > > > emulate it so driver no longer have to implement reqbufs() if they have
> > > > create_bufs() ?
> > > 
> > > For drivers it should be fairly simply, as the reqbufs and create_bufs
> > > ioctl handlers should just point to the corresponding videobuf2 helpers.
> > > 
> > > What I meant is that I'd like to encourage userspace to use the
> > > VIDIOC_CREATE_BUFS ioctl instead of VIDIOC_REQBUFS.
> > > 
> > 
> > I'm not sure what rationale I can give implementer to "encourage" them to use a
> > more complex API that needs to copy over the FMT (which has just been set),
> > specially in the initial pre-allocation case. For most, CREATE_BUFS after SMT
> > will look like a very redundant and counter intuitive thing. Maybe you have a
> > more optimistic view on the matter ? Or you have a better idea how we could give
> > a meaning to having a fmt there on the initial case where the allocation matches
> > the queue FMT ?
> 
> I wouldn't mind if we can make a much nicer CREATE_BUFS variant with just the
> size instead of a format. That was in hindsight a really bad idea, terrible
> over-engineering.

Note that all DRM allocators also includes width/height and some format related
info (or the full info). This is because the driver deals with the alignment
requirements. In some use cases (I have inter frame dynamic control in mind
here) the fmt could be a mean to feedback the alignment (like bytesperline) back
to the application where the stream is no longer homogeneous on the FMT.

That being said, If we move toward a size base allocator API, we could also just
point back to an existing HEAP (or export an new heap if none are valid). And
define the sizeimage(s) is now that information you need from the FMT to
allocate anything + which heap needs to be used for the current setup.

Nicolas

> 
> Regards,
> 
> 	Hans
  
Laurent Pinchart March 24, 2023, 8:21 p.m. UTC | #14
On Fri, Mar 24, 2023 at 11:34:48AM -0400, Nicolas Dufresne wrote:
> Le vendredi 24 mars 2023 à 16:18 +0100, Hans Verkuil a écrit :
> > On 24/03/2023 16:14, Nicolas Dufresne wrote:
> > > Le mercredi 22 mars 2023 à 17:01 +0200, Laurent Pinchart a écrit :
> > > > On Wed, Mar 22, 2023 at 10:50:52AM -0400, Nicolas Dufresne wrote:
> > > > > Le lundi 20 mars 2023 à 01:33 +0200, Laurent Pinchart a écrit :
> > > > > > > The typical usage is that applications allocate N buffers with the
> > > > > > > VIDIOC_REQBUFS ioctl, and in most cases that's all they use.
> > > > > > 
> > > > > > Note that once we get DELETE_BUF (or DELETE_BUFS) support I'd like to
> > > > > > encourage applications to use the new API, and deprecate REQBUFS
> > > > > > (dropping it isn't on my radar, as it would take forever before no
> > > > > > userspace uses it anymore).
> > > > > 
> > > > > I was wondering if you can extend on this. I'm worried the count semantic might
> > > > > prevent emulating it over create_bufs() ops, but if that works, did you meant to
> > > > > emulate it so driver no longer have to implement reqbufs() if they have
> > > > > create_bufs() ?
> > > > 
> > > > For drivers it should be fairly simply, as the reqbufs and create_bufs
> > > > ioctl handlers should just point to the corresponding videobuf2 helpers.
> > > > 
> > > > What I meant is that I'd like to encourage userspace to use the
> > > > VIDIOC_CREATE_BUFS ioctl instead of VIDIOC_REQBUFS.
> > > > 
> > > 
> > > I'm not sure what rationale I can give implementer to "encourage" them to use a
> > > more complex API that needs to copy over the FMT (which has just been set),
> > > specially in the initial pre-allocation case. For most, CREATE_BUFS after SMT
> > > will look like a very redundant and counter intuitive thing. Maybe you have a
> > > more optimistic view on the matter ? Or you have a better idea how we could give
> > > a meaning to having a fmt there on the initial case where the allocation matches
> > > the queue FMT ?
> > 
> > I wouldn't mind if we can make a much nicer CREATE_BUFS variant with just the
> > size instead of a format. That was in hindsight a really bad idea, terrible
> > over-engineering.
> 
> Note that all DRM allocators also includes width/height and some format related
> info (or the full info). This is because the driver deals with the alignment
> requirements. In some use cases (I have inter frame dynamic control in mind
> here) the fmt could be a mean to feedback the alignment (like bytesperline) back
> to the application where the stream is no longer homogeneous on the FMT.
> 
> That being said, If we move toward a size base allocator API, we could also just
> point back to an existing HEAP (or export an new heap if none are valid). And
> define the sizeimage(s) is now that information you need from the FMT to
> allocate anything + which heap needs to be used for the current setup.

If we could move away from allocating buffers within V4L2 to only
importing buffers allocated through the DMA heaps API, I'd be very
happy. That won't be simple though. Maybe a good candidate for
discussions during the media summit in Prague this year ?
  

Patch

diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c
index b51152ace763..96597d339a07 100644
--- a/drivers/media/common/videobuf2/videobuf2-core.c
+++ b/drivers/media/common/videobuf2/videobuf2-core.c
@@ -412,10 +412,6 @@  static int __vb2_queue_alloc(struct vb2_queue *q, enum vb2_memory memory,
 	struct vb2_buffer *vb;
 	int ret;
 
-	/* Ensure that q->num_buffers+num_buffers is below VB2_MAX_FRAME */
-	num_buffers = min_t(unsigned int, num_buffers,
-			    VB2_MAX_FRAME - q->num_buffers);
-
 	for (buffer = 0; buffer < num_buffers; ++buffer) {
 		/* Allocate vb2 buffer structures */
 		vb = kzalloc(q->buf_struct_size, GFP_KERNEL);
@@ -797,9 +793,7 @@  int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
 	/*
 	 * Make sure the requested values and current defaults are sane.
 	 */
-	WARN_ON(q->min_buffers_needed > VB2_MAX_FRAME);
 	num_buffers = max_t(unsigned int, *count, q->min_buffers_needed);
-	num_buffers = min_t(unsigned int, num_buffers, VB2_MAX_FRAME);
 	memset(q->alloc_devs, 0, sizeof(q->alloc_devs));
 	/*
 	 * Set this now to ensure that drivers see the correct q->memory value
@@ -915,11 +909,6 @@  int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
 	bool no_previous_buffers = !q->num_buffers;
 	int ret;
 
-	if (q->num_buffers == VB2_MAX_FRAME) {
-		dprintk(q, 1, "maximum number of buffers already allocated\n");
-		return -ENOBUFS;
-	}
-
 	if (no_previous_buffers) {
 		if (q->waiting_in_dqbuf && *count) {
 			dprintk(q, 1, "another dup()ped fd is waiting for a buffer\n");
@@ -944,7 +933,7 @@  int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
 			return -EINVAL;
 	}
 
-	num_buffers = min(*count, VB2_MAX_FRAME - q->num_buffers);
+	num_buffers = *count;
 
 	if (requested_planes && requested_sizes) {
 		num_planes = requested_planes;
@@ -2444,6 +2433,7 @@  int vb2_core_queue_init(struct vb2_queue *q)
 
 	INIT_LIST_HEAD(&q->queued_list);
 	INIT_LIST_HEAD(&q->done_list);
+	INIT_LIST_HEAD(&q->allocated_bufs);
 	spin_lock_init(&q->done_lock);
 	mutex_init(&q->mmap_lock);
 	init_waitqueue_head(&q->done_wq);
diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index d18c57e7aef0..47f1f35eb9cb 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -276,6 +276,8 @@  struct vb2_buffer {
 	 * done_entry:		entry on the list that stores all buffers ready
 	 *			to be dequeued to userspace
 	 * vb2_plane:		per-plane information; do not change
+	 * allocated_entry:	entry on the list that stores all buffers allocated
+	 *			for the queue.
 	 */
 	enum vb2_buffer_state	state;
 	unsigned int		synced:1;
@@ -287,6 +289,7 @@  struct vb2_buffer {
 	struct vb2_plane	planes[VB2_MAX_PLANES];
 	struct list_head	queued_entry;
 	struct list_head	done_entry;
+	struct list_head	allocated_entry;
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 	/*
 	 * Counters for how often these buffer-related ops are
@@ -556,7 +559,7 @@  struct vb2_buf_ops {
  * @mmap_lock:	private mutex used when buffers are allocated/freed/mmapped
  * @memory:	current memory type used
  * @dma_dir:	DMA mapping direction.
- * @bufs:	videobuf2 buffer structures
+ * @allocated_bufs: list of buffer allocated for the queue.
  * @num_buffers: number of allocated/used buffers
  * @queued_list: list of buffers currently queued from userspace
  * @queued_count: number of buffers queued and ready for streaming.
@@ -619,7 +622,7 @@  struct vb2_queue {
 	struct mutex			mmap_lock;
 	unsigned int			memory;
 	enum dma_data_direction		dma_dir;
-	struct vb2_buffer		*bufs[VB2_MAX_FRAME];
+	struct list_head		allocated_bufs;
 	unsigned int			num_buffers;
 
 	struct list_head		queued_list;
@@ -1239,8 +1242,12 @@  static inline void vb2_clear_last_buffer_dequeued(struct vb2_queue *q)
 static inline struct vb2_buffer *vb2_get_buffer(struct vb2_queue *q,
 						unsigned int index)
 {
-	if (index < q->num_buffers)
-		return q->bufs[index];
+	struct vb2_buffer *vb;
+
+	list_for_each_entry(vb, &q->allocated_bufs, allocated_entry)
+		if (vb->index == index)
+			return vb;
+
 	return NULL;
 }
 
@@ -1251,7 +1258,7 @@  static inline struct vb2_buffer *vb2_get_buffer(struct vb2_queue *q,
  */
 static inline void vb2_set_buffer(struct vb2_queue *q, struct vb2_buffer *vb)
 {
-	q->bufs[vb->index] = vb;
+	list_add_tail(&vb->allocated_entry, &q->allocated_bufs);
 }
 
 /**
@@ -1261,7 +1268,7 @@  static inline void vb2_set_buffer(struct vb2_queue *q, struct vb2_buffer *vb)
  */
 static inline void vb2_del_buffer(struct vb2_queue *q, struct vb2_buffer *vb)
 {
-	q->bufs[vb->index] = NULL;
+	list_del(&vb->allocated_entry);
 }
 
 /*