[v1,5/5] gpiolib: cdev: Utilize more bitmap APIs

Message ID 20230926052007.3917389-6-andriy.shevchenko@linux.intel.com
State New
Headers
Series bitmap: get rid of bitmap_remap() and bitmap_biremap() uses |

Commit Message

Andy Shevchenko Sept. 26, 2023, 5:20 a.m. UTC
  Currently we have a few bitmap calls that are open coded in the library
module. Let's convert them to use generic bitmap APIs instead.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/gpio/gpiolib-cdev.c | 79 +++++++++++++++++--------------------
 1 file changed, 36 insertions(+), 43 deletions(-)
  

Comments

Yury Norov Sept. 27, 2023, 12:46 a.m. UTC | #1
On Tue, Sep 26, 2023 at 08:20:07AM +0300, Andy Shevchenko wrote:
> Currently we have a few bitmap calls that are open coded in the library
> module. Let's convert them to use generic bitmap APIs instead.
> 
> Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> ---
>  drivers/gpio/gpiolib-cdev.c | 79 +++++++++++++++++--------------------
>  1 file changed, 36 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
> index e39d344feb28..a5bbbd44531f 100644
> --- a/drivers/gpio/gpiolib-cdev.c
> +++ b/drivers/gpio/gpiolib-cdev.c
> @@ -1263,35 +1263,32 @@ static long linereq_get_values(struct linereq *lr, void __user *ip)
>  {
>  	struct gpio_v2_line_values lv;
>  	DECLARE_BITMAP(vals, GPIO_V2_LINES_MAX);
> +	DECLARE_BITMAP(mask, GPIO_V2_LINES_MAX);
> +	DECLARE_BITMAP(bits, GPIO_V2_LINES_MAX);
>  	struct gpio_desc **descs;
>  	unsigned int i, didx, num_get;
> -	bool val;
>  	int ret;
>  
>  	/* NOTE: It's ok to read values of output lines. */
>  	if (copy_from_user(&lv, ip, sizeof(lv)))
>  		return -EFAULT;
>  
> -	for (num_get = 0, i = 0; i < lr->num_lines; i++) {
> -		if (lv.mask & BIT_ULL(i)) {
> -			num_get++;
> -			descs = &lr->lines[i].desc;
> -		}
> -	}
> +	bitmap_from_arr64(mask, &lv.mask, GPIO_V2_LINES_MAX);
>  
> +	num_get = bitmap_weight(mask, lr->num_lines);
>  	if (num_get == 0)
>  		return -EINVAL;
>  
> -	if (num_get != 1) {
> +	if (num_get == 1) {
> +		descs = &lr->lines[find_first_bit(mask, lr->num_lines)].desc;
> +	} else {
>  		descs = kmalloc_array(num_get, sizeof(*descs), GFP_KERNEL);
>  		if (!descs)
>  			return -ENOMEM;
> -		for (didx = 0, i = 0; i < lr->num_lines; i++) {
> -			if (lv.mask & BIT_ULL(i)) {
> -				descs[didx] = lr->lines[i].desc;
> -				didx++;
> -			}
> -		}
> +
> +		didx = 0;
> +		for_each_set_bit(i, mask, lr->num_lines)
> +			descs[didx++] = lr->lines[i].desc;
>  	}
>  	ret = gpiod_get_array_value_complex(false, true, num_get,
>  					    descs, NULL, vals);
> @@ -1301,19 +1298,15 @@ static long linereq_get_values(struct linereq *lr, void __user *ip)
>  	if (ret)
>  		return ret;
>  
> -	lv.bits = 0;
> -	for (didx = 0, i = 0; i < lr->num_lines; i++) {
> -		if (lv.mask & BIT_ULL(i)) {
> -			if (lr->lines[i].sw_debounced)
> -				val = debounced_value(&lr->lines[i]);
> -			else
> -				val = test_bit(didx, vals);
> -			if (val)
> -				lv.bits |= BIT_ULL(i);
> -			didx++;
> -		}
> +	bitmap_scatter(bits, vals, mask, lr->num_lines);
> +
> +	for_each_set_bit(i, mask, lr->num_lines) {
> +		if (lr->lines[i].sw_debounced)
> +			__assign_bit(i, bits, debounced_value(&lr->lines[i]));
>  	}
>  
> +	bitmap_to_arr64(&lv.bits, bits, GPIO_V2_LINES_MAX);
> +
>  	if (copy_to_user(ip, &lv, sizeof(lv)))
>  		return -EFAULT;
>  
> @@ -1324,35 +1317,35 @@ static long linereq_set_values_unlocked(struct linereq *lr,
>  					struct gpio_v2_line_values *lv)
>  {
>  	DECLARE_BITMAP(vals, GPIO_V2_LINES_MAX);
> +	DECLARE_BITMAP(mask, GPIO_V2_LINES_MAX);
> +	DECLARE_BITMAP(bits, GPIO_V2_LINES_MAX);
>  	struct gpio_desc **descs;
>  	unsigned int i, didx, num_set;
>  	int ret;
>  
> -	bitmap_zero(vals, GPIO_V2_LINES_MAX);
> -	for (num_set = 0, i = 0; i < lr->num_lines; i++) {
> -		if (lv->mask & BIT_ULL(i)) {
> -			if (!test_bit(FLAG_IS_OUT, &lr->lines[i].desc->flags))
> -				return -EPERM;
> -			if (lv->bits & BIT_ULL(i))
> -				__set_bit(num_set, vals);
> -			num_set++;
> -			descs = &lr->lines[i].desc;
> -		}
> -	}
> +	bitmap_from_arr64(mask, &lv->mask, GPIO_V2_LINES_MAX);
> +	bitmap_from_arr64(bits, &lv->bits, GPIO_V2_LINES_MAX);
> +
> +	num_set = bitmap_gather(vals, bits, mask, lr->num_lines);

It looks like GPIO_V2_LINES_MAX is always 64, and so I wonder: is
my understanding correct that all bits in ->mask and ->bits beyond
lr->num_lines are clear?

If so, you can seemingly pass the GPIO_V2_LINES_MAX instead of
lr->num_lines, and that way it will be small_cons_nbits()-optimized.

>  	if (num_set == 0)
>  		return -EINVAL;
>  
> -	if (num_set != 1) {
> +	for_each_set_bit(i, mask, lr->num_lines) {
> +		if (!test_bit(FLAG_IS_OUT, &lr->lines[i].desc->flags))
> +			return -EPERM;
> +	}
> +
> +	if (num_set == 1) {
> +		descs = &lr->lines[find_first_bit(mask, lr->num_lines)].desc;
> +	} else {
>  		/* build compacted desc array and values */
>  		descs = kmalloc_array(num_set, sizeof(*descs), GFP_KERNEL);
>  		if (!descs)
>  			return -ENOMEM;
> -		for (didx = 0, i = 0; i < lr->num_lines; i++) {
> -			if (lv->mask & BIT_ULL(i)) {
> -				descs[didx] = lr->lines[i].desc;
> -				didx++;
> -			}
> -		}
> +
> +		didx = 0;
> +		for_each_set_bit(i, mask, lr->num_lines)
> +			descs[didx++] = lr->lines[i].desc;
>  	}
>  	ret = gpiod_set_array_value_complex(false, true, num_set,
>  					    descs, NULL, vals);
> -- 
> 2.40.0.1.gaa8946217a0b
  
Kent Gibson Sept. 27, 2023, 1:32 a.m. UTC | #2
On Tue, Sep 26, 2023 at 08:20:07AM +0300, Andy Shevchenko wrote:
> Currently we have a few bitmap calls that are open coded in the library
> module. Let's convert them to use generic bitmap APIs instead.
> 

Firstly, I didn't consider using the bitmap module here as, in my mind at
least, that is intended for bitmaps wider than 64 bits, or with variable
width. In this case the bitmap is fixed at 64 bits, so bitops seemed more
appropriate.

And I would argue that they aren't "open coded" - they are parallelized
to reduce the number of passes over the bitmap.
This change serialises them, e.g. the get used to require 2 passes over
the bitmap, it now requires 3 or 4.  The set used to require 1 and now
requires 2.
And there are additional copies that the original doesn't require.
So your change looks less efficient to me - unless there is direct
hardware support for bitmap ops??

Wrt the argument that the serialized form is clearer and more
maintainable, optimised code is frequently more cryptic - as noted in
bitmap.c itself, and this code has remained unchanged since it was merged
3 years ago, so the only maintenance it has required is to be more
maintainable??  Ok then.

Your patch is functionally equivalent and pass my uAPI tests, so 

Tested-by: Kent Gibson <warthog618@gmail.com>

but my preference is to leave it as is.

Cheers,
Kent.

> Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> ---
>  drivers/gpio/gpiolib-cdev.c | 79 +++++++++++++++++--------------------
>  1 file changed, 36 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
> index e39d344feb28..a5bbbd44531f 100644
> --- a/drivers/gpio/gpiolib-cdev.c
> +++ b/drivers/gpio/gpiolib-cdev.c
> @@ -1263,35 +1263,32 @@ static long linereq_get_values(struct linereq *lr, void __user *ip)
>  {
>  	struct gpio_v2_line_values lv;
>  	DECLARE_BITMAP(vals, GPIO_V2_LINES_MAX);
> +	DECLARE_BITMAP(mask, GPIO_V2_LINES_MAX);
> +	DECLARE_BITMAP(bits, GPIO_V2_LINES_MAX);
>  	struct gpio_desc **descs;
>  	unsigned int i, didx, num_get;
> -	bool val;
>  	int ret;
>  
>  	/* NOTE: It's ok to read values of output lines. */
>  	if (copy_from_user(&lv, ip, sizeof(lv)))
>  		return -EFAULT;
>  
> -	for (num_get = 0, i = 0; i < lr->num_lines; i++) {
> -		if (lv.mask & BIT_ULL(i)) {
> -			num_get++;
> -			descs = &lr->lines[i].desc;
> -		}
> -	}
> +	bitmap_from_arr64(mask, &lv.mask, GPIO_V2_LINES_MAX);
>  
> +	num_get = bitmap_weight(mask, lr->num_lines);
>  	if (num_get == 0)
>  		return -EINVAL;
>  
> -	if (num_get != 1) {
> +	if (num_get == 1) {
> +		descs = &lr->lines[find_first_bit(mask, lr->num_lines)].desc;
> +	} else {
>  		descs = kmalloc_array(num_get, sizeof(*descs), GFP_KERNEL);
>  		if (!descs)
>  			return -ENOMEM;
> -		for (didx = 0, i = 0; i < lr->num_lines; i++) {
> -			if (lv.mask & BIT_ULL(i)) {
> -				descs[didx] = lr->lines[i].desc;
> -				didx++;
> -			}
> -		}
> +
> +		didx = 0;
> +		for_each_set_bit(i, mask, lr->num_lines)
> +			descs[didx++] = lr->lines[i].desc;
>  	}
>  	ret = gpiod_get_array_value_complex(false, true, num_get,
>  					    descs, NULL, vals);
> @@ -1301,19 +1298,15 @@ static long linereq_get_values(struct linereq *lr, void __user *ip)
>  	if (ret)
>  		return ret;
>  
> -	lv.bits = 0;
> -	for (didx = 0, i = 0; i < lr->num_lines; i++) {
> -		if (lv.mask & BIT_ULL(i)) {
> -			if (lr->lines[i].sw_debounced)
> -				val = debounced_value(&lr->lines[i]);
> -			else
> -				val = test_bit(didx, vals);
> -			if (val)
> -				lv.bits |= BIT_ULL(i);
> -			didx++;
> -		}
> +	bitmap_scatter(bits, vals, mask, lr->num_lines);
> +
> +	for_each_set_bit(i, mask, lr->num_lines) {
> +		if (lr->lines[i].sw_debounced)
> +			__assign_bit(i, bits, debounced_value(&lr->lines[i]));
>  	}
>  
> +	bitmap_to_arr64(&lv.bits, bits, GPIO_V2_LINES_MAX);
> +
>  	if (copy_to_user(ip, &lv, sizeof(lv)))
>  		return -EFAULT;
>  
> @@ -1324,35 +1317,35 @@ static long linereq_set_values_unlocked(struct linereq *lr,
>  					struct gpio_v2_line_values *lv)
>  {
>  	DECLARE_BITMAP(vals, GPIO_V2_LINES_MAX);
> +	DECLARE_BITMAP(mask, GPIO_V2_LINES_MAX);
> +	DECLARE_BITMAP(bits, GPIO_V2_LINES_MAX);
>  	struct gpio_desc **descs;
>  	unsigned int i, didx, num_set;
>  	int ret;
>  
> -	bitmap_zero(vals, GPIO_V2_LINES_MAX);
> -	for (num_set = 0, i = 0; i < lr->num_lines; i++) {
> -		if (lv->mask & BIT_ULL(i)) {
> -			if (!test_bit(FLAG_IS_OUT, &lr->lines[i].desc->flags))
> -				return -EPERM;
> -			if (lv->bits & BIT_ULL(i))
> -				__set_bit(num_set, vals);
> -			num_set++;
> -			descs = &lr->lines[i].desc;
> -		}
> -	}
> +	bitmap_from_arr64(mask, &lv->mask, GPIO_V2_LINES_MAX);
> +	bitmap_from_arr64(bits, &lv->bits, GPIO_V2_LINES_MAX);
> +
> +	num_set = bitmap_gather(vals, bits, mask, lr->num_lines);
>  	if (num_set == 0)
>  		return -EINVAL;
>  
> -	if (num_set != 1) {
> +	for_each_set_bit(i, mask, lr->num_lines) {
> +		if (!test_bit(FLAG_IS_OUT, &lr->lines[i].desc->flags))
> +			return -EPERM;
> +	}
> +
> +	if (num_set == 1) {
> +		descs = &lr->lines[find_first_bit(mask, lr->num_lines)].desc;
> +	} else {
>  		/* build compacted desc array and values */
>  		descs = kmalloc_array(num_set, sizeof(*descs), GFP_KERNEL);
>  		if (!descs)
>  			return -ENOMEM;
> -		for (didx = 0, i = 0; i < lr->num_lines; i++) {
> -			if (lv->mask & BIT_ULL(i)) {
> -				descs[didx] = lr->lines[i].desc;
> -				didx++;
> -			}
> -		}
> +
> +		didx = 0;
> +		for_each_set_bit(i, mask, lr->num_lines)
> +			descs[didx++] = lr->lines[i].desc;
>  	}
>  	ret = gpiod_set_array_value_complex(false, true, num_set,
>  					    descs, NULL, vals);
> -- 
> 2.40.0.1.gaa8946217a0b
>
  
Kent Gibson Sept. 27, 2023, 6:48 a.m. UTC | #3
On Tue, Sep 26, 2023 at 05:46:07PM -0700, Yury Norov wrote:
> On Tue, Sep 26, 2023 at 08:20:07AM +0300, Andy Shevchenko wrote:
> > Currently we have a few bitmap calls that are open coded in the library
> > module. Let's convert them to use generic bitmap APIs instead.
> > 
> > Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>

> > +	bitmap_from_arr64(mask, &lv->mask, GPIO_V2_LINES_MAX);
> > +	bitmap_from_arr64(bits, &lv->bits, GPIO_V2_LINES_MAX);
> > +
> > +	num_set = bitmap_gather(vals, bits, mask, lr->num_lines);
> 
> It looks like GPIO_V2_LINES_MAX is always 64, and so I wonder: is
> my understanding correct that all bits in ->mask and ->bits beyond
> lr->num_lines are clear?
> 

The lv fields come from userspace and so cannot be guaranteed to be
zeroed beyond lr->num_lines.  Any set bits beyond that must be ignored,
one way or another.

> If so, you can seemingly pass the GPIO_V2_LINES_MAX instead of
> lr->num_lines, and that way it will be small_cons_nbits()-optimized.
> 

But that would be decidedly non-optimal for the most common case where
lr->num_lines == 1.

Cheers,
Kent.
  
Andy Shevchenko Sept. 27, 2023, 12:17 p.m. UTC | #4
On Wed, Sep 27, 2023 at 09:32:11AM +0800, Kent Gibson wrote:
> On Tue, Sep 26, 2023 at 08:20:07AM +0300, Andy Shevchenko wrote:
> > Currently we have a few bitmap calls that are open coded in the library
> > module. Let's convert them to use generic bitmap APIs instead.
> 
> Firstly, I didn't consider using the bitmap module here as, in my mind at
> least, that is intended for bitmaps wider than 64 bits, or with variable
> width. In this case the bitmap is fixed at 64 bits, so bitops seemed more
> appropriate.
> 
> And I would argue that they aren't "open coded" - they are parallelized
> to reduce the number of passes over the bitmap.
> This change serialises them, e.g. the get used to require 2 passes over
> the bitmap, it now requires 3 or 4.  The set used to require 1 and now
> requires 2.
> And there are additional copies that the original doesn't require.
> So your change looks less efficient to me - unless there is direct
> hardware support for bitmap ops??
> 
> Wrt the argument that the serialized form is clearer and more
> maintainable, optimised code is frequently more cryptic - as noted in
> bitmap.c itself, and this code has remained unchanged since it was merged
> 3 years ago, so the only maintenance it has required is to be more
> maintainable??  Ok then.
> 
> Your patch is functionally equivalent and pass my uAPI tests, so 
> 
> Tested-by: Kent Gibson <warthog618@gmail.com>

Thanks for testing!

> but my preference is to leave it as is.

As Yury mentioned we need to look at bitmap APIs and make them possible to have
a compile-time optimizations. With that in mind, I would prefer bitmap APIs
over open-coded stuff which is hardly to be understood (yes, I still point
out that it takes a few hours to me, maybe because I'm stupid enough, to
get what's the heck is going one there, esp. for the == 1 case).

Yet, it opens a way to scale this in case we might have v3 ABI that let's say
allows to work with 512 GPIOs at a time. With your code it will be much harder
to achieve and see what you wrote about maintenance (in that case).
  
Kent Gibson Sept. 27, 2023, 1:49 p.m. UTC | #5
On Wed, Sep 27, 2023 at 03:17:06PM +0300, Andy Shevchenko wrote:
> On Wed, Sep 27, 2023 at 09:32:11AM +0800, Kent Gibson wrote:
> > On Tue, Sep 26, 2023 at 08:20:07AM +0300, Andy Shevchenko wrote:
> > > Currently we have a few bitmap calls that are open coded in the library
> > > module. Let's convert them to use generic bitmap APIs instead.
> > 
> > Firstly, I didn't consider using the bitmap module here as, in my mind at
> > least, that is intended for bitmaps wider than 64 bits, or with variable
> > width. In this case the bitmap is fixed at 64 bits, so bitops seemed more
> > appropriate.
> > 
> > And I would argue that they aren't "open coded" - they are parallelized
> > to reduce the number of passes over the bitmap.
> > This change serialises them, e.g. the get used to require 2 passes over
> > the bitmap, it now requires 3 or 4.  The set used to require 1 and now
> > requires 2.
> > And there are additional copies that the original doesn't require.
> > So your change looks less efficient to me - unless there is direct
> > hardware support for bitmap ops??
> > 
> > Wrt the argument that the serialized form is clearer and more
> > maintainable, optimised code is frequently more cryptic - as noted in
> > bitmap.c itself, and this code has remained unchanged since it was merged
> > 3 years ago, so the only maintenance it has required is to be more
> > maintainable??  Ok then.
> > 
> > Your patch is functionally equivalent and pass my uAPI tests, so 
> > 
> > Tested-by: Kent Gibson <warthog618@gmail.com>
> 
> Thanks for testing!
> 

Not a problem - that is what test suites are for.

> > but my preference is to leave it as is.
> 
> As Yury mentioned we need to look at bitmap APIs and make them possible to have
> a compile-time optimizations. With that in mind, I would prefer bitmap APIs
> over open-coded stuff which is hardly to be understood (yes, I still point
> out that it takes a few hours to me, maybe because I'm stupid enough, to
> get what's the heck is going one there, esp. for the == 1 case).
> 

Really?  With all the bits out in the open it seems pretty clear to me.
Clearer than scatter/gather in fact.

Sure, if there is suitable hardware support then bitmaps COULD be faster
than bitops.  But without that, and that is the general case, it will be
slower.  Do you have ANY cases where your implementation is currently
faster?  Then you would have a stronger case.

And if you find the existing implementation unclear then the appropriate
solution is to better document it, as bitmaps itself does, not replace it
with something simpler and slower.

> Yet, it opens a way to scale this in case we might have v3 ABI that let's say
> allows to work with 512 GPIOs at a time. With your code it will be much harder
> to achieve and see what you wrote about maintenance (in that case).
> 

v3 ABI?? libgpiod v2 is barely out the door!
Do you have any cases where 64 lines per request is limiting?
If that sort of speculation isn't premature optimisation then I don't know
what is.

Cheers,
Kent.
  
Andy Shevchenko Sept. 27, 2023, 1:59 p.m. UTC | #6
On Wed, Sep 27, 2023 at 09:49:35PM +0800, Kent Gibson wrote:
> On Wed, Sep 27, 2023 at 03:17:06PM +0300, Andy Shevchenko wrote:
> > On Wed, Sep 27, 2023 at 09:32:11AM +0800, Kent Gibson wrote:
> > > On Tue, Sep 26, 2023 at 08:20:07AM +0300, Andy Shevchenko wrote:
> > > > Currently we have a few bitmap calls that are open coded in the library
> > > > module. Let's convert them to use generic bitmap APIs instead.
> > > 
> > > Firstly, I didn't consider using the bitmap module here as, in my mind at
> > > least, that is intended for bitmaps wider than 64 bits, or with variable
> > > width. In this case the bitmap is fixed at 64 bits, so bitops seemed more
> > > appropriate.
> > > 
> > > And I would argue that they aren't "open coded" - they are parallelized
> > > to reduce the number of passes over the bitmap.
> > > This change serialises them, e.g. the get used to require 2 passes over
> > > the bitmap, it now requires 3 or 4.  The set used to require 1 and now
> > > requires 2.
> > > And there are additional copies that the original doesn't require.
> > > So your change looks less efficient to me - unless there is direct
> > > hardware support for bitmap ops??
> > > 
> > > Wrt the argument that the serialized form is clearer and more
> > > maintainable, optimised code is frequently more cryptic - as noted in
> > > bitmap.c itself, and this code has remained unchanged since it was merged
> > > 3 years ago, so the only maintenance it has required is to be more
> > > maintainable??  Ok then.
> > > 
> > > Your patch is functionally equivalent and pass my uAPI tests, so 
> > > 
> > > Tested-by: Kent Gibson <warthog618@gmail.com>
> > 
> > Thanks for testing!
> 
> Not a problem - that is what test suites are for.
> 
> > > but my preference is to leave it as is.
> > 
> > As Yury mentioned we need to look at bitmap APIs and make them possible to have
> > a compile-time optimizations. With that in mind, I would prefer bitmap APIs
> > over open-coded stuff which is hardly to be understood (yes, I still point
> > out that it takes a few hours to me, maybe because I'm stupid enough, to
> > get what's the heck is going one there, esp. for the == 1 case).
> 
> Really?  With all the bits out in the open it seems pretty clear to me.
> Clearer than scatter/gather in fact.

Yes, you are biased. :-) Ask some stranger about this code and I am pretty sure
there will be double-figures percentage of people who can tell that the current
code is a bit voodoo.

> Sure, if there is suitable hardware support then bitmaps COULD be faster
> than bitops.  But without that, and that is the general case, it will be
> slower.  Do you have ANY cases where your implementation is currently
> faster?  Then you would have a stronger case.

Why do we care here about performance? But if we do, I would check this on
the 32-bit platform where 64-bit operations somewhat problematic / slow.

If Yury gives an idea about performance tests I can consider to add this
piece to compare with and we might see the difference.

> And if you find the existing implementation unclear then the appropriate
> solution is to better document it, as bitmaps itself does, not replace it
> with something simpler and slower.

Documentation will be needed either way. In general statistics it will be 50/50
who (mis)understands this or new code. Pity that the original author of the code
hadn't though about documenting this...

> > Yet, it opens a way to scale this in case we might have v3 ABI that let's say
> > allows to work with 512 GPIOs at a time. With your code it will be much harder
> > to achieve and see what you wrote about maintenance (in that case).
> 
> v3 ABI?? libgpiod v2 is barely out the door!
> Do you have any cases where 64 lines per request is limiting?

IIRC it was SO question where the OP asks exactly about breaking the 64 lines
limitation in the current ABI.

> If that sort of speculation isn't premature optimisation then I don't know
> what is.

No, based on the real question / discussion, just have no link at hand.
But it's quite a niche, I can agree.
  
Kent Gibson Sept. 27, 2023, 2:23 p.m. UTC | #7
On Wed, Sep 27, 2023 at 04:59:34PM +0300, Andy Shevchenko wrote:
> On Wed, Sep 27, 2023 at 09:49:35PM +0800, Kent Gibson wrote:
> > On Wed, Sep 27, 2023 at 03:17:06PM +0300, Andy Shevchenko wrote:
> > > On Wed, Sep 27, 2023 at 09:32:11AM +0800, Kent Gibson wrote:
> > > > On Tue, Sep 26, 2023 at 08:20:07AM +0300, Andy Shevchenko wrote:
> > > > > Currently we have a few bitmap calls that are open coded in the library
> > > > > module. Let's convert them to use generic bitmap APIs instead.
> > > > 
> > > > Firstly, I didn't consider using the bitmap module here as, in my mind at
> > > > least, that is intended for bitmaps wider than 64 bits, or with variable
> > > > width. In this case the bitmap is fixed at 64 bits, so bitops seemed more
> > > > appropriate.
> > > > 
> > > > And I would argue that they aren't "open coded" - they are parallelized
> > > > to reduce the number of passes over the bitmap.
> > > > This change serialises them, e.g. the get used to require 2 passes over
> > > > the bitmap, it now requires 3 or 4.  The set used to require 1 and now
> > > > requires 2.
> > > > And there are additional copies that the original doesn't require.
> > > > So your change looks less efficient to me - unless there is direct
> > > > hardware support for bitmap ops??
> > > > 
> > > > Wrt the argument that the serialized form is clearer and more
> > > > maintainable, optimised code is frequently more cryptic - as noted in
> > > > bitmap.c itself, and this code has remained unchanged since it was merged
> > > > 3 years ago, so the only maintenance it has required is to be more
> > > > maintainable??  Ok then.
> > > > 
> > > > Your patch is functionally equivalent and pass my uAPI tests, so 
> > > > 
> > > > Tested-by: Kent Gibson <warthog618@gmail.com>
> > > 
> > > Thanks for testing!
> > 
> > Not a problem - that is what test suites are for.
> > 
> > > > but my preference is to leave it as is.
> > > 
> > > As Yury mentioned we need to look at bitmap APIs and make them possible to have
> > > a compile-time optimizations. With that in mind, I would prefer bitmap APIs
> > > over open-coded stuff which is hardly to be understood (yes, I still point
> > > out that it takes a few hours to me, maybe because I'm stupid enough, to
> > > get what's the heck is going one there, esp. for the == 1 case).
> > 
> > Really?  With all the bits out in the open it seems pretty clear to me.
> > Clearer than scatter/gather in fact.
> 
> Yes, you are biased. :-) Ask some stranger about this code and I am pretty sure
> there will be double-figures percentage of people who can tell that the current
> code is a bit voodoo.
> 

It is the same as yours - just inside out.  i.e. it performs the ops per
selected line, not each op on the whole bitmap of lines.

> > Sure, if there is suitable hardware support then bitmaps COULD be faster
> > than bitops.  But without that, and that is the general case, it will be
> > slower.  Do you have ANY cases where your implementation is currently
> > faster?  Then you would have a stronger case.
> 
> Why do we care here about performance? But if we do, I would check this on
> the 32-bit platform where 64-bit operations somewhat problematic / slow.
> 

Yet you argue that bitmaps could be more performant??  Pick a side!

> If Yury gives an idea about performance tests I can consider to add this
> piece to compare with and we might see the difference.
> 
> > And if you find the existing implementation unclear then the appropriate
> > solution is to better document it, as bitmaps itself does, not replace it
> > with something simpler and slower.
> 
> Documentation will be needed either way. In general statistics it will be 50/50
> who (mis)understands this or new code. Pity that the original author of the code
> hadn't though about documenting this...
> 

And who was the original author?  I forget.

What you mean to say is it is a pity the reviewers at the time were
satisfied with the code as it stands, right?
Cos there is a process here.
As I recall reviewers were more often than not complaining about
pointless comments, not the lack of comments, so the natural bias as the
author is towards under-documenting...

> > > Yet, it opens a way to scale this in case we might have v3 ABI that let's say
> > > allows to work with 512 GPIOs at a time. With your code it will be much harder
> > > to achieve and see what you wrote about maintenance (in that case).
> > 
> > v3 ABI?? libgpiod v2 is barely out the door!
> > Do you have any cases where 64 lines per request is limiting?
> 
> IIRC it was SO question where the OP asks exactly about breaking the 64 lines
> limitation in the current ABI.
> 
> > If that sort of speculation isn't premature optimisation then I don't know
> > what is.
> 
> No, based on the real question / discussion, just have no link at hand.
> But it's quite a niche, I can agree.
> 

Let me know if you find a ref to that discussion - I'm curious.

Cheers,
Kent.
  
Andy Shevchenko Oct. 2, 2023, 9:05 a.m. UTC | #8
On Wed, Sep 27, 2023 at 10:23:12PM +0800, Kent Gibson wrote:
> On Wed, Sep 27, 2023 at 04:59:34PM +0300, Andy Shevchenko wrote:
> > On Wed, Sep 27, 2023 at 09:49:35PM +0800, Kent Gibson wrote:
> > > On Wed, Sep 27, 2023 at 03:17:06PM +0300, Andy Shevchenko wrote:
> > > > On Wed, Sep 27, 2023 at 09:32:11AM +0800, Kent Gibson wrote:

...

> > > > Yet, it opens a way to scale this in case we might have v3 ABI that let's say
> > > > allows to work with 512 GPIOs at a time. With your code it will be much harder
> > > > to achieve and see what you wrote about maintenance (in that case).
> > > 
> > > v3 ABI?? libgpiod v2 is barely out the door!
> > > Do you have any cases where 64 lines per request is limiting?
> > 
> > IIRC it was SO question where the OP asks exactly about breaking the 64 lines
> > limitation in the current ABI.
> > 
> > > If that sort of speculation isn't premature optimisation then I don't know
> > > what is.
> > 
> > No, based on the real question / discussion, just have no link at hand.
> > But it's quite a niche, I can agree.
> 
> Let me know if you find a ref to that discussion - I'm curious.

Here it is (read comments as well):
https://stackoverflow.com/questions/76307370/control-gpio-from-linux-userspace-with-linux-gpio-h
  
Kent Gibson Oct. 2, 2023, 9:25 a.m. UTC | #9
On Mon, Oct 02, 2023 at 12:05:11PM +0300, Andy Shevchenko wrote:
> On Wed, Sep 27, 2023 at 10:23:12PM +0800, Kent Gibson wrote:
> > On Wed, Sep 27, 2023 at 04:59:34PM +0300, Andy Shevchenko wrote:
> > > On Wed, Sep 27, 2023 at 09:49:35PM +0800, Kent Gibson wrote:
> > > > On Wed, Sep 27, 2023 at 03:17:06PM +0300, Andy Shevchenko wrote:
> > > > > On Wed, Sep 27, 2023 at 09:32:11AM +0800, Kent Gibson wrote:
> 
> ...
> 
> > > > > Yet, it opens a way to scale this in case we might have v3 ABI that let's say
> > > > > allows to work with 512 GPIOs at a time. With your code it will be much harder
> > > > > to achieve and see what you wrote about maintenance (in that case).
> > > > 
> > > > v3 ABI?? libgpiod v2 is barely out the door!
> > > > Do you have any cases where 64 lines per request is limiting?
> > > 
> > > IIRC it was SO question where the OP asks exactly about breaking the 64 lines
> > > limitation in the current ABI.
> > > 
> > > > If that sort of speculation isn't premature optimisation then I don't know
> > > > what is.
> > > 
> > > No, based on the real question / discussion, just have no link at hand.
> > > But it's quite a niche, I can agree.
> > 
> > Let me know if you find a ref to that discussion - I'm curious.
> 
> Here it is (read comments as well):
> https://stackoverflow.com/questions/76307370/control-gpio-from-linux-userspace-with-linux-gpio-h
> 

That question looks to me to be confusing how many GPIOs can be
requested per request (64) and in total (effectively unlimited) - thinking
they are the same.
That could be due to their desire to use the gpiod_chip_get_all_lines()
convenience function with a chip with more than 64 lines, rather than
because they have an actual need for the lines to be managed in a single
request.

So that doesn't look like a genuine use case to me - just a "what if I
want to do X" question.  Certainly not something that would warrant a v3
ABI.

Cheers,
Kent.
  
Andy Shevchenko Oct. 2, 2023, 9:32 a.m. UTC | #10
On Mon, Oct 02, 2023 at 05:25:05PM +0800, Kent Gibson wrote:
> On Mon, Oct 02, 2023 at 12:05:11PM +0300, Andy Shevchenko wrote:
> > On Wed, Sep 27, 2023 at 10:23:12PM +0800, Kent Gibson wrote:
> > > On Wed, Sep 27, 2023 at 04:59:34PM +0300, Andy Shevchenko wrote:
> > > > On Wed, Sep 27, 2023 at 09:49:35PM +0800, Kent Gibson wrote:
> > > > > On Wed, Sep 27, 2023 at 03:17:06PM +0300, Andy Shevchenko wrote:
> > > > > > On Wed, Sep 27, 2023 at 09:32:11AM +0800, Kent Gibson wrote:

...

> > > > > > Yet, it opens a way to scale this in case we might have v3 ABI that let's say
> > > > > > allows to work with 512 GPIOs at a time. With your code it will be much harder
> > > > > > to achieve and see what you wrote about maintenance (in that case).
> > > > > 
> > > > > v3 ABI?? libgpiod v2 is barely out the door!
> > > > > Do you have any cases where 64 lines per request is limiting?
> > > > 
> > > > IIRC it was SO question where the OP asks exactly about breaking the 64 lines
> > > > limitation in the current ABI.
> > > > 
> > > > > If that sort of speculation isn't premature optimisation then I don't know
> > > > > what is.
> > > > 
> > > > No, based on the real question / discussion, just have no link at hand.
> > > > But it's quite a niche, I can agree.
> > > 
> > > Let me know if you find a ref to that discussion - I'm curious.
> > 
> > Here it is (read comments as well):
> > https://stackoverflow.com/questions/76307370/control-gpio-from-linux-userspace-with-linux-gpio-h
> > 
> 
> That question looks to me to be confusing how many GPIOs can be
> requested per request (64) and in total (effectively unlimited) - thinking
> they are the same.
> That could be due to their desire to use the gpiod_chip_get_all_lines()
> convenience function with a chip with more than 64 lines, rather than
> because they have an actual need for the lines to be managed in a single
> request.
> 
> So that doesn't look like a genuine use case to me - just a "what if I
> want to do X" question.  Certainly not something that would warrant a v3
> ABI.

Sure, and I'm not talking about v3 ABI to go for, see the word "might" in my
reply in the first paragraph of this message.
  
Kent Gibson Oct. 2, 2023, 9:42 a.m. UTC | #11
On Mon, Oct 02, 2023 at 12:32:22PM +0300, Andy Shevchenko wrote:
> On Mon, Oct 02, 2023 at 05:25:05PM +0800, Kent Gibson wrote:
> > On Mon, Oct 02, 2023 at 12:05:11PM +0300, Andy Shevchenko wrote:
> > > On Wed, Sep 27, 2023 at 10:23:12PM +0800, Kent Gibson wrote:
> > > > On Wed, Sep 27, 2023 at 04:59:34PM +0300, Andy Shevchenko wrote:
> > > > > On Wed, Sep 27, 2023 at 09:49:35PM +0800, Kent Gibson wrote:
> > > > > > On Wed, Sep 27, 2023 at 03:17:06PM +0300, Andy Shevchenko wrote:
> > > > > > > On Wed, Sep 27, 2023 at 09:32:11AM +0800, Kent Gibson wrote:
> 
> ...
> 
> > > > > > > Yet, it opens a way to scale this in case we might have v3 ABI that let's say
> > > > > > > allows to work with 512 GPIOs at a time. With your code it will be much harder
> > > > > > > to achieve and see what you wrote about maintenance (in that case).
> > > > > > 
> > > > > > v3 ABI?? libgpiod v2 is barely out the door!
> > > > > > Do you have any cases where 64 lines per request is limiting?
> > > > > 
> > > > > IIRC it was SO question where the OP asks exactly about breaking the 64 lines
> > > > > limitation in the current ABI.
> > > > > 
> > > > > > If that sort of speculation isn't premature optimisation then I don't know
> > > > > > what is.
> > > > > 
> > > > > No, based on the real question / discussion, just have no link at hand.
> > > > > But it's quite a niche, I can agree.
> > > > 
> > > > Let me know if you find a ref to that discussion - I'm curious.
> > > 
> > > Here it is (read comments as well):
> > > https://stackoverflow.com/questions/76307370/control-gpio-from-linux-userspace-with-linux-gpio-h
> > > 
> > 
> > That question looks to me to be confusing how many GPIOs can be
> > requested per request (64) and in total (effectively unlimited) - thinking
> > they are the same.
> > That could be due to their desire to use the gpiod_chip_get_all_lines()
> > convenience function with a chip with more than 64 lines, rather than
> > because they have an actual need for the lines to be managed in a single
> > request.
> > 
> > So that doesn't look like a genuine use case to me - just a "what if I
> > want to do X" question.  Certainly not something that would warrant a v3
> > ABI.
> 
> Sure, and I'm not talking about v3 ABI to go for, see the word "might" in my
> reply in the first paragraph of this message.
> 

Ok, so your original point was pure speculation.

Cheers,
Kent.
  

Patch

diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index e39d344feb28..a5bbbd44531f 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -1263,35 +1263,32 @@  static long linereq_get_values(struct linereq *lr, void __user *ip)
 {
 	struct gpio_v2_line_values lv;
 	DECLARE_BITMAP(vals, GPIO_V2_LINES_MAX);
+	DECLARE_BITMAP(mask, GPIO_V2_LINES_MAX);
+	DECLARE_BITMAP(bits, GPIO_V2_LINES_MAX);
 	struct gpio_desc **descs;
 	unsigned int i, didx, num_get;
-	bool val;
 	int ret;
 
 	/* NOTE: It's ok to read values of output lines. */
 	if (copy_from_user(&lv, ip, sizeof(lv)))
 		return -EFAULT;
 
-	for (num_get = 0, i = 0; i < lr->num_lines; i++) {
-		if (lv.mask & BIT_ULL(i)) {
-			num_get++;
-			descs = &lr->lines[i].desc;
-		}
-	}
+	bitmap_from_arr64(mask, &lv.mask, GPIO_V2_LINES_MAX);
 
+	num_get = bitmap_weight(mask, lr->num_lines);
 	if (num_get == 0)
 		return -EINVAL;
 
-	if (num_get != 1) {
+	if (num_get == 1) {
+		descs = &lr->lines[find_first_bit(mask, lr->num_lines)].desc;
+	} else {
 		descs = kmalloc_array(num_get, sizeof(*descs), GFP_KERNEL);
 		if (!descs)
 			return -ENOMEM;
-		for (didx = 0, i = 0; i < lr->num_lines; i++) {
-			if (lv.mask & BIT_ULL(i)) {
-				descs[didx] = lr->lines[i].desc;
-				didx++;
-			}
-		}
+
+		didx = 0;
+		for_each_set_bit(i, mask, lr->num_lines)
+			descs[didx++] = lr->lines[i].desc;
 	}
 	ret = gpiod_get_array_value_complex(false, true, num_get,
 					    descs, NULL, vals);
@@ -1301,19 +1298,15 @@  static long linereq_get_values(struct linereq *lr, void __user *ip)
 	if (ret)
 		return ret;
 
-	lv.bits = 0;
-	for (didx = 0, i = 0; i < lr->num_lines; i++) {
-		if (lv.mask & BIT_ULL(i)) {
-			if (lr->lines[i].sw_debounced)
-				val = debounced_value(&lr->lines[i]);
-			else
-				val = test_bit(didx, vals);
-			if (val)
-				lv.bits |= BIT_ULL(i);
-			didx++;
-		}
+	bitmap_scatter(bits, vals, mask, lr->num_lines);
+
+	for_each_set_bit(i, mask, lr->num_lines) {
+		if (lr->lines[i].sw_debounced)
+			__assign_bit(i, bits, debounced_value(&lr->lines[i]));
 	}
 
+	bitmap_to_arr64(&lv.bits, bits, GPIO_V2_LINES_MAX);
+
 	if (copy_to_user(ip, &lv, sizeof(lv)))
 		return -EFAULT;
 
@@ -1324,35 +1317,35 @@  static long linereq_set_values_unlocked(struct linereq *lr,
 					struct gpio_v2_line_values *lv)
 {
 	DECLARE_BITMAP(vals, GPIO_V2_LINES_MAX);
+	DECLARE_BITMAP(mask, GPIO_V2_LINES_MAX);
+	DECLARE_BITMAP(bits, GPIO_V2_LINES_MAX);
 	struct gpio_desc **descs;
 	unsigned int i, didx, num_set;
 	int ret;
 
-	bitmap_zero(vals, GPIO_V2_LINES_MAX);
-	for (num_set = 0, i = 0; i < lr->num_lines; i++) {
-		if (lv->mask & BIT_ULL(i)) {
-			if (!test_bit(FLAG_IS_OUT, &lr->lines[i].desc->flags))
-				return -EPERM;
-			if (lv->bits & BIT_ULL(i))
-				__set_bit(num_set, vals);
-			num_set++;
-			descs = &lr->lines[i].desc;
-		}
-	}
+	bitmap_from_arr64(mask, &lv->mask, GPIO_V2_LINES_MAX);
+	bitmap_from_arr64(bits, &lv->bits, GPIO_V2_LINES_MAX);
+
+	num_set = bitmap_gather(vals, bits, mask, lr->num_lines);
 	if (num_set == 0)
 		return -EINVAL;
 
-	if (num_set != 1) {
+	for_each_set_bit(i, mask, lr->num_lines) {
+		if (!test_bit(FLAG_IS_OUT, &lr->lines[i].desc->flags))
+			return -EPERM;
+	}
+
+	if (num_set == 1) {
+		descs = &lr->lines[find_first_bit(mask, lr->num_lines)].desc;
+	} else {
 		/* build compacted desc array and values */
 		descs = kmalloc_array(num_set, sizeof(*descs), GFP_KERNEL);
 		if (!descs)
 			return -ENOMEM;
-		for (didx = 0, i = 0; i < lr->num_lines; i++) {
-			if (lv->mask & BIT_ULL(i)) {
-				descs[didx] = lr->lines[i].desc;
-				didx++;
-			}
-		}
+
+		didx = 0;
+		for_each_set_bit(i, mask, lr->num_lines)
+			descs[didx++] = lr->lines[i].desc;
 	}
 	ret = gpiod_set_array_value_complex(false, true, num_set,
 					    descs, NULL, vals);