[v4,3/7] mm/vmalloc.c: allow vread() to read out vm_map_ram areas

Message ID 20230201091339.61761-4-bhe@redhat.com
State New
Headers
Series mm/vmalloc.c: allow vread() to read out vm_map_ram areas |

Commit Message

Baoquan He Feb. 1, 2023, 9:13 a.m. UTC
  Currently, vread can read out vmalloc areas which is associated with
a vm_struct. While this doesn't work for areas created by vm_map_ram()
interface because it doesn't have an associated vm_struct. Then in vread(),
these areas are all skipped.

Here, add a new function vmap_ram_vread() to read out vm_map_ram areas.
The area created with vmap_ram_vread() interface directly can be handled
like the other normal vmap areas with aligned_vread(). While areas
which will be further subdivided and managed with vmap_block need
carefully read out page-aligned small regions and zero fill holes.

Signed-off-by: Baoquan He <bhe@redhat.com>
---
 mm/vmalloc.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 80 insertions(+), 7 deletions(-)
  

Comments

Lorenzo Stoakes Feb. 1, 2023, 8:16 p.m. UTC | #1
On Wed, Feb 01, 2023 at 05:13:35PM +0800, Baoquan He wrote:
> Currently, vread can read out vmalloc areas which is associated with
> a vm_struct. While this doesn't work for areas created by vm_map_ram()
> interface because it doesn't have an associated vm_struct. Then in vread(),
> these areas are all skipped.
>
> Here, add a new function vmap_ram_vread() to read out vm_map_ram areas.
> The area created with vmap_ram_vread() interface directly can be handled
> like the other normal vmap areas with aligned_vread(). While areas
> which will be further subdivided and managed with vmap_block need
> carefully read out page-aligned small regions and zero fill holes.
>
> Signed-off-by: Baoquan He <bhe@redhat.com>
> ---
>  mm/vmalloc.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 80 insertions(+), 7 deletions(-)
>
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index ab4825050b5c..5a3ea6cb7ec2 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -3544,6 +3544,67 @@ static int aligned_vread(char *buf, char *addr, unsigned long count)
>  	return copied;
>  }
>
> +static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags)
> +{
> +	char *start;
> +	struct vmap_block *vb;
> +	unsigned long offset;
> +	unsigned int rs, re, n;
> +
> +	/*
> +	 * If it's area created by vm_map_ram() interface directly, but
> +	 * not further subdividing and delegating management to vmap_block,
> +	 * handle it here.
> +	 */
> +	if (!(flags & VMAP_BLOCK)) {
> +		aligned_vread(buf, addr, count);
> +		return;
> +	}
> +
> +	/*
> +	 * Area is split into regions and tracked with vmap_block, read out
> +	 * each region and zero fill the hole between regions.
> +	 */
> +	vb = xa_load(&vmap_blocks, addr_to_vb_idx((unsigned long)addr));
> +	if (!vb)
> +		goto finished;
> +
> +	spin_lock(&vb->lock);
> +	if (bitmap_empty(vb->used_map, VMAP_BBMAP_BITS)) {
> +		spin_unlock(&vb->lock);
> +		goto finished;
> +	}
> +	for_each_set_bitrange(rs, re, vb->used_map, VMAP_BBMAP_BITS) {
> +		if (!count)
> +			break;
> +		start = vmap_block_vaddr(vb->va->va_start, rs);
> +		while (addr < start) {
> +			if (count == 0)
> +				break;

Bit pedantic, but you're using the `if (!count)` form of checking whether it's
zero above, but here you explicitly check it, would be good to keep both consistent.

Given you're checking here, perhaps you could simply drop the previous check?

> +			*buf = '\0';
> +			buf++;
> +			addr++;
> +			count--;
> +		}
> +		/*it could start reading from the middle of used region*/
> +		offset = offset_in_page(addr);
> +		n = ((re - rs + 1) << PAGE_SHIFT) - offset;
> +		if (n > count)
> +			n = count;
> +		aligned_vread(buf, start+offset, n);
> +
> +		buf += n;
> +		addr += n;
> +		count -= n;
> +	}
> +	spin_unlock(&vb->lock);
> +
> +finished:
> +	/* zero-fill the left dirty or free regions */
> +	if (count)
> +		memset(buf, 0, count);
> +}
> +
>  /**
>   * vread() - read vmalloc area in a safe way.
>   * @buf:     buffer for reading data
> @@ -3574,7 +3635,7 @@ long vread(char *buf, char *addr, unsigned long count)
>  	struct vm_struct *vm;
>  	char *vaddr, *buf_start = buf;
>  	unsigned long buflen = count;
> -	unsigned long n;
> +	unsigned long n, size, flags;
>
>  	addr = kasan_reset_tag(addr);
>
> @@ -3595,12 +3656,21 @@ long vread(char *buf, char *addr, unsigned long count)
>  		if (!count)
>  			break;
>
> -		if (!va->vm)
> +		vm = va->vm;
> +		flags = va->flags & VMAP_FLAGS_MASK;
> +		/*
> +		 * VMAP_BLOCK indicates a sub-type of vm_map_ram area, need
> +		 * be set together with VMAP_RAM.
> +		 */
> +		WARN_ON(flags == VMAP_BLOCK);
> +
> +		if (!vm && !flags)
>  			continue;
>
> -		vm = va->vm;
> -		vaddr = (char *) vm->addr;
> -		if (addr >= vaddr + get_vm_area_size(vm))
> +		vaddr = (char *) va->va_start;
> +		size = vm ? get_vm_area_size(vm) : va_size(va);
> +
> +		if (addr >= vaddr + size)
>  			continue;
>  		while (addr < vaddr) {
>  			if (count == 0)
> @@ -3610,10 +3680,13 @@ long vread(char *buf, char *addr, unsigned long count)
>  			addr++;
>  			count--;
>  		}
> -		n = vaddr + get_vm_area_size(vm) - addr;
> +		n = vaddr + size - addr;
>  		if (n > count)
>  			n = count;
> -		if (!(vm->flags & VM_IOREMAP))
> +
> +		if (flags & VMAP_RAM)
> +			vmap_ram_vread(buf, addr, n, flags);
> +		else if (!(vm->flags & VM_IOREMAP))
>  			aligned_vread(buf, addr, n);
>  		else /* IOREMAP area is treated as memory hole */
>  			memset(buf, 0, n);
> --
> 2.34.1
>

Other than the nit, feel free to add:-

Reviewed-by: Lorenzo Stoakes <lstoakes@gmail.com>
  
Baoquan He Feb. 2, 2023, 3:20 a.m. UTC | #2
On 02/01/23 at 08:16pm, Lorenzo Stoakes wrote:
> On Wed, Feb 01, 2023 at 05:13:35PM +0800, Baoquan He wrote:
> > Currently, vread can read out vmalloc areas which is associated with
> > a vm_struct. While this doesn't work for areas created by vm_map_ram()
> > interface because it doesn't have an associated vm_struct. Then in vread(),
> > these areas are all skipped.
> >
> > Here, add a new function vmap_ram_vread() to read out vm_map_ram areas.
> > The area created with vmap_ram_vread() interface directly can be handled
> > like the other normal vmap areas with aligned_vread(). While areas
> > which will be further subdivided and managed with vmap_block need
> > carefully read out page-aligned small regions and zero fill holes.
> >
> > Signed-off-by: Baoquan He <bhe@redhat.com>
> > ---
> >  mm/vmalloc.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++-----
> >  1 file changed, 80 insertions(+), 7 deletions(-)
> >
> > diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> > index ab4825050b5c..5a3ea6cb7ec2 100644
> > --- a/mm/vmalloc.c
> > +++ b/mm/vmalloc.c
> > @@ -3544,6 +3544,67 @@ static int aligned_vread(char *buf, char *addr, unsigned long count)
> >  	return copied;
> >  }
> >
> > +static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags)
> > +{
> > +	char *start;
> > +	struct vmap_block *vb;
> > +	unsigned long offset;
> > +	unsigned int rs, re, n;
> > +
> > +	/*
> > +	 * If it's area created by vm_map_ram() interface directly, but
> > +	 * not further subdividing and delegating management to vmap_block,
> > +	 * handle it here.
> > +	 */
> > +	if (!(flags & VMAP_BLOCK)) {
> > +		aligned_vread(buf, addr, count);
> > +		return;
> > +	}
> > +
> > +	/*
> > +	 * Area is split into regions and tracked with vmap_block, read out
> > +	 * each region and zero fill the hole between regions.
> > +	 */
> > +	vb = xa_load(&vmap_blocks, addr_to_vb_idx((unsigned long)addr));
> > +	if (!vb)
> > +		goto finished;
> > +
> > +	spin_lock(&vb->lock);
> > +	if (bitmap_empty(vb->used_map, VMAP_BBMAP_BITS)) {
> > +		spin_unlock(&vb->lock);
> > +		goto finished;
> > +	}
> > +	for_each_set_bitrange(rs, re, vb->used_map, VMAP_BBMAP_BITS) {
> > +		if (!count)
> > +			break;
> > +		start = vmap_block_vaddr(vb->va->va_start, rs);
> > +		while (addr < start) {
> > +			if (count == 0)
> > +				break;
> 
> Bit pedantic, but you're using the `if (!count)` form of checking whether it's
> zero above, but here you explicitly check it, would be good to keep both consistent.

Yeah, sounds good. Will change.

> 
> Given you're checking here, perhaps you could simply drop the previous check?

Well, maybe no. The previous "if (!count)" is checking if count is 0
after the 'count -=n;' line at the end of the for_each loop. While this
"if (count == 0)" is checking if count is 0 after 'count--;' at the end
of while loop. Not sure if I got your point.

> 
> > +			*buf = '\0';
> > +			buf++;
> > +			addr++;
> > +			count--;
> > +		}
> > +		/*it could start reading from the middle of used region*/
> > +		offset = offset_in_page(addr);
> > +		n = ((re - rs + 1) << PAGE_SHIFT) - offset;
> > +		if (n > count)
> > +			n = count;
> > +		aligned_vread(buf, start+offset, n);
> > +
> > +		buf += n;
> > +		addr += n;
> > +		count -= n;
> > +	}
> > +	spin_unlock(&vb->lock);
> > +
> > +finished:
> > +	/* zero-fill the left dirty or free regions */
> > +	if (count)
> > +		memset(buf, 0, count);
> > +}
> > +
> >  /**
> >   * vread() - read vmalloc area in a safe way.
> >   * @buf:     buffer for reading data
> > @@ -3574,7 +3635,7 @@ long vread(char *buf, char *addr, unsigned long count)
> >  	struct vm_struct *vm;
> >  	char *vaddr, *buf_start = buf;
> >  	unsigned long buflen = count;
> > -	unsigned long n;
> > +	unsigned long n, size, flags;
> >
> >  	addr = kasan_reset_tag(addr);
> >
> > @@ -3595,12 +3656,21 @@ long vread(char *buf, char *addr, unsigned long count)
> >  		if (!count)
> >  			break;
> >
> > -		if (!va->vm)
> > +		vm = va->vm;
> > +		flags = va->flags & VMAP_FLAGS_MASK;
> > +		/*
> > +		 * VMAP_BLOCK indicates a sub-type of vm_map_ram area, need
> > +		 * be set together with VMAP_RAM.
> > +		 */
> > +		WARN_ON(flags == VMAP_BLOCK);
> > +
> > +		if (!vm && !flags)
> >  			continue;
> >
> > -		vm = va->vm;
> > -		vaddr = (char *) vm->addr;
> > -		if (addr >= vaddr + get_vm_area_size(vm))
> > +		vaddr = (char *) va->va_start;
> > +		size = vm ? get_vm_area_size(vm) : va_size(va);
> > +
> > +		if (addr >= vaddr + size)
> >  			continue;
> >  		while (addr < vaddr) {
> >  			if (count == 0)
> > @@ -3610,10 +3680,13 @@ long vread(char *buf, char *addr, unsigned long count)
> >  			addr++;
> >  			count--;
> >  		}
> > -		n = vaddr + get_vm_area_size(vm) - addr;
> > +		n = vaddr + size - addr;
> >  		if (n > count)
> >  			n = count;
> > -		if (!(vm->flags & VM_IOREMAP))
> > +
> > +		if (flags & VMAP_RAM)
> > +			vmap_ram_vread(buf, addr, n, flags);
> > +		else if (!(vm->flags & VM_IOREMAP))
> >  			aligned_vread(buf, addr, n);
> >  		else /* IOREMAP area is treated as memory hole */
> >  			memset(buf, 0, n);
> > --
> > 2.34.1
> >
> 
> Other than the nit, feel free to add:-
> 
> Reviewed-by: Lorenzo Stoakes <lstoakes@gmail.com>
>
  
Lorenzo Stoakes Feb. 2, 2023, 7:17 a.m. UTC | #3
On Thu, Feb 02, 2023 at 11:20:07AM +0800, Baoquan He wrote:

[snip]

> > > +	for_each_set_bitrange(rs, re, vb->used_map, VMAP_BBMAP_BITS) {
> > > +		if (!count)
> > > +			break;
> > > +		start = vmap_block_vaddr(vb->va->va_start, rs);
> > > +		while (addr < start) {
> > > +			if (count == 0)
> > > +				break;
> >
> > Bit pedantic, but you're using the `if (!count)` form of checking whether it's
> > zero above, but here you explicitly check it, would be good to keep both consistent.
>
> Yeah, sounds good. Will change.
>
> >
> > Given you're checking here, perhaps you could simply drop the previous check?
>
> Well, maybe no. The previous "if (!count)" is checking if count is 0
> after the 'count -=n;' line at the end of the for_each loop. While this
> "if (count == 0)" is checking if count is 0 after 'count--;' at the end
> of while loop. Not sure if I got your point.

You're right, sorry each break is for a different loop :) and I guess the inner
check is feeding the outer one so we're all good.
  
Baoquan He Feb. 2, 2023, 1:18 p.m. UTC | #4
On 02/02/23 at 07:17am, Lorenzo Stoakes wrote:
> On Thu, Feb 02, 2023 at 11:20:07AM +0800, Baoquan He wrote:
> 
> [snip]
> 
> > > > +	for_each_set_bitrange(rs, re, vb->used_map, VMAP_BBMAP_BITS) {
> > > > +		if (!count)
> > > > +			break;
> > > > +		start = vmap_block_vaddr(vb->va->va_start, rs);
> > > > +		while (addr < start) {
> > > > +			if (count == 0)
> > > > +				break;
> > >
> > > Bit pedantic, but you're using the `if (!count)` form of checking whether it's
> > > zero above, but here you explicitly check it, would be good to keep both consistent.
> >
> > Yeah, sounds good. Will change.
> >
> > >
> > > Given you're checking here, perhaps you could simply drop the previous check?
> >
> > Well, maybe no. The previous "if (!count)" is checking if count is 0
> > after the 'count -=n;' line at the end of the for_each loop. While this
> > "if (count == 0)" is checking if count is 0 after 'count--;' at the end
> > of while loop. Not sure if I got your point.
> 
> You're right, sorry each break is for a different loop :) and I guess the inner
> check is feeding the outer one so we're all good.

Oh, the inner check and break only terminates the while loop, but it
should jump to the 'spin_unlock(&vb->lock);' line too as the outer
break does. I will fix this.
  

Patch

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ab4825050b5c..5a3ea6cb7ec2 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3544,6 +3544,67 @@  static int aligned_vread(char *buf, char *addr, unsigned long count)
 	return copied;
 }
 
+static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags)
+{
+	char *start;
+	struct vmap_block *vb;
+	unsigned long offset;
+	unsigned int rs, re, n;
+
+	/*
+	 * If it's area created by vm_map_ram() interface directly, but
+	 * not further subdividing and delegating management to vmap_block,
+	 * handle it here.
+	 */
+	if (!(flags & VMAP_BLOCK)) {
+		aligned_vread(buf, addr, count);
+		return;
+	}
+
+	/*
+	 * Area is split into regions and tracked with vmap_block, read out
+	 * each region and zero fill the hole between regions.
+	 */
+	vb = xa_load(&vmap_blocks, addr_to_vb_idx((unsigned long)addr));
+	if (!vb)
+		goto finished;
+
+	spin_lock(&vb->lock);
+	if (bitmap_empty(vb->used_map, VMAP_BBMAP_BITS)) {
+		spin_unlock(&vb->lock);
+		goto finished;
+	}
+	for_each_set_bitrange(rs, re, vb->used_map, VMAP_BBMAP_BITS) {
+		if (!count)
+			break;
+		start = vmap_block_vaddr(vb->va->va_start, rs);
+		while (addr < start) {
+			if (count == 0)
+				break;
+			*buf = '\0';
+			buf++;
+			addr++;
+			count--;
+		}
+		/*it could start reading from the middle of used region*/
+		offset = offset_in_page(addr);
+		n = ((re - rs + 1) << PAGE_SHIFT) - offset;
+		if (n > count)
+			n = count;
+		aligned_vread(buf, start+offset, n);
+
+		buf += n;
+		addr += n;
+		count -= n;
+	}
+	spin_unlock(&vb->lock);
+
+finished:
+	/* zero-fill the left dirty or free regions */
+	if (count)
+		memset(buf, 0, count);
+}
+
 /**
  * vread() - read vmalloc area in a safe way.
  * @buf:     buffer for reading data
@@ -3574,7 +3635,7 @@  long vread(char *buf, char *addr, unsigned long count)
 	struct vm_struct *vm;
 	char *vaddr, *buf_start = buf;
 	unsigned long buflen = count;
-	unsigned long n;
+	unsigned long n, size, flags;
 
 	addr = kasan_reset_tag(addr);
 
@@ -3595,12 +3656,21 @@  long vread(char *buf, char *addr, unsigned long count)
 		if (!count)
 			break;
 
-		if (!va->vm)
+		vm = va->vm;
+		flags = va->flags & VMAP_FLAGS_MASK;
+		/*
+		 * VMAP_BLOCK indicates a sub-type of vm_map_ram area, need
+		 * be set together with VMAP_RAM.
+		 */
+		WARN_ON(flags == VMAP_BLOCK);
+
+		if (!vm && !flags)
 			continue;
 
-		vm = va->vm;
-		vaddr = (char *) vm->addr;
-		if (addr >= vaddr + get_vm_area_size(vm))
+		vaddr = (char *) va->va_start;
+		size = vm ? get_vm_area_size(vm) : va_size(va);
+
+		if (addr >= vaddr + size)
 			continue;
 		while (addr < vaddr) {
 			if (count == 0)
@@ -3610,10 +3680,13 @@  long vread(char *buf, char *addr, unsigned long count)
 			addr++;
 			count--;
 		}
-		n = vaddr + get_vm_area_size(vm) - addr;
+		n = vaddr + size - addr;
 		if (n > count)
 			n = count;
-		if (!(vm->flags & VM_IOREMAP))
+
+		if (flags & VMAP_RAM)
+			vmap_ram_vread(buf, addr, n, flags);
+		else if (!(vm->flags & VM_IOREMAP))
 			aligned_vread(buf, addr, n);
 		else /* IOREMAP area is treated as memory hole */
 			memset(buf, 0, n);