[bpf-next] bpf: dup xlated insns with kvmalloc+memcpy

Message ID 20221216055436.4698-1-sunhao.th@gmail.com
State New
Headers
Series [bpf-next] bpf: dup xlated insns with kvmalloc+memcpy |

Commit Message

Hao Sun Dec. 16, 2022, 5:54 a.m. UTC
  Currently, kmemdup() is used for allocating and copying xlated insns
in bpf_insn_prepare_dump(). The following warning can be triggered
when dup large amount of insns (roughly BPF_COMPLEXITY_LIMIT_INSNS/2)
because kmemdup() uses kmalloc() which would fail when allocing size
is too big, leading to failure in dump xlated insns:

WARNING: CPU: 2 PID: 7060 at mm/page_alloc.c:5534
Call Trace:
 <TASK>
 __alloc_pages_node include/linux/gfp.h:237 [inline]
 alloc_pages_node include/linux/gfp.h:260 [inline]
 __kmalloc_large_node+0x81/0x160 mm/slab_common.c:1096
 __do_kmalloc_node mm/slab_common.c:943 [inline]
 __kmalloc_node_track_caller.cold+0x5/0x5d mm/slab_common.c:975
 kmemdup+0x29/0x60 mm/util.c:129
 kmemdup include/linux/fortify-string.h:585 [inline]
 bpf_insn_prepare_dump kernel/bpf/syscall.c:3820 [inline]
 bpf_prog_get_info_by_fd+0x9a3/0x2cb0 kernel/bpf/syscall.c:3975
 bpf_obj_get_info_by_fd kernel/bpf/syscall.c:4297 [inline]
 __sys_bpf+0x3928/0x56f0 kernel/bpf/syscall.c:5004
 __do_sys_bpf kernel/bpf/syscall.c:5069 [inline]
 __se_sys_bpf kernel/bpf/syscall.c:5067 [inline]
 ...

So use kvmalloc()+memcpy() to fix this, for small size of insns,
this is same as kmemdup(), but this also support dup large amount
of xlated insns.

Signed-off-by: Hao Sun <sunhao.th@gmail.com>
---
 kernel/bpf/syscall.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)


base-commit: 0e43662e61f2569500ab83b8188c065603530785
  

Comments

Yonghong Song Dec. 16, 2022, 7:03 a.m. UTC | #1
On 12/15/22 9:54 PM, Hao Sun wrote:
> Currently, kmemdup() is used for allocating and copying xlated insns
> in bpf_insn_prepare_dump(). The following warning can be triggered
> when dup large amount of insns (roughly BPF_COMPLEXITY_LIMIT_INSNS/2)
> because kmemdup() uses kmalloc() which would fail when allocing size
> is too big, leading to failure in dump xlated insns:
> 
> WARNING: CPU: 2 PID: 7060 at mm/page_alloc.c:5534
> Call Trace:
>   <TASK>
>   __alloc_pages_node include/linux/gfp.h:237 [inline]
>   alloc_pages_node include/linux/gfp.h:260 [inline]
>   __kmalloc_large_node+0x81/0x160 mm/slab_common.c:1096
>   __do_kmalloc_node mm/slab_common.c:943 [inline]
>   __kmalloc_node_track_caller.cold+0x5/0x5d mm/slab_common.c:975
>   kmemdup+0x29/0x60 mm/util.c:129
>   kmemdup include/linux/fortify-string.h:585 [inline]
>   bpf_insn_prepare_dump kernel/bpf/syscall.c:3820 [inline]
>   bpf_prog_get_info_by_fd+0x9a3/0x2cb0 kernel/bpf/syscall.c:3975
>   bpf_obj_get_info_by_fd kernel/bpf/syscall.c:4297 [inline]
>   __sys_bpf+0x3928/0x56f0 kernel/bpf/syscall.c:5004
>   __do_sys_bpf kernel/bpf/syscall.c:5069 [inline]
>   __se_sys_bpf kernel/bpf/syscall.c:5067 [inline]
>   ...
> 
> So use kvmalloc()+memcpy() to fix this, for small size of insns,
> this is same as kmemdup(), but this also support dup large amount
> of xlated insns.
> 
> Signed-off-by: Hao Sun <sunhao.th@gmail.com>
> ---
>   kernel/bpf/syscall.c | 8 ++++----
>   1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 35972afb6850..06229fddac0d 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -3831,10 +3831,10 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog,
>   	u8 code;
>   	int i;
>   
> -	insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog),
> -			GFP_USER);

Does kmemdup(prog->insnsi, bpf_prog_insn_size(prog), GFP_USER | 
__GFP_NOWARN) work?

> -	if (!insns)
> +	insns = kvmalloc(bpf_prog_insn_size(prog), GFP_USER | __GFP_NOWARN);
> +	if (unlikely(!insns))
>   		return insns;
> +	memcpy(insns, prog->insnsi, bpf_prog_insn_size(prog));
>   
>   	for (i = 0; i < prog->len; i++) {
>   		code = insns[i].code;
> @@ -3992,7 +3992,7 @@ static int bpf_prog_get_info_by_fd(struct file *file,
>   		uinsns = u64_to_user_ptr(info.xlated_prog_insns);
>   		ulen = min_t(u32, info.xlated_prog_len, ulen);
>   		fault = copy_to_user(uinsns, insns_sanitized, ulen);
> -		kfree(insns_sanitized);
> +		kvfree(insns_sanitized);
>   		if (fault)
>   			return -EFAULT;
>   	}
> 
> base-commit: 0e43662e61f2569500ab83b8188c065603530785
  
Hao Sun Dec. 16, 2022, 7:18 a.m. UTC | #2
> On 16 Dec 2022, at 3:03 PM, Yonghong Song <yhs@meta.com> wrote:
> 
> 
> 
> On 12/15/22 9:54 PM, Hao Sun wrote:
>> Currently, kmemdup() is used for allocating and copying xlated insns
>> in bpf_insn_prepare_dump(). The following warning can be triggered
>> when dup large amount of insns (roughly BPF_COMPLEXITY_LIMIT_INSNS/2)
>> because kmemdup() uses kmalloc() which would fail when allocing size
>> is too big, leading to failure in dump xlated insns:
>> WARNING: CPU: 2 PID: 7060 at mm/page_alloc.c:5534
>> Call Trace:
>>  <TASK>
>>  __alloc_pages_node include/linux/gfp.h:237 [inline]
>>  alloc_pages_node include/linux/gfp.h:260 [inline]
>>  __kmalloc_large_node+0x81/0x160 mm/slab_common.c:1096
>>  __do_kmalloc_node mm/slab_common.c:943 [inline]
>>  __kmalloc_node_track_caller.cold+0x5/0x5d mm/slab_common.c:975
>>  kmemdup+0x29/0x60 mm/util.c:129
>>  kmemdup include/linux/fortify-string.h:585 [inline]
>>  bpf_insn_prepare_dump kernel/bpf/syscall.c:3820 [inline]
>>  bpf_prog_get_info_by_fd+0x9a3/0x2cb0 kernel/bpf/syscall.c:3975
>>  bpf_obj_get_info_by_fd kernel/bpf/syscall.c:4297 [inline]
>>  __sys_bpf+0x3928/0x56f0 kernel/bpf/syscall.c:5004
>>  __do_sys_bpf kernel/bpf/syscall.c:5069 [inline]
>>  __se_sys_bpf kernel/bpf/syscall.c:5067 [inline]
>>  ...
>> So use kvmalloc()+memcpy() to fix this, for small size of insns,
>> this is same as kmemdup(), but this also support dup large amount
>> of xlated insns.
>> Signed-off-by: Hao Sun <sunhao.th@gmail.com>
>> ---
>>  kernel/bpf/syscall.c | 8 ++++----
>>  1 file changed, 4 insertions(+), 4 deletions(-)
>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>> index 35972afb6850..06229fddac0d 100644
>> --- a/kernel/bpf/syscall.c
>> +++ b/kernel/bpf/syscall.c
>> @@ -3831,10 +3831,10 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog,
>>   u8 code;
>>   int i;
>>  - insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog),
>> - GFP_USER);
> 
> Does kmemdup(prog->insnsi, bpf_prog_insn_size(prog), GFP_USER | __GFP_NOWARN) work?

This only suppress the warning, bpf_insn_prepare_dump() still fails because of
the failure of kmalloc() invoked by kmemdup(). 

> 
>> - if (!insns)
>> + insns = kvmalloc(bpf_prog_insn_size(prog), GFP_USER | __GFP_NOWARN);
>> + if (unlikely(!insns))
>>   return insns;
>> + memcpy(insns, prog->insnsi, bpf_prog_insn_size(prog));
>>     for (i = 0; i < prog->len; i++) {
>>   code = insns[i].code;
>> @@ -3992,7 +3992,7 @@ static int bpf_prog_get_info_by_fd(struct file *file,
>>   uinsns = u64_to_user_ptr(info.xlated_prog_insns);
>>   ulen = min_t(u32, info.xlated_prog_len, ulen);
>>   fault = copy_to_user(uinsns, insns_sanitized, ulen);
>> - kfree(insns_sanitized);
>> + kvfree(insns_sanitized);
>>   if (fault)
>>   return -EFAULT;
>>   }
>> base-commit: 0e43662e61f2569500ab83b8188c065603530785
  
Daniel Borkmann Dec. 16, 2022, 3:24 p.m. UTC | #3
On 12/16/22 8:18 AM, Hao Sun wrote:
> 
> 
>> On 16 Dec 2022, at 3:03 PM, Yonghong Song <yhs@meta.com> wrote:
>>
>>
>>
>> On 12/15/22 9:54 PM, Hao Sun wrote:
>>> Currently, kmemdup() is used for allocating and copying xlated insns
>>> in bpf_insn_prepare_dump(). The following warning can be triggered
>>> when dup large amount of insns (roughly BPF_COMPLEXITY_LIMIT_INSNS/2)
>>> because kmemdup() uses kmalloc() which would fail when allocing size
>>> is too big, leading to failure in dump xlated insns:
>>> WARNING: CPU: 2 PID: 7060 at mm/page_alloc.c:5534
>>> Call Trace:
>>>   <TASK>
>>>   __alloc_pages_node include/linux/gfp.h:237 [inline]
>>>   alloc_pages_node include/linux/gfp.h:260 [inline]
>>>   __kmalloc_large_node+0x81/0x160 mm/slab_common.c:1096
>>>   __do_kmalloc_node mm/slab_common.c:943 [inline]
>>>   __kmalloc_node_track_caller.cold+0x5/0x5d mm/slab_common.c:975
>>>   kmemdup+0x29/0x60 mm/util.c:129
>>>   kmemdup include/linux/fortify-string.h:585 [inline]
>>>   bpf_insn_prepare_dump kernel/bpf/syscall.c:3820 [inline]
>>>   bpf_prog_get_info_by_fd+0x9a3/0x2cb0 kernel/bpf/syscall.c:3975
>>>   bpf_obj_get_info_by_fd kernel/bpf/syscall.c:4297 [inline]
>>>   __sys_bpf+0x3928/0x56f0 kernel/bpf/syscall.c:5004
>>>   __do_sys_bpf kernel/bpf/syscall.c:5069 [inline]
>>>   __se_sys_bpf kernel/bpf/syscall.c:5067 [inline]
>>>   ...
>>> So use kvmalloc()+memcpy() to fix this, for small size of insns,
>>> this is same as kmemdup(), but this also support dup large amount
>>> of xlated insns.
>>> Signed-off-by: Hao Sun <sunhao.th@gmail.com>
>>> ---
>>>   kernel/bpf/syscall.c | 8 ++++----
>>>   1 file changed, 4 insertions(+), 4 deletions(-)
>>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>>> index 35972afb6850..06229fddac0d 100644
>>> --- a/kernel/bpf/syscall.c
>>> +++ b/kernel/bpf/syscall.c
>>> @@ -3831,10 +3831,10 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog,
>>>    u8 code;
>>>    int i;
>>>   - insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog),
>>> - GFP_USER);
>>
>> Does kmemdup(prog->insnsi, bpf_prog_insn_size(prog), GFP_USER | __GFP_NOWARN) work?
> 
> This only suppress the warning, bpf_insn_prepare_dump() still fails because of
> the failure of kmalloc() invoked by kmemdup().

Ok, instead of open coding, would be nice if we add a helper to mm/util.c :

void *kvmemdup(const void *src, size_t len, gfp_t gfp)
{
         void *p;

         p = kvmalloc(len, gfp);
         if (p)
                 memcpy(p, src, len);
         return p;
}
EXPORT_SYMBOL(kvmemdup);

And then bpf and in future others could make use of it.

Thanks,
Daniel
  

Patch

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 35972afb6850..06229fddac0d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3831,10 +3831,10 @@  static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog,
 	u8 code;
 	int i;
 
-	insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog),
-			GFP_USER);
-	if (!insns)
+	insns = kvmalloc(bpf_prog_insn_size(prog), GFP_USER | __GFP_NOWARN);
+	if (unlikely(!insns))
 		return insns;
+	memcpy(insns, prog->insnsi, bpf_prog_insn_size(prog));
 
 	for (i = 0; i < prog->len; i++) {
 		code = insns[i].code;
@@ -3992,7 +3992,7 @@  static int bpf_prog_get_info_by_fd(struct file *file,
 		uinsns = u64_to_user_ptr(info.xlated_prog_insns);
 		ulen = min_t(u32, info.xlated_prog_len, ulen);
 		fault = copy_to_user(uinsns, insns_sanitized, ulen);
-		kfree(insns_sanitized);
+		kvfree(insns_sanitized);
 		if (fault)
 			return -EFAULT;
 	}