[RFC,bpf-next,v3,05/16] bpf/verifier: add bpf_timer as a kfunc capable type

Message ID 20240221-hid-bpf-sleepable-v3-5-1fb378ca6301@kernel.org
State New
Headers
Series sleepable bpf_timer (was: allow HID-BPF to do device IOs) |

Commit Message

Benjamin Tissoires Feb. 21, 2024, 4:25 p.m. UTC
  We need to extend the bpf_timer API, but the way forward relies on kfuncs.
So make bpf_timer known for kfuncs from the verifier PoV

Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>

---

new in v3 (split from v2 02/10)
---
 kernel/bpf/verifier.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
  

Comments

Eduard Zingerman Feb. 23, 2024, 12:22 a.m. UTC | #1
On Wed, 2024-02-21 at 17:25 +0100, Benjamin Tissoires wrote:

[...]

> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index f81c799b2c80..2b11687063ff 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -5444,6 +5444,26 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
>  					return -EACCES;
>  				}
>  				break;
> +			case BPF_TIMER:
> +				/* FIXME: kptr does the above, should we use the same? */

I don't think so.
Basically this allows double word reads / writes from timer address,
which probably should not be allowed.

The ACCESS_DIRECT is passed to check_map_access() from
check_mem_access() and I don't see points where check_mem_access()
call would be triggered for pointer parameter of kfunc
(unless it is accompanied by a size parameter).

I tried the following simple program and it verifies fine:

    struct elem {
    	struct bpf_timer t;
    };

    struct {
    	__uint(type, BPF_MAP_TYPE_ARRAY);
    	__uint(max_entries, 2);
    	__type(key, int);
    	__type(value, struct elem);
    } array SEC(".maps");

    int bpf_timer_set_sleepable_cb
      (struct bpf_timer *timer,
       int (callback_fn)(void *map, int *key, struct bpf_timer *timer))
      __ksym __weak;

    static int cb_sleepable(void *map, int *key, struct bpf_timer *timer)
    {
    	return 0;
    }

    SEC("fentry/bpf_fentry_test5")
    int BPF_PROG2(test_sleepable, int, a)
    {
    	struct bpf_timer *arr_timer;
    	int array_key = ARRAY;

    	arr_timer = bpf_map_lookup_elem(&array, &array_key);
    	if (!arr_timer)
    		return 0;
    	bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC);

    	bpf_timer_set_sleepable_cb(arr_timer, cb_sleepable);
    	bpf_timer_start(arr_timer, 0, 0);

    	return 0;
    }

(in general, it would be easier to review if there were some test
 cases to play with).

> +				if (src != ACCESS_DIRECT) {
> +					verbose(env, "bpf_timer cannot be accessed indirectly by helper\n");
> +					return -EACCES;
> +				}
> +				if (!tnum_is_const(reg->var_off)) {
> +					verbose(env, "bpf_timer access cannot have variable offset\n");
> +					return -EACCES;
> +				}
> +				if (p != off + reg->var_off.value) {
> +					verbose(env, "bpf_timer access misaligned expected=%u off=%llu\n",
> +						p, off + reg->var_off.value);
> +					return -EACCES;
> +				}
> +				if (size != bpf_size_to_bytes(BPF_DW)) {
> +					verbose(env, "bpf_timer access size must be BPF_DW\n");
> +					return -EACCES;
> +				}
> +				break;
>  			default:
>  				verbose(env, "%s cannot be accessed directly by load/store\n",
>  					btf_field_type_name(field->type));

[...]
  
Eduard Zingerman Feb. 23, 2024, 12:26 a.m. UTC | #2
On Fri, 2024-02-23 at 02:22 +0200, Eduard Zingerman wrote:
[...]

> > +			case BPF_TIMER:
> > +				/* FIXME: kptr does the above, should we use the same? */

[...]

> I tried the following simple program and it verifies fine: 

Sorry, I meant that I tried it with the above check removed.
  
Eduard Zingerman Feb. 23, 2024, 2:54 p.m. UTC | #3
On Wed, 2024-02-21 at 17:25 +0100, Benjamin Tissoires wrote:
[...]

> @@ -11973,6 +12006,9 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
>  			if (ret)
>  				return ret;
>  			break;
> +		case KF_ARG_PTR_TO_TIMER:
> +			/* FIXME: should we do anything here? */
> +			break;

I think that here it is necessary to enforce that R1
is PTR_TO_MAP_VALUE and that it points to the timer field of the map value.

As is, the following program leads to in-kernel page fault when
printing verifier log:

--- 8< ----------------------------

struct elem {
	struct bpf_timer t;
};

struct {
	__uint(type, BPF_MAP_TYPE_ARRAY);
	__uint(max_entries, 2);
	__type(key, int);
	__type(value, struct elem);
} array SEC(".maps");

int bpf_timer_set_sleepable_cb
  (struct bpf_timer *timer,
   int (callback_fn)(void *map, int *key, struct bpf_timer *timer))
  __ksym __weak;

static int cb_sleepable(void *map, int *key, struct bpf_timer *timer)
{
	return 0;
}

SEC("fentry/bpf_fentry_test5")
int BPF_PROG2(test_sleepable, int, a)
{
	struct bpf_timer *arr_timer;
	int array_key = 1;

	arr_timer = bpf_map_lookup_elem(&array, &array_key);
	if (!arr_timer)
		return 0;
	bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC);
	bpf_timer_set_sleepable_cb((void *)&arr_timer, // note incorrrect pointer type!
				   cb_sleepable);
	bpf_timer_start(arr_timer, 0, 0);
	return 0;
}

---------------------------- >8 ---

I get the page fault when doing:

    $ ./veristat -l7 -vvv -f test_sleepable timer.bpf.o

[   21.014886] BUG: kernel NULL pointer dereference, address: 0000000000000060
..
[   21.015780] RIP: 0010:print_reg_state (kernel/bpf/log.c:715)

And here is a relevant fragment of print_reg_state():

713	if (type_is_map_ptr(t)) {
714		if (reg->map_ptr->name[0])
715			verbose_a("map=%s", reg->map_ptr->name);
716		verbose_a("ks=%d,vs=%d",
717			  reg->map_ptr->key_size,
718			  reg->map_ptr->value_size);
719	}

The error is caused by reg->map_ptr being NULL.
The code in check_kfunc_args() allows anything in R1,
including registers for which type is not pointer to map and reg->map_ptr is NULL.
When later the check_kfunc_call() is done it does push_callback_call():

12152		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
12153					 set_timer_callback_state);

Which calls set_timer_callback_state(), that sets bogus state for R{1,2,3}:

9683 static int set_timer_callback_state(...)
9684 {
9685	struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
9687
9688	/* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
9689	 * callback_fn(struct bpf_map *map, void *key, void *value);
9690	 */
9691	callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
9692	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
9693	callee->regs[BPF_REG_1].map_ptr = map_ptr;
                                         ^^^^^^^^^
                                         This is NULL!
  

Patch

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f81c799b2c80..2b11687063ff 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5444,6 +5444,26 @@  static int check_map_access(struct bpf_verifier_env *env, u32 regno,
 					return -EACCES;
 				}
 				break;
+			case BPF_TIMER:
+				/* FIXME: kptr does the above, should we use the same? */
+				if (src != ACCESS_DIRECT) {
+					verbose(env, "bpf_timer cannot be accessed indirectly by helper\n");
+					return -EACCES;
+				}
+				if (!tnum_is_const(reg->var_off)) {
+					verbose(env, "bpf_timer access cannot have variable offset\n");
+					return -EACCES;
+				}
+				if (p != off + reg->var_off.value) {
+					verbose(env, "bpf_timer access misaligned expected=%u off=%llu\n",
+						p, off + reg->var_off.value);
+					return -EACCES;
+				}
+				if (size != bpf_size_to_bytes(BPF_DW)) {
+					verbose(env, "bpf_timer access size must be BPF_DW\n");
+					return -EACCES;
+				}
+				break;
 			default:
 				verbose(env, "%s cannot be accessed directly by load/store\n",
 					btf_field_type_name(field->type));
@@ -10789,6 +10809,7 @@  enum {
 	KF_ARG_LIST_NODE_ID,
 	KF_ARG_RB_ROOT_ID,
 	KF_ARG_RB_NODE_ID,
+	KF_ARG_TIMER_ID,
 };
 
 BTF_ID_LIST(kf_arg_btf_ids)
@@ -10797,6 +10818,7 @@  BTF_ID(struct, bpf_list_head)
 BTF_ID(struct, bpf_list_node)
 BTF_ID(struct, bpf_rb_root)
 BTF_ID(struct, bpf_rb_node)
+BTF_ID(struct, bpf_timer_kern)
 
 static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
 				    const struct btf_param *arg, int type)
@@ -10840,6 +10862,12 @@  static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_par
 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID);
 }
 
+static bool is_kfunc_arg_timer(const struct btf *btf, const struct btf_param *arg)
+{
+	bool ret = __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TIMER_ID);
+	return ret;
+}
+
 static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
 				  const struct btf_param *arg)
 {
@@ -10908,6 +10936,7 @@  enum kfunc_ptr_arg_type {
 	KF_ARG_PTR_TO_RB_NODE,
 	KF_ARG_PTR_TO_NULL,
 	KF_ARG_PTR_TO_CONST_STR,
+	KF_ARG_PTR_TO_TIMER,
 };
 
 enum special_kfunc_type {
@@ -11061,6 +11090,9 @@  get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
 	if (is_kfunc_arg_const_str(meta->btf, &args[argno]))
 		return KF_ARG_PTR_TO_CONST_STR;
 
+	if (is_kfunc_arg_timer(meta->btf, &args[argno]))
+		return KF_ARG_PTR_TO_TIMER;
+
 	if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
 		if (!btf_type_is_struct(ref_t)) {
 			verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
@@ -11693,6 +11725,7 @@  static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 		case KF_ARG_PTR_TO_CALLBACK:
 		case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
 		case KF_ARG_PTR_TO_CONST_STR:
+		case KF_ARG_PTR_TO_TIMER:
 			/* Trusted by default */
 			break;
 		default:
@@ -11973,6 +12006,9 @@  static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 			if (ret)
 				return ret;
 			break;
+		case KF_ARG_PTR_TO_TIMER:
+			/* FIXME: should we do anything here? */
+			break;
 		}
 	}