[RFC,bpf-next,v3,05/16] bpf/verifier: add bpf_timer as a kfunc capable type
Commit Message
We need to extend the bpf_timer API, but the way forward relies on kfuncs.
So make bpf_timer known for kfuncs from the verifier PoV
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
new in v3 (split from v2 02/10)
---
kernel/bpf/verifier.c | 36 ++++++++++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)
Comments
On Wed, 2024-02-21 at 17:25 +0100, Benjamin Tissoires wrote:
[...]
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index f81c799b2c80..2b11687063ff 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -5444,6 +5444,26 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
> return -EACCES;
> }
> break;
> + case BPF_TIMER:
> + /* FIXME: kptr does the above, should we use the same? */
I don't think so.
Basically this allows double word reads / writes from timer address,
which probably should not be allowed.
The ACCESS_DIRECT is passed to check_map_access() from
check_mem_access() and I don't see points where check_mem_access()
call would be triggered for pointer parameter of kfunc
(unless it is accompanied by a size parameter).
I tried the following simple program and it verifies fine:
struct elem {
struct bpf_timer t;
};
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 2);
__type(key, int);
__type(value, struct elem);
} array SEC(".maps");
int bpf_timer_set_sleepable_cb
(struct bpf_timer *timer,
int (callback_fn)(void *map, int *key, struct bpf_timer *timer))
__ksym __weak;
static int cb_sleepable(void *map, int *key, struct bpf_timer *timer)
{
return 0;
}
SEC("fentry/bpf_fentry_test5")
int BPF_PROG2(test_sleepable, int, a)
{
struct bpf_timer *arr_timer;
int array_key = ARRAY;
arr_timer = bpf_map_lookup_elem(&array, &array_key);
if (!arr_timer)
return 0;
bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC);
bpf_timer_set_sleepable_cb(arr_timer, cb_sleepable);
bpf_timer_start(arr_timer, 0, 0);
return 0;
}
(in general, it would be easier to review if there were some test
cases to play with).
> + if (src != ACCESS_DIRECT) {
> + verbose(env, "bpf_timer cannot be accessed indirectly by helper\n");
> + return -EACCES;
> + }
> + if (!tnum_is_const(reg->var_off)) {
> + verbose(env, "bpf_timer access cannot have variable offset\n");
> + return -EACCES;
> + }
> + if (p != off + reg->var_off.value) {
> + verbose(env, "bpf_timer access misaligned expected=%u off=%llu\n",
> + p, off + reg->var_off.value);
> + return -EACCES;
> + }
> + if (size != bpf_size_to_bytes(BPF_DW)) {
> + verbose(env, "bpf_timer access size must be BPF_DW\n");
> + return -EACCES;
> + }
> + break;
> default:
> verbose(env, "%s cannot be accessed directly by load/store\n",
> btf_field_type_name(field->type));
[...]
On Fri, 2024-02-23 at 02:22 +0200, Eduard Zingerman wrote:
[...]
> > + case BPF_TIMER:
> > + /* FIXME: kptr does the above, should we use the same? */
[...]
> I tried the following simple program and it verifies fine:
Sorry, I meant that I tried it with the above check removed.
On Wed, 2024-02-21 at 17:25 +0100, Benjamin Tissoires wrote:
[...]
> @@ -11973,6 +12006,9 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
> if (ret)
> return ret;
> break;
> + case KF_ARG_PTR_TO_TIMER:
> + /* FIXME: should we do anything here? */
> + break;
I think that here it is necessary to enforce that R1
is PTR_TO_MAP_VALUE and that it points to the timer field of the map value.
As is, the following program leads to in-kernel page fault when
printing verifier log:
--- 8< ----------------------------
struct elem {
struct bpf_timer t;
};
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 2);
__type(key, int);
__type(value, struct elem);
} array SEC(".maps");
int bpf_timer_set_sleepable_cb
(struct bpf_timer *timer,
int (callback_fn)(void *map, int *key, struct bpf_timer *timer))
__ksym __weak;
static int cb_sleepable(void *map, int *key, struct bpf_timer *timer)
{
return 0;
}
SEC("fentry/bpf_fentry_test5")
int BPF_PROG2(test_sleepable, int, a)
{
struct bpf_timer *arr_timer;
int array_key = 1;
arr_timer = bpf_map_lookup_elem(&array, &array_key);
if (!arr_timer)
return 0;
bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC);
bpf_timer_set_sleepable_cb((void *)&arr_timer, // note incorrrect pointer type!
cb_sleepable);
bpf_timer_start(arr_timer, 0, 0);
return 0;
}
---------------------------- >8 ---
I get the page fault when doing:
$ ./veristat -l7 -vvv -f test_sleepable timer.bpf.o
[ 21.014886] BUG: kernel NULL pointer dereference, address: 0000000000000060
..
[ 21.015780] RIP: 0010:print_reg_state (kernel/bpf/log.c:715)
And here is a relevant fragment of print_reg_state():
713 if (type_is_map_ptr(t)) {
714 if (reg->map_ptr->name[0])
715 verbose_a("map=%s", reg->map_ptr->name);
716 verbose_a("ks=%d,vs=%d",
717 reg->map_ptr->key_size,
718 reg->map_ptr->value_size);
719 }
The error is caused by reg->map_ptr being NULL.
The code in check_kfunc_args() allows anything in R1,
including registers for which type is not pointer to map and reg->map_ptr is NULL.
When later the check_kfunc_call() is done it does push_callback_call():
12152 err = push_callback_call(env, insn, insn_idx, meta.subprogno,
12153 set_timer_callback_state);
Which calls set_timer_callback_state(), that sets bogus state for R{1,2,3}:
9683 static int set_timer_callback_state(...)
9684 {
9685 struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
9687
9688 /* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
9689 * callback_fn(struct bpf_map *map, void *key, void *value);
9690 */
9691 callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
9692 __mark_reg_known_zero(&callee->regs[BPF_REG_1]);
9693 callee->regs[BPF_REG_1].map_ptr = map_ptr;
^^^^^^^^^
This is NULL!
@@ -5444,6 +5444,26 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
return -EACCES;
}
break;
+ case BPF_TIMER:
+ /* FIXME: kptr does the above, should we use the same? */
+ if (src != ACCESS_DIRECT) {
+ verbose(env, "bpf_timer cannot be accessed indirectly by helper\n");
+ return -EACCES;
+ }
+ if (!tnum_is_const(reg->var_off)) {
+ verbose(env, "bpf_timer access cannot have variable offset\n");
+ return -EACCES;
+ }
+ if (p != off + reg->var_off.value) {
+ verbose(env, "bpf_timer access misaligned expected=%u off=%llu\n",
+ p, off + reg->var_off.value);
+ return -EACCES;
+ }
+ if (size != bpf_size_to_bytes(BPF_DW)) {
+ verbose(env, "bpf_timer access size must be BPF_DW\n");
+ return -EACCES;
+ }
+ break;
default:
verbose(env, "%s cannot be accessed directly by load/store\n",
btf_field_type_name(field->type));
@@ -10789,6 +10809,7 @@ enum {
KF_ARG_LIST_NODE_ID,
KF_ARG_RB_ROOT_ID,
KF_ARG_RB_NODE_ID,
+ KF_ARG_TIMER_ID,
};
BTF_ID_LIST(kf_arg_btf_ids)
@@ -10797,6 +10818,7 @@ BTF_ID(struct, bpf_list_head)
BTF_ID(struct, bpf_list_node)
BTF_ID(struct, bpf_rb_root)
BTF_ID(struct, bpf_rb_node)
+BTF_ID(struct, bpf_timer_kern)
static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
const struct btf_param *arg, int type)
@@ -10840,6 +10862,12 @@ static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_par
return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID);
}
+static bool is_kfunc_arg_timer(const struct btf *btf, const struct btf_param *arg)
+{
+ bool ret = __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TIMER_ID);
+ return ret;
+}
+
static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
const struct btf_param *arg)
{
@@ -10908,6 +10936,7 @@ enum kfunc_ptr_arg_type {
KF_ARG_PTR_TO_RB_NODE,
KF_ARG_PTR_TO_NULL,
KF_ARG_PTR_TO_CONST_STR,
+ KF_ARG_PTR_TO_TIMER,
};
enum special_kfunc_type {
@@ -11061,6 +11090,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
if (is_kfunc_arg_const_str(meta->btf, &args[argno]))
return KF_ARG_PTR_TO_CONST_STR;
+ if (is_kfunc_arg_timer(meta->btf, &args[argno]))
+ return KF_ARG_PTR_TO_TIMER;
+
if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
if (!btf_type_is_struct(ref_t)) {
verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
@@ -11693,6 +11725,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
case KF_ARG_PTR_TO_CALLBACK:
case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
case KF_ARG_PTR_TO_CONST_STR:
+ case KF_ARG_PTR_TO_TIMER:
/* Trusted by default */
break;
default:
@@ -11973,6 +12006,9 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
if (ret)
return ret;
break;
+ case KF_ARG_PTR_TO_TIMER:
+ /* FIXME: should we do anything here? */
+ break;
}
}