[net-next,v6,4/4] eventpoll: Add epoll ioctl for epoll_params
Commit Message
Add an ioctl for getting and setting epoll_params. User programs can use
this ioctl to get and set the busy poll usec time, packet budget, and
prefer busy poll params for a specific epoll context.
Parameters are limited:
- busy_poll_usecs is limited to <= u32_max
- busy_poll_budget is limited to <= NAPI_POLL_WEIGHT by unprivileged
users (!capable(CAP_NET_ADMIN))
- prefer_busy_poll must be 0 or 1
- __pad must be 0
Signed-off-by: Joe Damato <jdamato@fastly.com>
---
.../userspace-api/ioctl/ioctl-number.rst | 1 +
fs/eventpoll.c | 73 +++++++++++++++++++
include/uapi/linux/eventpoll.h | 13 ++++
3 files changed, 87 insertions(+)
Comments
On 05. 02. 24, 22:04, Joe Damato wrote:
> Add an ioctl for getting and setting epoll_params. User programs can use
> this ioctl to get and set the busy poll usec time, packet budget, and
> prefer busy poll params for a specific epoll context.
>
> Parameters are limited:
> - busy_poll_usecs is limited to <= u32_max
> - busy_poll_budget is limited to <= NAPI_POLL_WEIGHT by unprivileged
> users (!capable(CAP_NET_ADMIN))
> - prefer_busy_poll must be 0 or 1
> - __pad must be 0
>
> Signed-off-by: Joe Damato <jdamato@fastly.com>
..
> --- a/fs/eventpoll.c
> +++ b/fs/eventpoll.c
..
> @@ -497,6 +498,50 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
> ep->napi_id = napi_id;
> }
>
> +static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
> + unsigned long arg)
> +{
> + struct eventpoll *ep;
> + struct epoll_params epoll_params;
> + void __user *uarg = (void __user *) arg;
> +
> + ep = file->private_data;
This might have been on the ep declaration line.
> + switch (cmd) {
> + case EPIOCSPARAMS:
> + if (copy_from_user(&epoll_params, uarg, sizeof(epoll_params)))
> + return -EFAULT;
> +
> + if (memchr_inv(epoll_params.__pad, 0, sizeof(epoll_params.__pad)))
> + return -EINVAL;
> +
> + if (epoll_params.busy_poll_usecs > U32_MAX)
> + return -EINVAL;
> +
> + if (epoll_params.prefer_busy_poll > 1)
> + return -EINVAL;
> +
> + if (epoll_params.busy_poll_budget > NAPI_POLL_WEIGHT &&
> + !capable(CAP_NET_ADMIN))
> + return -EPERM;
> +
> + ep->busy_poll_usecs = epoll_params.busy_poll_usecs;
> + ep->busy_poll_budget = epoll_params.busy_poll_budget;
> + ep->prefer_busy_poll = !!epoll_params.prefer_busy_poll;
This !! is unnecessary. Nonzero values shall be "converted" to true.
But FWIW, the above is nothing which should be blocking, so:
Reviewed-by: Jiri Slaby <jirislaby@kernel.org>
> + return 0;
> + case EPIOCGPARAMS:
> + memset(&epoll_params, 0, sizeof(epoll_params));
> + epoll_params.busy_poll_usecs = ep->busy_poll_usecs;
> + epoll_params.busy_poll_budget = ep->busy_poll_budget;
> + epoll_params.prefer_busy_poll = ep->prefer_busy_poll;
> + if (copy_to_user(uarg, &epoll_params, sizeof(epoll_params)))
> + return -EFAULT;
> + return 0;
> + default:
> + return -ENOIOCTLCMD;
> + }
> +}
..
thanks,
On Wed, Feb 07, 2024 at 09:37:14AM +0100, Jiri Slaby wrote:
> On 05. 02. 24, 22:04, Joe Damato wrote:
> >Add an ioctl for getting and setting epoll_params. User programs can use
> >this ioctl to get and set the busy poll usec time, packet budget, and
> >prefer busy poll params for a specific epoll context.
> >
> >Parameters are limited:
> > - busy_poll_usecs is limited to <= u32_max
> > - busy_poll_budget is limited to <= NAPI_POLL_WEIGHT by unprivileged
> > users (!capable(CAP_NET_ADMIN))
> > - prefer_busy_poll must be 0 or 1
> > - __pad must be 0
> >
> >Signed-off-by: Joe Damato <jdamato@fastly.com>
> ...
> >--- a/fs/eventpoll.c
> >+++ b/fs/eventpoll.c
> ...
> >@@ -497,6 +498,50 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
> > ep->napi_id = napi_id;
> > }
> >+static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
> >+ unsigned long arg)
> >+{
> >+ struct eventpoll *ep;
> >+ struct epoll_params epoll_params;
> >+ void __user *uarg = (void __user *) arg;
> >+
> >+ ep = file->private_data;
>
> This might have been on the ep declaration line.
>
> >+ switch (cmd) {
> >+ case EPIOCSPARAMS:
> >+ if (copy_from_user(&epoll_params, uarg, sizeof(epoll_params)))
> >+ return -EFAULT;
> >+
> >+ if (memchr_inv(epoll_params.__pad, 0, sizeof(epoll_params.__pad)))
> >+ return -EINVAL;
> >+
> >+ if (epoll_params.busy_poll_usecs > U32_MAX)
> >+ return -EINVAL;
> >+
> >+ if (epoll_params.prefer_busy_poll > 1)
> >+ return -EINVAL;
> >+
> >+ if (epoll_params.busy_poll_budget > NAPI_POLL_WEIGHT &&
> >+ !capable(CAP_NET_ADMIN))
> >+ return -EPERM;
> >+
> >+ ep->busy_poll_usecs = epoll_params.busy_poll_usecs;
> >+ ep->busy_poll_budget = epoll_params.busy_poll_budget;
> >+ ep->prefer_busy_poll = !!epoll_params.prefer_busy_poll;
>
> This !! is unnecessary. Nonzero values shall be "converted" to true.
>
> But FWIW, the above is nothing which should be blocking, so:
">
> Reviewed-by: Jiri Slaby <jirislaby@kernel.org>
netdev maintainers: Jiri marked this with Reviewed-by, but was this review
what caused "Changes Requested" to be the status set for this patch set in
patchwork?
If needed, I'll send a v7 with the changes Jiri suggested and add the
"Reviewed-by" since the changes are cosmetic, but I wanted to make sure
this was the reason.
Thanks.
On Wed, 7 Feb 2024 10:50:15 -0800 Joe Damato wrote:
> > This !! is unnecessary. Nonzero values shall be "converted" to true.
> >
> > But FWIW, the above is nothing which should be blocking, so:
> ">
> > Reviewed-by: Jiri Slaby <jirislaby@kernel.org>
>
> netdev maintainers: Jiri marked this with Reviewed-by, but was this review
> what caused "Changes Requested" to be the status set for this patch set in
> patchwork?
>
> If needed, I'll send a v7 with the changes Jiri suggested and add the
> "Reviewed-by" since the changes are cosmetic, but I wanted to make sure
> this was the reason.
Yes, I think that's it.
On Wed, Feb 07, 2024 at 11:07:26AM -0800, Jakub Kicinski wrote:
> On Wed, 7 Feb 2024 10:50:15 -0800 Joe Damato wrote:
> > > This !! is unnecessary. Nonzero values shall be "converted" to true.
> > >
> > > But FWIW, the above is nothing which should be blocking, so:
> > ">
> > > Reviewed-by: Jiri Slaby <jirislaby@kernel.org>
> >
> > netdev maintainers: Jiri marked this with Reviewed-by, but was this review
> > what caused "Changes Requested" to be the status set for this patch set in
> > patchwork?
> >
> > If needed, I'll send a v7 with the changes Jiri suggested and add the
> > "Reviewed-by" since the changes are cosmetic, but I wanted to make sure
> > this was the reason.
>
> Yes, I think that's it.
OK, thanks for letting me know. I wasn't sure if it was because of the
netdev/source_inline which marked 1/4 as "fail" because of the inlines
added.
Does that need to be changed, as well?
On Wed, 7 Feb 2024 11:16:03 -0800 Joe Damato wrote:
> > > netdev maintainers: Jiri marked this with Reviewed-by, but was this review
> > > what caused "Changes Requested" to be the status set for this patch set in
> > > patchwork?
> > >
> > > If needed, I'll send a v7 with the changes Jiri suggested and add the
> > > "Reviewed-by" since the changes are cosmetic, but I wanted to make sure
> > > this was the reason.
> >
> > Yes, I think that's it.
>
> OK, thanks for letting me know. I wasn't sure if it was because of the
> netdev/source_inline which marked 1/4 as "fail" because of the inlines
> added.
>
> Does that need to be changed, as well?
For background our preference is to avoid using static inline in C
sources, unless the author compiled the code and actually confirmed
the code doesn't get inlined correctly. But it's not a hard
requirement, and technically the code is under fs/.
In general the patchwork checks are a bit noisy, see here the top left
graph of how many of the patches we merge are "all green":
https://netdev.bots.linux.dev/checks.html
Some of the checks are also largely outside of our control (checkpatch)
so consider the patchwork checks as automation for maintainers.
The maintainers should respond on the list if any of the failures
are indeed legit.
@@ -309,6 +309,7 @@ Code Seq# Include File Comments
0x89 0B-DF linux/sockios.h
0x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range
0x89 F0-FF linux/sockios.h SIOCDEVPRIVATE range
+0x8A 00-1F linux/eventpoll.h
0x8B all linux/wireless.h
0x8C 00-3F WiNRADiO driver
<http://www.winradio.com.au/>
@@ -37,6 +37,7 @@
#include <linux/seq_file.h>
#include <linux/compat.h>
#include <linux/rculist.h>
+#include <linux/capability.h>
#include <net/busy_poll.h>
/*
@@ -497,6 +498,50 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
ep->napi_id = napi_id;
}
+static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct eventpoll *ep;
+ struct epoll_params epoll_params;
+ void __user *uarg = (void __user *) arg;
+
+ ep = file->private_data;
+
+ switch (cmd) {
+ case EPIOCSPARAMS:
+ if (copy_from_user(&epoll_params, uarg, sizeof(epoll_params)))
+ return -EFAULT;
+
+ if (memchr_inv(epoll_params.__pad, 0, sizeof(epoll_params.__pad)))
+ return -EINVAL;
+
+ if (epoll_params.busy_poll_usecs > U32_MAX)
+ return -EINVAL;
+
+ if (epoll_params.prefer_busy_poll > 1)
+ return -EINVAL;
+
+ if (epoll_params.busy_poll_budget > NAPI_POLL_WEIGHT &&
+ !capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ ep->busy_poll_usecs = epoll_params.busy_poll_usecs;
+ ep->busy_poll_budget = epoll_params.busy_poll_budget;
+ ep->prefer_busy_poll = !!epoll_params.prefer_busy_poll;
+ return 0;
+ case EPIOCGPARAMS:
+ memset(&epoll_params, 0, sizeof(epoll_params));
+ epoll_params.busy_poll_usecs = ep->busy_poll_usecs;
+ epoll_params.busy_poll_budget = ep->busy_poll_budget;
+ epoll_params.prefer_busy_poll = ep->prefer_busy_poll;
+ if (copy_to_user(uarg, &epoll_params, sizeof(epoll_params)))
+ return -EFAULT;
+ return 0;
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+
#else
static inline bool ep_busy_loop(struct eventpoll *ep, int nonblock)
@@ -512,6 +557,12 @@ static inline bool ep_busy_loop_on(struct eventpoll *ep)
{
return false;
}
+
+static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_NET_RX_BUSY_POLL */
/*
@@ -871,6 +922,26 @@ static void ep_clear_and_put(struct eventpoll *ep)
ep_free(ep);
}
+static long ep_eventpoll_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ int ret;
+
+ if (!is_file_epoll(file))
+ return -EINVAL;
+
+ switch (cmd) {
+ case EPIOCSPARAMS:
+ case EPIOCGPARAMS:
+ ret = ep_eventpoll_bp_ioctl(file, cmd, arg);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
static int ep_eventpoll_release(struct inode *inode, struct file *file)
{
struct eventpoll *ep = file->private_data;
@@ -977,6 +1048,8 @@ static const struct file_operations eventpoll_fops = {
.release = ep_eventpoll_release,
.poll = ep_eventpoll_poll,
.llseek = noop_llseek,
+ .unlocked_ioctl = ep_eventpoll_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
/*
@@ -85,4 +85,17 @@ struct epoll_event {
__u64 data;
} EPOLL_PACKED;
+struct epoll_params {
+ __aligned_u64 busy_poll_usecs;
+ __u16 busy_poll_budget;
+ __u8 prefer_busy_poll;
+
+ /* pad the struct to a multiple of 64bits for alignment on all arches */
+ __u8 __pad[5];
+};
+
+#define EPOLL_IOC_TYPE 0x8A
+#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params)
+#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params)
+
#endif /* _UAPI_LINUX_EVENTPOLL_H */