[net-next,v6,4/4] eventpoll: Add epoll ioctl for epoll_params

Message ID 20240205210453.11301-5-jdamato@fastly.com
State New
Headers
Series Per epoll context busy poll support |

Commit Message

Joe Damato Feb. 5, 2024, 9:04 p.m. UTC
  Add an ioctl for getting and setting epoll_params. User programs can use
this ioctl to get and set the busy poll usec time, packet budget, and
prefer busy poll params for a specific epoll context.

Parameters are limited:
  - busy_poll_usecs is limited to <= u32_max
  - busy_poll_budget is limited to <= NAPI_POLL_WEIGHT by unprivileged
    users (!capable(CAP_NET_ADMIN))
  - prefer_busy_poll must be 0 or 1
  - __pad must be 0

Signed-off-by: Joe Damato <jdamato@fastly.com>
---
 .../userspace-api/ioctl/ioctl-number.rst      |  1 +
 fs/eventpoll.c                                | 73 +++++++++++++++++++
 include/uapi/linux/eventpoll.h                | 13 ++++
 3 files changed, 87 insertions(+)
  

Comments

Jiri Slaby Feb. 7, 2024, 8:37 a.m. UTC | #1
On 05. 02. 24, 22:04, Joe Damato wrote:
> Add an ioctl for getting and setting epoll_params. User programs can use
> this ioctl to get and set the busy poll usec time, packet budget, and
> prefer busy poll params for a specific epoll context.
> 
> Parameters are limited:
>    - busy_poll_usecs is limited to <= u32_max
>    - busy_poll_budget is limited to <= NAPI_POLL_WEIGHT by unprivileged
>      users (!capable(CAP_NET_ADMIN))
>    - prefer_busy_poll must be 0 or 1
>    - __pad must be 0
> 
> Signed-off-by: Joe Damato <jdamato@fastly.com>
..
> --- a/fs/eventpoll.c
> +++ b/fs/eventpoll.c
..
> @@ -497,6 +498,50 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
>   	ep->napi_id = napi_id;
>   }
>   
> +static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
> +				  unsigned long arg)
> +{
> +	struct eventpoll *ep;
> +	struct epoll_params epoll_params;
> +	void __user *uarg = (void __user *) arg;
> +
> +	ep = file->private_data;

This might have been on the ep declaration line.

> +	switch (cmd) {
> +	case EPIOCSPARAMS:
> +		if (copy_from_user(&epoll_params, uarg, sizeof(epoll_params)))
> +			return -EFAULT;
> +
> +		if (memchr_inv(epoll_params.__pad, 0, sizeof(epoll_params.__pad)))
> +			return -EINVAL;
> +
> +		if (epoll_params.busy_poll_usecs > U32_MAX)
> +			return -EINVAL;
> +
> +		if (epoll_params.prefer_busy_poll > 1)
> +			return -EINVAL;
> +
> +		if (epoll_params.busy_poll_budget > NAPI_POLL_WEIGHT &&
> +		    !capable(CAP_NET_ADMIN))
> +			return -EPERM;
> +
> +		ep->busy_poll_usecs = epoll_params.busy_poll_usecs;
> +		ep->busy_poll_budget = epoll_params.busy_poll_budget;
> +		ep->prefer_busy_poll = !!epoll_params.prefer_busy_poll;

This !! is unnecessary. Nonzero values shall be "converted" to true.

But FWIW, the above is nothing which should be blocking, so:

Reviewed-by: Jiri Slaby <jirislaby@kernel.org>

> +		return 0;
> +	case EPIOCGPARAMS:
> +		memset(&epoll_params, 0, sizeof(epoll_params));
> +		epoll_params.busy_poll_usecs = ep->busy_poll_usecs;
> +		epoll_params.busy_poll_budget = ep->busy_poll_budget;
> +		epoll_params.prefer_busy_poll = ep->prefer_busy_poll;
> +		if (copy_to_user(uarg, &epoll_params, sizeof(epoll_params)))
> +			return -EFAULT;
> +		return 0;
> +	default:
> +		return -ENOIOCTLCMD;
> +	}
> +}
..
thanks,
  
Joe Damato Feb. 7, 2024, 6:50 p.m. UTC | #2
On Wed, Feb 07, 2024 at 09:37:14AM +0100, Jiri Slaby wrote:
> On 05. 02. 24, 22:04, Joe Damato wrote:
> >Add an ioctl for getting and setting epoll_params. User programs can use
> >this ioctl to get and set the busy poll usec time, packet budget, and
> >prefer busy poll params for a specific epoll context.
> >
> >Parameters are limited:
> >   - busy_poll_usecs is limited to <= u32_max
> >   - busy_poll_budget is limited to <= NAPI_POLL_WEIGHT by unprivileged
> >     users (!capable(CAP_NET_ADMIN))
> >   - prefer_busy_poll must be 0 or 1
> >   - __pad must be 0
> >
> >Signed-off-by: Joe Damato <jdamato@fastly.com>
> ...
> >--- a/fs/eventpoll.c
> >+++ b/fs/eventpoll.c
> ...
> >@@ -497,6 +498,50 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
> >  	ep->napi_id = napi_id;
> >  }
> >+static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
> >+				  unsigned long arg)
> >+{
> >+	struct eventpoll *ep;
> >+	struct epoll_params epoll_params;
> >+	void __user *uarg = (void __user *) arg;
> >+
> >+	ep = file->private_data;
> 
> This might have been on the ep declaration line.
> 
> >+	switch (cmd) {
> >+	case EPIOCSPARAMS:
> >+		if (copy_from_user(&epoll_params, uarg, sizeof(epoll_params)))
> >+			return -EFAULT;
> >+
> >+		if (memchr_inv(epoll_params.__pad, 0, sizeof(epoll_params.__pad)))
> >+			return -EINVAL;
> >+
> >+		if (epoll_params.busy_poll_usecs > U32_MAX)
> >+			return -EINVAL;
> >+
> >+		if (epoll_params.prefer_busy_poll > 1)
> >+			return -EINVAL;
> >+
> >+		if (epoll_params.busy_poll_budget > NAPI_POLL_WEIGHT &&
> >+		    !capable(CAP_NET_ADMIN))
> >+			return -EPERM;
> >+
> >+		ep->busy_poll_usecs = epoll_params.busy_poll_usecs;
> >+		ep->busy_poll_budget = epoll_params.busy_poll_budget;
> >+		ep->prefer_busy_poll = !!epoll_params.prefer_busy_poll;
> 
> This !! is unnecessary. Nonzero values shall be "converted" to true.
> 
> But FWIW, the above is nothing which should be blocking, so:
"> 
> Reviewed-by: Jiri Slaby <jirislaby@kernel.org>

netdev maintainers: Jiri marked this with Reviewed-by, but was this review
what caused "Changes Requested" to be the status set for this patch set in
patchwork?

If needed, I'll send a v7 with the changes Jiri suggested and add the
"Reviewed-by" since the changes are cosmetic, but I wanted to make sure
this was the reason.

Thanks.
  
Jakub Kicinski Feb. 7, 2024, 7:07 p.m. UTC | #3
On Wed, 7 Feb 2024 10:50:15 -0800 Joe Damato wrote:
> > This !! is unnecessary. Nonzero values shall be "converted" to true.
> > 
> > But FWIW, the above is nothing which should be blocking, so:
> "> 
> > Reviewed-by: Jiri Slaby <jirislaby@kernel.org>  
> 
> netdev maintainers: Jiri marked this with Reviewed-by, but was this review
> what caused "Changes Requested" to be the status set for this patch set in
> patchwork?
> 
> If needed, I'll send a v7 with the changes Jiri suggested and add the
> "Reviewed-by" since the changes are cosmetic, but I wanted to make sure
> this was the reason.

Yes, I think that's it.
  
Joe Damato Feb. 7, 2024, 7:16 p.m. UTC | #4
On Wed, Feb 07, 2024 at 11:07:26AM -0800, Jakub Kicinski wrote:
> On Wed, 7 Feb 2024 10:50:15 -0800 Joe Damato wrote:
> > > This !! is unnecessary. Nonzero values shall be "converted" to true.
> > > 
> > > But FWIW, the above is nothing which should be blocking, so:
> > "> 
> > > Reviewed-by: Jiri Slaby <jirislaby@kernel.org>  
> > 
> > netdev maintainers: Jiri marked this with Reviewed-by, but was this review
> > what caused "Changes Requested" to be the status set for this patch set in
> > patchwork?
> > 
> > If needed, I'll send a v7 with the changes Jiri suggested and add the
> > "Reviewed-by" since the changes are cosmetic, but I wanted to make sure
> > this was the reason.
> 
> Yes, I think that's it.

OK, thanks for letting me know. I wasn't sure if it was because of the
netdev/source_inline which marked 1/4 as "fail" because of the inlines
added.

Does that need to be changed, as well?
  
Jakub Kicinski Feb. 7, 2024, 8:18 p.m. UTC | #5
On Wed, 7 Feb 2024 11:16:03 -0800 Joe Damato wrote:
> > > netdev maintainers: Jiri marked this with Reviewed-by, but was this review
> > > what caused "Changes Requested" to be the status set for this patch set in
> > > patchwork?
> > > 
> > > If needed, I'll send a v7 with the changes Jiri suggested and add the
> > > "Reviewed-by" since the changes are cosmetic, but I wanted to make sure
> > > this was the reason.  
> > 
> > Yes, I think that's it.  
> 
> OK, thanks for letting me know. I wasn't sure if it was because of the
> netdev/source_inline which marked 1/4 as "fail" because of the inlines
> added.
> 
> Does that need to be changed, as well?

For background our preference is to avoid using static inline in C
sources, unless the author compiled the code and actually confirmed
the code doesn't get inlined correctly. But it's not a hard
requirement, and technically the code is under fs/.

In general the patchwork checks are a bit noisy, see here the top left
graph of how many of the patches we merge are "all green":
https://netdev.bots.linux.dev/checks.html
Some of the checks are also largely outside of our control (checkpatch)
so consider the patchwork checks as automation for maintainers. 
The maintainers should respond on the list if any of the failures 
are indeed legit.
  

Patch

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 457e16f06e04..b33918232f78 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -309,6 +309,7 @@  Code  Seq#    Include File                                           Comments
 0x89  0B-DF  linux/sockios.h
 0x89  E0-EF  linux/sockios.h                                         SIOCPROTOPRIVATE range
 0x89  F0-FF  linux/sockios.h                                         SIOCDEVPRIVATE range
+0x8A  00-1F  linux/eventpoll.h
 0x8B  all    linux/wireless.h
 0x8C  00-3F                                                          WiNRADiO driver
                                                                      <http://www.winradio.com.au/>
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index a69ee11682b9..8eb4ea2557af 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -37,6 +37,7 @@ 
 #include <linux/seq_file.h>
 #include <linux/compat.h>
 #include <linux/rculist.h>
+#include <linux/capability.h>
 #include <net/busy_poll.h>
 
 /*
@@ -497,6 +498,50 @@  static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
 	ep->napi_id = napi_id;
 }
 
+static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
+				  unsigned long arg)
+{
+	struct eventpoll *ep;
+	struct epoll_params epoll_params;
+	void __user *uarg = (void __user *) arg;
+
+	ep = file->private_data;
+
+	switch (cmd) {
+	case EPIOCSPARAMS:
+		if (copy_from_user(&epoll_params, uarg, sizeof(epoll_params)))
+			return -EFAULT;
+
+		if (memchr_inv(epoll_params.__pad, 0, sizeof(epoll_params.__pad)))
+			return -EINVAL;
+
+		if (epoll_params.busy_poll_usecs > U32_MAX)
+			return -EINVAL;
+
+		if (epoll_params.prefer_busy_poll > 1)
+			return -EINVAL;
+
+		if (epoll_params.busy_poll_budget > NAPI_POLL_WEIGHT &&
+		    !capable(CAP_NET_ADMIN))
+			return -EPERM;
+
+		ep->busy_poll_usecs = epoll_params.busy_poll_usecs;
+		ep->busy_poll_budget = epoll_params.busy_poll_budget;
+		ep->prefer_busy_poll = !!epoll_params.prefer_busy_poll;
+		return 0;
+	case EPIOCGPARAMS:
+		memset(&epoll_params, 0, sizeof(epoll_params));
+		epoll_params.busy_poll_usecs = ep->busy_poll_usecs;
+		epoll_params.busy_poll_budget = ep->busy_poll_budget;
+		epoll_params.prefer_busy_poll = ep->prefer_busy_poll;
+		if (copy_to_user(uarg, &epoll_params, sizeof(epoll_params)))
+			return -EFAULT;
+		return 0;
+	default:
+		return -ENOIOCTLCMD;
+	}
+}
+
 #else
 
 static inline bool ep_busy_loop(struct eventpoll *ep, int nonblock)
@@ -512,6 +557,12 @@  static inline bool ep_busy_loop_on(struct eventpoll *ep)
 {
 	return false;
 }
+
+static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd,
+				  unsigned long arg)
+{
+	return -EOPNOTSUPP;
+}
 #endif /* CONFIG_NET_RX_BUSY_POLL */
 
 /*
@@ -871,6 +922,26 @@  static void ep_clear_and_put(struct eventpoll *ep)
 		ep_free(ep);
 }
 
+static long ep_eventpoll_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int ret;
+
+	if (!is_file_epoll(file))
+		return -EINVAL;
+
+	switch (cmd) {
+	case EPIOCSPARAMS:
+	case EPIOCGPARAMS:
+		ret = ep_eventpoll_bp_ioctl(file, cmd, arg);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
 static int ep_eventpoll_release(struct inode *inode, struct file *file)
 {
 	struct eventpoll *ep = file->private_data;
@@ -977,6 +1048,8 @@  static const struct file_operations eventpoll_fops = {
 	.release	= ep_eventpoll_release,
 	.poll		= ep_eventpoll_poll,
 	.llseek		= noop_llseek,
+	.unlocked_ioctl	= ep_eventpoll_ioctl,
+	.compat_ioctl   = compat_ptr_ioctl,
 };
 
 /*
diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
index cfbcc4cc49ac..36a002660955 100644
--- a/include/uapi/linux/eventpoll.h
+++ b/include/uapi/linux/eventpoll.h
@@ -85,4 +85,17 @@  struct epoll_event {
 	__u64 data;
 } EPOLL_PACKED;
 
+struct epoll_params {
+	__aligned_u64 busy_poll_usecs;
+	__u16 busy_poll_budget;
+	__u8 prefer_busy_poll;
+
+	/* pad the struct to a multiple of 64bits for alignment on all arches */
+	__u8 __pad[5];
+};
+
+#define EPOLL_IOC_TYPE 0x8A
+#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params)
+#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params)
+
 #endif /* _UAPI_LINUX_EVENTPOLL_H */