[V3,3/5] misc: mlx5ctl: Add info ioctl

Message ID 20231121070619.9836-4-saeed@kernel.org
State New
Headers
Series mlx5 ConnectX control misc driver |

Commit Message

Saeed Mahameed Nov. 21, 2023, 7:06 a.m. UTC
  From: Saeed Mahameed <saeedm@nvidia.com>

Implement INFO ioctl to return the allocated UID and the capability flags
and some other useful device information such as the underlying ConnectX
device.

Example:
$ sudo ./mlx5ctlu mlx5_core.ctl.0
mlx5dev: 0000:00:04.0
UCTX UID: 1
UCTX CAP: 0x3
DEV UCTX CAP: 0x3
USER CAP: 0x1d

Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../userspace-api/ioctl/ioctl-number.rst      |  1 +
 drivers/misc/mlx5ctl/main.c                   | 71 +++++++++++++++++++
 include/uapi/misc/mlx5ctl.h                   | 24 +++++++
 3 files changed, 96 insertions(+)
 create mode 100644 include/uapi/misc/mlx5ctl.h
  

Comments

Greg KH Nov. 27, 2023, 7:09 p.m. UTC | #1
On Mon, Nov 20, 2023 at 11:06:17PM -0800, Saeed Mahameed wrote:
> +static int mlx5ctl_info_ioctl(struct file *file,
> +			      struct mlx5ctl_info __user *arg,
> +			      size_t usize)
> +{
> +	struct mlx5ctl_fd *mfd = file->private_data;
> +	struct mlx5ctl_dev *mcdev = mfd->mcdev;
> +	struct mlx5_core_dev *mdev = mcdev->mdev;
> +	struct mlx5ctl_info *info;
> +	size_t ksize = 0;
> +	int err = 0;
> +
> +	ksize = max(sizeof(struct mlx5ctl_info), usize);

Why / How can usize be larger than the structure size and you still want
to allocate a memory chunk that big?  Shouldn't the size always match?

And what if it's too small?

> +	info = kzalloc(ksize, GFP_KERNEL_ACCOUNT);

Why account as it will go away almost instantly?

> +	if (!info)
> +		return -ENOMEM;
> +
> +	info->size = sizeof(struct mlx5ctl_info);
> +
> +	info->dev_uctx_cap = MLX5_CAP_GEN(mdev, uctx_cap);
> +	info->uctx_cap = mfd->uctx_cap;
> +	info->uctx_uid = mfd->uctx_uid;
> +	info->ucap = mfd->ucap;
> +
> +	strscpy(info->devname, dev_name(&mdev->pdev->dev),
> +		sizeof(info->devname));
> +
> +	if (copy_to_user(arg, info, usize))
> +		err = -EFAULT;

So if usize is smaller than the structure you don't copy it all?

What am I missing here?

> +
> +	kfree(info);
> +	return err;
> +}
> +
> +static long mlx5ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
> +{
> +	struct mlx5ctl_fd *mfd = file->private_data;
> +	struct mlx5ctl_dev *mcdev = mfd->mcdev;
> +	void __user *argp = (void __user *)arg;
> +	size_t size = _IOC_SIZE(cmd);
> +	int err = 0;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	mlx5ctl_dbg(mcdev, "ioctl 0x%x type/nr: %d/%d size: %d DIR:%d\n", cmd,
> +		    _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd), _IOC_DIR(cmd));
> +
> +	down_read(&mcdev->rw_lock);
> +	if (!mcdev->mdev) {
> +		err = -ENODEV;
> +		goto unlock;
> +	}
> +
> +	switch (cmd) {
> +	case MLX5CTL_IOCTL_INFO:
> +		err = mlx5ctl_info_ioctl(file, argp, size);
> +		break;
> +
> +	default:
> +		mlx5ctl_dbg(mcdev, "Unknown ioctl %x\n", cmd);
> +		err = -ENOIOCTLCMD;

-ENOTTY is the correct error.

> --- /dev/null
> +++ b/include/uapi/misc/mlx5ctl.h
> @@ -0,0 +1,24 @@
> +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 WITH Linux-syscall-note */
> +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
> +
> +#ifndef __MLX5CTL_IOCTL_H__
> +#define __MLX5CTL_IOCTL_H__
> +
> +struct mlx5ctl_info {
> +	__aligned_u64 flags;

Is this used?

> +	__u32 size;
> +	__u8 devname[64]; /* underlaying ConnectX device */

64 should be a define somewhere, right?  And why 64?

> +	__u16 uctx_uid; /* current process allocated UCTX UID */
> +	__u16 reserved1;

Where is this checked to be always 0?  Well it's a read so I guess where
is the documentation saying it will always be set to 0?

> +	__u32 uctx_cap; /* current process effective UCTX cap */
> +	__u32 dev_uctx_cap; /* device's UCTX capabilities */
> +	__u32 ucap; /* process user capability */
> +	__u32 reserved2;

Same here.

And why reserve anything?  What does that help with?

thanks,

greg k-h
  
Saeed Mahameed Nov. 27, 2023, 8:39 p.m. UTC | #2
On 27 Nov 19:09, Greg Kroah-Hartman wrote:
>On Mon, Nov 20, 2023 at 11:06:17PM -0800, Saeed Mahameed wrote:
>> +static int mlx5ctl_info_ioctl(struct file *file,
>> +			      struct mlx5ctl_info __user *arg,
>> +			      size_t usize)
>> +{
>> +	struct mlx5ctl_fd *mfd = file->private_data;
>> +	struct mlx5ctl_dev *mcdev = mfd->mcdev;
>> +	struct mlx5_core_dev *mdev = mcdev->mdev;
>> +	struct mlx5ctl_info *info;
>> +	size_t ksize = 0;
>> +	int err = 0;
>> +
>> +	ksize = max(sizeof(struct mlx5ctl_info), usize);
>
>Why / How can usize be larger than the structure size and you still want
>to allocate a memory chunk that big?  Shouldn't the size always match?
>

new user-space old kernel, the driver would allocate the usiae and make
sure to clear all the buffer with 0's, then fill in what the kernel
understands and send the whole buffer back to user with trailer always
zeroed out.

>And what if it's too small?
>

Too small means old user new kernel, the kernel will honor that and only
fill in what the user understands.

>> +	info = kzalloc(ksize, GFP_KERNEL_ACCOUNT);
>
>Why account as it will go away almost instantly?
>

Will change that back to GFP_KERNEL.

>> +	if (!info)
>> +		return -ENOMEM;
>> +
>> +	info->size = sizeof(struct mlx5ctl_info);
>> +
>> +	info->dev_uctx_cap = MLX5_CAP_GEN(mdev, uctx_cap);
>> +	info->uctx_cap = mfd->uctx_cap;
>> +	info->uctx_uid = mfd->uctx_uid;
>> +	info->ucap = mfd->ucap;
>> +
>> +	strscpy(info->devname, dev_name(&mdev->pdev->dev),
>> +		sizeof(info->devname));
>> +
>> +	if (copy_to_user(arg, info, usize))
>> +		err = -EFAULT;
>
>So if usize is smaller than the structure you don't copy it all?
>
>What am I missing here?
>

We copy back to user as much data as they expect, newer kernel can have
extended functionality and we want to allow expanding the ioctl structs in
the future while keeping support for old user, so new kernel would honor
old user and copy only the size the user expects.

>> +
>> +	kfree(info);
>> +	return err;
>> +}
>> +
>> +static long mlx5ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
>> +{
>> +	struct mlx5ctl_fd *mfd = file->private_data;
>> +	struct mlx5ctl_dev *mcdev = mfd->mcdev;
>> +	void __user *argp = (void __user *)arg;
>> +	size_t size = _IOC_SIZE(cmd);
>> +	int err = 0;
>> +
>> +	if (!capable(CAP_SYS_ADMIN))
>> +		return -EPERM;
>> +
>> +	mlx5ctl_dbg(mcdev, "ioctl 0x%x type/nr: %d/%d size: %d DIR:%d\n", cmd,
>> +		    _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd), _IOC_DIR(cmd));
>> +
>> +	down_read(&mcdev->rw_lock);
>> +	if (!mcdev->mdev) {
>> +		err = -ENODEV;
>> +		goto unlock;
>> +	}
>> +
>> +	switch (cmd) {
>> +	case MLX5CTL_IOCTL_INFO:
>> +		err = mlx5ctl_info_ioctl(file, argp, size);
>> +		break;
>> +
>> +	default:
>> +		mlx5ctl_dbg(mcdev, "Unknown ioctl %x\n", cmd);
>> +		err = -ENOIOCTLCMD;
>
>-ENOTTY is the correct error.
>
>> --- /dev/null
>> +++ b/include/uapi/misc/mlx5ctl.h
>> @@ -0,0 +1,24 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 WITH Linux-syscall-note */
>> +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
>> +
>> +#ifndef __MLX5CTL_IOCTL_H__
>> +#define __MLX5CTL_IOCTL_H__
>> +
>> +struct mlx5ctl_info {
>> +	__aligned_u64 flags;
>
>Is this used?
>

no, not yet, but it is good for future extendibility and compatibility
checking.

>> +	__u32 size;
>> +	__u8 devname[64]; /* underlaying ConnectX device */
>
>64 should be a define somewhere, right?  And why 64?
>

It is usually the kobj->name of the underlying device, I will have to
define this in the uAPI. 64 seemed large enough, any other suggestion ? 

This field is informational only for the user to have an idea which is the
underlying physical device, it's ok if in odd situation the name has to be
truncated to fit into the uAPI buffer.

>> +	__u16 uctx_uid; /* current process allocated UCTX UID */
>> +	__u16 reserved1;
>
>Where is this checked to be always 0?  Well it's a read so I guess where
>is the documentation saying it will always be set to 0?
>

I forgot to add the checks in the info ioctl path, will add that.
Isn't it an unwritten rule that reserved fields has to be always 0 ?
Do I really need to document this ?

>> +	__u32 uctx_cap; /* current process effective UCTX cap */
>> +	__u32 dev_uctx_cap; /* device's UCTX capabilities */
>> +	__u32 ucap; /* process user capability */
>> +	__u32 reserved2;
>
>Same here.
>
>And why reserve anything?  What does that help with?
>

struct alignment and explicit padding.

>thanks,
>
>greg k-h
  
Greg KH Nov. 28, 2023, 9:13 a.m. UTC | #3
On Mon, Nov 27, 2023 at 12:39:22PM -0800, Saeed Mahameed wrote:
> On 27 Nov 19:09, Greg Kroah-Hartman wrote:
> > On Mon, Nov 20, 2023 at 11:06:17PM -0800, Saeed Mahameed wrote:
> > > +static int mlx5ctl_info_ioctl(struct file *file,
> > > +			      struct mlx5ctl_info __user *arg,
> > > +			      size_t usize)
> > > +{
> > > +	struct mlx5ctl_fd *mfd = file->private_data;
> > > +	struct mlx5ctl_dev *mcdev = mfd->mcdev;
> > > +	struct mlx5_core_dev *mdev = mcdev->mdev;
> > > +	struct mlx5ctl_info *info;
> > > +	size_t ksize = 0;
> > > +	int err = 0;
> > > +
> > > +	ksize = max(sizeof(struct mlx5ctl_info), usize);
> > 
> > Why / How can usize be larger than the structure size and you still want
> > to allocate a memory chunk that big?  Shouldn't the size always match?
> > 
> 
> new user-space old kernel, the driver would allocate the usiae and make
> sure to clear all the buffer with 0's, then fill in what the kernel
> understands and send the whole buffer back to user with trailer always
> zeroed out.

No, at that point you know something is wrong and you need to just abort
and return -EINVAL as the structure sizes do not match.

If you need to "extend" the structure to include more information, do so
in a new ioctl.

> > > --- /dev/null
> > > +++ b/include/uapi/misc/mlx5ctl.h
> > > @@ -0,0 +1,24 @@
> > > +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 WITH Linux-syscall-note */
> > > +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
> > > +
> > > +#ifndef __MLX5CTL_IOCTL_H__
> > > +#define __MLX5CTL_IOCTL_H__
> > > +
> > > +struct mlx5ctl_info {
> > > +	__aligned_u64 flags;
> > 
> > Is this used?
> > 
> 
> no, not yet, but it is good for future extendibility and compatibility
> checking.

But you are not checking anything now, so please don't include something
that will not work in the future.

> > > +	__u32 size;
> > > +	__u8 devname[64]; /* underlaying ConnectX device */
> > 
> > 64 should be a define somewhere, right?  And why 64?
> > 
> 
> It is usually the kobj->name of the underlying device, I will have to
> define this in the uAPI. 64 seemed large enough, any other suggestion ?

What happens if the names get bigger?

> This field is informational only for the user to have an idea which is the
> underlying physical device, it's ok if in odd situation the name has to be
> truncated to fit into the uAPI buffer.

As the truncation will happen on the right side of the string, usually
the actual device id or unique identifier, that's not going to help out
much to drop that portion :(

> > > +	__u16 uctx_uid; /* current process allocated UCTX UID */
> > > +	__u16 reserved1;
> > 
> > Where is this checked to be always 0?  Well it's a read so I guess where
> > is the documentation saying it will always be set to 0?
> > 
> 
> I forgot to add the checks in the info ioctl path, will add that.
> Isn't it an unwritten rule that reserved fields has to be always 0 ?
> Do I really need to document this ?

It is a written rule that reserved fields must be 0, please see the
documentation for how to write an ioctl.

thanks,

greg k-h
  
Saeed Mahameed Nov. 29, 2023, 8:53 a.m. UTC | #4
On 28 Nov 09:13, Greg Kroah-Hartman wrote:
>On Mon, Nov 27, 2023 at 12:39:22PM -0800, Saeed Mahameed wrote:
>> On 27 Nov 19:09, Greg Kroah-Hartman wrote:
>> > On Mon, Nov 20, 2023 at 11:06:17PM -0800, Saeed Mahameed wrote:
>> > > +static int mlx5ctl_info_ioctl(struct file *file,
>> > > +			      struct mlx5ctl_info __user *arg,
>> > > +			      size_t usize)
>> > > +{
>> > > +	struct mlx5ctl_fd *mfd = file->private_data;
>> > > +	struct mlx5ctl_dev *mcdev = mfd->mcdev;
>> > > +	struct mlx5_core_dev *mdev = mcdev->mdev;
>> > > +	struct mlx5ctl_info *info;
>> > > +	size_t ksize = 0;
>> > > +	int err = 0;
>> > > +
>> > > +	ksize = max(sizeof(struct mlx5ctl_info), usize);
>> >
>> > Why / How can usize be larger than the structure size and you still want
>> > to allocate a memory chunk that big?  Shouldn't the size always match?
>> >
>>
>> new user-space old kernel, the driver would allocate the usiae and make
>> sure to clear all the buffer with 0's, then fill in what the kernel
>> understands and send the whole buffer back to user with trailer always
>> zeroed out.
>
>No, at that point you know something is wrong and you need to just abort
>and return -EINVAL as the structure sizes do not match.
>
>If you need to "extend" the structure to include more information, do so
>in a new ioctl.
>

Ack, will remove these fields.

>> > > --- /dev/null
>> > > +++ b/include/uapi/misc/mlx5ctl.h
>> > > @@ -0,0 +1,24 @@
>> > > +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 WITH Linux-syscall-note */
>> > > +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
>> > > +
>> > > +#ifndef __MLX5CTL_IOCTL_H__
>> > > +#define __MLX5CTL_IOCTL_H__
>> > > +
>> > > +struct mlx5ctl_info {
>> > > +	__aligned_u64 flags;
>> >
>> > Is this used?
>> >
>>
>> no, not yet, but it is good for future extendibility and compatibility
>> checking.
>
>But you are not checking anything now, so please don't include something
>that will not work in the future.
>

Ack, will remove.

>> > > +	__u32 size;
>> > > +	__u8 devname[64]; /* underlaying ConnectX device */
>> >
>> > 64 should be a define somewhere, right?  And why 64?
>> >
>>
>> It is usually the kobj->name of the underlying device, I will have to
>> define this in the uAPI. 64 seemed large enough, any other suggestion ?
>
>What happens if the names get bigger?
>
>> This field is informational only for the user to have an idea which is the
>> underlying physical device, it's ok if in odd situation the name has to be
>> truncated to fit into the uAPI buffer.
>
>As the truncation will happen on the right side of the string, usually
>the actual device id or unique identifier, that's not going to help out
>much to drop that portion :(
>

Right :/, it's an assumption that mlx5 devices can either be a pci device
or an auxiliary device in case of a mlx5-subfunction, so i don't expect the
names to get larger and can easily fit in 64B string, but you are right, I
shouldn't make such assumption in an IOCTL, I will figure out something or
completely drop this field in V4.

>> > > +	__u16 uctx_uid; /* current process allocated UCTX UID */
>> > > +	__u16 reserved1;
>> >
>> > Where is this checked to be always 0?  Well it's a read so I guess where
>> > is the documentation saying it will always be set to 0?
>> >
>>
>> I forgot to add the checks in the info ioctl path, will add that.
>> Isn't it an unwritten rule that reserved fields has to be always 0 ?
>> Do I really need to document this ?
>
>It is a written rule that reserved fields must be 0, please see the
>documentation for how to write an ioctl.
>

Ack, will document.

>thanks,
>
>greg k-h
  

Patch

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 4ea5b837399a..9faf91ffefff 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -89,6 +89,7 @@  Code  Seq#    Include File                                           Comments
 0x20  all    drivers/cdrom/cm206.h
 0x22  all    scsi/sg.h
 0x3E  00-0F  linux/counter.h                                         <mailto:linux-iio@vger.kernel.org>
+0x5c  all    uapi/misc/mlx5ctl.h                                     Nvidia ConnectX control
 '!'   00-1F  uapi/linux/seccomp.h
 '#'   00-3F                                                          IEEE 1394 Subsystem
                                                                      Block for the entire subsystem
diff --git a/drivers/misc/mlx5ctl/main.c b/drivers/misc/mlx5ctl/main.c
index 8eb150461b80..6a98b40e4300 100644
--- a/drivers/misc/mlx5ctl/main.c
+++ b/drivers/misc/mlx5ctl/main.c
@@ -8,6 +8,7 @@ 
 #include <linux/auxiliary_bus.h>
 #include <linux/mlx5/device.h>
 #include <linux/mlx5/driver.h>
+#include <uapi/misc/mlx5ctl.h>
 #include <linux/atomic.h>
 #include <linux/refcount.h>
 
@@ -198,10 +199,80 @@  static int mlx5ctl_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+static int mlx5ctl_info_ioctl(struct file *file,
+			      struct mlx5ctl_info __user *arg,
+			      size_t usize)
+{
+	struct mlx5ctl_fd *mfd = file->private_data;
+	struct mlx5ctl_dev *mcdev = mfd->mcdev;
+	struct mlx5_core_dev *mdev = mcdev->mdev;
+	struct mlx5ctl_info *info;
+	size_t ksize = 0;
+	int err = 0;
+
+	ksize = max(sizeof(struct mlx5ctl_info), usize);
+	info = kzalloc(ksize, GFP_KERNEL_ACCOUNT);
+	if (!info)
+		return -ENOMEM;
+
+	info->size = sizeof(struct mlx5ctl_info);
+
+	info->dev_uctx_cap = MLX5_CAP_GEN(mdev, uctx_cap);
+	info->uctx_cap = mfd->uctx_cap;
+	info->uctx_uid = mfd->uctx_uid;
+	info->ucap = mfd->ucap;
+
+	strscpy(info->devname, dev_name(&mdev->pdev->dev),
+		sizeof(info->devname));
+
+	if (copy_to_user(arg, info, usize))
+		err = -EFAULT;
+
+	kfree(info);
+	return err;
+}
+
+static long mlx5ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct mlx5ctl_fd *mfd = file->private_data;
+	struct mlx5ctl_dev *mcdev = mfd->mcdev;
+	void __user *argp = (void __user *)arg;
+	size_t size = _IOC_SIZE(cmd);
+	int err = 0;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	mlx5ctl_dbg(mcdev, "ioctl 0x%x type/nr: %d/%d size: %d DIR:%d\n", cmd,
+		    _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd), _IOC_DIR(cmd));
+
+	down_read(&mcdev->rw_lock);
+	if (!mcdev->mdev) {
+		err = -ENODEV;
+		goto unlock;
+	}
+
+	switch (cmd) {
+	case MLX5CTL_IOCTL_INFO:
+		err = mlx5ctl_info_ioctl(file, argp, size);
+		break;
+
+	default:
+		mlx5ctl_dbg(mcdev, "Unknown ioctl %x\n", cmd);
+		err = -ENOIOCTLCMD;
+		break;
+	}
+unlock:
+	up_read(&mcdev->rw_lock);
+	return err;
+}
+
 static const struct file_operations mlx5ctl_fops = {
 	.owner = THIS_MODULE,
 	.open = mlx5ctl_open,
 	.release = mlx5ctl_release,
+	.unlocked_ioctl = mlx5ctl_ioctl,
+	.compat_ioctl = compat_ptr_ioctl,
 };
 
 static int mlx5ctl_probe(struct auxiliary_device *adev,
diff --git a/include/uapi/misc/mlx5ctl.h b/include/uapi/misc/mlx5ctl.h
new file mode 100644
index 000000000000..37153cc0fc6e
--- /dev/null
+++ b/include/uapi/misc/mlx5ctl.h
@@ -0,0 +1,24 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5CTL_IOCTL_H__
+#define __MLX5CTL_IOCTL_H__
+
+struct mlx5ctl_info {
+	__aligned_u64 flags;
+	__u32 size;
+	__u8 devname[64]; /* underlaying ConnectX device */
+	__u16 uctx_uid; /* current process allocated UCTX UID */
+	__u16 reserved1;
+	__u32 uctx_cap; /* current process effective UCTX cap */
+	__u32 dev_uctx_cap; /* device's UCTX capabilities */
+	__u32 ucap; /* process user capability */
+	__u32 reserved2;
+};
+
+#define MLX5CTL_IOCTL_MAGIC 0x5c
+
+#define MLX5CTL_IOCTL_INFO \
+	_IOR(MLX5CTL_IOCTL_MAGIC, 0x0, struct mlx5ctl_info)
+
+#endif /* __MLX5CTL_IOCTL_H__ */