[v5] posix-timers: add multi_clock_gettime system call
Commit Message
Some user space applications need to read some clocks.
Each read requires moving from user space to kernel space.
The syscall overhead causes unpredictable delay between N clocks reads
Removing this delay causes better synchronization between N clocks.
Introduce a new system call multi_clock_gettime, which can be used to measure
the offset between multiple clocks, from variety of types: PHC, virtual PHC
and various system clocks (CLOCK_REALTIME, CLOCK_MONOTONIC, etc).
The offset includes the total time that the driver needs to read the clock
timestamp.
New system call allows the reading of a list of clocks - up to PTP_MAX_CLOCKS.
Supported clocks IDs: PHC, virtual PHC and various system clocks.
Up to PTP_MAX_SAMPLES times (per clock) in a single system call read.
The system call returns n_clocks timestamps for each measurement:
- clock 0 timestamp
- ...
- clock n timestamp
Signed-off-by: Sagi Maimon <maimon.sagi@gmail.com>
---
Changes since version 4:
- fix error : 'struct __ptp_multi_clock_get' declared inside parameter list
will not be visible outside of this definition or declaration
arch/x86/entry/syscalls/syscall_64.tbl | 1 +
include/linux/syscalls.h | 3 +-
include/uapi/asm-generic/unistd.h | 4 +-
include/uapi/linux/multi_clock.h | 21 +++++++++
kernel/time/posix-timers.c | 59 ++++++++++++++++++++++++++
5 files changed, 86 insertions(+), 2 deletions(-)
create mode 100644 include/uapi/linux/multi_clock.h
Comments
On Tue, Jan 2, 2024, at 10:18, Sagi Maimon wrote:
> Some user space applications need to read some clocks.
> Each read requires moving from user space to kernel space.
> The syscall overhead causes unpredictable delay between N clocks reads
> Removing this delay causes better synchronization between N clocks.
>
> Introduce a new system call multi_clock_gettime, which can be used to measure
> the offset between multiple clocks, from variety of types: PHC, virtual PHC
> and various system clocks (CLOCK_REALTIME, CLOCK_MONOTONIC, etc).
> The offset includes the total time that the driver needs to read the clock
> timestamp.
>
> New system call allows the reading of a list of clocks - up to PTP_MAX_CLOCKS.
> Supported clocks IDs: PHC, virtual PHC and various system clocks.
> Up to PTP_MAX_SAMPLES times (per clock) in a single system call read.
> The system call returns n_clocks timestamps for each measurement:
> - clock 0 timestamp
> - ...
> - clock n timestamp
>
> Signed-off-by: Sagi Maimon <maimon.sagi@gmail.com>
> ---
> Changes since version 4:
> - fix error : 'struct __ptp_multi_clock_get' declared inside parameter list
> will not be visible outside of this definition or declaration
I usually put all the changes for previous versions in a
list here, it helps reviewers.
The changes you made for previous versions all look good
to me, but I think there is still a few things worth
considering. I'll also follow up on the earlier threads.
> +#define MULTI_PTP_MAX_CLOCKS 32 /* Max number of clocks */
> +#define MULTI_PTP_MAX_SAMPLES 32 /* Max allowed offset measurement samples. */
> +
> +struct __ptp_multi_clock_get {
> + unsigned int n_clocks; /* Desired number of clocks. */
> + unsigned int n_samples; /* Desired number of measurements per clock. */
> + clockid_t clkid_arr[MULTI_PTP_MAX_CLOCKS]; /* list of clock IDs */
> + /*
> + * Array of list of n_clocks clocks time samples n_samples times.
> + */
> + struct __kernel_timespec ts[MULTI_PTP_MAX_SAMPLES][MULTI_PTP_MAX_CLOCKS];
> +};
Since you now access each member individually, I think it
makes more sense here to just pass these as four
register arguments. It helps with argument introspection,
avoids a couple of get_user(), and lets you remove the fixed
array dimensions.
> +SYSCALL_DEFINE1(multi_clock_gettime, struct __ptp_multi_clock_get
> __user *, ptp_multi_clk_get)
> +{
> + const struct k_clock *kc;
> + struct timespec64 *kernel_tp;
> + struct timespec64 *kernel_tp_base;
> + unsigned int n_clocks; /* Desired number of clocks. */
> + unsigned int n_samples; /* Desired number of measurements per clock.
> */
> + unsigned int i, j;
> + clockid_t clkid_arr[MULTI_PTP_MAX_CLOCKS]; /* list of clock IDs */
> + int error = 0;
> +
> + if (copy_from_user(&n_clocks, &ptp_multi_clk_get->n_clocks,
> sizeof(n_clocks)))
> + return -EFAULT;
> + if (copy_from_user(&n_samples, &ptp_multi_clk_get->n_samples,
> sizeof(n_samples)))
If these remain as struct members rather than register arguments,
you should use get_user() instead of copy_from_user().
> + kernel_tp_base = kmalloc_array(n_clocks * n_samples,
> + sizeof(struct timespec64), GFP_KERNEL);
> + if (!kernel_tp_base)
> + return -ENOMEM;
To be on the safe side regarding possible data leak, maybe use
kcalloc() instead of kmalloc_array() here.
> + kernel_tp = kernel_tp_base;
> + for (j = 0; j < n_samples; j++) {
> + for (i = 0; i < n_clocks; i++) {
> + if (put_timespec64(kernel_tp++, (struct __kernel_timespec __user *)
> + &ptp_multi_clk_get->ts[j][i])) {
I think the typecast here can be removed.
Arnd
On Tue, Jan 2, 2024 at 12:22 PM Arnd Bergmann <arnd@arndb.de> wrote:
>
> On Tue, Jan 2, 2024, at 10:18, Sagi Maimon wrote:
> > Some user space applications need to read some clocks.
> > Each read requires moving from user space to kernel space.
> > The syscall overhead causes unpredictable delay between N clocks reads
> > Removing this delay causes better synchronization between N clocks.
> >
> > Introduce a new system call multi_clock_gettime, which can be used to measure
> > the offset between multiple clocks, from variety of types: PHC, virtual PHC
> > and various system clocks (CLOCK_REALTIME, CLOCK_MONOTONIC, etc).
> > The offset includes the total time that the driver needs to read the clock
> > timestamp.
> >
> > New system call allows the reading of a list of clocks - up to PTP_MAX_CLOCKS.
> > Supported clocks IDs: PHC, virtual PHC and various system clocks.
> > Up to PTP_MAX_SAMPLES times (per clock) in a single system call read.
> > The system call returns n_clocks timestamps for each measurement:
> > - clock 0 timestamp
> > - ...
> > - clock n timestamp
> >
> > Signed-off-by: Sagi Maimon <maimon.sagi@gmail.com>
> > ---
> > Changes since version 4:
> > - fix error : 'struct __ptp_multi_clock_get' declared inside parameter list
> > will not be visible outside of this definition or declaration
>
> I usually put all the changes for previous versions in a
> list here, it helps reviewers.
>
Will be done on patch V6.
> The changes you made for previous versions all look good
> to me, but I think there is still a few things worth
> considering. I'll also follow up on the earlier threads.
>
> > +#define MULTI_PTP_MAX_CLOCKS 32 /* Max number of clocks */
> > +#define MULTI_PTP_MAX_SAMPLES 32 /* Max allowed offset measurement samples. */
> > +
> > +struct __ptp_multi_clock_get {
> > + unsigned int n_clocks; /* Desired number of clocks. */
> > + unsigned int n_samples; /* Desired number of measurements per clock. */
> > + clockid_t clkid_arr[MULTI_PTP_MAX_CLOCKS]; /* list of clock IDs */
> > + /*
> > + * Array of list of n_clocks clocks time samples n_samples times.
> > + */
> > + struct __kernel_timespec ts[MULTI_PTP_MAX_SAMPLES][MULTI_PTP_MAX_CLOCKS];
> > +};
>
> Since you now access each member individually, I think it
> makes more sense here to just pass these as four
> register arguments. It helps with argument introspection,
> avoids a couple of get_user(), and lets you remove the fixed
> array dimensions.
>
I prefer the use of get_user(), I will use it to remove the fixed
array dimensions.
which will be done on patch V6.
> > +SYSCALL_DEFINE1(multi_clock_gettime, struct __ptp_multi_clock_get
> > __user *, ptp_multi_clk_get)
> > +{
> > + const struct k_clock *kc;
> > + struct timespec64 *kernel_tp;
> > + struct timespec64 *kernel_tp_base;
> > + unsigned int n_clocks; /* Desired number of clocks. */
> > + unsigned int n_samples; /* Desired number of measurements per clock.
> > */
> > + unsigned int i, j;
> > + clockid_t clkid_arr[MULTI_PTP_MAX_CLOCKS]; /* list of clock IDs */
> > + int error = 0;
> > +
> > + if (copy_from_user(&n_clocks, &ptp_multi_clk_get->n_clocks,
> > sizeof(n_clocks)))
> > + return -EFAULT;
> > + if (copy_from_user(&n_samples, &ptp_multi_clk_get->n_samples,
> > sizeof(n_samples)))
>
> If these remain as struct members rather than register arguments,
> you should use get_user() instead of copy_from_user().
>
Will be done on patch V6
> > + kernel_tp_base = kmalloc_array(n_clocks * n_samples,
> > + sizeof(struct timespec64), GFP_KERNEL);
> > + if (!kernel_tp_base)
> > + return -ENOMEM;
>
> To be on the safe side regarding possible data leak, maybe use
> kcalloc() instead of kmalloc_array() here.
>
Will be done on patch V6.
> > + kernel_tp = kernel_tp_base;
> > + for (j = 0; j < n_samples; j++) {
> > + for (i = 0; i < n_clocks; i++) {
> > + if (put_timespec64(kernel_tp++, (struct __kernel_timespec __user *)
> > + &ptp_multi_clk_get->ts[j][i])) {
>
> I think the typecast here can be removed.
>
You are right, will be fixed on patch V6.
> Arnd
Thanks for your Notes.
Hi Sagi,
kernel test robot noticed the following build errors:
[auto build test ERROR on tip/x86/asm]
[also build test ERROR on arnd-asm-generic/master tip/timers/core linus/master v6.7-rc8]
[cannot apply to next-20240103]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Sagi-Maimon/posix-timers-add-multi_clock_gettime-system-call/20240102-172105
base: tip/x86/asm
patch link: https://lore.kernel.org/r/20240102091855.70418-1-maimon.sagi%40gmail.com
patch subject: [PATCH v5] posix-timers: add multi_clock_gettime system call
config: csky-randconfig-002-20240104 (https://download.01.org/0day-ci/archive/20240104/202401041000.T0GQPIBW-lkp@intel.com/config)
compiler: csky-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240104/202401041000.T0GQPIBW-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202401041000.T0GQPIBW-lkp@intel.com/
All errors (new ones prefixed by >>):
>> csky-linux-ld: arch/csky/kernel/syscall_table.o:(.data..page_aligned+0x724): undefined reference to `sys_multi_clock_gettime'
On Tue, Jan 02 2024 at 11:18, Sagi Maimon wrote:
> Some user space applications need to read some clocks.
> Each read requires moving from user space to kernel space.
> The syscall overhead causes unpredictable delay between N clocks reads
> Removing this delay causes better synchronization between N clocks.
As I explained to you before: This is wishful thinking.
There is absolutely no guarantee that the syscall will yield better
results. It might on average, but that's a useless measure.
You also still fail to explain what this is going to solve and how it's
used.
> Some user space applications need to read some clocks.
Is just not an explanation at all.
Thanks,
tglx
@@ -378,6 +378,7 @@
454 common futex_wake sys_futex_wake
455 common futex_wait sys_futex_wait
456 common futex_requeue sys_futex_requeue
+457 common multi_clock_gettime sys_multi_clock_gettime
#
# Due to a historical design error, certain syscalls are numbered differently
@@ -74,6 +74,7 @@ struct landlock_ruleset_attr;
enum landlock_rule_type;
struct cachestat_range;
struct cachestat;
+struct __ptp_multi_clock_get;
#include <linux/types.h>
#include <linux/aio_abi.h>
@@ -1161,7 +1162,7 @@ asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long pgoff);
asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg);
-
+asmlinkage long sys_multi_clock_gettime(struct __ptp_multi_clock_get __user * ptp_multi_clk_get);
/*
* Not a real system call, but a placeholder for syscalls which are
@@ -828,9 +828,11 @@ __SYSCALL(__NR_futex_wake, sys_futex_wake)
__SYSCALL(__NR_futex_wait, sys_futex_wait)
#define __NR_futex_requeue 456
__SYSCALL(__NR_futex_requeue, sys_futex_requeue)
+#define __NR_multi_clock_gettime 457
+__SYSCALL(__NR_multi_clock_gettime, sys_multi_clock_gettime)
#undef __NR_syscalls
-#define __NR_syscalls 457
+#define __NR_syscalls 458
/*
* 32 bit systems traditionally used different
new file mode 100644
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_MULTI_CLOCK_H
+#define _UAPI_MULTI_CLOCK_H
+
+#include <linux/types.h>
+#include <linux/time_types.h>
+
+#define MULTI_PTP_MAX_CLOCKS 32 /* Max number of clocks */
+#define MULTI_PTP_MAX_SAMPLES 32 /* Max allowed offset measurement samples. */
+
+struct __ptp_multi_clock_get {
+ unsigned int n_clocks; /* Desired number of clocks. */
+ unsigned int n_samples; /* Desired number of measurements per clock. */
+ clockid_t clkid_arr[MULTI_PTP_MAX_CLOCKS]; /* list of clock IDs */
+ /*
+ * Array of list of n_clocks clocks time samples n_samples times.
+ */
+ struct __kernel_timespec ts[MULTI_PTP_MAX_SAMPLES][MULTI_PTP_MAX_CLOCKS];
+};
+
+#endif /* _UAPI_MULTI_CLOCK_H */
@@ -31,6 +31,8 @@
#include <linux/compat.h>
#include <linux/nospec.h>
#include <linux/time_namespace.h>
+#include <linux/multi_clock.h>
+#include <linux/slab.h>
#include "timekeeping.h"
#include "posix-timers.h"
@@ -1426,6 +1428,63 @@ SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags,
#endif
+SYSCALL_DEFINE1(multi_clock_gettime, struct __ptp_multi_clock_get __user *, ptp_multi_clk_get)
+{
+ const struct k_clock *kc;
+ struct timespec64 *kernel_tp;
+ struct timespec64 *kernel_tp_base;
+ unsigned int n_clocks; /* Desired number of clocks. */
+ unsigned int n_samples; /* Desired number of measurements per clock. */
+ unsigned int i, j;
+ clockid_t clkid_arr[MULTI_PTP_MAX_CLOCKS]; /* list of clock IDs */
+ int error = 0;
+
+ if (copy_from_user(&n_clocks, &ptp_multi_clk_get->n_clocks, sizeof(n_clocks)))
+ return -EFAULT;
+ if (copy_from_user(&n_samples, &ptp_multi_clk_get->n_samples, sizeof(n_samples)))
+ return -EFAULT;
+ if (n_samples > MULTI_PTP_MAX_SAMPLES)
+ return -EINVAL;
+ if (n_clocks > MULTI_PTP_MAX_CLOCKS)
+ return -EINVAL;
+ if (copy_from_user(clkid_arr, &ptp_multi_clk_get->clkid_arr,
+ sizeof(clockid_t) * n_clocks))
+ return -EFAULT;
+
+ kernel_tp_base = kmalloc_array(n_clocks * n_samples,
+ sizeof(struct timespec64), GFP_KERNEL);
+ if (!kernel_tp_base)
+ return -ENOMEM;
+
+ kernel_tp = kernel_tp_base;
+ for (j = 0; j < n_samples; j++) {
+ for (i = 0; i < n_clocks; i++) {
+ kc = clockid_to_kclock(clkid_arr[i]);
+ if (!kc) {
+ error = -EINVAL;
+ goto out;
+ }
+ error = kc->clock_get_timespec(clkid_arr[i], kernel_tp++);
+ if (error)
+ goto out;
+ }
+ }
+
+ kernel_tp = kernel_tp_base;
+ for (j = 0; j < n_samples; j++) {
+ for (i = 0; i < n_clocks; i++) {
+ if (put_timespec64(kernel_tp++, (struct __kernel_timespec __user *)
+ &ptp_multi_clk_get->ts[j][i])) {
+ error = -EFAULT;
+ goto out;
+ }
+ }
+ }
+out:
+ kfree(kernel_tp_base);
+ return error;
+}
+
static const struct k_clock clock_realtime = {
.clock_getres = posix_get_hrtimer_res,
.clock_get_timespec = posix_get_realtime_timespec,