[v4] tee: Use iov_iter to better support shared buffer registration

Message ID 20231129164439.1130903-1-arnaud.pouliquen@foss.st.com
State New
Headers
Series [v4] tee: Use iov_iter to better support shared buffer registration |

Commit Message

Arnaud POULIQUEN Nov. 29, 2023, 4:44 p.m. UTC
  Currently it's not possible to register kernel buffers with TEE
which are allocated via vmalloc.

Use iov_iter and associated helper functions to manage the page
registration for all type of memories.

Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
---
Update from V3 to V4:
- improve commit message,
- use import_ubuf() instead of iov_iter_init(),
- move shm_get_kernel_pages in register_shm_helper,
- put back untagged_addr in register_shm_helper(),
- move the comment related to pin pages from shm_get_kernel_pages()
  to register_shm_helper().

Update from V2 to V3:
- break lines longer than 80 columns.

Update from V1 to V2:
- replace ITER_SOURCE by ITER_DEST flag in tee_shm_register_user_buf(),
- replace IS_ERR_OR NULL(shm) by IS_ERR(shm) in tee_shm_register_user_buf().

V1:
The support of buffer registration allocated with vmalloc is no more
available since c83900393aa1 ("tee: Remove vmalloc page support").

This patch is an alternative to a revert and resulted from a discussion
with Christopher Hellwig [1].

This patch has been tested using xtest tool in optee qemu environment [2]
and using the series related to the remoteproc tee that should be
proposed soon [3].

References:
[1] https://lore.kernel.org/linux-arm-kernel/18a8528d-7d9d-6ed0-0045-5ee47dd39fb2@foss.st.com/T/#m8ec683c44fcd9b69c2aee42eaed0793afac9dd18in
[2] https://optee.readthedocs.io/en/latest/building/devices/qemu.html#build-instructions
[3] https://lore.kernel.org/linux-arm-kernel/18a8528d-7d9d-6ed0-0045-5ee47dd39fb2@foss.st.com/T/#maca0a1fc897aadd54c7deac432e11473fe970d1d
---
 drivers/tee/tee_shm.c | 83 ++++++++++++++++++++++++-------------------
 1 file changed, 46 insertions(+), 37 deletions(-)
  

Comments

Sumit Garg Nov. 30, 2023, 7:54 a.m. UTC | #1
On Wed, 29 Nov 2023 at 22:15, Arnaud Pouliquen
<arnaud.pouliquen@foss.st.com> wrote:
>
> Currently it's not possible to register kernel buffers with TEE
> which are allocated via vmalloc.
>
> Use iov_iter and associated helper functions to manage the page
> registration for all type of memories.
>
> Suggested-by: Christoph Hellwig <hch@infradead.org>
> Signed-off-by: Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
> ---
> Update from V3 to V4:
> - improve commit message,
> - use import_ubuf() instead of iov_iter_init(),
> - move shm_get_kernel_pages in register_shm_helper,
> - put back untagged_addr in register_shm_helper(),
> - move the comment related to pin pages from shm_get_kernel_pages()
>   to register_shm_helper().
>
> Update from V2 to V3:
> - break lines longer than 80 columns.
>
> Update from V1 to V2:
> - replace ITER_SOURCE by ITER_DEST flag in tee_shm_register_user_buf(),
> - replace IS_ERR_OR NULL(shm) by IS_ERR(shm) in tee_shm_register_user_buf().
>
> V1:
> The support of buffer registration allocated with vmalloc is no more
> available since c83900393aa1 ("tee: Remove vmalloc page support").
>
> This patch is an alternative to a revert and resulted from a discussion
> with Christopher Hellwig [1].
>
> This patch has been tested using xtest tool in optee qemu environment [2]
> and using the series related to the remoteproc tee that should be
> proposed soon [3].
>
> References:
> [1] https://lore.kernel.org/linux-arm-kernel/18a8528d-7d9d-6ed0-0045-5ee47dd39fb2@foss.st.com/T/#m8ec683c44fcd9b69c2aee42eaed0793afac9dd18in
> [2] https://optee.readthedocs.io/en/latest/building/devices/qemu.html#build-instructions
> [3] https://lore.kernel.org/linux-arm-kernel/18a8528d-7d9d-6ed0-0045-5ee47dd39fb2@foss.st.com/T/#maca0a1fc897aadd54c7deac432e11473fe970d1d
> ---
>  drivers/tee/tee_shm.c | 83 ++++++++++++++++++++++++-------------------
>  1 file changed, 46 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
> index 673cf0359494..ac73e8143233 100644
> --- a/drivers/tee/tee_shm.c
> +++ b/drivers/tee/tee_shm.c
> @@ -22,23 +22,12 @@ static void shm_put_kernel_pages(struct page **pages, size_t page_count)
>                 put_page(pages[n]);
>  }
>
> -static int shm_get_kernel_pages(unsigned long start, size_t page_count,
> -                               struct page **pages)
> +static void shm_get_kernel_pages(struct page **pages, size_t page_count)
>  {
> -       struct page *page;
>         size_t n;
>
> -       if (WARN_ON_ONCE(is_vmalloc_addr((void *)start) ||
> -                        is_kmap_addr((void *)start)))
> -               return -EINVAL;
> -
> -       page = virt_to_page((void *)start);
> -       for (n = 0; n < page_count; n++) {
> -               pages[n] = page + n;
> +       for (n = 0; n < page_count; n++)
>                 get_page(pages[n]);
> -       }
> -
> -       return page_count;
>  }
>
>  static void release_registered_pages(struct tee_shm *shm)
> @@ -214,13 +203,14 @@ struct tee_shm *tee_shm_alloc_priv_buf(struct tee_context *ctx, size_t size)
>  EXPORT_SYMBOL_GPL(tee_shm_alloc_priv_buf);
>
>  static struct tee_shm *
> -register_shm_helper(struct tee_context *ctx, unsigned long addr,
> -                   size_t length, u32 flags, int id)
> +register_shm_helper(struct tee_context *ctx, struct iov_iter *iter, u32 flags,
> +                   int id)
>  {
>         struct tee_device *teedev = ctx->teedev;
>         struct tee_shm *shm;
> -       unsigned long start;
> -       size_t num_pages;
> +       unsigned long start, addr;
> +       size_t num_pages, off;
> +       ssize_t len;
>         void *ret;
>         int rc;
>
> @@ -245,31 +235,38 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
>         shm->flags = flags;
>         shm->ctx = ctx;
>         shm->id = id;
> -       addr = untagged_addr(addr);
> +       addr = untagged_addr((unsigned long)iter_iov_addr(iter));
>         start = rounddown(addr, PAGE_SIZE);
> -       shm->offset = addr - start;
> -       shm->size = length;
> -       num_pages = (roundup(addr + length, PAGE_SIZE) - start) / PAGE_SIZE;
> +       num_pages = iov_iter_npages(iter, INT_MAX);
> +       if (!num_pages) {
> +               ret = ERR_PTR(-ENOMEM);
> +               goto err_ctx_put;
> +       }
> +
>         shm->pages = kcalloc(num_pages, sizeof(*shm->pages), GFP_KERNEL);
>         if (!shm->pages) {
>                 ret = ERR_PTR(-ENOMEM);
>                 goto err_free_shm;
>         }
>
> -       if (flags & TEE_SHM_USER_MAPPED)
> -               rc = pin_user_pages_fast(start, num_pages, FOLL_WRITE,
> -                                        shm->pages);
> -       else
> -               rc = shm_get_kernel_pages(start, num_pages, shm->pages);
> -       if (rc > 0)
> -               shm->num_pages = rc;
> -       if (rc != num_pages) {
> -               if (rc >= 0)
> -                       rc = -ENOMEM;
> -               ret = ERR_PTR(rc);
> -               goto err_put_shm_pages;
> +       len = iov_iter_extract_pages(iter, &shm->pages, LONG_MAX, num_pages, 0,
> +                                    &off);
> +       if (unlikely(len <= 0)) {
> +               ret = len ? ERR_PTR(len) : ERR_PTR(-ENOMEM);
> +               goto err_free_shm_pages;
>         }
>
> +       /*
> +        * iov_iter_extract_kvec_pages does not get reference on the pages,
> +        * get a pin on them.

I think you meant: "get a reference on them". But I don't see the
value of this comment since iov_iter_extract_kvec_pages() already has
been commented properly as follows:

/*
 * Extract a list of virtually contiguous pages from an ITER_KVEC iterator.
 * This does not get references on the pages, nor does it get a pin on them.
 */

> +        */
> +       if (iov_iter_is_kvec(iter))
> +               shm_get_kernel_pages(shm->pages, num_pages);
> +
> +       shm->offset = off;
> +       shm->size = len;
> +       shm->num_pages = num_pages;
> +
>         rc = teedev->desc->ops->shm_register(ctx, shm, shm->pages,
>                                              shm->num_pages, start);
>         if (rc) {
> @@ -279,10 +276,11 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
>
>         return shm;
>  err_put_shm_pages:
> -       if (flags & TEE_SHM_USER_MAPPED)
> +       if (!iov_iter_is_kvec(iter))
>                 unpin_user_pages(shm->pages, shm->num_pages);
>         else
>                 shm_put_kernel_pages(shm->pages, shm->num_pages);
> +err_free_shm_pages:
>         kfree(shm->pages);
>  err_free_shm:
>         kfree(shm);
> @@ -307,8 +305,9 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx,
>         u32 flags = TEE_SHM_USER_MAPPED | TEE_SHM_DYNAMIC;
>         struct tee_device *teedev = ctx->teedev;
>         struct tee_shm *shm;
> +       struct iov_iter iter;
>         void *ret;
> -       int id;
> +       int id, err;
>
>         if (!access_ok((void __user *)addr, length))
>                 return ERR_PTR(-EFAULT);
> @@ -319,7 +318,11 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx,
>         if (id < 0)
>                 return ERR_PTR(id);
>
> -       shm = register_shm_helper(ctx, addr, length, flags, id);
> +       err = import_ubuf(ITER_DEST, (void __user *)addr, length, &iter);

As I mentioned in a previous review, import_ubuf() already does the
access_ok() check, so we don't need the extra access_ok() check above.
Also, you should move import_ubuf() to be the first invocation within
this API.

-Sumit

> +       if (err)
> +               return ERR_PTR(err);
> +
> +       shm = register_shm_helper(ctx, &iter, flags, id);
>         if (IS_ERR(shm)) {
>                 mutex_lock(&teedev->mutex);
>                 idr_remove(&teedev->idr, id);
> @@ -352,8 +355,14 @@ struct tee_shm *tee_shm_register_kernel_buf(struct tee_context *ctx,
>                                             void *addr, size_t length)
>  {
>         u32 flags = TEE_SHM_DYNAMIC;
> +       struct kvec kvec;
> +       struct iov_iter iter;
> +
> +       kvec.iov_base = addr;
> +       kvec.iov_len = length;
> +       iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, length);
>
> -       return register_shm_helper(ctx, (unsigned long)addr, length, flags, -1);
> +       return register_shm_helper(ctx, &iter, flags, -1);
>  }
>  EXPORT_SYMBOL_GPL(tee_shm_register_kernel_buf);
>
> --
> 2.25.1
>
  
Arnaud POULIQUEN Nov. 30, 2023, 9:08 a.m. UTC | #2
On 11/30/23 08:54, Sumit Garg wrote:
> On Wed, 29 Nov 2023 at 22:15, Arnaud Pouliquen
> <arnaud.pouliquen@foss.st.com> wrote:
>>
>> Currently it's not possible to register kernel buffers with TEE
>> which are allocated via vmalloc.
>>
>> Use iov_iter and associated helper functions to manage the page
>> registration for all type of memories.
>>
>> Suggested-by: Christoph Hellwig <hch@infradead.org>
>> Signed-off-by: Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
>> ---
>> Update from V3 to V4:
>> - improve commit message,
>> - use import_ubuf() instead of iov_iter_init(),
>> - move shm_get_kernel_pages in register_shm_helper,
>> - put back untagged_addr in register_shm_helper(),
>> - move the comment related to pin pages from shm_get_kernel_pages()
>>   to register_shm_helper().
>>
>> Update from V2 to V3:
>> - break lines longer than 80 columns.
>>
>> Update from V1 to V2:
>> - replace ITER_SOURCE by ITER_DEST flag in tee_shm_register_user_buf(),
>> - replace IS_ERR_OR NULL(shm) by IS_ERR(shm) in tee_shm_register_user_buf().
>>
>> V1:
>> The support of buffer registration allocated with vmalloc is no more
>> available since c83900393aa1 ("tee: Remove vmalloc page support").
>>
>> This patch is an alternative to a revert and resulted from a discussion
>> with Christopher Hellwig [1].
>>
>> This patch has been tested using xtest tool in optee qemu environment [2]
>> and using the series related to the remoteproc tee that should be
>> proposed soon [3].
>>
>> References:
>> [1] https://lore.kernel.org/linux-arm-kernel/18a8528d-7d9d-6ed0-0045-5ee47dd39fb2@foss.st.com/T/#m8ec683c44fcd9b69c2aee42eaed0793afac9dd18in
>> [2] https://optee.readthedocs.io/en/latest/building/devices/qemu.html#build-instructions
>> [3] https://lore.kernel.org/linux-arm-kernel/18a8528d-7d9d-6ed0-0045-5ee47dd39fb2@foss.st.com/T/#maca0a1fc897aadd54c7deac432e11473fe970d1d
>> ---
>>  drivers/tee/tee_shm.c | 83 ++++++++++++++++++++++++-------------------
>>  1 file changed, 46 insertions(+), 37 deletions(-)
>>
>> diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
>> index 673cf0359494..ac73e8143233 100644
>> --- a/drivers/tee/tee_shm.c
>> +++ b/drivers/tee/tee_shm.c
>> @@ -22,23 +22,12 @@ static void shm_put_kernel_pages(struct page **pages, size_t page_count)
>>                 put_page(pages[n]);
>>  }
>>
>> -static int shm_get_kernel_pages(unsigned long start, size_t page_count,
>> -                               struct page **pages)
>> +static void shm_get_kernel_pages(struct page **pages, size_t page_count)
>>  {
>> -       struct page *page;
>>         size_t n;
>>
>> -       if (WARN_ON_ONCE(is_vmalloc_addr((void *)start) ||
>> -                        is_kmap_addr((void *)start)))
>> -               return -EINVAL;
>> -
>> -       page = virt_to_page((void *)start);
>> -       for (n = 0; n < page_count; n++) {
>> -               pages[n] = page + n;
>> +       for (n = 0; n < page_count; n++)
>>                 get_page(pages[n]);
>> -       }
>> -
>> -       return page_count;
>>  }
>>
>>  static void release_registered_pages(struct tee_shm *shm)
>> @@ -214,13 +203,14 @@ struct tee_shm *tee_shm_alloc_priv_buf(struct tee_context *ctx, size_t size)
>>  EXPORT_SYMBOL_GPL(tee_shm_alloc_priv_buf);
>>
>>  static struct tee_shm *
>> -register_shm_helper(struct tee_context *ctx, unsigned long addr,
>> -                   size_t length, u32 flags, int id)
>> +register_shm_helper(struct tee_context *ctx, struct iov_iter *iter, u32 flags,
>> +                   int id)
>>  {
>>         struct tee_device *teedev = ctx->teedev;
>>         struct tee_shm *shm;
>> -       unsigned long start;
>> -       size_t num_pages;
>> +       unsigned long start, addr;
>> +       size_t num_pages, off;
>> +       ssize_t len;
>>         void *ret;
>>         int rc;
>>
>> @@ -245,31 +235,38 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
>>         shm->flags = flags;
>>         shm->ctx = ctx;
>>         shm->id = id;
>> -       addr = untagged_addr(addr);
>> +       addr = untagged_addr((unsigned long)iter_iov_addr(iter));
>>         start = rounddown(addr, PAGE_SIZE);
>> -       shm->offset = addr - start;
>> -       shm->size = length;
>> -       num_pages = (roundup(addr + length, PAGE_SIZE) - start) / PAGE_SIZE;
>> +       num_pages = iov_iter_npages(iter, INT_MAX);
>> +       if (!num_pages) {
>> +               ret = ERR_PTR(-ENOMEM);
>> +               goto err_ctx_put;
>> +       }
>> +
>>         shm->pages = kcalloc(num_pages, sizeof(*shm->pages), GFP_KERNEL);
>>         if (!shm->pages) {
>>                 ret = ERR_PTR(-ENOMEM);
>>                 goto err_free_shm;
>>         }
>>
>> -       if (flags & TEE_SHM_USER_MAPPED)
>> -               rc = pin_user_pages_fast(start, num_pages, FOLL_WRITE,
>> -                                        shm->pages);
>> -       else
>> -               rc = shm_get_kernel_pages(start, num_pages, shm->pages);
>> -       if (rc > 0)
>> -               shm->num_pages = rc;
>> -       if (rc != num_pages) {
>> -               if (rc >= 0)
>> -                       rc = -ENOMEM;
>> -               ret = ERR_PTR(rc);
>> -               goto err_put_shm_pages;
>> +       len = iov_iter_extract_pages(iter, &shm->pages, LONG_MAX, num_pages, 0,
>> +                                    &off);
>> +       if (unlikely(len <= 0)) {
>> +               ret = len ? ERR_PTR(len) : ERR_PTR(-ENOMEM);
>> +               goto err_free_shm_pages;
>>         }
>>
>> +       /*
>> +        * iov_iter_extract_kvec_pages does not get reference on the pages,
>> +        * get a pin on them.
> 
> I think you meant: "get a reference on them". But I don't see the
> value of this comment since iov_iter_extract_kvec_pages() already has
> been commented properly as follows:
> 
> /*
>  * Extract a list of virtually contiguous pages from an ITER_KVEC iterator.
>  * This does not get references on the pages, nor does it get a pin on them.
>  */
> 

I spent some time debugging this part. Since we use the same API for both user
and kernel buffers, we wouldn't expect to have any specific actions to take.
Therefore, I thought it would be helpful to add a comment explaining the reason
for this specific code, rather than go deeper into iov_iter to understand it.

But if you don't see the value, I can remove the comment.

>> +        */
>> +       if (iov_iter_is_kvec(iter))
>> +               shm_get_kernel_pages(shm->pages, num_pages);
>> +
>> +       shm->offset = off;
>> +       shm->size = len;
>> +       shm->num_pages = num_pages;
>> +
>>         rc = teedev->desc->ops->shm_register(ctx, shm, shm->pages,
>>                                              shm->num_pages, start);
>>         if (rc) {
>> @@ -279,10 +276,11 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
>>
>>         return shm;
>>  err_put_shm_pages:
>> -       if (flags & TEE_SHM_USER_MAPPED)
>> +       if (!iov_iter_is_kvec(iter))
>>                 unpin_user_pages(shm->pages, shm->num_pages);
>>         else
>>                 shm_put_kernel_pages(shm->pages, shm->num_pages);
>> +err_free_shm_pages:
>>         kfree(shm->pages);
>>  err_free_shm:
>>         kfree(shm);
>> @@ -307,8 +305,9 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx,
>>         u32 flags = TEE_SHM_USER_MAPPED | TEE_SHM_DYNAMIC;
>>         struct tee_device *teedev = ctx->teedev;
>>         struct tee_shm *shm;
>> +       struct iov_iter iter;
>>         void *ret;
>> -       int id;
>> +       int id, err;
>>
>>         if (!access_ok((void __user *)addr, length))
>>                 return ERR_PTR(-EFAULT);
>> @@ -319,7 +318,11 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx,
>>         if (id < 0)
>>                 return ERR_PTR(id);
>>
>> -       shm = register_shm_helper(ctx, addr, length, flags, id);
>> +       err = import_ubuf(ITER_DEST, (void __user *)addr, length, &iter);
> 
> As I mentioned in a previous review, import_ubuf() already does the
> access_ok() check, so we don't need the extra access_ok() check above.
> Also, you should move import_ubuf() to be the first invocation within
> this API.

My apologies, I re-added import_ubuf() during testing to debug an issue and
forgot to
remove it afterwards.

Thanks and regards,
Arnaud

> 
> -Sumit
> 
>> +       if (err)
>> +               return ERR_PTR(err);
>> +
>> +       shm = register_shm_helper(ctx, &iter, flags, id);
>>         if (IS_ERR(shm)) {
>>                 mutex_lock(&teedev->mutex);
>>                 idr_remove(&teedev->idr, id);
>> @@ -352,8 +355,14 @@ struct tee_shm *tee_shm_register_kernel_buf(struct tee_context *ctx,
>>                                             void *addr, size_t length)
>>  {
>>         u32 flags = TEE_SHM_DYNAMIC;
>> +       struct kvec kvec;
>> +       struct iov_iter iter;
>> +
>> +       kvec.iov_base = addr;
>> +       kvec.iov_len = length;
>> +       iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, length);
>>
>> -       return register_shm_helper(ctx, (unsigned long)addr, length, flags, -1);
>> +       return register_shm_helper(ctx, &iter, flags, -1);
>>  }
>>  EXPORT_SYMBOL_GPL(tee_shm_register_kernel_buf);
>>
>> --
>> 2.25.1
>>
  
Sumit Garg Nov. 30, 2023, noon UTC | #3
On Thu, 30 Nov 2023 at 14:38, Arnaud POULIQUEN
<arnaud.pouliquen@foss.st.com> wrote:
>
>
>
> On 11/30/23 08:54, Sumit Garg wrote:
> > On Wed, 29 Nov 2023 at 22:15, Arnaud Pouliquen
> > <arnaud.pouliquen@foss.st.com> wrote:
> >>
> >> Currently it's not possible to register kernel buffers with TEE
> >> which are allocated via vmalloc.
> >>
> >> Use iov_iter and associated helper functions to manage the page
> >> registration for all type of memories.
> >>
> >> Suggested-by: Christoph Hellwig <hch@infradead.org>
> >> Signed-off-by: Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
> >> ---
> >> Update from V3 to V4:
> >> - improve commit message,
> >> - use import_ubuf() instead of iov_iter_init(),
> >> - move shm_get_kernel_pages in register_shm_helper,
> >> - put back untagged_addr in register_shm_helper(),
> >> - move the comment related to pin pages from shm_get_kernel_pages()
> >>   to register_shm_helper().
> >>
> >> Update from V2 to V3:
> >> - break lines longer than 80 columns.
> >>
> >> Update from V1 to V2:
> >> - replace ITER_SOURCE by ITER_DEST flag in tee_shm_register_user_buf(),
> >> - replace IS_ERR_OR NULL(shm) by IS_ERR(shm) in tee_shm_register_user_buf().
> >>
> >> V1:
> >> The support of buffer registration allocated with vmalloc is no more
> >> available since c83900393aa1 ("tee: Remove vmalloc page support").
> >>
> >> This patch is an alternative to a revert and resulted from a discussion
> >> with Christopher Hellwig [1].
> >>
> >> This patch has been tested using xtest tool in optee qemu environment [2]
> >> and using the series related to the remoteproc tee that should be
> >> proposed soon [3].
> >>
> >> References:
> >> [1] https://lore.kernel.org/linux-arm-kernel/18a8528d-7d9d-6ed0-0045-5ee47dd39fb2@foss.st.com/T/#m8ec683c44fcd9b69c2aee42eaed0793afac9dd18in
> >> [2] https://optee.readthedocs.io/en/latest/building/devices/qemu.html#build-instructions
> >> [3] https://lore.kernel.org/linux-arm-kernel/18a8528d-7d9d-6ed0-0045-5ee47dd39fb2@foss.st.com/T/#maca0a1fc897aadd54c7deac432e11473fe970d1d
> >> ---
> >>  drivers/tee/tee_shm.c | 83 ++++++++++++++++++++++++-------------------
> >>  1 file changed, 46 insertions(+), 37 deletions(-)
> >>
> >> diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
> >> index 673cf0359494..ac73e8143233 100644
> >> --- a/drivers/tee/tee_shm.c
> >> +++ b/drivers/tee/tee_shm.c
> >> @@ -22,23 +22,12 @@ static void shm_put_kernel_pages(struct page **pages, size_t page_count)
> >>                 put_page(pages[n]);
> >>  }
> >>
> >> -static int shm_get_kernel_pages(unsigned long start, size_t page_count,
> >> -                               struct page **pages)
> >> +static void shm_get_kernel_pages(struct page **pages, size_t page_count)
> >>  {
> >> -       struct page *page;
> >>         size_t n;
> >>
> >> -       if (WARN_ON_ONCE(is_vmalloc_addr((void *)start) ||
> >> -                        is_kmap_addr((void *)start)))
> >> -               return -EINVAL;
> >> -
> >> -       page = virt_to_page((void *)start);
> >> -       for (n = 0; n < page_count; n++) {
> >> -               pages[n] = page + n;
> >> +       for (n = 0; n < page_count; n++)
> >>                 get_page(pages[n]);
> >> -       }
> >> -
> >> -       return page_count;
> >>  }
> >>
> >>  static void release_registered_pages(struct tee_shm *shm)
> >> @@ -214,13 +203,14 @@ struct tee_shm *tee_shm_alloc_priv_buf(struct tee_context *ctx, size_t size)
> >>  EXPORT_SYMBOL_GPL(tee_shm_alloc_priv_buf);
> >>
> >>  static struct tee_shm *
> >> -register_shm_helper(struct tee_context *ctx, unsigned long addr,
> >> -                   size_t length, u32 flags, int id)
> >> +register_shm_helper(struct tee_context *ctx, struct iov_iter *iter, u32 flags,
> >> +                   int id)
> >>  {
> >>         struct tee_device *teedev = ctx->teedev;
> >>         struct tee_shm *shm;
> >> -       unsigned long start;
> >> -       size_t num_pages;
> >> +       unsigned long start, addr;
> >> +       size_t num_pages, off;
> >> +       ssize_t len;
> >>         void *ret;
> >>         int rc;
> >>
> >> @@ -245,31 +235,38 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
> >>         shm->flags = flags;
> >>         shm->ctx = ctx;
> >>         shm->id = id;
> >> -       addr = untagged_addr(addr);
> >> +       addr = untagged_addr((unsigned long)iter_iov_addr(iter));
> >>         start = rounddown(addr, PAGE_SIZE);
> >> -       shm->offset = addr - start;
> >> -       shm->size = length;
> >> -       num_pages = (roundup(addr + length, PAGE_SIZE) - start) / PAGE_SIZE;
> >> +       num_pages = iov_iter_npages(iter, INT_MAX);
> >> +       if (!num_pages) {
> >> +               ret = ERR_PTR(-ENOMEM);
> >> +               goto err_ctx_put;
> >> +       }
> >> +
> >>         shm->pages = kcalloc(num_pages, sizeof(*shm->pages), GFP_KERNEL);
> >>         if (!shm->pages) {
> >>                 ret = ERR_PTR(-ENOMEM);
> >>                 goto err_free_shm;
> >>         }
> >>
> >> -       if (flags & TEE_SHM_USER_MAPPED)
> >> -               rc = pin_user_pages_fast(start, num_pages, FOLL_WRITE,
> >> -                                        shm->pages);
> >> -       else
> >> -               rc = shm_get_kernel_pages(start, num_pages, shm->pages);
> >> -       if (rc > 0)
> >> -               shm->num_pages = rc;
> >> -       if (rc != num_pages) {
> >> -               if (rc >= 0)
> >> -                       rc = -ENOMEM;
> >> -               ret = ERR_PTR(rc);
> >> -               goto err_put_shm_pages;
> >> +       len = iov_iter_extract_pages(iter, &shm->pages, LONG_MAX, num_pages, 0,
> >> +                                    &off);
> >> +       if (unlikely(len <= 0)) {
> >> +               ret = len ? ERR_PTR(len) : ERR_PTR(-ENOMEM);
> >> +               goto err_free_shm_pages;
> >>         }
> >>
> >> +       /*
> >> +        * iov_iter_extract_kvec_pages does not get reference on the pages,
> >> +        * get a pin on them.
> >
> > I think you meant: "get a reference on them". But I don't see the
> > value of this comment since iov_iter_extract_kvec_pages() already has
> > been commented properly as follows:
> >
> > /*
> >  * Extract a list of virtually contiguous pages from an ITER_KVEC iterator.
> >  * This does not get references on the pages, nor does it get a pin on them.
> >  */
> >
>
> I spent some time debugging this part. Since we use the same API for both user
> and kernel buffers, we wouldn't expect to have any specific actions to take.
> Therefore, I thought it would be helpful to add a comment explaining the reason
> for this specific code, rather than go deeper into iov_iter to understand it.
>

Fair enough, let's keep it with s/pin/reference/.

> But if you don't see the value, I can remove the comment.
>
> >> +        */
> >> +       if (iov_iter_is_kvec(iter))
> >> +               shm_get_kernel_pages(shm->pages, num_pages);
> >> +
> >> +       shm->offset = off;
> >> +       shm->size = len;
> >> +       shm->num_pages = num_pages;
> >> +
> >>         rc = teedev->desc->ops->shm_register(ctx, shm, shm->pages,
> >>                                              shm->num_pages, start);
> >>         if (rc) {
> >> @@ -279,10 +276,11 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
> >>
> >>         return shm;
> >>  err_put_shm_pages:
> >> -       if (flags & TEE_SHM_USER_MAPPED)
> >> +       if (!iov_iter_is_kvec(iter))
> >>                 unpin_user_pages(shm->pages, shm->num_pages);
> >>         else
> >>                 shm_put_kernel_pages(shm->pages, shm->num_pages);
> >> +err_free_shm_pages:
> >>         kfree(shm->pages);
> >>  err_free_shm:
> >>         kfree(shm);
> >> @@ -307,8 +305,9 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx,
> >>         u32 flags = TEE_SHM_USER_MAPPED | TEE_SHM_DYNAMIC;
> >>         struct tee_device *teedev = ctx->teedev;
> >>         struct tee_shm *shm;
> >> +       struct iov_iter iter;
> >>         void *ret;
> >> -       int id;
> >> +       int id, err;
> >>
> >>         if (!access_ok((void __user *)addr, length))
> >>                 return ERR_PTR(-EFAULT);
> >> @@ -319,7 +318,11 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx,
> >>         if (id < 0)
> >>                 return ERR_PTR(id);
> >>
> >> -       shm = register_shm_helper(ctx, addr, length, flags, id);
> >> +       err = import_ubuf(ITER_DEST, (void __user *)addr, length, &iter);
> >
> > As I mentioned in a previous review, import_ubuf() already does the
> > access_ok() check, so we don't need the extra access_ok() check above.
> > Also, you should move import_ubuf() to be the first invocation within
> > this API.
>
> My apologies, I re-added import_ubuf() during testing to debug an issue and

I suppose you intended to mention access_ok() here, BTW, no worries :).

-Sumit

> forgot to
> remove it afterwards.
>
> Thanks and regards,
> Arnaud
>
> >
> > -Sumit
> >
> >> +       if (err)
> >> +               return ERR_PTR(err);
> >> +
> >> +       shm = register_shm_helper(ctx, &iter, flags, id);
> >>         if (IS_ERR(shm)) {
> >>                 mutex_lock(&teedev->mutex);
> >>                 idr_remove(&teedev->idr, id);
> >> @@ -352,8 +355,14 @@ struct tee_shm *tee_shm_register_kernel_buf(struct tee_context *ctx,
> >>                                             void *addr, size_t length)
> >>  {
> >>         u32 flags = TEE_SHM_DYNAMIC;
> >> +       struct kvec kvec;
> >> +       struct iov_iter iter;
> >> +
> >> +       kvec.iov_base = addr;
> >> +       kvec.iov_len = length;
> >> +       iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, length);
> >>
> >> -       return register_shm_helper(ctx, (unsigned long)addr, length, flags, -1);
> >> +       return register_shm_helper(ctx, &iter, flags, -1);
> >>  }
> >>  EXPORT_SYMBOL_GPL(tee_shm_register_kernel_buf);
> >>
> >> --
> >> 2.25.1
> >>
  
Sumit Garg Dec. 4, 2023, 12:42 p.m. UTC | #4
+ Jens A., Al Viro

<snip>

> >>>> +        */
> >>>> +       if (iov_iter_is_kvec(iter))
> >>>> +               shm_get_kernel_pages(shm->pages, num_pages);
> >>>> +
> >>>> +       shm->offset = off;
> >>>> +       shm->size = len;
> >>>> +       shm->num_pages = num_pages;
> >>>> +
> >>>>         rc = teedev->desc->ops->shm_register(ctx, shm, shm->pages,
> >>>>                                              shm->num_pages, start);
> >>>>         if (rc) {
> >>>> @@ -279,10 +276,11 @@ register_shm_helper(struct tee_context *ctx, unsigned long addr,
> >>>>
> >>>>         return shm;
> >>>>  err_put_shm_pages:
> >>>> -       if (flags & TEE_SHM_USER_MAPPED)
> >>>> +       if (!iov_iter_is_kvec(iter))
> >>>>                 unpin_user_pages(shm->pages, shm->num_pages);
> >>>>         else
> >>>>                 shm_put_kernel_pages(shm->pages, shm->num_pages);
> >>>> +err_free_shm_pages:
> >>>>         kfree(shm->pages);
> >>>>  err_free_shm:
> >>>>         kfree(shm);
> >>>> @@ -307,8 +305,9 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx,
> >>>>         u32 flags = TEE_SHM_USER_MAPPED | TEE_SHM_DYNAMIC;
> >>>>         struct tee_device *teedev = ctx->teedev;
> >>>>         struct tee_shm *shm;
> >>>> +       struct iov_iter iter;
> >>>>         void *ret;
> >>>> -       int id;
> >>>> +       int id, err;
> >>>>
> >>>>         if (!access_ok((void __user *)addr, length))
> >>>>                 return ERR_PTR(-EFAULT);
> >>>> @@ -319,7 +318,11 @@ struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx,
> >>>>         if (id < 0)
> >>>>                 return ERR_PTR(id);
> >>>>
> >>>> -       shm = register_shm_helper(ctx, addr, length, flags, id);
> >>>> +       err = import_ubuf(ITER_DEST, (void __user *)addr, length, &iter);
> >>>
> >>> As I mentioned in a previous review, import_ubuf() already does the
> >>> access_ok() check, so we don't need the extra access_ok() check above.
> >>> Also, you should move import_ubuf() to be the first invocation within
> >>> this API.
> >>
> >> My apologies, I re-added import_ubuf() during testing to debug an issue and
> >
> > I suppose you intended to mention access_ok() here, BTW, no worries :).
>
> Re-running xtest after removing the access_ok() I have a crash in
> regression_5006.3 Allocate_out_of_memory
>
> [   89.258100] ------------[ cut here ]------------
> [   89.258377] WARNING: CPU: 1 PID: 134 at mm/page_alloc.c:4402
> __alloc_pages+0x674/0xd14
> [   89.258988] Modules linked in:
> [   89.259554] CPU: 1 PID: 134 Comm: xtest Not tainted 6.6.0-g1ebcc18a80d7-dirty #69
> [   89.259763] Hardware name: linux,dummy-virt (DT)
> [   89.259977] pstate: 21400005 (nzCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
> [   89.260143] pc : __alloc_pages+0x674/0xd14
> [   89.260252] lr : alloc_pages+0xac/0x160
> [   89.260364] sp : ffff8000803f3a30
> [   89.260437] x29: ffff8000803f3a30 x28: ffff8000803f3d70 x27: 0000000000000000
> [   89.260705] x26: 000000000000000b x25: 0000000000000000 x24: ffff000000178000
> [   89.260847] x23: ffff00003f52b720 x22: ffff000001442720 x21: 0000000000000000
> [   89.261022] x20: 000000000000000b x19: 0000000000000000 x18: ffff8000803f3b78
> [   89.261162] x17: 0000000000000000 x16: ffffa7a428b67660 x15: 0000ffff63ffffff
> [   89.261348] x14: 0000000000000008 x13: 1fffe00000a4c421 x12: ffff8000803f3b78
> [   89.261528] x11: ffff000005262100 x10: ffff000005262108 x9 : ffff00000526210c
> [   89.261739] x8 : 0000aaab0a97c000 x7 : 0000aaab0a90e000 x6 : ffff000005262150
> [   89.261920] x5 : 0000000000000000 x4 : ffff000000976740 x3 : 0000000000000000
> [   89.262098] x2 : 0000000000000000 x1 : 0000000000000001 x0 : ffffa7a429daf000
> [   89.262340] Call trace:
> [   89.262921]  __alloc_pages+0x674/0xd14
> [   89.263262]  alloc_pages+0xac/0x160
> [   89.263373]  alloc_pages_exact+0x48/0x94
> [   89.263464]  optee_shm_register+0xa8/0x1f4
> [   89.263591]  register_shm_helper+0x1bc/0x28c
> [   89.263687]  tee_shm_register_user_buf+0xb8/0x128
> [   89.263816]  tee_ioctl+0xbc/0xfa0
> [   89.263915]  __arm64_sys_ioctl+0xa8/0xec
> [   89.264053]  invoke_syscall+0x48/0x114
> [   89.264173]  el0_svc_common.constprop.0+0x40/0xe8
> [   89.264321]  do_el0_svc+0x20/0x2c
> [   89.264488]  el0_svc+0x40/0xf4
> [   89.264578]  el0t_64_sync_handler+0x13c/0x158
> [   89.264714]  el0t_64_sync+0x190/0x194
> [   89.265003] ---[ end trace 0000000000000000 ]---
>
>
> The issue is that, in import_ubuf(), it updates the length [1], making the
>
> access_ok() succeed and leading to an issue later in the page allocation process.
>
> To fix, I propose to add a test in tee_shm_register_user_buf() after calling
>
> import_ubuf()
>
>         if (length != iter_iov_len(&iter))
>                 return ERR_PTR(-EFAULT);
>
>
> Would it be ok for you ? I'm afraid of side effects if I update import_ubuf()...

IMO, access_ok() should be the first thing that import_ubuf() or
import_single_range() should do, something as follows:

diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 8ff6824a1005..4aee0371824c 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);

 int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
 {
-       if (len > MAX_RW_COUNT)
-               len = MAX_RW_COUNT;
        if (unlikely(!access_ok(buf, len)))
                return -EFAULT;
+       if (len > MAX_RW_COUNT)
+               len = MAX_RW_COUNT;

        iov_iter_ubuf(i, rw, buf, len);
        return 0;

Jens A., Al Viro,

Was there any particular reason which I am unaware of to perform
access_ok() check on modified input length?

-Sumit

>
> [1] https://elixir.bootlin.com/linux/latest/source/lib/iov_iter.c#L1553
>
  
Jens Axboe Dec. 4, 2023, 4:36 p.m. UTC | #5
On 12/4/23 5:42 AM, Sumit Garg wrote:
> IMO, access_ok() should be the first thing that import_ubuf() or
> import_single_range() should do, something as follows:
> 
> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> index 8ff6824a1005..4aee0371824c 100644
> --- a/lib/iov_iter.c
> +++ b/lib/iov_iter.c
> @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
> 
>  int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
>  {
> -       if (len > MAX_RW_COUNT)
> -               len = MAX_RW_COUNT;
>         if (unlikely(!access_ok(buf, len)))
>                 return -EFAULT;
> +       if (len > MAX_RW_COUNT)
> +               len = MAX_RW_COUNT;
> 
>         iov_iter_ubuf(i, rw, buf, len);
>         return 0;
> 
> Jens A., Al Viro,
> 
> Was there any particular reason which I am unaware of to perform
> access_ok() check on modified input length?

This change makes sense to me, and seems consistent with what is done
elsewhere too.
  
Jens Axboe Dec. 4, 2023, 4:40 p.m. UTC | #6
On 12/4/23 9:36 AM, Jens Axboe wrote:
> On 12/4/23 5:42 AM, Sumit Garg wrote:
>> IMO, access_ok() should be the first thing that import_ubuf() or
>> import_single_range() should do, something as follows:
>>
>> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
>> index 8ff6824a1005..4aee0371824c 100644
>> --- a/lib/iov_iter.c
>> +++ b/lib/iov_iter.c
>> @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
>>
>>  int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
>>  {
>> -       if (len > MAX_RW_COUNT)
>> -               len = MAX_RW_COUNT;
>>         if (unlikely(!access_ok(buf, len)))
>>                 return -EFAULT;
>> +       if (len > MAX_RW_COUNT)
>> +               len = MAX_RW_COUNT;
>>
>>         iov_iter_ubuf(i, rw, buf, len);
>>         return 0;
>>
>> Jens A., Al Viro,
>>
>> Was there any particular reason which I am unaware of to perform
>> access_ok() check on modified input length?
> 
> This change makes sense to me, and seems consistent with what is done
> elsewhere too.

For some reason I missed import_single_range(), which does it the same
way as import_ubuf() currently does - cap the range before the
access_ok() check. The vec variants sum as they go, but access_ok()
before the range.

I think part of the issue here is that the single range imports return 0
for success and -ERROR otherwise. This means that the caller does not
know if the full range was imported or not. OTOH, we always cap any data
transfer at MAX_RW_COUNT, so may make more sense to fix up the caller
here.
  
Arnaud POULIQUEN Dec. 4, 2023, 5:02 p.m. UTC | #7
Hi,

On 12/4/23 17:40, Jens Axboe wrote:
> On 12/4/23 9:36 AM, Jens Axboe wrote:
>> On 12/4/23 5:42 AM, Sumit Garg wrote:
>>> IMO, access_ok() should be the first thing that import_ubuf() or
>>> import_single_range() should do, something as follows:
>>>
>>> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
>>> index 8ff6824a1005..4aee0371824c 100644
>>> --- a/lib/iov_iter.c
>>> +++ b/lib/iov_iter.c
>>> @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
>>>
>>>  int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
>>>  {
>>> -       if (len > MAX_RW_COUNT)
>>> -               len = MAX_RW_COUNT;
>>>         if (unlikely(!access_ok(buf, len)))
>>>                 return -EFAULT;
>>> +       if (len > MAX_RW_COUNT)
>>> +               len = MAX_RW_COUNT;
>>>
>>>         iov_iter_ubuf(i, rw, buf, len);
>>>         return 0;
>>>
>>> Jens A., Al Viro,
>>>
>>> Was there any particular reason which I am unaware of to perform
>>> access_ok() check on modified input length?
>>
>> This change makes sense to me, and seems consistent with what is done
>> elsewhere too.
> 
> For some reason I missed import_single_range(), which does it the same
> way as import_ubuf() currently does - cap the range before the
> access_ok() check. The vec variants sum as they go, but access_ok()
> before the range.
> 
> I think part of the issue here is that the single range imports return 0
> for success and -ERROR otherwise. This means that the caller does not
> know if the full range was imported or not. OTOH, we always cap any data
> transfer at MAX_RW_COUNT, so may make more sense to fix up the caller
> here.
> 

Should we limit to MAX_RW_COUNT or return an error?
Seems to me that limiting could generate side effect later that could be not
simple to debug.


>>>  int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
>>>  {
>>> -       if (len > MAX_RW_COUNT)
>>> +               return -EFAULT;
>>>         if (unlikely(!access_ok(buf, len)))
>>>                 return -EFAULT;
>>>
>>>         iov_iter_ubuf(i, rw, buf, len);
>>>         return 0;

or perhaps just remove the test as __access_ok() already tests that the
size < TASK_SIZE

https://elixir.bootlin.com/linux/v6.7-rc4/source/include/asm-generic/access_ok.h#L31


Thanks,
Arnaud
  
Jens Axboe Dec. 4, 2023, 5:13 p.m. UTC | #8
On 12/4/23 10:02 AM, Arnaud POULIQUEN wrote:
> Hi,
> 
> On 12/4/23 17:40, Jens Axboe wrote:
>> On 12/4/23 9:36 AM, Jens Axboe wrote:
>>> On 12/4/23 5:42 AM, Sumit Garg wrote:
>>>> IMO, access_ok() should be the first thing that import_ubuf() or
>>>> import_single_range() should do, something as follows:
>>>>
>>>> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
>>>> index 8ff6824a1005..4aee0371824c 100644
>>>> --- a/lib/iov_iter.c
>>>> +++ b/lib/iov_iter.c
>>>> @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
>>>>
>>>>  int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
>>>>  {
>>>> -       if (len > MAX_RW_COUNT)
>>>> -               len = MAX_RW_COUNT;
>>>>         if (unlikely(!access_ok(buf, len)))
>>>>                 return -EFAULT;
>>>> +       if (len > MAX_RW_COUNT)
>>>> +               len = MAX_RW_COUNT;
>>>>
>>>>         iov_iter_ubuf(i, rw, buf, len);
>>>>         return 0;
>>>>
>>>> Jens A., Al Viro,
>>>>
>>>> Was there any particular reason which I am unaware of to perform
>>>> access_ok() check on modified input length?
>>>
>>> This change makes sense to me, and seems consistent with what is done
>>> elsewhere too.
>>
>> For some reason I missed import_single_range(), which does it the same
>> way as import_ubuf() currently does - cap the range before the
>> access_ok() check. The vec variants sum as they go, but access_ok()
>> before the range.
>>
>> I think part of the issue here is that the single range imports return 0
>> for success and -ERROR otherwise. This means that the caller does not
>> know if the full range was imported or not. OTOH, we always cap any data
>> transfer at MAX_RW_COUNT, so may make more sense to fix up the caller
>> here.
>>
> 
> Should we limit to MAX_RW_COUNT or return an error? Seems to me that
> limiting could generate side effect later that could be not simple to
> debug.

We've traditionally just truncated the length, so principle of least
surprise says we should continue doing that.
  
Sumit Garg Dec. 5, 2023, 12:07 p.m. UTC | #9
Hi Arnaud,

On Mon, 4 Dec 2023 at 22:32, Arnaud POULIQUEN
<arnaud.pouliquen@foss.st.com> wrote:
>
> Hi,
>
> On 12/4/23 17:40, Jens Axboe wrote:
> > On 12/4/23 9:36 AM, Jens Axboe wrote:
> >> On 12/4/23 5:42 AM, Sumit Garg wrote:
> >>> IMO, access_ok() should be the first thing that import_ubuf() or
> >>> import_single_range() should do, something as follows:
> >>>
> >>> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> >>> index 8ff6824a1005..4aee0371824c 100644
> >>> --- a/lib/iov_iter.c
> >>> +++ b/lib/iov_iter.c
> >>> @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
> >>>
> >>>  int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
> >>>  {
> >>> -       if (len > MAX_RW_COUNT)
> >>> -               len = MAX_RW_COUNT;
> >>>         if (unlikely(!access_ok(buf, len)))
> >>>                 return -EFAULT;
> >>> +       if (len > MAX_RW_COUNT)
> >>> +               len = MAX_RW_COUNT;
> >>>
> >>>         iov_iter_ubuf(i, rw, buf, len);
> >>>         return 0;
> >>>
> >>> Jens A., Al Viro,
> >>>
> >>> Was there any particular reason which I am unaware of to perform
> >>> access_ok() check on modified input length?
> >>
> >> This change makes sense to me, and seems consistent with what is done
> >> elsewhere too.
> >
> > For some reason I missed import_single_range(), which does it the same
> > way as import_ubuf() currently does - cap the range before the
> > access_ok() check. The vec variants sum as they go, but access_ok()
> > before the range.
> >
> > I think part of the issue here is that the single range imports return 0
> > for success and -ERROR otherwise. This means that the caller does not
> > know if the full range was imported or not. OTOH, we always cap any data
> > transfer at MAX_RW_COUNT, so may make more sense to fix up the caller
> > here.
> >
>
> Should we limit to MAX_RW_COUNT or return an error?
> Seems to me that limiting could generate side effect later that could be not
> simple to debug.
>
>
> >>>  int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
> >>>  {
> >>> -       if (len > MAX_RW_COUNT)
> >>> +               return -EFAULT;
> >>>         if (unlikely(!access_ok(buf, len)))
> >>>                 return -EFAULT;
> >>>
> >>>         iov_iter_ubuf(i, rw, buf, len);
> >>>         return 0;
>
> or perhaps just remove the test as __access_ok() already tests that the
> size < TASK_SIZE
>
> https://elixir.bootlin.com/linux/v6.7-rc4/source/include/asm-generic/access_ok.h#L31
>

It looks like there are predefined constraints for using import_ubuf()
which doesn't properly match our needs. So let's directly use:
iov_iter_ubuf() instead.

-Sumit

>
> Thanks,
> Arnaud
>
  
Arnaud POULIQUEN Dec. 5, 2023, 1:45 p.m. UTC | #10
Hi Sumit,

On 12/5/23 13:07, Sumit Garg wrote:
> Hi Arnaud,
> 
> On Mon, 4 Dec 2023 at 22:32, Arnaud POULIQUEN
> <arnaud.pouliquen@foss.st.com> wrote:
>>
>> Hi,
>>
>> On 12/4/23 17:40, Jens Axboe wrote:
>>> On 12/4/23 9:36 AM, Jens Axboe wrote:
>>>> On 12/4/23 5:42 AM, Sumit Garg wrote:
>>>>> IMO, access_ok() should be the first thing that import_ubuf() or
>>>>> import_single_range() should do, something as follows:
>>>>>
>>>>> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
>>>>> index 8ff6824a1005..4aee0371824c 100644
>>>>> --- a/lib/iov_iter.c
>>>>> +++ b/lib/iov_iter.c
>>>>> @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
>>>>>
>>>>>  int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
>>>>>  {
>>>>> -       if (len > MAX_RW_COUNT)
>>>>> -               len = MAX_RW_COUNT;
>>>>>         if (unlikely(!access_ok(buf, len)))
>>>>>                 return -EFAULT;
>>>>> +       if (len > MAX_RW_COUNT)
>>>>> +               len = MAX_RW_COUNT;
>>>>>
>>>>>         iov_iter_ubuf(i, rw, buf, len);
>>>>>         return 0;
>>>>>
>>>>> Jens A., Al Viro,
>>>>>
>>>>> Was there any particular reason which I am unaware of to perform
>>>>> access_ok() check on modified input length?
>>>>
>>>> This change makes sense to me, and seems consistent with what is done
>>>> elsewhere too.
>>>
>>> For some reason I missed import_single_range(), which does it the same
>>> way as import_ubuf() currently does - cap the range before the
>>> access_ok() check. The vec variants sum as they go, but access_ok()
>>> before the range.
>>>
>>> I think part of the issue here is that the single range imports return 0
>>> for success and -ERROR otherwise. This means that the caller does not
>>> know if the full range was imported or not. OTOH, we always cap any data
>>> transfer at MAX_RW_COUNT, so may make more sense to fix up the caller
>>> here.
>>>
>>
>> Should we limit to MAX_RW_COUNT or return an error?
>> Seems to me that limiting could generate side effect later that could be not
>> simple to debug.
>>
>>
>>>>>  int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
>>>>>  {
>>>>> -       if (len > MAX_RW_COUNT)
>>>>> +               return -EFAULT;
>>>>>         if (unlikely(!access_ok(buf, len)))
>>>>>                 return -EFAULT;
>>>>>
>>>>>         iov_iter_ubuf(i, rw, buf, len);
>>>>>         return 0;
>>
>> or perhaps just remove the test as __access_ok() already tests that the
>> size < TASK_SIZE
>>
>> https://elixir.bootlin.com/linux/v6.7-rc4/source/include/asm-generic/access_ok.h#L31
>>
> 
> It looks like there are predefined constraints for using import_ubuf()
> which doesn't properly match our needs. So let's directly use:
> iov_iter_ubuf() instead.

Yes, this seems a safer alternative. I will send a new version based on it.

Thanks,
Arnaud

> 
> -Sumit
> 
>>
>> Thanks,
>> Arnaud
>>
  
Arnaud POULIQUEN Dec. 5, 2023, 4:55 p.m. UTC | #11
hi Jens Axboe, Al Viro,

On 12/4/23 18:13, Jens Axboe wrote:
> On 12/4/23 10:02 AM, Arnaud POULIQUEN wrote:
>> Hi,
>>
>> On 12/4/23 17:40, Jens Axboe wrote:
>>> On 12/4/23 9:36 AM, Jens Axboe wrote:
>>>> On 12/4/23 5:42 AM, Sumit Garg wrote:
>>>>> IMO, access_ok() should be the first thing that import_ubuf() or
>>>>> import_single_range() should do, something as follows:
>>>>>
>>>>> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
>>>>> index 8ff6824a1005..4aee0371824c 100644
>>>>> --- a/lib/iov_iter.c
>>>>> +++ b/lib/iov_iter.c
>>>>> @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
>>>>>
>>>>>  int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
>>>>>  {
>>>>> -       if (len > MAX_RW_COUNT)
>>>>> -               len = MAX_RW_COUNT;
>>>>>         if (unlikely(!access_ok(buf, len)))
>>>>>                 return -EFAULT;
>>>>> +       if (len > MAX_RW_COUNT)
>>>>> +               len = MAX_RW_COUNT;
>>>>>
>>>>>         iov_iter_ubuf(i, rw, buf, len);
>>>>>         return 0;
>>>>>
>>>>> Jens A., Al Viro,
>>>>>
>>>>> Was there any particular reason which I am unaware of to perform
>>>>> access_ok() check on modified input length?
>>>>
>>>> This change makes sense to me, and seems consistent with what is done
>>>> elsewhere too.
>>>
>>> For some reason I missed import_single_range(), which does it the same
>>> way as import_ubuf() currently does - cap the range before the
>>> access_ok() check. The vec variants sum as they go, but access_ok()
>>> before the range.
>>>
>>> I think part of the issue here is that the single range imports return 0
>>> for success and -ERROR otherwise. This means that the caller does not
>>> know if the full range was imported or not. OTOH, we always cap any data
>>> transfer at MAX_RW_COUNT, so may make more sense to fix up the caller
>>> here.
>>>
>>
>> Should we limit to MAX_RW_COUNT or return an error? Seems to me that
>> limiting could generate side effect later that could be not simple to
>> debug.
> 
> We've traditionally just truncated the length, so principle of least
> surprise says we should continue doing that.
> 


As Jens Wiklander has proposed using iov_iter_ubuf() instead of import_ubuf(),
should I propose a patch updating import_ubuf() and import_single_range()?
Or would you prefer that we keep the functions unchanged for the time being?

Regards,
Arnaud
  
Jens Axboe Dec. 5, 2023, 5:50 p.m. UTC | #12
On 12/5/23 9:55 AM, Arnaud POULIQUEN wrote:
> hi Jens Axboe, Al Viro,
> 
> On 12/4/23 18:13, Jens Axboe wrote:
>> On 12/4/23 10:02 AM, Arnaud POULIQUEN wrote:
>>> Hi,
>>>
>>> On 12/4/23 17:40, Jens Axboe wrote:
>>>> On 12/4/23 9:36 AM, Jens Axboe wrote:
>>>>> On 12/4/23 5:42 AM, Sumit Garg wrote:
>>>>>> IMO, access_ok() should be the first thing that import_ubuf() or
>>>>>> import_single_range() should do, something as follows:
>>>>>>
>>>>>> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
>>>>>> index 8ff6824a1005..4aee0371824c 100644
>>>>>> --- a/lib/iov_iter.c
>>>>>> +++ b/lib/iov_iter.c
>>>>>> @@ -1384,10 +1384,10 @@ EXPORT_SYMBOL(import_single_range);
>>>>>>
>>>>>>  int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
>>>>>>  {
>>>>>> -       if (len > MAX_RW_COUNT)
>>>>>> -               len = MAX_RW_COUNT;
>>>>>>         if (unlikely(!access_ok(buf, len)))
>>>>>>                 return -EFAULT;
>>>>>> +       if (len > MAX_RW_COUNT)
>>>>>> +               len = MAX_RW_COUNT;
>>>>>>
>>>>>>         iov_iter_ubuf(i, rw, buf, len);
>>>>>>         return 0;
>>>>>>
>>>>>> Jens A., Al Viro,
>>>>>>
>>>>>> Was there any particular reason which I am unaware of to perform
>>>>>> access_ok() check on modified input length?
>>>>>
>>>>> This change makes sense to me, and seems consistent with what is done
>>>>> elsewhere too.
>>>>
>>>> For some reason I missed import_single_range(), which does it the same
>>>> way as import_ubuf() currently does - cap the range before the
>>>> access_ok() check. The vec variants sum as they go, but access_ok()
>>>> before the range.
>>>>
>>>> I think part of the issue here is that the single range imports return 0
>>>> for success and -ERROR otherwise. This means that the caller does not
>>>> know if the full range was imported or not. OTOH, we always cap any data
>>>> transfer at MAX_RW_COUNT, so may make more sense to fix up the caller
>>>> here.
>>>>
>>>
>>> Should we limit to MAX_RW_COUNT or return an error? Seems to me that
>>> limiting could generate side effect later that could be not simple to
>>> debug.
>>
>> We've traditionally just truncated the length, so principle of least
>> surprise says we should continue doing that.
>>
> 
> 
> As Jens Wiklander has proposed using iov_iter_ubuf() instead of
> import_ubuf(), should I propose a patch updating import_ubuf() and
> import_single_range()? Or would you prefer that we keep the functions
> unchanged for the time being?

Arguably it should be consistent with iovec imports, which return the
length (or error). But it might be safer to just check access_ok()
first and then truncate at least, vs what is there now.

Note that for 6.8 import_single_range() is gone as it was really just
doing the same thing that import_ubuf() is. Any further changes in this
area should CC Christian Brauner as well, as he has that staged in his
tree.
  
David Laight Dec. 6, 2023, 11:38 a.m. UTC | #13
> > As Jens Wiklander has proposed using iov_iter_ubuf() instead of
> > import_ubuf(), should I propose a patch updating import_ubuf() and
> > import_single_range()? Or would you prefer that we keep the functions
> > unchanged for the time being?
> 
> Arguably it should be consistent with iovec imports, which return the
> length (or error). But it might be safer to just check access_ok()
> first and then truncate at least, vs what is there now.

Is the access_ok() check even needed when setting up an iov_iter?
It is always checked again when the actual copy is done.

I looked at this a while back and couldn't see any code paths that
relied on the early access_ok() check.
Maybe it is historic?

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
  

Patch

diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
index 673cf0359494..ac73e8143233 100644
--- a/drivers/tee/tee_shm.c
+++ b/drivers/tee/tee_shm.c
@@ -22,23 +22,12 @@  static void shm_put_kernel_pages(struct page **pages, size_t page_count)
 		put_page(pages[n]);
 }
 
-static int shm_get_kernel_pages(unsigned long start, size_t page_count,
-				struct page **pages)
+static void shm_get_kernel_pages(struct page **pages, size_t page_count)
 {
-	struct page *page;
 	size_t n;
 
-	if (WARN_ON_ONCE(is_vmalloc_addr((void *)start) ||
-			 is_kmap_addr((void *)start)))
-		return -EINVAL;
-
-	page = virt_to_page((void *)start);
-	for (n = 0; n < page_count; n++) {
-		pages[n] = page + n;
+	for (n = 0; n < page_count; n++)
 		get_page(pages[n]);
-	}
-
-	return page_count;
 }
 
 static void release_registered_pages(struct tee_shm *shm)
@@ -214,13 +203,14 @@  struct tee_shm *tee_shm_alloc_priv_buf(struct tee_context *ctx, size_t size)
 EXPORT_SYMBOL_GPL(tee_shm_alloc_priv_buf);
 
 static struct tee_shm *
-register_shm_helper(struct tee_context *ctx, unsigned long addr,
-		    size_t length, u32 flags, int id)
+register_shm_helper(struct tee_context *ctx, struct iov_iter *iter, u32 flags,
+		    int id)
 {
 	struct tee_device *teedev = ctx->teedev;
 	struct tee_shm *shm;
-	unsigned long start;
-	size_t num_pages;
+	unsigned long start, addr;
+	size_t num_pages, off;
+	ssize_t len;
 	void *ret;
 	int rc;
 
@@ -245,31 +235,38 @@  register_shm_helper(struct tee_context *ctx, unsigned long addr,
 	shm->flags = flags;
 	shm->ctx = ctx;
 	shm->id = id;
-	addr = untagged_addr(addr);
+	addr = untagged_addr((unsigned long)iter_iov_addr(iter));
 	start = rounddown(addr, PAGE_SIZE);
-	shm->offset = addr - start;
-	shm->size = length;
-	num_pages = (roundup(addr + length, PAGE_SIZE) - start) / PAGE_SIZE;
+	num_pages = iov_iter_npages(iter, INT_MAX);
+	if (!num_pages) {
+		ret = ERR_PTR(-ENOMEM);
+		goto err_ctx_put;
+	}
+
 	shm->pages = kcalloc(num_pages, sizeof(*shm->pages), GFP_KERNEL);
 	if (!shm->pages) {
 		ret = ERR_PTR(-ENOMEM);
 		goto err_free_shm;
 	}
 
-	if (flags & TEE_SHM_USER_MAPPED)
-		rc = pin_user_pages_fast(start, num_pages, FOLL_WRITE,
-					 shm->pages);
-	else
-		rc = shm_get_kernel_pages(start, num_pages, shm->pages);
-	if (rc > 0)
-		shm->num_pages = rc;
-	if (rc != num_pages) {
-		if (rc >= 0)
-			rc = -ENOMEM;
-		ret = ERR_PTR(rc);
-		goto err_put_shm_pages;
+	len = iov_iter_extract_pages(iter, &shm->pages, LONG_MAX, num_pages, 0,
+				     &off);
+	if (unlikely(len <= 0)) {
+		ret = len ? ERR_PTR(len) : ERR_PTR(-ENOMEM);
+		goto err_free_shm_pages;
 	}
 
+	/*
+	 * iov_iter_extract_kvec_pages does not get reference on the pages,
+	 * get a pin on them.
+	 */
+	if (iov_iter_is_kvec(iter))
+		shm_get_kernel_pages(shm->pages, num_pages);
+
+	shm->offset = off;
+	shm->size = len;
+	shm->num_pages = num_pages;
+
 	rc = teedev->desc->ops->shm_register(ctx, shm, shm->pages,
 					     shm->num_pages, start);
 	if (rc) {
@@ -279,10 +276,11 @@  register_shm_helper(struct tee_context *ctx, unsigned long addr,
 
 	return shm;
 err_put_shm_pages:
-	if (flags & TEE_SHM_USER_MAPPED)
+	if (!iov_iter_is_kvec(iter))
 		unpin_user_pages(shm->pages, shm->num_pages);
 	else
 		shm_put_kernel_pages(shm->pages, shm->num_pages);
+err_free_shm_pages:
 	kfree(shm->pages);
 err_free_shm:
 	kfree(shm);
@@ -307,8 +305,9 @@  struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx,
 	u32 flags = TEE_SHM_USER_MAPPED | TEE_SHM_DYNAMIC;
 	struct tee_device *teedev = ctx->teedev;
 	struct tee_shm *shm;
+	struct iov_iter iter;
 	void *ret;
-	int id;
+	int id, err;
 
 	if (!access_ok((void __user *)addr, length))
 		return ERR_PTR(-EFAULT);
@@ -319,7 +318,11 @@  struct tee_shm *tee_shm_register_user_buf(struct tee_context *ctx,
 	if (id < 0)
 		return ERR_PTR(id);
 
-	shm = register_shm_helper(ctx, addr, length, flags, id);
+	err = import_ubuf(ITER_DEST, (void __user *)addr, length, &iter);
+	if (err)
+		return ERR_PTR(err);
+
+	shm = register_shm_helper(ctx, &iter, flags, id);
 	if (IS_ERR(shm)) {
 		mutex_lock(&teedev->mutex);
 		idr_remove(&teedev->idr, id);
@@ -352,8 +355,14 @@  struct tee_shm *tee_shm_register_kernel_buf(struct tee_context *ctx,
 					    void *addr, size_t length)
 {
 	u32 flags = TEE_SHM_DYNAMIC;
+	struct kvec kvec;
+	struct iov_iter iter;
+
+	kvec.iov_base = addr;
+	kvec.iov_len = length;
+	iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, length);
 
-	return register_shm_helper(ctx, (unsigned long)addr, length, flags, -1);
+	return register_shm_helper(ctx, &iter, flags, -1);
 }
 EXPORT_SYMBOL_GPL(tee_shm_register_kernel_buf);