[v2,07/18] x86/sgx: Use a list to track to-be-reclaimed pages during reclaim
Commit Message
From: Sean Christopherson <sean.j.christopherson@intel.com>
Change sgx_reclaim_pages() to use a list rather than an array for
storing the epc_pages which will be reclaimed. This change is needed
to transition to the LRU implementation for EPC cgroup support.
This change requires keeping track of whether newly recorded
EPC pages are pages for VA Arrays, or for Enclave data. In addition,
helper functions are added to move pages from one list to another and
enforce a consistent queue like behavior for the LRU lists.
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
Cc: Sean Christopherson <seanjc@google.com>
---
arch/x86/kernel/cpu/sgx/encl.c | 7 ++--
arch/x86/kernel/cpu/sgx/ioctl.c | 5 ++-
arch/x86/kernel/cpu/sgx/main.c | 69 +++++++++++++++++----------------
arch/x86/kernel/cpu/sgx/sgx.h | 42 ++++++++++++++++++++
4 files changed, 85 insertions(+), 38 deletions(-)
Comments
On 12/2/22 10:36, Kristen Carlson Accardi wrote:
> From: Sean Christopherson <sean.j.christopherson@intel.com>
>
> Change sgx_reclaim_pages() to use a list rather than an array for
> storing the epc_pages which will be reclaimed. This change is needed
> to transition to the LRU implementation for EPC cgroup support.
>
> This change requires keeping track of whether newly recorded
> EPC pages are pages for VA Arrays, or for Enclave data. In addition,
> helper functions are added to move pages from one list to another and
> enforce a consistent queue like behavior for the LRU lists.
More changelog nit: Please use imperative voice, not passive voice.
Move from:
In addition, helper functions are added
to:
In addition, add helper functions
> diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
> index 4683da9ef4f1..9ee306ac2a8e 100644
> --- a/arch/x86/kernel/cpu/sgx/encl.c
> +++ b/arch/x86/kernel/cpu/sgx/encl.c
> @@ -252,7 +252,7 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
> epc_page = sgx_encl_eldu(&encl->secs, NULL);
> if (IS_ERR(epc_page))
> return ERR_CAST(epc_page);
> - sgx_record_epc_page(epc_page, 0);
> + sgx_record_epc_page(epc_page, SGX_EPC_PAGE_ENCLAVE);
> }
This is one of those patches where the first hunk seems like it is
entirely disconnected from what the changelog made me expect I would see.
I don't see sgx_reclaim_pages(), or lists or arrays.
If you need to pass additional data down into a function, then do *that*
in a separate patch.
I'm glad it eventually got fixed up, but I don't really ever like to see
bare integers that don't have obvious meaning:
sgx_record_epc_page(epc_page, 0);
Even if you had:
#define SGX_EPC_PAGE_RECLAIMER_UNTRACKED 0
sgx_record_epc_page(epc_page, SGX_EPC_PAGE_RECLAIMER_UNTRACKED);
makes a *LOT* of sense compared to other callers that do
sgx_record_epc_page(epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
> epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
> @@ -260,7 +260,8 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
> return ERR_CAST(epc_page);
>
> encl->secs_child_cnt++;
> - sgx_record_epc_page(entry->epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
> + sgx_record_epc_page(entry->epc_page,
> + (SGX_EPC_PAGE_ENCLAVE | SGX_EPC_PAGE_RECLAIMER_TRACKED));
>
> return entry;
> }
> @@ -1221,7 +1222,7 @@ struct sgx_epc_page *sgx_alloc_va_page(struct sgx_encl *encl, bool reclaim)
> sgx_encl_free_epc_page(epc_page);
> return ERR_PTR(-EFAULT);
> }
> - sgx_record_epc_page(epc_page, 0);
> + sgx_record_epc_page(epc_page, SGX_EPC_PAGE_VERSION_ARRAY);
>
> return epc_page;
> }
> diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
> index aca80a3f38a1..c3a9bffbc37e 100644
> --- a/arch/x86/kernel/cpu/sgx/ioctl.c
> +++ b/arch/x86/kernel/cpu/sgx/ioctl.c
> @@ -114,7 +114,7 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
> encl->attributes = secs->attributes;
> encl->attributes_mask = SGX_ATTR_DEBUG | SGX_ATTR_MODE64BIT | SGX_ATTR_KSS;
>
> - sgx_record_epc_page(encl->secs.epc_page, 0);
> + sgx_record_epc_page(encl->secs.epc_page, SGX_EPC_PAGE_ENCLAVE);
>
> /* Set only after completion, as encl->lock has not been taken. */
> set_bit(SGX_ENCL_CREATED, &encl->flags);
> @@ -325,7 +325,8 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
> goto err_out;
> }
>
> - sgx_record_epc_page(encl_page->epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
> + sgx_record_epc_page(encl_page->epc_page,
> + (SGX_EPC_PAGE_ENCLAVE | SGX_EPC_PAGE_RECLAIMER_TRACKED));
> mutex_unlock(&encl->lock);
> mmap_read_unlock(current->mm);
> return ret;
> diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> index bad72498b0a7..83aaf5cea7b9 100644
> --- a/arch/x86/kernel/cpu/sgx/main.c
> +++ b/arch/x86/kernel/cpu/sgx/main.c
> @@ -288,37 +288,43 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
> */
> static void __sgx_reclaim_pages(void)
> {
> - struct sgx_epc_page *chunk[SGX_NR_TO_SCAN];
> struct sgx_backing backing[SGX_NR_TO_SCAN];
> + struct sgx_epc_page *epc_page, *tmp;
> struct sgx_encl_page *encl_page;
> - struct sgx_epc_page *epc_page;
> pgoff_t page_index;
> - int cnt = 0;
> + LIST_HEAD(iso);
> int ret;
> int i;
>
> spin_lock(&sgx_global_lru.lock);
> for (i = 0; i < SGX_NR_TO_SCAN; i++) {
> - epc_page = sgx_epc_pop_reclaimable(&sgx_global_lru);
> + epc_page = sgx_epc_peek_reclaimable(&sgx_global_lru);
> if (!epc_page)
> break;
>
> encl_page = epc_page->encl_owner;
>
> + if (WARN_ON_ONCE(!(epc_page->flags & SGX_EPC_PAGE_ENCLAVE)))
> + continue;
> +
> if (kref_get_unless_zero(&encl_page->encl->refcount) != 0) {
> epc_page->flags |= SGX_EPC_PAGE_RECLAIM_IN_PROGRESS;
> - chunk[cnt++] = epc_page;
> + list_move_tail(&epc_page->list, &iso);
> } else {
> - /* The owner is freeing the page. No need to add the
> - * page back to the list of reclaimable pages.
> + /* The owner is freeing the page, remove it from the
> + * LRU list
> */
> epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
> + list_del_init(&epc_page->list);
> }
> }
> spin_unlock(&sgx_global_lru.lock);
>
> - for (i = 0; i < cnt; i++) {
> - epc_page = chunk[i];
> + if (list_empty(&iso))
> + return;
> +
> + i = 0;
> + list_for_each_entry_safe(epc_page, tmp, &iso, list) {
> encl_page = epc_page->encl_owner;
>
> if (!sgx_reclaimer_age(epc_page))
> @@ -333,6 +339,7 @@ static void __sgx_reclaim_pages(void)
> goto skip;
> }
>
> + i++;
> encl_page->desc |= SGX_ENCL_PAGE_BEING_RECLAIMED;
> mutex_unlock(&encl_page->encl->lock);
> continue;
> @@ -340,31 +347,25 @@ static void __sgx_reclaim_pages(void)
> skip:
> spin_lock(&sgx_global_lru.lock);
> epc_page->flags &= ~SGX_EPC_PAGE_RECLAIM_IN_PROGRESS;
> - sgx_epc_push_reclaimable(&sgx_global_lru, epc_page);
> + sgx_epc_move_reclaimable(&sgx_global_lru, epc_page);
> spin_unlock(&sgx_global_lru.lock);
>
> kref_put(&encl_page->encl->refcount, sgx_encl_release);
> -
> - chunk[i] = NULL;
> }
>
> - for (i = 0; i < cnt; i++) {
> - epc_page = chunk[i];
> - if (epc_page)
> - sgx_reclaimer_block(epc_page);
> - }
> -
> - for (i = 0; i < cnt; i++) {
> - epc_page = chunk[i];
> - if (!epc_page)
> - continue;
> -
> + list_for_each_entry(epc_page, &iso, list)
> + sgx_reclaimer_block(epc_page);
> +
> + i = 0;
> + list_for_each_entry_safe(epc_page, tmp, &iso, list) {
> encl_page = epc_page->encl_owner;
> - sgx_reclaimer_write(epc_page, &backing[i]);
> + sgx_reclaimer_write(epc_page, &backing[i++]);
>
> kref_put(&encl_page->encl->refcount, sgx_encl_release);
> epc_page->flags &= ~(SGX_EPC_PAGE_RECLAIMER_TRACKED |
> - SGX_EPC_PAGE_RECLAIM_IN_PROGRESS);
> + SGX_EPC_PAGE_RECLAIM_IN_PROGRESS |
> + SGX_EPC_PAGE_ENCLAVE |
> + SGX_EPC_PAGE_VERSION_ARRAY);
>
> sgx_free_epc_page(epc_page);
> }
> @@ -505,6 +506,7 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void)
> /**
> * sgx_record_epc_page() - Add a page to the LRU tracking
> * @page: EPC page
> + * @flags: Reclaim flags for the page.
> *
> * Mark a page with the specified flags and add it to the appropriate
> * (un)reclaimable list.
> @@ -535,18 +537,19 @@ void sgx_record_epc_page(struct sgx_epc_page *page, unsigned long flags)
> int sgx_drop_epc_page(struct sgx_epc_page *page)
> {
> spin_lock(&sgx_global_lru.lock);
> - if (page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) {
> - /* The page is being reclaimed. */
> - if (page->flags & SGX_EPC_PAGE_RECLAIM_IN_PROGRESS) {
> - spin_unlock(&sgx_global_lru.lock);
> - return -EBUSY;
> - }
> -
> - page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
> + if ((page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) &&
> + (page->flags & SGX_EPC_PAGE_RECLAIM_IN_PROGRESS)) {
> + spin_unlock(&sgx_global_lru.lock);
> + return -EBUSY;
> }
> list_del(&page->list);
> spin_unlock(&sgx_global_lru.lock);
>
> + page->flags &= ~(SGX_EPC_PAGE_RECLAIMER_TRACKED |
> + SGX_EPC_PAGE_RECLAIM_IN_PROGRESS |
> + SGX_EPC_PAGE_ENCLAVE |
> + SGX_EPC_PAGE_VERSION_ARRAY);
> +
> return 0;
> }
>
> diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
> index 37d66bc6ca27..ec8d567cd975 100644
> --- a/arch/x86/kernel/cpu/sgx/sgx.h
> +++ b/arch/x86/kernel/cpu/sgx/sgx.h
> @@ -32,6 +32,8 @@
> #define SGX_EPC_PAGE_KVM_GUEST BIT(2)
> /* page flag to indicate reclaim is in progress */
> #define SGX_EPC_PAGE_RECLAIM_IN_PROGRESS BIT(3)
> +#define SGX_EPC_PAGE_ENCLAVE BIT(4)
> +#define SGX_EPC_PAGE_VERSION_ARRAY BIT(5)
Could you please spend some time to clearly document what each bit means?
> +static inline void __sgx_epc_page_list_move(struct list_head *list, struct sgx_epc_page *page)
> +{
> + list_move_tail(&page->list, list);
> +}
I'm not sure I get the point of a helper like this. Why not just have
the caller call list_move() directly?
> /*
> * Must be called with queue lock acquired
> */
> @@ -157,6 +167,38 @@ static inline void sgx_epc_push_unreclaimable(struct sgx_epc_lru_lists *lrus,
> __sgx_epc_page_list_push(&(lrus)->unreclaimable, page);
> }
>
> +/*
> + * Must be called with queue lock acquired
> + */
> +static inline struct sgx_epc_page * __sgx_epc_page_list_peek(struct list_head *list)
> +{
> + struct sgx_epc_page *epc_page;
> +
> + if (list_empty(list))
> + return NULL;
> +
> + epc_page = list_first_entry(list, struct sgx_epc_page, list);
> + return epc_page;
> +}
list_first_entry_or_null() perhaps?
> +static inline struct sgx_epc_page *
> +sgx_epc_peek_reclaimable(struct sgx_epc_lru_lists *lrus)
> +{
> + return __sgx_epc_page_list_peek(&(lrus)->reclaimable);
> +}
> +
> +static inline void sgx_epc_move_reclaimable(struct sgx_epc_lru_lists *lru,
> + struct sgx_epc_page *page)
> +{
> + __sgx_epc_page_list_move(&(lru)->reclaimable, page);
> +}
> +
> +static inline struct sgx_epc_page *
> +sgx_epc_peek_unreclaimable(struct sgx_epc_lru_lists *lrus)
> +{
> + return __sgx_epc_page_list_peek(&(lrus)->unreclaimable);
> +}
In general, I'm not becoming more fond of these helpers as the series
goes along. My worry is that they're an abstraction where we don't
*really* need one. I don't seem them growing much functionality as the
series goes along.
I'll reserve judgement until the end though.
On Fri, 2022-12-02 at 14:33 -0800, Dave Hansen wrote:
> On 12/2/22 10:36, Kristen Carlson Accardi wrote:
> > From: Sean Christopherson <sean.j.christopherson@intel.com>
> >
> > Change sgx_reclaim_pages() to use a list rather than an array for
> > storing the epc_pages which will be reclaimed. This change is
> > needed
> > to transition to the LRU implementation for EPC cgroup support.
> >
> > This change requires keeping track of whether newly recorded
> > EPC pages are pages for VA Arrays, or for Enclave data. In
> > addition,
> > helper functions are added to move pages from one list to another
> > and
> > enforce a consistent queue like behavior for the LRU lists.
>
> More changelog nit: Please use imperative voice, not passive voice.
> Move from:
>
> In addition, helper functions are added
>
> to:
>
> In addition, add helper functions
>
> > diff --git a/arch/x86/kernel/cpu/sgx/encl.c
> > b/arch/x86/kernel/cpu/sgx/encl.c
> > index 4683da9ef4f1..9ee306ac2a8e 100644
> > --- a/arch/x86/kernel/cpu/sgx/encl.c
> > +++ b/arch/x86/kernel/cpu/sgx/encl.c
> > @@ -252,7 +252,7 @@ static struct sgx_encl_page
> > *__sgx_encl_load_page(struct sgx_encl *encl,
> > epc_page = sgx_encl_eldu(&encl->secs, NULL);
> > if (IS_ERR(epc_page))
> > return ERR_CAST(epc_page);
> > - sgx_record_epc_page(epc_page, 0);
> > + sgx_record_epc_page(epc_page,
> > SGX_EPC_PAGE_ENCLAVE);
> > }
>
> This is one of those patches where the first hunk seems like it is
> entirely disconnected from what the changelog made me expect I would
> see.
>
> I don't see sgx_reclaim_pages(), or lists or arrays.
>
> If you need to pass additional data down into a function, then do
> *that*
> in a separate patch.
>
> I'm glad it eventually got fixed up, but I don't really ever like to
> see
> bare integers that don't have obvious meaning:
>
> sgx_record_epc_page(epc_page, 0);
>
> Even if you had:
>
> #define SGX_EPC_PAGE_RECLAIMER_UNTRACKED 0
>
> sgx_record_epc_page(epc_page,
> SGX_EPC_PAGE_RECLAIMER_UNTRACKED);
>
> makes a *LOT* of sense compared to other callers that do
>
> sgx_record_epc_page(epc_page,
> SGX_EPC_PAGE_RECLAIMER_TRACKED);
>
> > epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
> > @@ -260,7 +260,8 @@ static struct sgx_encl_page
> > *__sgx_encl_load_page(struct sgx_encl *encl,
> > return ERR_CAST(epc_page);
> >
> > encl->secs_child_cnt++;
> > - sgx_record_epc_page(entry->epc_page,
> > SGX_EPC_PAGE_RECLAIMER_TRACKED);
> > + sgx_record_epc_page(entry->epc_page,
> > + (SGX_EPC_PAGE_ENCLAVE |
> > SGX_EPC_PAGE_RECLAIMER_TRACKED));
> >
> > return entry;
> > }
> > @@ -1221,7 +1222,7 @@ struct sgx_epc_page *sgx_alloc_va_page(struct
> > sgx_encl *encl, bool reclaim)
> > sgx_encl_free_epc_page(epc_page);
> > return ERR_PTR(-EFAULT);
> > }
> > - sgx_record_epc_page(epc_page, 0);
> > + sgx_record_epc_page(epc_page, SGX_EPC_PAGE_VERSION_ARRAY);
> >
> > return epc_page;
> > }
> > diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c
> > b/arch/x86/kernel/cpu/sgx/ioctl.c
> > index aca80a3f38a1..c3a9bffbc37e 100644
> > --- a/arch/x86/kernel/cpu/sgx/ioctl.c
> > +++ b/arch/x86/kernel/cpu/sgx/ioctl.c
> > @@ -114,7 +114,7 @@ static int sgx_encl_create(struct sgx_encl
> > *encl, struct sgx_secs *secs)
> > encl->attributes = secs->attributes;
> > encl->attributes_mask = SGX_ATTR_DEBUG | SGX_ATTR_MODE64BIT
> > | SGX_ATTR_KSS;
> >
> > - sgx_record_epc_page(encl->secs.epc_page, 0);
> > + sgx_record_epc_page(encl->secs.epc_page,
> > SGX_EPC_PAGE_ENCLAVE);
> >
> > /* Set only after completion, as encl->lock has not been
> > taken. */
> > set_bit(SGX_ENCL_CREATED, &encl->flags);
> > @@ -325,7 +325,8 @@ static int sgx_encl_add_page(struct sgx_encl
> > *encl, unsigned long src,
> > goto err_out;
> > }
> >
> > - sgx_record_epc_page(encl_page->epc_page,
> > SGX_EPC_PAGE_RECLAIMER_TRACKED);
> > + sgx_record_epc_page(encl_page->epc_page,
> > + (SGX_EPC_PAGE_ENCLAVE |
> > SGX_EPC_PAGE_RECLAIMER_TRACKED));
> > mutex_unlock(&encl->lock);
> > mmap_read_unlock(current->mm);
> > return ret;
> > diff --git a/arch/x86/kernel/cpu/sgx/main.c
> > b/arch/x86/kernel/cpu/sgx/main.c
> > index bad72498b0a7..83aaf5cea7b9 100644
> > --- a/arch/x86/kernel/cpu/sgx/main.c
> > +++ b/arch/x86/kernel/cpu/sgx/main.c
> > @@ -288,37 +288,43 @@ static void sgx_reclaimer_write(struct
> > sgx_epc_page *epc_page,
> > */
> > static void __sgx_reclaim_pages(void)
> > {
> > - struct sgx_epc_page *chunk[SGX_NR_TO_SCAN];
> > struct sgx_backing backing[SGX_NR_TO_SCAN];
> > + struct sgx_epc_page *epc_page, *tmp;
> > struct sgx_encl_page *encl_page;
> > - struct sgx_epc_page *epc_page;
> > pgoff_t page_index;
> > - int cnt = 0;
> > + LIST_HEAD(iso);
> > int ret;
> > int i;
> >
> > spin_lock(&sgx_global_lru.lock);
> > for (i = 0; i < SGX_NR_TO_SCAN; i++) {
> > - epc_page =
> > sgx_epc_pop_reclaimable(&sgx_global_lru);
> > + epc_page =
> > sgx_epc_peek_reclaimable(&sgx_global_lru);
> > if (!epc_page)
> > break;
> >
> > encl_page = epc_page->encl_owner;
> >
> > + if (WARN_ON_ONCE(!(epc_page->flags &
> > SGX_EPC_PAGE_ENCLAVE)))
> > + continue;
> > +
> > if (kref_get_unless_zero(&encl_page->encl-
> > >refcount) != 0) {
> > epc_page->flags |=
> > SGX_EPC_PAGE_RECLAIM_IN_PROGRESS;
> > - chunk[cnt++] = epc_page;
> > + list_move_tail(&epc_page->list, &iso);
> > } else {
> > - /* The owner is freeing the page. No need
> > to add the
> > - * page back to the list of reclaimable
> > pages.
> > + /* The owner is freeing the page, remove it
> > from the
> > + * LRU list
> > */
> > epc_page->flags &=
> > ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
> > + list_del_init(&epc_page->list);
> > }
> > }
> > spin_unlock(&sgx_global_lru.lock);
> >
> > - for (i = 0; i < cnt; i++) {
> > - epc_page = chunk[i];
> > + if (list_empty(&iso))
> > + return;
> > +
> > + i = 0;
> > + list_for_each_entry_safe(epc_page, tmp, &iso, list) {
> > encl_page = epc_page->encl_owner;
> >
> > if (!sgx_reclaimer_age(epc_page))
> > @@ -333,6 +339,7 @@ static void __sgx_reclaim_pages(void)
> > goto skip;
> > }
> >
> > + i++;
> > encl_page->desc |= SGX_ENCL_PAGE_BEING_RECLAIMED;
> > mutex_unlock(&encl_page->encl->lock);
> > continue;
> > @@ -340,31 +347,25 @@ static void __sgx_reclaim_pages(void)
> > skip:
> > spin_lock(&sgx_global_lru.lock);
> > epc_page->flags &=
> > ~SGX_EPC_PAGE_RECLAIM_IN_PROGRESS;
> > - sgx_epc_push_reclaimable(&sgx_global_lru,
> > epc_page);
> > + sgx_epc_move_reclaimable(&sgx_global_lru,
> > epc_page);
> > spin_unlock(&sgx_global_lru.lock);
> >
> > kref_put(&encl_page->encl->refcount,
> > sgx_encl_release);
> > -
> > - chunk[i] = NULL;
> > }
> >
> > - for (i = 0; i < cnt; i++) {
> > - epc_page = chunk[i];
> > - if (epc_page)
> > - sgx_reclaimer_block(epc_page);
> > - }
> > -
> > - for (i = 0; i < cnt; i++) {
> > - epc_page = chunk[i];
> > - if (!epc_page)
> > - continue;
> > -
> > + list_for_each_entry(epc_page, &iso, list)
> > + sgx_reclaimer_block(epc_page);
> > +
> > + i = 0;
> > + list_for_each_entry_safe(epc_page, tmp, &iso, list) {
> > encl_page = epc_page->encl_owner;
> > - sgx_reclaimer_write(epc_page, &backing[i]);
> > + sgx_reclaimer_write(epc_page, &backing[i++]);
> >
> > kref_put(&encl_page->encl->refcount,
> > sgx_encl_release);
> > epc_page->flags &= ~(SGX_EPC_PAGE_RECLAIMER_TRACKED
> > |
> > -
> > SGX_EPC_PAGE_RECLAIM_IN_PROGRESS);
> > +
> > SGX_EPC_PAGE_RECLAIM_IN_PROGRESS |
> > + SGX_EPC_PAGE_ENCLAVE |
> > + SGX_EPC_PAGE_VERSION_ARRAY);
> >
> > sgx_free_epc_page(epc_page);
> > }
> > @@ -505,6 +506,7 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void)
> > /**
> > * sgx_record_epc_page() - Add a page to the LRU tracking
> > * @page: EPC page
> > + * @flags: Reclaim flags for the page.
> > *
> > * Mark a page with the specified flags and add it to the
> > appropriate
> > * (un)reclaimable list.
> > @@ -535,18 +537,19 @@ void sgx_record_epc_page(struct sgx_epc_page
> > *page, unsigned long flags)
> > int sgx_drop_epc_page(struct sgx_epc_page *page)
> > {
> > spin_lock(&sgx_global_lru.lock);
> > - if (page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) {
> > - /* The page is being reclaimed. */
> > - if (page->flags & SGX_EPC_PAGE_RECLAIM_IN_PROGRESS)
> > {
> > - spin_unlock(&sgx_global_lru.lock);
> > - return -EBUSY;
> > - }
> > -
> > - page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
> > + if ((page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) &&
> > + (page->flags & SGX_EPC_PAGE_RECLAIM_IN_PROGRESS)) {
> > + spin_unlock(&sgx_global_lru.lock);
> > + return -EBUSY;
> > }
> > list_del(&page->list);
> > spin_unlock(&sgx_global_lru.lock);
> >
> > + page->flags &= ~(SGX_EPC_PAGE_RECLAIMER_TRACKED |
> > + SGX_EPC_PAGE_RECLAIM_IN_PROGRESS |
> > + SGX_EPC_PAGE_ENCLAVE |
> > + SGX_EPC_PAGE_VERSION_ARRAY);
> > +
> > return 0;
> > }
> >
> > diff --git a/arch/x86/kernel/cpu/sgx/sgx.h
> > b/arch/x86/kernel/cpu/sgx/sgx.h
> > index 37d66bc6ca27..ec8d567cd975 100644
> > --- a/arch/x86/kernel/cpu/sgx/sgx.h
> > +++ b/arch/x86/kernel/cpu/sgx/sgx.h
> > @@ -32,6 +32,8 @@
> > #define SGX_EPC_PAGE_KVM_GUEST BIT(2)
> > /* page flag to indicate reclaim is in progress */
> > #define SGX_EPC_PAGE_RECLAIM_IN_PROGRESS BIT(3)
> > +#define SGX_EPC_PAGE_ENCLAVE BIT(4)
> > +#define SGX_EPC_PAGE_VERSION_ARRAY BIT(5)
>
> Could you please spend some time to clearly document what each bit
> means?
>
> > +static inline void __sgx_epc_page_list_move(struct list_head
> > *list, struct sgx_epc_page *page)
> > +{
> > + list_move_tail(&page->list, list);
> > +}
>
> I'm not sure I get the point of a helper like this. Why not just
> have
> the caller call list_move() directly?
>
> > /*
> > * Must be called with queue lock acquired
> > */
> > @@ -157,6 +167,38 @@ static inline void
> > sgx_epc_push_unreclaimable(struct sgx_epc_lru_lists *lrus,
> > __sgx_epc_page_list_push(&(lrus)->unreclaimable, page);
> > }
> >
> > +/*
> > + * Must be called with queue lock acquired
> > + */
> > +static inline struct sgx_epc_page *
> > __sgx_epc_page_list_peek(struct list_head *list)
> > +{
> > + struct sgx_epc_page *epc_page;
> > +
> > + if (list_empty(list))
> > + return NULL;
> > +
> > + epc_page = list_first_entry(list, struct sgx_epc_page,
> > list);
> > + return epc_page;
> > +}
>
> list_first_entry_or_null() perhaps?
>
> > +static inline struct sgx_epc_page *
> > +sgx_epc_peek_reclaimable(struct sgx_epc_lru_lists *lrus)
> > +{
> > + return __sgx_epc_page_list_peek(&(lrus)->reclaimable);
> > +}
> > +
> > +static inline void sgx_epc_move_reclaimable(struct
> > sgx_epc_lru_lists *lru,
> > + struct sgx_epc_page
> > *page)
> > +{
> > + __sgx_epc_page_list_move(&(lru)->reclaimable, page);
> > +}
> > +
> > +static inline struct sgx_epc_page *
> > +sgx_epc_peek_unreclaimable(struct sgx_epc_lru_lists *lrus)
> > +{
> > + return __sgx_epc_page_list_peek(&(lrus)->unreclaimable);
> > +}
>
> In general, I'm not becoming more fond of these helpers as the series
> goes along. My worry is that they're an abstraction where we don't
> *really* need one. I don't seem them growing much functionality as
> the
> series goes along.
>
> I'll reserve judgement until the end though.
>
The helpers were added because Jarrko requested a queue abstraction for
the sgx_epc_lru_lists data structure in the first round of reviews. the
simple one line inlines are effectively just renaming to make the queue
abstraction more obvious to the reader.
On 12/5/22 08:33, Kristen Carlson Accardi wrote:
> The helpers were added because Jarrko requested a queue abstraction for
> the sgx_epc_lru_lists data structure in the first round of reviews. the
> simple one line inlines are effectively just renaming to make the queue
> abstraction more obvious to the reader.
Jarkko,
Do you have any issues with zapping these helpers? I really don't think
they add to readability. The "reclaimable" versus "unreclaimable"
naming is patently obvious from the structure member names. I'm not
sure what value it adds to have them in the function names too.
On Mon, 2022-12-05 at 09:03 -0800, Dave Hansen wrote:
> On 12/5/22 08:33, Kristen Carlson Accardi wrote:
> > The helpers were added because Jarrko requested a queue abstraction
> > for
> > the sgx_epc_lru_lists data structure in the first round of reviews.
> > the
> > simple one line inlines are effectively just renaming to make the
> > queue
> > abstraction more obvious to the reader.
>
> Jarkko,
>
> Do you have any issues with zapping these helpers? I really don't
> think
> they add to readability. The "reclaimable" versus "unreclaimable"
> naming is patently obvious from the structure member names. I'm not
> sure what value it adds to have them in the function names too.
>
>
Well, there's sort of 2 things I would want clarity on before my next
revision. One is obviously deleting the wrappers for unreclaimable and
reclaimable pushes etc. The other is deleting the wrappers for the list
operations (the push/pop/peek queue abstractions) and whether those are
desired.
@@ -252,7 +252,7 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
epc_page = sgx_encl_eldu(&encl->secs, NULL);
if (IS_ERR(epc_page))
return ERR_CAST(epc_page);
- sgx_record_epc_page(epc_page, 0);
+ sgx_record_epc_page(epc_page, SGX_EPC_PAGE_ENCLAVE);
}
epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
@@ -260,7 +260,8 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
return ERR_CAST(epc_page);
encl->secs_child_cnt++;
- sgx_record_epc_page(entry->epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
+ sgx_record_epc_page(entry->epc_page,
+ (SGX_EPC_PAGE_ENCLAVE | SGX_EPC_PAGE_RECLAIMER_TRACKED));
return entry;
}
@@ -1221,7 +1222,7 @@ struct sgx_epc_page *sgx_alloc_va_page(struct sgx_encl *encl, bool reclaim)
sgx_encl_free_epc_page(epc_page);
return ERR_PTR(-EFAULT);
}
- sgx_record_epc_page(epc_page, 0);
+ sgx_record_epc_page(epc_page, SGX_EPC_PAGE_VERSION_ARRAY);
return epc_page;
}
@@ -114,7 +114,7 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
encl->attributes = secs->attributes;
encl->attributes_mask = SGX_ATTR_DEBUG | SGX_ATTR_MODE64BIT | SGX_ATTR_KSS;
- sgx_record_epc_page(encl->secs.epc_page, 0);
+ sgx_record_epc_page(encl->secs.epc_page, SGX_EPC_PAGE_ENCLAVE);
/* Set only after completion, as encl->lock has not been taken. */
set_bit(SGX_ENCL_CREATED, &encl->flags);
@@ -325,7 +325,8 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
goto err_out;
}
- sgx_record_epc_page(encl_page->epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
+ sgx_record_epc_page(encl_page->epc_page,
+ (SGX_EPC_PAGE_ENCLAVE | SGX_EPC_PAGE_RECLAIMER_TRACKED));
mutex_unlock(&encl->lock);
mmap_read_unlock(current->mm);
return ret;
@@ -288,37 +288,43 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
*/
static void __sgx_reclaim_pages(void)
{
- struct sgx_epc_page *chunk[SGX_NR_TO_SCAN];
struct sgx_backing backing[SGX_NR_TO_SCAN];
+ struct sgx_epc_page *epc_page, *tmp;
struct sgx_encl_page *encl_page;
- struct sgx_epc_page *epc_page;
pgoff_t page_index;
- int cnt = 0;
+ LIST_HEAD(iso);
int ret;
int i;
spin_lock(&sgx_global_lru.lock);
for (i = 0; i < SGX_NR_TO_SCAN; i++) {
- epc_page = sgx_epc_pop_reclaimable(&sgx_global_lru);
+ epc_page = sgx_epc_peek_reclaimable(&sgx_global_lru);
if (!epc_page)
break;
encl_page = epc_page->encl_owner;
+ if (WARN_ON_ONCE(!(epc_page->flags & SGX_EPC_PAGE_ENCLAVE)))
+ continue;
+
if (kref_get_unless_zero(&encl_page->encl->refcount) != 0) {
epc_page->flags |= SGX_EPC_PAGE_RECLAIM_IN_PROGRESS;
- chunk[cnt++] = epc_page;
+ list_move_tail(&epc_page->list, &iso);
} else {
- /* The owner is freeing the page. No need to add the
- * page back to the list of reclaimable pages.
+ /* The owner is freeing the page, remove it from the
+ * LRU list
*/
epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
+ list_del_init(&epc_page->list);
}
}
spin_unlock(&sgx_global_lru.lock);
- for (i = 0; i < cnt; i++) {
- epc_page = chunk[i];
+ if (list_empty(&iso))
+ return;
+
+ i = 0;
+ list_for_each_entry_safe(epc_page, tmp, &iso, list) {
encl_page = epc_page->encl_owner;
if (!sgx_reclaimer_age(epc_page))
@@ -333,6 +339,7 @@ static void __sgx_reclaim_pages(void)
goto skip;
}
+ i++;
encl_page->desc |= SGX_ENCL_PAGE_BEING_RECLAIMED;
mutex_unlock(&encl_page->encl->lock);
continue;
@@ -340,31 +347,25 @@ static void __sgx_reclaim_pages(void)
skip:
spin_lock(&sgx_global_lru.lock);
epc_page->flags &= ~SGX_EPC_PAGE_RECLAIM_IN_PROGRESS;
- sgx_epc_push_reclaimable(&sgx_global_lru, epc_page);
+ sgx_epc_move_reclaimable(&sgx_global_lru, epc_page);
spin_unlock(&sgx_global_lru.lock);
kref_put(&encl_page->encl->refcount, sgx_encl_release);
-
- chunk[i] = NULL;
}
- for (i = 0; i < cnt; i++) {
- epc_page = chunk[i];
- if (epc_page)
- sgx_reclaimer_block(epc_page);
- }
-
- for (i = 0; i < cnt; i++) {
- epc_page = chunk[i];
- if (!epc_page)
- continue;
-
+ list_for_each_entry(epc_page, &iso, list)
+ sgx_reclaimer_block(epc_page);
+
+ i = 0;
+ list_for_each_entry_safe(epc_page, tmp, &iso, list) {
encl_page = epc_page->encl_owner;
- sgx_reclaimer_write(epc_page, &backing[i]);
+ sgx_reclaimer_write(epc_page, &backing[i++]);
kref_put(&encl_page->encl->refcount, sgx_encl_release);
epc_page->flags &= ~(SGX_EPC_PAGE_RECLAIMER_TRACKED |
- SGX_EPC_PAGE_RECLAIM_IN_PROGRESS);
+ SGX_EPC_PAGE_RECLAIM_IN_PROGRESS |
+ SGX_EPC_PAGE_ENCLAVE |
+ SGX_EPC_PAGE_VERSION_ARRAY);
sgx_free_epc_page(epc_page);
}
@@ -505,6 +506,7 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void)
/**
* sgx_record_epc_page() - Add a page to the LRU tracking
* @page: EPC page
+ * @flags: Reclaim flags for the page.
*
* Mark a page with the specified flags and add it to the appropriate
* (un)reclaimable list.
@@ -535,18 +537,19 @@ void sgx_record_epc_page(struct sgx_epc_page *page, unsigned long flags)
int sgx_drop_epc_page(struct sgx_epc_page *page)
{
spin_lock(&sgx_global_lru.lock);
- if (page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) {
- /* The page is being reclaimed. */
- if (page->flags & SGX_EPC_PAGE_RECLAIM_IN_PROGRESS) {
- spin_unlock(&sgx_global_lru.lock);
- return -EBUSY;
- }
-
- page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
+ if ((page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) &&
+ (page->flags & SGX_EPC_PAGE_RECLAIM_IN_PROGRESS)) {
+ spin_unlock(&sgx_global_lru.lock);
+ return -EBUSY;
}
list_del(&page->list);
spin_unlock(&sgx_global_lru.lock);
+ page->flags &= ~(SGX_EPC_PAGE_RECLAIMER_TRACKED |
+ SGX_EPC_PAGE_RECLAIM_IN_PROGRESS |
+ SGX_EPC_PAGE_ENCLAVE |
+ SGX_EPC_PAGE_VERSION_ARRAY);
+
return 0;
}
@@ -32,6 +32,8 @@
#define SGX_EPC_PAGE_KVM_GUEST BIT(2)
/* page flag to indicate reclaim is in progress */
#define SGX_EPC_PAGE_RECLAIM_IN_PROGRESS BIT(3)
+#define SGX_EPC_PAGE_ENCLAVE BIT(4)
+#define SGX_EPC_PAGE_VERSION_ARRAY BIT(5)
struct sgx_epc_page {
unsigned int section;
@@ -118,6 +120,14 @@ static inline void __sgx_epc_page_list_push(struct list_head *list, struct sgx_e
list_add_tail(&page->list, list);
}
+/*
+ * Must be called with queue lock acquired
+ */
+static inline void __sgx_epc_page_list_move(struct list_head *list, struct sgx_epc_page *page)
+{
+ list_move_tail(&page->list, list);
+}
+
/*
* Must be called with queue lock acquired
*/
@@ -157,6 +167,38 @@ static inline void sgx_epc_push_unreclaimable(struct sgx_epc_lru_lists *lrus,
__sgx_epc_page_list_push(&(lrus)->unreclaimable, page);
}
+/*
+ * Must be called with queue lock acquired
+ */
+static inline struct sgx_epc_page * __sgx_epc_page_list_peek(struct list_head *list)
+{
+ struct sgx_epc_page *epc_page;
+
+ if (list_empty(list))
+ return NULL;
+
+ epc_page = list_first_entry(list, struct sgx_epc_page, list);
+ return epc_page;
+}
+
+static inline struct sgx_epc_page *
+sgx_epc_peek_reclaimable(struct sgx_epc_lru_lists *lrus)
+{
+ return __sgx_epc_page_list_peek(&(lrus)->reclaimable);
+}
+
+static inline void sgx_epc_move_reclaimable(struct sgx_epc_lru_lists *lru,
+ struct sgx_epc_page *page)
+{
+ __sgx_epc_page_list_move(&(lru)->reclaimable, page);
+}
+
+static inline struct sgx_epc_page *
+sgx_epc_peek_unreclaimable(struct sgx_epc_lru_lists *lrus)
+{
+ return __sgx_epc_page_list_peek(&(lrus)->unreclaimable);
+}
+
struct sgx_epc_page *__sgx_alloc_epc_page(void);
void sgx_free_epc_page(struct sgx_epc_page *page);