[v4,11/18] x86/sgx: store unreclaimable pages in LRU lists
Commit Message
From: Kristen Carlson Accardi <kristen@linux.intel.com>
When an OOM event occurs, all pages associated with an enclave will need
to be freed, including pages that are not currently tracked by the
cgroup LRU lists.
Add a new "unreclaimable" list to the sgx_epc_lru_lists struct and
update the "sgx_record/drop_epc_pages()" functions for adding/removing
VA and SECS pages to/from this "unreclaimable" list.
Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>
---
V4:
- Updates for patch reordering.
- Revised commit messages.
- Revised comments for the list.
V3:
- Removed tracking virtual EPC pages in unreclaimable list as host
kernel does not reclaim them. The EPC cgroups implemented later only
blocks allocating for a guest if the limit is reached by returning
-ENOMEM from sgx_alloc_epc_page() called by virt_epc, and does nothing
else. Therefore, no need to track those in LRU lists.
---
arch/x86/kernel/cpu/sgx/encl.c | 2 ++
arch/x86/kernel/cpu/sgx/ioctl.c | 1 +
arch/x86/kernel/cpu/sgx/main.c | 3 +++
arch/x86/kernel/cpu/sgx/sgx.h | 8 +++++++-
4 files changed, 13 insertions(+), 1 deletion(-)
Comments
On Wed Sep 13, 2023 at 7:06 AM EEST, Haitao Huang wrote:
> From: Kristen Carlson Accardi <kristen@linux.intel.com>
>
> When an OOM event occurs, all pages associated with an enclave will need
> to be freed, including pages that are not currently tracked by the
> cgroup LRU lists.
>
> Add a new "unreclaimable" list to the sgx_epc_lru_lists struct and
> update the "sgx_record/drop_epc_pages()" functions for adding/removing
> VA and SECS pages to/from this "unreclaimable" list.
>
> Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
> Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>
> ---
> V4:
> - Updates for patch reordering.
> - Revised commit messages.
> - Revised comments for the list.
>
> V3:
> - Removed tracking virtual EPC pages in unreclaimable list as host
> kernel does not reclaim them. The EPC cgroups implemented later only
> blocks allocating for a guest if the limit is reached by returning
> -ENOMEM from sgx_alloc_epc_page() called by virt_epc, and does nothing
> else. Therefore, no need to track those in LRU lists.
> ---
> arch/x86/kernel/cpu/sgx/encl.c | 2 ++
> arch/x86/kernel/cpu/sgx/ioctl.c | 1 +
> arch/x86/kernel/cpu/sgx/main.c | 3 +++
> arch/x86/kernel/cpu/sgx/sgx.h | 8 +++++++-
> 4 files changed, 13 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
> index 91f83a5e543d..bf0ac3677ca8 100644
> --- a/arch/x86/kernel/cpu/sgx/encl.c
> +++ b/arch/x86/kernel/cpu/sgx/encl.c
> @@ -748,6 +748,7 @@ void sgx_encl_release(struct kref *ref)
> xa_destroy(&encl->page_array);
>
> if (!encl->secs_child_cnt && encl->secs.epc_page) {
> + sgx_drop_epc_page(encl->secs.epc_page);
> sgx_encl_free_epc_page(encl->secs.epc_page);
> encl->secs.epc_page = NULL;
> }
> @@ -756,6 +757,7 @@ void sgx_encl_release(struct kref *ref)
> va_page = list_first_entry(&encl->va_pages, struct sgx_va_page,
> list);
> list_del(&va_page->list);
> + sgx_drop_epc_page(va_page->epc_page);
> sgx_encl_free_epc_page(va_page->epc_page);
> kfree(va_page);
> }
> diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
> index 95ec20a6992f..8c23bb524674 100644
> --- a/arch/x86/kernel/cpu/sgx/ioctl.c
> +++ b/arch/x86/kernel/cpu/sgx/ioctl.c
> @@ -48,6 +48,7 @@ void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
> encl->page_cnt--;
>
> if (va_page) {
> + sgx_drop_epc_page(va_page->epc_page);
> sgx_encl_free_epc_page(va_page->epc_page);
> list_del(&va_page->list);
> kfree(va_page);
> diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> index ed813288af44..f3a3ed894616 100644
> --- a/arch/x86/kernel/cpu/sgx/main.c
> +++ b/arch/x86/kernel/cpu/sgx/main.c
> @@ -268,6 +268,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
> goto out;
>
> sgx_encl_ewb(encl->secs.epc_page, &secs_backing);
> + sgx_drop_epc_page(encl->secs.epc_page);
> sgx_encl_free_epc_page(encl->secs.epc_page);
> encl->secs.epc_page = NULL;
>
> @@ -510,6 +511,8 @@ void sgx_record_epc_page(struct sgx_epc_page *page, unsigned long flags)
> page->flags |= flags;
> if (sgx_epc_page_reclaimable(flags))
> list_add_tail(&page->list, &sgx_global_lru.reclaimable);
> + else
> + list_add_tail(&page->list, &sgx_global_lru.unreclaimable);
> spin_unlock(&sgx_global_lru.lock);
> }
>
> diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
> index e06b4aadb6a1..e210af77f0cf 100644
> --- a/arch/x86/kernel/cpu/sgx/sgx.h
> +++ b/arch/x86/kernel/cpu/sgx/sgx.h
> @@ -150,17 +150,23 @@ static inline void *sgx_get_epc_virt_addr(struct sgx_epc_page *page)
> }
>
> /*
> - * Tracks EPC pages reclaimable by the reclaimer (ksgxd).
> + * Contains EPC pages tracked by the reclaimer (ksgxd).
> */
> struct sgx_epc_lru_lists {
> spinlock_t lock;
> struct list_head reclaimable;
> + /*
> + * Tracks SECS, VA pages,etc., pages only freeable after all its
> + * dependent reclaimables are freed.
> + */
> + struct list_head unreclaimable;
> };
>
> static inline void sgx_lru_init(struct sgx_epc_lru_lists *lrus)
> {
> spin_lock_init(&lrus->lock);
> INIT_LIST_HEAD(&lrus->reclaimable);
> + INIT_LIST_HEAD(&lrus->unreclaimable);
> }
>
> struct sgx_epc_page *__sgx_alloc_epc_page(void);
> --
> 2.25.1
LGTM
BR, Jarkko
@@ -748,6 +748,7 @@ void sgx_encl_release(struct kref *ref)
xa_destroy(&encl->page_array);
if (!encl->secs_child_cnt && encl->secs.epc_page) {
+ sgx_drop_epc_page(encl->secs.epc_page);
sgx_encl_free_epc_page(encl->secs.epc_page);
encl->secs.epc_page = NULL;
}
@@ -756,6 +757,7 @@ void sgx_encl_release(struct kref *ref)
va_page = list_first_entry(&encl->va_pages, struct sgx_va_page,
list);
list_del(&va_page->list);
+ sgx_drop_epc_page(va_page->epc_page);
sgx_encl_free_epc_page(va_page->epc_page);
kfree(va_page);
}
@@ -48,6 +48,7 @@ void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
encl->page_cnt--;
if (va_page) {
+ sgx_drop_epc_page(va_page->epc_page);
sgx_encl_free_epc_page(va_page->epc_page);
list_del(&va_page->list);
kfree(va_page);
@@ -268,6 +268,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
goto out;
sgx_encl_ewb(encl->secs.epc_page, &secs_backing);
+ sgx_drop_epc_page(encl->secs.epc_page);
sgx_encl_free_epc_page(encl->secs.epc_page);
encl->secs.epc_page = NULL;
@@ -510,6 +511,8 @@ void sgx_record_epc_page(struct sgx_epc_page *page, unsigned long flags)
page->flags |= flags;
if (sgx_epc_page_reclaimable(flags))
list_add_tail(&page->list, &sgx_global_lru.reclaimable);
+ else
+ list_add_tail(&page->list, &sgx_global_lru.unreclaimable);
spin_unlock(&sgx_global_lru.lock);
}
@@ -150,17 +150,23 @@ static inline void *sgx_get_epc_virt_addr(struct sgx_epc_page *page)
}
/*
- * Tracks EPC pages reclaimable by the reclaimer (ksgxd).
+ * Contains EPC pages tracked by the reclaimer (ksgxd).
*/
struct sgx_epc_lru_lists {
spinlock_t lock;
struct list_head reclaimable;
+ /*
+ * Tracks SECS, VA pages,etc., pages only freeable after all its
+ * dependent reclaimables are freed.
+ */
+ struct list_head unreclaimable;
};
static inline void sgx_lru_init(struct sgx_epc_lru_lists *lrus)
{
spin_lock_init(&lrus->lock);
INIT_LIST_HEAD(&lrus->reclaimable);
+ INIT_LIST_HEAD(&lrus->unreclaimable);
}
struct sgx_epc_page *__sgx_alloc_epc_page(void);