[v5,06/18] x86/sgx: Introduce EPC page states
Commit Message
Use the lower 3 bits in the flags field of sgx_epc_page struct to
track EPC states in its life cycle and define an enum for possible
states. More state(s) will be added later.
Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>
---
V4:
- No changes other than required for patch reordering.
V3:
- This is new in V3 to replace the bit mask based approach (requested by Jarkko)
---
arch/x86/kernel/cpu/sgx/encl.c | 14 +++++++---
arch/x86/kernel/cpu/sgx/ioctl.c | 7 +++--
arch/x86/kernel/cpu/sgx/main.c | 19 +++++++------
arch/x86/kernel/cpu/sgx/sgx.h | 49 ++++++++++++++++++++++++++++++---
4 files changed, 71 insertions(+), 18 deletions(-)
Comments
On Sat Sep 23, 2023 at 6:06 AM EEST, Haitao Huang wrote:
> Use the lower 3 bits in the flags field of sgx_epc_page struct to
> track EPC states in its life cycle and define an enum for possible
> states. More state(s) will be added later.
>
> Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>
> ---
> V4:
> - No changes other than required for patch reordering.
>
> V3:
> - This is new in V3 to replace the bit mask based approach (requested by Jarkko)
> ---
> arch/x86/kernel/cpu/sgx/encl.c | 14 +++++++---
> arch/x86/kernel/cpu/sgx/ioctl.c | 7 +++--
> arch/x86/kernel/cpu/sgx/main.c | 19 +++++++------
> arch/x86/kernel/cpu/sgx/sgx.h | 49 ++++++++++++++++++++++++++++++---
> 4 files changed, 71 insertions(+), 18 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
> index 97a53e34a8b4..f5afc8d65e22 100644
> --- a/arch/x86/kernel/cpu/sgx/encl.c
> +++ b/arch/x86/kernel/cpu/sgx/encl.c
> @@ -244,8 +244,12 @@ static struct sgx_epc_page *sgx_encl_load_secs(struct sgx_encl *encl)
> {
> struct sgx_epc_page *epc_page = encl->secs.epc_page;
>
> - if (!epc_page)
> + if (!epc_page) {
> epc_page = sgx_encl_eldu(&encl->secs, NULL);
> + if (!IS_ERR(epc_page))
> + sgx_record_epc_page(epc_page,
> + SGX_EPC_PAGE_UNRECLAIMABLE);
Can be a single line probably (less than 100 characters).
> + }
>
> return epc_page;
> }
> @@ -272,7 +276,7 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
> return ERR_CAST(epc_page);
>
> encl->secs_child_cnt++;
> - sgx_record_epc_page(epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
> + sgx_record_epc_page(epc_page, SGX_EPC_PAGE_RECLAIMABLE);
>
> return entry;
> }
> @@ -398,7 +402,7 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
> encl_page->type = SGX_PAGE_TYPE_REG;
> encl->secs_child_cnt++;
>
> - sgx_record_epc_page(epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
> + sgx_record_epc_page(epc_page, SGX_EPC_PAGE_RECLAIMABLE);
>
> phys_addr = sgx_get_epc_phys_addr(epc_page);
> /*
> @@ -1256,6 +1260,8 @@ struct sgx_epc_page *sgx_alloc_va_page(bool reclaim)
> sgx_encl_free_epc_page(epc_page);
> return ERR_PTR(-EFAULT);
> }
> + sgx_record_epc_page(epc_page,
> + SGX_EPC_PAGE_UNRECLAIMABLE);
There is bunch of these apparently.
>
> return epc_page;
> }
> @@ -1315,7 +1321,7 @@ void sgx_encl_free_epc_page(struct sgx_epc_page *page)
> {
> int ret;
>
> - WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);
> + WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_STATE_MASK);
>
> ret = __eremove(sgx_get_epc_virt_addr(page));
> if (WARN_ONCE(ret, EREMOVE_ERROR_MESSAGE, ret, ret))
> diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
> index a75eb44022a3..9a32bf5a1070 100644
> --- a/arch/x86/kernel/cpu/sgx/ioctl.c
> +++ b/arch/x86/kernel/cpu/sgx/ioctl.c
> @@ -113,6 +113,9 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
> encl->attributes = secs->attributes;
> encl->attributes_mask = SGX_ATTR_UNPRIV_MASK;
>
> + sgx_record_epc_page(encl->secs.epc_page,
> + SGX_EPC_PAGE_UNRECLAIMABLE);
> +
> /* Set only after completion, as encl->lock has not been taken. */
> set_bit(SGX_ENCL_CREATED, &encl->flags);
>
> @@ -322,7 +325,7 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
> goto err_out;
> }
>
> - sgx_record_epc_page(epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
> + sgx_record_epc_page(epc_page, SGX_EPC_PAGE_RECLAIMABLE);
> mutex_unlock(&encl->lock);
> mmap_read_unlock(current->mm);
> return ret;
> @@ -976,7 +979,7 @@ static long sgx_enclave_modify_types(struct sgx_encl *encl,
>
> mutex_lock(&encl->lock);
>
> - sgx_record_epc_page(entry->epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
> + sgx_record_epc_page(entry->epc_page, SGX_EPC_PAGE_RECLAIMABLE);
> }
>
> /* Change EPC type */
> diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> index dec1d57cbff6..b26860399402 100644
> --- a/arch/x86/kernel/cpu/sgx/main.c
> +++ b/arch/x86/kernel/cpu/sgx/main.c
> @@ -318,7 +318,7 @@ static void sgx_reclaim_pages(void)
> /* The owner is freeing the page. No need to add the
> * page back to the list of reclaimable pages.
> */
> - epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
> + sgx_epc_page_reset_state(epc_page);
> }
> spin_unlock(&sgx_global_lru.lock);
>
> @@ -344,6 +344,7 @@ static void sgx_reclaim_pages(void)
>
> skip:
> spin_lock(&sgx_global_lru.lock);
> + sgx_epc_page_set_state(epc_page, SGX_EPC_PAGE_RECLAIMABLE);
> list_add_tail(&epc_page->list, &sgx_global_lru.reclaimable);
> spin_unlock(&sgx_global_lru.lock);
>
> @@ -367,7 +368,7 @@ static void sgx_reclaim_pages(void)
> sgx_reclaimer_write(epc_page, &backing[i]);
>
> kref_put(&encl_page->encl->refcount, sgx_encl_release);
> - epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
> + sgx_epc_page_reset_state(epc_page);
>
> sgx_free_epc_page(epc_page);
> }
> @@ -507,9 +508,9 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void)
> void sgx_record_epc_page(struct sgx_epc_page *page, unsigned long flags)
> {
> spin_lock(&sgx_global_lru.lock);
> - WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);
> + WARN_ON_ONCE(sgx_epc_page_reclaimable(page->flags));
> page->flags |= flags;
> - if (flags & SGX_EPC_PAGE_RECLAIMER_TRACKED)
> + if (sgx_epc_page_reclaimable(flags))
> list_add_tail(&page->list, &sgx_global_lru.reclaimable);
> spin_unlock(&sgx_global_lru.lock);
> }
> @@ -527,7 +528,7 @@ void sgx_record_epc_page(struct sgx_epc_page *page, unsigned long flags)
> int sgx_drop_epc_page(struct sgx_epc_page *page)
> {
> spin_lock(&sgx_global_lru.lock);
> - if (page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) {
> + if (sgx_epc_page_reclaimable(page->flags)) {
> /* The page is being reclaimed. */
> if (list_empty(&page->list)) {
> spin_unlock(&sgx_global_lru.lock);
> @@ -535,7 +536,7 @@ int sgx_drop_epc_page(struct sgx_epc_page *page)
> }
>
> list_del(&page->list);
> - page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
> + sgx_epc_page_reset_state(page);
> }
> spin_unlock(&sgx_global_lru.lock);
>
> @@ -607,6 +608,8 @@ void sgx_free_epc_page(struct sgx_epc_page *page)
> struct sgx_epc_section *section = &sgx_epc_sections[page->section];
> struct sgx_numa_node *node = section->node;
>
> + WARN_ON_ONCE(page->flags & (SGX_EPC_PAGE_STATE_MASK));
> +
> spin_lock(&node->lock);
>
> page->owner = NULL;
> @@ -614,7 +617,7 @@ void sgx_free_epc_page(struct sgx_epc_page *page)
> list_add(&page->list, &node->sgx_poison_page_list);
> else
> list_add_tail(&page->list, &node->free_page_list);
> - page->flags = SGX_EPC_PAGE_IS_FREE;
> + page->flags = SGX_EPC_PAGE_FREE;
>
> spin_unlock(&node->lock);
> atomic_long_inc(&sgx_nr_free_pages);
> @@ -715,7 +718,7 @@ int arch_memory_failure(unsigned long pfn, int flags)
> * If the page is on a free list, move it to the per-node
> * poison page list.
> */
> - if (page->flags & SGX_EPC_PAGE_IS_FREE) {
> + if (page->flags == SGX_EPC_PAGE_FREE) {
> list_move(&page->list, &node->sgx_poison_page_list);
> goto out;
> }
> diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
> index 113d930fd087..2faeb40b345f 100644
> --- a/arch/x86/kernel/cpu/sgx/sgx.h
> +++ b/arch/x86/kernel/cpu/sgx/sgx.h
> @@ -23,11 +23,36 @@
> #define SGX_NR_LOW_PAGES 32
> #define SGX_NR_HIGH_PAGES 64
>
> -/* Pages, which are being tracked by the page reclaimer. */
> -#define SGX_EPC_PAGE_RECLAIMER_TRACKED BIT(0)
> +enum sgx_epc_page_state {
> + /* Not tracked by the reclaimer:
> + * Pages allocated for virtual EPC which are never tracked by the host
> + * reclaimer; pages just allocated from free list but not yet put in
> + * use; pages just reclaimed, but not yet returned to the free list.
> + * Becomes FREE after sgx_free_epc()
> + * Becomes RECLAIMABLE or UNRECLAIMABLE after sgx_record_epc()
> + */
> + SGX_EPC_PAGE_NOT_TRACKED = 0,
> +
> + /* Page is in the free list, ready for allocation
> + * Becomes NOT_TRACKED after sgx_alloc_epc_page()
> + */
> + SGX_EPC_PAGE_FREE = 1,
> +
> + /* Page is in use and tracked in a reclaimable LRU list
> + * Becomes NOT_TRACKED after sgx_drop_epc()
> + */
> + SGX_EPC_PAGE_RECLAIMABLE = 2,
> +
> + /* Page is in use but tracked in an unreclaimable LRU list. These are
> + * only reclaimable when the whole enclave is OOM killed or the enclave
> + * is released, e.g., VA, SECS pages
> + * Becomes NOT_TRACKED after sgx_drop_epc()
> + */
> + SGX_EPC_PAGE_UNRECLAIMABLE = 3,
>
> -/* Pages on free list */
> -#define SGX_EPC_PAGE_IS_FREE BIT(1)
> +};
> +
> +#define SGX_EPC_PAGE_STATE_MASK GENMASK(2, 0)
>
> struct sgx_epc_page {
> unsigned int section;
> @@ -37,6 +62,22 @@ struct sgx_epc_page {
> struct list_head list;
> };
>
> +static inline void sgx_epc_page_reset_state(struct sgx_epc_page *page)
> +{
> + page->flags &= ~SGX_EPC_PAGE_STATE_MASK;
> +}
> +
> +static inline void sgx_epc_page_set_state(struct sgx_epc_page *page, unsigned long flags)
> +{
> + page->flags &= ~SGX_EPC_PAGE_STATE_MASK;
> + page->flags |= (flags & SGX_EPC_PAGE_STATE_MASK);
> +}
> +
> +static inline bool sgx_epc_page_reclaimable(unsigned long flags)
> +{
> + return SGX_EPC_PAGE_RECLAIMABLE == (flags & SGX_EPC_PAGE_STATE_MASK);
> +}
> +
> /*
> * Contains the tracking data for NUMA nodes having EPC pages. Most importantly,
> * the free page list local to the node is stored here.
> --
> 2.25.1
BR, Jarkko
On Fri, 2023-09-22 at 20:06 -0700, Haitao Huang wrote:
> Use the lower 3 bits in the flags field of sgx_epc_page struct to
> track EPC states in its life cycle and define an enum for possible
> states. More state(s) will be added later.
This patch does more than what the changelog claims to do. AFAICT it does
below:
1) Use the lower 3 bits to track EPC page status
2) Rename SGX_EPC_PAGE_RECLAIMER_TRACKED to SGX_EPC_PAGE_RERCLAIMABLE
3) Introduce a new state SGX_EPC_PAGE_UNRECLAIMABLE
4) Track SECS and VA pages as SGX_EPC_PAGE_UNRECLAIMABLE
The changelog only says 1) IIUC.
If we really want to do all these in one patch, then the changelog should at
least mention the justification of all of them.
But I don't see why 3) and 4) need to be done here. Instead, IMHO they should
be done in a separate patch, and do it after the unreclaimable list is
introduced (or you need to bring that patch forward).
For instance, ...
[snip]
> +
> + /* Page is in use but tracked in an unreclaimable LRU list. These are
> + * only reclaimable when the whole enclave is OOM killed or the enclave
> + * is released, e.g., VA, SECS pages
> + * Becomes NOT_TRACKED after sgx_drop_epc()
> + */
> + SGX_EPC_PAGE_UNRECLAIMABLE = 3,
... We even don't have the unreclaimable LRU list yet. It's odd to have this
comment here.
On Wed, 27 Sep 2023 05:28:36 -0500, Huang, Kai <kai.huang@intel.com> wrote:
> On Fri, 2023-09-22 at 20:06 -0700, Haitao Huang wrote:
>> Use the lower 3 bits in the flags field of sgx_epc_page struct to
>> track EPC states in its life cycle and define an enum for possible
>> states. More state(s) will be added later.
>
> This patch does more than what the changelog claims to do. AFAICT it
> does
> below:
>
> 1) Use the lower 3 bits to track EPC page status
> 2) Rename SGX_EPC_PAGE_RECLAIMER_TRACKED to SGX_EPC_PAGE_RERCLAIMABLE
> 3) Introduce a new state SGX_EPC_PAGE_UNRECLAIMABLE
> 4) Track SECS and VA pages as SGX_EPC_PAGE_UNRECLAIMABLE
>
> The changelog only says 1) IIUC.
>
I don't quite get why you would view 3) as a separate item from 1).
In my view, 4) is not done as long as there is not separate list to track
it.
Maybe I should make it clear the "states" vs "tracking". States are just
bits in the flags, "tracking" is done using the lists by ksgxd/cgroup. And
this patch is really about "states"
Would that clarify the intention of the patch?
> If we really want to do all these in one patch, then the changelog
> should at
> least mention the justification of all of them.
>
> But I don't see why 3) and 4) need to be done here. Instead, IMHO they
> should
> be done in a separate patch, and do it after the unreclaimable list is
> introduced (or you need to bring that patch forward).
>
>
> For instance, ...
>
> [snip]
>
>> +
>> + /* Page is in use but tracked in an unreclaimable LRU list. These are
>> + * only reclaimable when the whole enclave is OOM killed or the
>> enclave
>> + * is released, e.g., VA, SECS pages
>> + * Becomes NOT_TRACKED after sgx_drop_epc()
>> + */
>> + SGX_EPC_PAGE_UNRECLAIMABLE = 3,
>
> ... We even don't have the unreclaimable LRU list yet. It's odd to have
> this
> comment here.
>
Yeah, I should take out the mentioning of the LRUs from definitions of the
states.
Thanks
Haitao
On Mon, 2023-10-02 at 23:49 -0500, Haitao Huang wrote:
> On Wed, 27 Sep 2023 05:28:36 -0500, Huang, Kai <kai.huang@intel.com> wrote:
>
> > On Fri, 2023-09-22 at 20:06 -0700, Haitao Huang wrote:
> > > Use the lower 3 bits in the flags field of sgx_epc_page struct to
> > > track EPC states in its life cycle and define an enum for possible
> > > states. More state(s) will be added later.
> >
> > This patch does more than what the changelog claims to do. AFAICT it
> > does
> > below:
> >
> > 1) Use the lower 3 bits to track EPC page status
> > 2) Rename SGX_EPC_PAGE_RECLAIMER_TRACKED to SGX_EPC_PAGE_RERCLAIMABLE
> > 3) Introduce a new state SGX_EPC_PAGE_UNRECLAIMABLE
> > 4) Track SECS and VA pages as SGX_EPC_PAGE_UNRECLAIMABLE
> >
> > The changelog only says 1) IIUC.
> >
> I don't quite get why you would view 3) as a separate item from 1).
1) is about using some method to track EPC page status, 3) is adding a new
state.
Why cannot they be separated?
> In my view, 4) is not done as long as there is not separate list to track
> it.
You are literally doing below:
@@ -113,6 +113,9 @@ static int sgx_encl_create(struct sgx_encl *encl, struct
sgx_secs *secs)
encl->attributes = secs->attributes;
encl->attributes_mask = SGX_ATTR_UNPRIV_MASK;
+ sgx_record_epc_page(encl->secs.epc_page,
+ SGX_EPC_PAGE_UNRECLAIMABLE);
+
Which obviously is tracking SECS as unreclaimable page here.
The only thing you are not doing now is to put to the actual list, which you
introduced in a later patch.
But why not just doing them together?
On Tue, 03 Oct 2023 15:03:48 -0500, Huang, Kai <kai.huang@intel.com> wrote:
> On Mon, 2023-10-02 at 23:49 -0500, Haitao Huang wrote:
>> On Wed, 27 Sep 2023 05:28:36 -0500, Huang, Kai <kai.huang@intel.com>
>> wrote:
>>
>> > On Fri, 2023-09-22 at 20:06 -0700, Haitao Huang wrote:
>> > > Use the lower 3 bits in the flags field of sgx_epc_page struct to
>> > > track EPC states in its life cycle and define an enum for possible
>> > > states. More state(s) will be added later.
>> >
>> > This patch does more than what the changelog claims to do. AFAICT it
>> > does
>> > below:
>> >
>> > 1) Use the lower 3 bits to track EPC page status
>> > 2) Rename SGX_EPC_PAGE_RECLAIMER_TRACKED to SGX_EPC_PAGE_RERCLAIMABLE
>> > 3) Introduce a new state SGX_EPC_PAGE_UNRECLAIMABLE
>> > 4) Track SECS and VA pages as SGX_EPC_PAGE_UNRECLAIMABLE
>> >
>> > The changelog only says 1) IIUC.
>> >
>> I don't quite get why you would view 3) as a separate item from 1).
>
> 1) is about using some method to track EPC page status, 3) is adding a
> new
> state.
>
> Why cannot they be separated?
>
>> In my view, 4) is not done as long as there is not separate list to
>> track
>> it.
>
> You are literally doing below:
>
> @@ -113,6 +113,9 @@ static int sgx_encl_create(struct sgx_encl *encl,
> struct
> sgx_secs *secs)
> encl->attributes = secs->attributes;
> encl->attributes_mask = SGX_ATTR_UNPRIV_MASK;
> + sgx_record_epc_page(encl->secs.epc_page,
> + SGX_EPC_PAGE_UNRECLAIMABLE);
> +
>
> Which obviously is tracking SECS as unreclaimable page here.
>
> The only thing you are not doing now is to put to the actual list, which
> you
> introduced in a later patch.
>
> But why not just doing them together?
>
>
I see where the problem is now. Initially these states are bit masks so
UNTRACKED and UNRECLAIMABLE are all not masked (set zero). I'll change
these "record" calls with UNTRACKED instead, and later replace with
UNRECLAIMABLE when they are actually added to the list. So UNRECLAIMABLE
state can also be delayed until that patch with the list added.
Thanks.
Haitao
On Wed, 2023-10-04 at 10:24 -0500, Haitao Huang wrote:
> On Tue, 03 Oct 2023 15:03:48 -0500, Huang, Kai <kai.huang@intel.com> wrote:
>
> > On Mon, 2023-10-02 at 23:49 -0500, Haitao Huang wrote:
> > > On Wed, 27 Sep 2023 05:28:36 -0500, Huang, Kai <kai.huang@intel.com>
> > > wrote:
> > >
> > > > On Fri, 2023-09-22 at 20:06 -0700, Haitao Huang wrote:
> > > > > Use the lower 3 bits in the flags field of sgx_epc_page struct to
> > > > > track EPC states in its life cycle and define an enum for possible
> > > > > states. More state(s) will be added later.
> > > >
> > > > This patch does more than what the changelog claims to do. AFAICT it
> > > > does
> > > > below:
> > > >
> > > > 1) Use the lower 3 bits to track EPC page status
> > > > 2) Rename SGX_EPC_PAGE_RECLAIMER_TRACKED to SGX_EPC_PAGE_RERCLAIMABLE
> > > > 3) Introduce a new state SGX_EPC_PAGE_UNRECLAIMABLE
> > > > 4) Track SECS and VA pages as SGX_EPC_PAGE_UNRECLAIMABLE
> > > >
> > > > The changelog only says 1) IIUC.
> > > >
> > > I don't quite get why you would view 3) as a separate item from 1).
> >
> > 1) is about using some method to track EPC page status, 3) is adding a
> > new
> > state.
> >
> > Why cannot they be separated?
> >
> > > In my view, 4) is not done as long as there is not separate list to
> > > track
> > > it.
> >
> > You are literally doing below:
> >
> > @@ -113,6 +113,9 @@ static int sgx_encl_create(struct sgx_encl *encl,
> > struct
> > sgx_secs *secs)
> > encl->attributes = secs->attributes;
> > encl->attributes_mask = SGX_ATTR_UNPRIV_MASK;
> > + sgx_record_epc_page(encl->secs.epc_page,
> > + SGX_EPC_PAGE_UNRECLAIMABLE);
> > +
> >
> > Which obviously is tracking SECS as unreclaimable page here.
> >
> > The only thing you are not doing now is to put to the actual list, which
> > you
> > introduced in a later patch.
> >
> > But why not just doing them together?
> >
> >
> I see where the problem is now. Initially these states are bit masks so
> UNTRACKED and UNRECLAIMABLE are all not masked (set zero). I'll change
> these "record" calls with UNTRACKED instead, and later replace with
> UNRECLAIMABLE when they are actually added to the list. So UNRECLAIMABLE
> state can also be delayed until that patch with the list added.
I am not sure whether I am following, but could we just delay introducing the
"untracked" or "unreclaimable" until the list is added?
Why do we need to call sgx_record_epc_page() for SECS and VA pages in _this_
patch?
Reading again, I _think_ the reason why you added these new states because you
want to justify using the low 3 bits as EPC page states, i.e., below code ...
+#define SGX_EPC_PAGE_STATE_MASK GENMASK(2, 0)
But for now we only have two valid states:
- SGX_EPC_PAGE_IS_FREE
- SGX_EPC_PAGE_RECLAIMER_TRACKED
Thus you added two more states: NOT_TRACKED/UNRECLAIMABLE. And more
confusingly, you added calling sgx_record_epc_page() for SECS and VA pages in
this patch to try to actually use these new states, while the changelog says:
Use the lower 3 bits in the flags field of sgx_epc_page struct to
track EPC states in its life cycle and define an enum for possible
states. More state(s) will be added later.
... which doesn't mention any of above.
But this doesn't stand either because you only need 2 bits for the four states
but not 3 bits. So I don't see how adding the new states could help here.
So I would suggest two options:
1)
In this patch, you only change the way to track EPC states to reflect your
changelog of this patch (maybe you can add NOT_TRACKED, but I am not sure /
don't care, just give a justification if you do).
And then you have a patch to introduce the new unreclaimable list, the new EPC
state, and call sgx_record_epc_page() *AND* sgx_drop_epc_page() for SECS/VA
pages. The patch could be titled such as:
x86/sgx: Store SECS/VA pages in the unreclaimable list
2)
You do the opposite to option 1): Introduce the patch
x86/sgx: Store SECS/VA pages in the unreclaimable list
... first, and then convert the EPC states to using the lower 3 bits (actually 2
bits is enough, you can extend to 3 bits in later patch when needed).
Does above make more sense?
@@ -244,8 +244,12 @@ static struct sgx_epc_page *sgx_encl_load_secs(struct sgx_encl *encl)
{
struct sgx_epc_page *epc_page = encl->secs.epc_page;
- if (!epc_page)
+ if (!epc_page) {
epc_page = sgx_encl_eldu(&encl->secs, NULL);
+ if (!IS_ERR(epc_page))
+ sgx_record_epc_page(epc_page,
+ SGX_EPC_PAGE_UNRECLAIMABLE);
+ }
return epc_page;
}
@@ -272,7 +276,7 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
return ERR_CAST(epc_page);
encl->secs_child_cnt++;
- sgx_record_epc_page(epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
+ sgx_record_epc_page(epc_page, SGX_EPC_PAGE_RECLAIMABLE);
return entry;
}
@@ -398,7 +402,7 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
encl_page->type = SGX_PAGE_TYPE_REG;
encl->secs_child_cnt++;
- sgx_record_epc_page(epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
+ sgx_record_epc_page(epc_page, SGX_EPC_PAGE_RECLAIMABLE);
phys_addr = sgx_get_epc_phys_addr(epc_page);
/*
@@ -1256,6 +1260,8 @@ struct sgx_epc_page *sgx_alloc_va_page(bool reclaim)
sgx_encl_free_epc_page(epc_page);
return ERR_PTR(-EFAULT);
}
+ sgx_record_epc_page(epc_page,
+ SGX_EPC_PAGE_UNRECLAIMABLE);
return epc_page;
}
@@ -1315,7 +1321,7 @@ void sgx_encl_free_epc_page(struct sgx_epc_page *page)
{
int ret;
- WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);
+ WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_STATE_MASK);
ret = __eremove(sgx_get_epc_virt_addr(page));
if (WARN_ONCE(ret, EREMOVE_ERROR_MESSAGE, ret, ret))
@@ -113,6 +113,9 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
encl->attributes = secs->attributes;
encl->attributes_mask = SGX_ATTR_UNPRIV_MASK;
+ sgx_record_epc_page(encl->secs.epc_page,
+ SGX_EPC_PAGE_UNRECLAIMABLE);
+
/* Set only after completion, as encl->lock has not been taken. */
set_bit(SGX_ENCL_CREATED, &encl->flags);
@@ -322,7 +325,7 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
goto err_out;
}
- sgx_record_epc_page(epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
+ sgx_record_epc_page(epc_page, SGX_EPC_PAGE_RECLAIMABLE);
mutex_unlock(&encl->lock);
mmap_read_unlock(current->mm);
return ret;
@@ -976,7 +979,7 @@ static long sgx_enclave_modify_types(struct sgx_encl *encl,
mutex_lock(&encl->lock);
- sgx_record_epc_page(entry->epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
+ sgx_record_epc_page(entry->epc_page, SGX_EPC_PAGE_RECLAIMABLE);
}
/* Change EPC type */
@@ -318,7 +318,7 @@ static void sgx_reclaim_pages(void)
/* The owner is freeing the page. No need to add the
* page back to the list of reclaimable pages.
*/
- epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
+ sgx_epc_page_reset_state(epc_page);
}
spin_unlock(&sgx_global_lru.lock);
@@ -344,6 +344,7 @@ static void sgx_reclaim_pages(void)
skip:
spin_lock(&sgx_global_lru.lock);
+ sgx_epc_page_set_state(epc_page, SGX_EPC_PAGE_RECLAIMABLE);
list_add_tail(&epc_page->list, &sgx_global_lru.reclaimable);
spin_unlock(&sgx_global_lru.lock);
@@ -367,7 +368,7 @@ static void sgx_reclaim_pages(void)
sgx_reclaimer_write(epc_page, &backing[i]);
kref_put(&encl_page->encl->refcount, sgx_encl_release);
- epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
+ sgx_epc_page_reset_state(epc_page);
sgx_free_epc_page(epc_page);
}
@@ -507,9 +508,9 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void)
void sgx_record_epc_page(struct sgx_epc_page *page, unsigned long flags)
{
spin_lock(&sgx_global_lru.lock);
- WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);
+ WARN_ON_ONCE(sgx_epc_page_reclaimable(page->flags));
page->flags |= flags;
- if (flags & SGX_EPC_PAGE_RECLAIMER_TRACKED)
+ if (sgx_epc_page_reclaimable(flags))
list_add_tail(&page->list, &sgx_global_lru.reclaimable);
spin_unlock(&sgx_global_lru.lock);
}
@@ -527,7 +528,7 @@ void sgx_record_epc_page(struct sgx_epc_page *page, unsigned long flags)
int sgx_drop_epc_page(struct sgx_epc_page *page)
{
spin_lock(&sgx_global_lru.lock);
- if (page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) {
+ if (sgx_epc_page_reclaimable(page->flags)) {
/* The page is being reclaimed. */
if (list_empty(&page->list)) {
spin_unlock(&sgx_global_lru.lock);
@@ -535,7 +536,7 @@ int sgx_drop_epc_page(struct sgx_epc_page *page)
}
list_del(&page->list);
- page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
+ sgx_epc_page_reset_state(page);
}
spin_unlock(&sgx_global_lru.lock);
@@ -607,6 +608,8 @@ void sgx_free_epc_page(struct sgx_epc_page *page)
struct sgx_epc_section *section = &sgx_epc_sections[page->section];
struct sgx_numa_node *node = section->node;
+ WARN_ON_ONCE(page->flags & (SGX_EPC_PAGE_STATE_MASK));
+
spin_lock(&node->lock);
page->owner = NULL;
@@ -614,7 +617,7 @@ void sgx_free_epc_page(struct sgx_epc_page *page)
list_add(&page->list, &node->sgx_poison_page_list);
else
list_add_tail(&page->list, &node->free_page_list);
- page->flags = SGX_EPC_PAGE_IS_FREE;
+ page->flags = SGX_EPC_PAGE_FREE;
spin_unlock(&node->lock);
atomic_long_inc(&sgx_nr_free_pages);
@@ -715,7 +718,7 @@ int arch_memory_failure(unsigned long pfn, int flags)
* If the page is on a free list, move it to the per-node
* poison page list.
*/
- if (page->flags & SGX_EPC_PAGE_IS_FREE) {
+ if (page->flags == SGX_EPC_PAGE_FREE) {
list_move(&page->list, &node->sgx_poison_page_list);
goto out;
}
@@ -23,11 +23,36 @@
#define SGX_NR_LOW_PAGES 32
#define SGX_NR_HIGH_PAGES 64
-/* Pages, which are being tracked by the page reclaimer. */
-#define SGX_EPC_PAGE_RECLAIMER_TRACKED BIT(0)
+enum sgx_epc_page_state {
+ /* Not tracked by the reclaimer:
+ * Pages allocated for virtual EPC which are never tracked by the host
+ * reclaimer; pages just allocated from free list but not yet put in
+ * use; pages just reclaimed, but not yet returned to the free list.
+ * Becomes FREE after sgx_free_epc()
+ * Becomes RECLAIMABLE or UNRECLAIMABLE after sgx_record_epc()
+ */
+ SGX_EPC_PAGE_NOT_TRACKED = 0,
+
+ /* Page is in the free list, ready for allocation
+ * Becomes NOT_TRACKED after sgx_alloc_epc_page()
+ */
+ SGX_EPC_PAGE_FREE = 1,
+
+ /* Page is in use and tracked in a reclaimable LRU list
+ * Becomes NOT_TRACKED after sgx_drop_epc()
+ */
+ SGX_EPC_PAGE_RECLAIMABLE = 2,
+
+ /* Page is in use but tracked in an unreclaimable LRU list. These are
+ * only reclaimable when the whole enclave is OOM killed or the enclave
+ * is released, e.g., VA, SECS pages
+ * Becomes NOT_TRACKED after sgx_drop_epc()
+ */
+ SGX_EPC_PAGE_UNRECLAIMABLE = 3,
-/* Pages on free list */
-#define SGX_EPC_PAGE_IS_FREE BIT(1)
+};
+
+#define SGX_EPC_PAGE_STATE_MASK GENMASK(2, 0)
struct sgx_epc_page {
unsigned int section;
@@ -37,6 +62,22 @@ struct sgx_epc_page {
struct list_head list;
};
+static inline void sgx_epc_page_reset_state(struct sgx_epc_page *page)
+{
+ page->flags &= ~SGX_EPC_PAGE_STATE_MASK;
+}
+
+static inline void sgx_epc_page_set_state(struct sgx_epc_page *page, unsigned long flags)
+{
+ page->flags &= ~SGX_EPC_PAGE_STATE_MASK;
+ page->flags |= (flags & SGX_EPC_PAGE_STATE_MASK);
+}
+
+static inline bool sgx_epc_page_reclaimable(unsigned long flags)
+{
+ return SGX_EPC_PAGE_RECLAIMABLE == (flags & SGX_EPC_PAGE_STATE_MASK);
+}
+
/*
* Contains the tracking data for NUMA nodes having EPC pages. Most importantly,
* the free page list local to the node is stored here.