[v2,06/18] x86/sgx: Introduce RECLAIM_IN_PROGRESS flag for EPC pages

Message ID 20221202183655.3767674-7-kristen@linux.intel.com
State New
Headers
Series Add Cgroup support for SGX EPC memory |

Commit Message

Kristen Carlson Accardi Dec. 2, 2022, 6:36 p.m. UTC
  From: Sean Christopherson <sean.j.christopherson@intel.com>

When selecting pages to be reclaimed from the page pool (sgx_global_lru),
the list of reclaimable pages is walked, and any page that is both
reclaimable and not in the process of being freed is added to a list of
potential candidates to be reclaimed. After that, this separate list is
further examined and may or may not ultimately be reclaimed. In order
to prevent this page from being removed from the sgx_epc_lru_lists
struct in a separate thread by sgx_drop_epc_page(), keep track of
whether the EPC page is in the middle of being reclaimed with
the addtion of a RECLAIM_IN_PROGRESS flag, and do not delete the page
off the LRU in sgx_drop_epc_page() if it has not yet finished being
reclaimed.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
Cc: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kernel/cpu/sgx/main.c | 15 ++++++++++-----
 arch/x86/kernel/cpu/sgx/sgx.h  |  2 ++
 2 files changed, 12 insertions(+), 5 deletions(-)
  

Comments

Dave Hansen Dec. 2, 2022, 10:15 p.m. UTC | #1
On 12/2/22 10:36, Kristen Carlson Accardi wrote:
> When selecting pages to be reclaimed from the page pool (sgx_global_lru),
> the list of reclaimable pages is walked, and any page that is both
> reclaimable and not in the process of being freed is added to a list of
> potential candidates to be reclaimed. After that, this separate list is
> further examined and may or may not ultimately be reclaimed. In order
> to prevent this page from being removed from the sgx_epc_lru_lists
> struct in a separate thread by sgx_drop_epc_page(), keep track of
> whether the EPC page is in the middle of being reclaimed with
> the addtion of a RECLAIM_IN_PROGRESS flag, and do not delete the page
> off the LRU in sgx_drop_epc_page() if it has not yet finished being
> reclaimed.

This never really comes out and tells us what problem is being addressed.
  
Jarkko Sakkinen Dec. 8, 2022, 3:46 p.m. UTC | #2
On Fri, Dec 02, 2022 at 10:36:42AM -0800, Kristen Carlson Accardi wrote:
> From: Sean Christopherson <sean.j.christopherson@intel.com>
> 
> When selecting pages to be reclaimed from the page pool (sgx_global_lru),
> the list of reclaimable pages is walked, and any page that is both
> reclaimable and not in the process of being freed is added to a list of
> potential candidates to be reclaimed. After that, this separate list is
> further examined and may or may not ultimately be reclaimed. In order
> to prevent this page from being removed from the sgx_epc_lru_lists
> struct in a separate thread by sgx_drop_epc_page(), keep track of
> whether the EPC page is in the middle of being reclaimed with
> the addtion of a RECLAIM_IN_PROGRESS flag, and do not delete the page
> off the LRU in sgx_drop_epc_page() if it has not yet finished being
> reclaimed.
> 
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
> Cc: Sean Christopherson <seanjc@google.com>
> ---
>  arch/x86/kernel/cpu/sgx/main.c | 15 ++++++++++-----
>  arch/x86/kernel/cpu/sgx/sgx.h  |  2 ++
>  2 files changed, 12 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> index ecd7f8e704cc..bad72498b0a7 100644
> --- a/arch/x86/kernel/cpu/sgx/main.c
> +++ b/arch/x86/kernel/cpu/sgx/main.c
> @@ -305,13 +305,15 @@ static void __sgx_reclaim_pages(void)
>  
>  		encl_page = epc_page->encl_owner;
>  
> -		if (kref_get_unless_zero(&encl_page->encl->refcount) != 0)
> +		if (kref_get_unless_zero(&encl_page->encl->refcount) != 0) {
> +			epc_page->flags |= SGX_EPC_PAGE_RECLAIM_IN_PROGRESS;
>  			chunk[cnt++] = epc_page;
> -		else
> +		} else {
>  			/* The owner is freeing the page. No need to add the
>  			 * page back to the list of reclaimable pages.
>  			 */
>  			epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
> +		}
>  	}
>  	spin_unlock(&sgx_global_lru.lock);
>  
> @@ -337,6 +339,7 @@ static void __sgx_reclaim_pages(void)
>  
>  skip:
>  		spin_lock(&sgx_global_lru.lock);
> +		epc_page->flags &= ~SGX_EPC_PAGE_RECLAIM_IN_PROGRESS;
>  		sgx_epc_push_reclaimable(&sgx_global_lru, epc_page);
>  		spin_unlock(&sgx_global_lru.lock);
>  
> @@ -360,7 +363,8 @@ static void __sgx_reclaim_pages(void)
>  		sgx_reclaimer_write(epc_page, &backing[i]);
>  
>  		kref_put(&encl_page->encl->refcount, sgx_encl_release);
> -		epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
> +		epc_page->flags &= ~(SGX_EPC_PAGE_RECLAIMER_TRACKED |
> +				     SGX_EPC_PAGE_RECLAIM_IN_PROGRESS);
>  
>  		sgx_free_epc_page(epc_page);
>  	}
> @@ -508,7 +512,8 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void)
>  void sgx_record_epc_page(struct sgx_epc_page *page, unsigned long flags)
>  {
>  	spin_lock(&sgx_global_lru.lock);
> -	WARN_ON(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);
> +	WARN_ON(page->flags & (SGX_EPC_PAGE_RECLAIMER_TRACKED |
> +			       SGX_EPC_PAGE_RECLAIM_IN_PROGRESS));
>  	page->flags |= flags;
>  	if (flags & SGX_EPC_PAGE_RECLAIMER_TRACKED)
>  		sgx_epc_push_reclaimable(&sgx_global_lru, page);
> @@ -532,7 +537,7 @@ int sgx_drop_epc_page(struct sgx_epc_page *page)
>  	spin_lock(&sgx_global_lru.lock);
>  	if (page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) {
>  		/* The page is being reclaimed. */
> -		if (list_empty(&page->list)) {
> +		if (page->flags & SGX_EPC_PAGE_RECLAIM_IN_PROGRESS) {
>  			spin_unlock(&sgx_global_lru.lock);
>  			return -EBUSY;
>  		}
> diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
> index ba4338b7303f..37d66bc6ca27 100644
> --- a/arch/x86/kernel/cpu/sgx/sgx.h
> +++ b/arch/x86/kernel/cpu/sgx/sgx.h
> @@ -30,6 +30,8 @@
>  #define SGX_EPC_PAGE_IS_FREE		BIT(1)
>  /* Pages allocated for KVM guest */
>  #define SGX_EPC_PAGE_KVM_GUEST		BIT(2)
> +/* page flag to indicate reclaim is in progress */
> +#define SGX_EPC_PAGE_RECLAIM_IN_PROGRESS BIT(3)
>  
>  struct sgx_epc_page {
>  	unsigned int section;
> -- 
> 2.38.1
> 

I would create:

enum sgx_epc_state {    
        SGX_EPC_STATE_READY = 0,
        SGX_EPC_STATE_RECLAIMER_TRACKED = 1,
        SGX_EPC_STATE_RECLAIM_IN_PROGRESS = 2,
};

I.e. not a bitmask because page should have only one state at
a time for any of this to make any sense. We have an FSM,
right?

And then allocate 2 upper bits to store this information from
flags.

And probably would make sense to have inline helper functions
to setting and getting the state that does the bitshifting and
masking shenanigangs.

This would be a patch prepending this.

In this patch you should then describe in the context of FSM
how EPC page moves between these states. With that knowledge
we can then reflect the actual code change.

The point is not to get right but more like a mindset that we
can discuss right or wrong in thee first place so just do your
best but don't stress too much.

BR, Jarkko
  
Kristen Carlson Accardi Dec. 8, 2022, 6:13 p.m. UTC | #3
On Thu, 2022-12-08 at 15:46 +0000, Jarkko Sakkinen wrote:
> On Fri, Dec 02, 2022 at 10:36:42AM -0800, Kristen Carlson Accardi
> wrote:
> > From: Sean Christopherson <sean.j.christopherson@intel.com>
> > 
> > When selecting pages to be reclaimed from the page pool
> > (sgx_global_lru),
> > the list of reclaimable pages is walked, and any page that is both
> > reclaimable and not in the process of being freed is added to a
> > list of
> > potential candidates to be reclaimed. After that, this separate
> > list is
> > further examined and may or may not ultimately be reclaimed. In
> > order
> > to prevent this page from being removed from the sgx_epc_lru_lists
> > struct in a separate thread by sgx_drop_epc_page(), keep track of
> > whether the EPC page is in the middle of being reclaimed with
> > the addtion of a RECLAIM_IN_PROGRESS flag, and do not delete the
> > page
> > off the LRU in sgx_drop_epc_page() if it has not yet finished being
> > reclaimed.
> > 
> > Signed-off-by: Sean Christopherson
> > <sean.j.christopherson@intel.com>
> > Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
> > Cc: Sean Christopherson <seanjc@google.com>
> > ---
> >  arch/x86/kernel/cpu/sgx/main.c | 15 ++++++++++-----
> >  arch/x86/kernel/cpu/sgx/sgx.h  |  2 ++
> >  2 files changed, 12 insertions(+), 5 deletions(-)
> > 
> > diff --git a/arch/x86/kernel/cpu/sgx/main.c
> > b/arch/x86/kernel/cpu/sgx/main.c
> > index ecd7f8e704cc..bad72498b0a7 100644
> > --- a/arch/x86/kernel/cpu/sgx/main.c
> > +++ b/arch/x86/kernel/cpu/sgx/main.c
> > @@ -305,13 +305,15 @@ static void __sgx_reclaim_pages(void)
> >  
> >                 encl_page = epc_page->encl_owner;
> >  
> > -               if (kref_get_unless_zero(&encl_page->encl-
> > >refcount) != 0)
> > +               if (kref_get_unless_zero(&encl_page->encl-
> > >refcount) != 0) {
> > +                       epc_page->flags |=
> > SGX_EPC_PAGE_RECLAIM_IN_PROGRESS;
> >                         chunk[cnt++] = epc_page;
> > -               else
> > +               } else {
> >                         /* The owner is freeing the page. No need
> > to add the
> >                          * page back to the list of reclaimable
> > pages.
> >                          */
> >                         epc_page->flags &=
> > ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
> > +               }
> >         }
> >         spin_unlock(&sgx_global_lru.lock);
> >  
> > @@ -337,6 +339,7 @@ static void __sgx_reclaim_pages(void)
> >  
> >  skip:
> >                 spin_lock(&sgx_global_lru.lock);
> > +               epc_page->flags &=
> > ~SGX_EPC_PAGE_RECLAIM_IN_PROGRESS;
> >                 sgx_epc_push_reclaimable(&sgx_global_lru,
> > epc_page);
> >                 spin_unlock(&sgx_global_lru.lock);
> >  
> > @@ -360,7 +363,8 @@ static void __sgx_reclaim_pages(void)
> >                 sgx_reclaimer_write(epc_page, &backing[i]);
> >  
> >                 kref_put(&encl_page->encl->refcount,
> > sgx_encl_release);
> > -               epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
> > +               epc_page->flags &= ~(SGX_EPC_PAGE_RECLAIMER_TRACKED
> > |
> > +                                   
> > SGX_EPC_PAGE_RECLAIM_IN_PROGRESS);
> >  
> >                 sgx_free_epc_page(epc_page);
> >         }
> > @@ -508,7 +512,8 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void)
> >  void sgx_record_epc_page(struct sgx_epc_page *page, unsigned long
> > flags)
> >  {
> >         spin_lock(&sgx_global_lru.lock);
> > -       WARN_ON(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);
> > +       WARN_ON(page->flags & (SGX_EPC_PAGE_RECLAIMER_TRACKED |
> > +                              SGX_EPC_PAGE_RECLAIM_IN_PROGRESS));
> >         page->flags |= flags;
> >         if (flags & SGX_EPC_PAGE_RECLAIMER_TRACKED)
> >                 sgx_epc_push_reclaimable(&sgx_global_lru, page);
> > @@ -532,7 +537,7 @@ int sgx_drop_epc_page(struct sgx_epc_page
> > *page)
> >         spin_lock(&sgx_global_lru.lock);
> >         if (page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) {
> >                 /* The page is being reclaimed. */
> > -               if (list_empty(&page->list)) {
> > +               if (page->flags & SGX_EPC_PAGE_RECLAIM_IN_PROGRESS)
> > {
> >                         spin_unlock(&sgx_global_lru.lock);
> >                         return -EBUSY;
> >                 }
> > diff --git a/arch/x86/kernel/cpu/sgx/sgx.h
> > b/arch/x86/kernel/cpu/sgx/sgx.h
> > index ba4338b7303f..37d66bc6ca27 100644
> > --- a/arch/x86/kernel/cpu/sgx/sgx.h
> > +++ b/arch/x86/kernel/cpu/sgx/sgx.h
> > @@ -30,6 +30,8 @@
> >  #define SGX_EPC_PAGE_IS_FREE           BIT(1)
> >  /* Pages allocated for KVM guest */
> >  #define SGX_EPC_PAGE_KVM_GUEST         BIT(2)
> > +/* page flag to indicate reclaim is in progress */
> > +#define SGX_EPC_PAGE_RECLAIM_IN_PROGRESS BIT(3)
> >  
> >  struct sgx_epc_page {
> >         unsigned int section;
> > -- 
> > 2.38.1
> > 
> 
> I would create:
> 
> enum sgx_epc_state {    
>         SGX_EPC_STATE_READY = 0,
>         SGX_EPC_STATE_RECLAIMER_TRACKED = 1,
>         SGX_EPC_STATE_RECLAIM_IN_PROGRESS = 2,
> };
> 
> I.e. not a bitmask because page should have only one state at
> a time for any of this to make any sense. We have an FSM,
> right?
> 

I can experiment with it and see if it can work for the flags to
represent a state machine. They don't work like that right now
obviously, since you can have both RECLAIMER_TRACKED and
RECLAIM_IN_PROGRESS set at the same time.


> And then allocate 2 upper bits to store this information from
> flags.
> 
> And probably would make sense to have inline helper functions
> to setting and getting the state that does the bitshifting and
> masking shenanigangs.
> 
> This would be a patch prepending this.
> 
> In this patch you should then describe in the context of FSM
> how EPC page moves between these states. With that knowledge
> we can then reflect the actual code change.
> 
> The point is not to get right but more like a mindset that we
> can discuss right or wrong in thee first place so just do your
> best but don't stress too much.
> 
> BR, Jarkko
  

Patch

diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index ecd7f8e704cc..bad72498b0a7 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -305,13 +305,15 @@  static void __sgx_reclaim_pages(void)
 
 		encl_page = epc_page->encl_owner;
 
-		if (kref_get_unless_zero(&encl_page->encl->refcount) != 0)
+		if (kref_get_unless_zero(&encl_page->encl->refcount) != 0) {
+			epc_page->flags |= SGX_EPC_PAGE_RECLAIM_IN_PROGRESS;
 			chunk[cnt++] = epc_page;
-		else
+		} else {
 			/* The owner is freeing the page. No need to add the
 			 * page back to the list of reclaimable pages.
 			 */
 			epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
+		}
 	}
 	spin_unlock(&sgx_global_lru.lock);
 
@@ -337,6 +339,7 @@  static void __sgx_reclaim_pages(void)
 
 skip:
 		spin_lock(&sgx_global_lru.lock);
+		epc_page->flags &= ~SGX_EPC_PAGE_RECLAIM_IN_PROGRESS;
 		sgx_epc_push_reclaimable(&sgx_global_lru, epc_page);
 		spin_unlock(&sgx_global_lru.lock);
 
@@ -360,7 +363,8 @@  static void __sgx_reclaim_pages(void)
 		sgx_reclaimer_write(epc_page, &backing[i]);
 
 		kref_put(&encl_page->encl->refcount, sgx_encl_release);
-		epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
+		epc_page->flags &= ~(SGX_EPC_PAGE_RECLAIMER_TRACKED |
+				     SGX_EPC_PAGE_RECLAIM_IN_PROGRESS);
 
 		sgx_free_epc_page(epc_page);
 	}
@@ -508,7 +512,8 @@  struct sgx_epc_page *__sgx_alloc_epc_page(void)
 void sgx_record_epc_page(struct sgx_epc_page *page, unsigned long flags)
 {
 	spin_lock(&sgx_global_lru.lock);
-	WARN_ON(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);
+	WARN_ON(page->flags & (SGX_EPC_PAGE_RECLAIMER_TRACKED |
+			       SGX_EPC_PAGE_RECLAIM_IN_PROGRESS));
 	page->flags |= flags;
 	if (flags & SGX_EPC_PAGE_RECLAIMER_TRACKED)
 		sgx_epc_push_reclaimable(&sgx_global_lru, page);
@@ -532,7 +537,7 @@  int sgx_drop_epc_page(struct sgx_epc_page *page)
 	spin_lock(&sgx_global_lru.lock);
 	if (page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) {
 		/* The page is being reclaimed. */
-		if (list_empty(&page->list)) {
+		if (page->flags & SGX_EPC_PAGE_RECLAIM_IN_PROGRESS) {
 			spin_unlock(&sgx_global_lru.lock);
 			return -EBUSY;
 		}
diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
index ba4338b7303f..37d66bc6ca27 100644
--- a/arch/x86/kernel/cpu/sgx/sgx.h
+++ b/arch/x86/kernel/cpu/sgx/sgx.h
@@ -30,6 +30,8 @@ 
 #define SGX_EPC_PAGE_IS_FREE		BIT(1)
 /* Pages allocated for KVM guest */
 #define SGX_EPC_PAGE_KVM_GUEST		BIT(2)
+/* page flag to indicate reclaim is in progress */
+#define SGX_EPC_PAGE_RECLAIM_IN_PROGRESS BIT(3)
 
 struct sgx_epc_page {
 	unsigned int section;