[RFC,v4,7/9] slub: Optimize deactivate_slab()

Message ID 20231031140741.79387-8-chengming.zhou@linux.dev
State New
Headers
Series slub: Delay freezing of CPU partial slabs |

Commit Message

Chengming Zhou Oct. 31, 2023, 2:07 p.m. UTC
  From: Chengming Zhou <zhouchengming@bytedance.com>

Since the introduce of unfrozen slabs on cpu partial list, we don't
need to synchronize the slab frozen state under the node list_lock.

The caller of deactivate_slab() and the caller of __slab_free() won't
manipulate the slab list concurrently.

So we can get node list_lock in the last stage if we really need to
manipulate the slab list in this path.

Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
---
 mm/slub.c | 76 +++++++++++++++++++------------------------------------
 1 file changed, 26 insertions(+), 50 deletions(-)
  

Comments

Vlastimil Babka Nov. 1, 2023, 1:21 p.m. UTC | #1
On 10/31/23 15:07, chengming.zhou@linux.dev wrote:
> From: Chengming Zhou <zhouchengming@bytedance.com>
> 
> Since the introduce of unfrozen slabs on cpu partial list, we don't
> need to synchronize the slab frozen state under the node list_lock.
> 
> The caller of deactivate_slab() and the caller of __slab_free() won't
> manipulate the slab list concurrently.
> 
> So we can get node list_lock in the last stage if we really need to
> manipulate the slab list in this path.
> 
> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>

Reviewed-by: Vlastimil Babka <vbabka@suse.cz>

> ---
>  mm/slub.c | 76 +++++++++++++++++++------------------------------------
>  1 file changed, 26 insertions(+), 50 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index bcb5b2c4e213..c429f8baba5f 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -2468,10 +2468,8 @@ static void init_kmem_cache_cpus(struct kmem_cache *s)
>  static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
>  			    void *freelist)
>  {
> -	enum slab_modes { M_NONE, M_PARTIAL, M_FREE, M_FULL_NOLIST };
>  	struct kmem_cache_node *n = get_node(s, slab_nid(slab));
>  	int free_delta = 0;
> -	enum slab_modes mode = M_NONE;
>  	void *nextfree, *freelist_iter, *freelist_tail;
>  	int tail = DEACTIVATE_TO_HEAD;
>  	unsigned long flags = 0;
> @@ -2512,62 +2510,40 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
>  	 *
>  	 * Ensure that the slab is unfrozen while the list presence
>  	 * reflects the actual number of objects during unfreeze.

I think this we can delete also these two lines. If there's no other
reason for v5, I can do it when merging the series.
  
Chengming Zhou Nov. 2, 2023, 2:10 a.m. UTC | #2
On 2023/11/1 21:21, Vlastimil Babka wrote:
> 
> 
> On 10/31/23 15:07, chengming.zhou@linux.dev wrote:
>> From: Chengming Zhou <zhouchengming@bytedance.com>
>>
>> Since the introduce of unfrozen slabs on cpu partial list, we don't
>> need to synchronize the slab frozen state under the node list_lock.
>>
>> The caller of deactivate_slab() and the caller of __slab_free() won't
>> manipulate the slab list concurrently.
>>
>> So we can get node list_lock in the last stage if we really need to
>> manipulate the slab list in this path.
>>
>> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
> 
> Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
> 
>> ---
>>  mm/slub.c | 76 +++++++++++++++++++------------------------------------
>>  1 file changed, 26 insertions(+), 50 deletions(-)
>>
>> diff --git a/mm/slub.c b/mm/slub.c
>> index bcb5b2c4e213..c429f8baba5f 100644
>> --- a/mm/slub.c
>> +++ b/mm/slub.c
>> @@ -2468,10 +2468,8 @@ static void init_kmem_cache_cpus(struct kmem_cache *s)
>>  static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
>>  			    void *freelist)
>>  {
>> -	enum slab_modes { M_NONE, M_PARTIAL, M_FREE, M_FULL_NOLIST };
>>  	struct kmem_cache_node *n = get_node(s, slab_nid(slab));
>>  	int free_delta = 0;
>> -	enum slab_modes mode = M_NONE;
>>  	void *nextfree, *freelist_iter, *freelist_tail;
>>  	int tail = DEACTIVATE_TO_HEAD;
>>  	unsigned long flags = 0;
>> @@ -2512,62 +2510,40 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
>>  	 *
>>  	 * Ensure that the slab is unfrozen while the list presence
>>  	 * reflects the actual number of objects during unfreeze.
> 
> I think this we can delete also these two lines. If there's no other
> reason for v5, I can do it when merging the series.

Ok, I will delete it in v5.

Thanks!
  

Patch

diff --git a/mm/slub.c b/mm/slub.c
index bcb5b2c4e213..c429f8baba5f 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2468,10 +2468,8 @@  static void init_kmem_cache_cpus(struct kmem_cache *s)
 static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
 			    void *freelist)
 {
-	enum slab_modes { M_NONE, M_PARTIAL, M_FREE, M_FULL_NOLIST };
 	struct kmem_cache_node *n = get_node(s, slab_nid(slab));
 	int free_delta = 0;
-	enum slab_modes mode = M_NONE;
 	void *nextfree, *freelist_iter, *freelist_tail;
 	int tail = DEACTIVATE_TO_HEAD;
 	unsigned long flags = 0;
@@ -2512,62 +2510,40 @@  static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
 	 *
 	 * Ensure that the slab is unfrozen while the list presence
 	 * reflects the actual number of objects during unfreeze.
-	 *
-	 * We first perform cmpxchg holding lock and insert to list
-	 * when it succeed. If there is mismatch then the slab is not
-	 * unfrozen and number of objects in the slab may have changed.
-	 * Then release lock and retry cmpxchg again.
 	 */
-redo:
-
-	old.freelist = READ_ONCE(slab->freelist);
-	old.counters = READ_ONCE(slab->counters);
-	VM_BUG_ON(!old.frozen);
-
-	/* Determine target state of the slab */
-	new.counters = old.counters;
-	if (freelist_tail) {
-		new.inuse -= free_delta;
-		set_freepointer(s, freelist_tail, old.freelist);
-		new.freelist = freelist;
-	} else
-		new.freelist = old.freelist;
-
-	new.frozen = 0;
+	do {
+		old.freelist = READ_ONCE(slab->freelist);
+		old.counters = READ_ONCE(slab->counters);
+		VM_BUG_ON(!old.frozen);
+
+		/* Determine target state of the slab */
+		new.counters = old.counters;
+		new.frozen = 0;
+		if (freelist_tail) {
+			new.inuse -= free_delta;
+			set_freepointer(s, freelist_tail, old.freelist);
+			new.freelist = freelist;
+		} else {
+			new.freelist = old.freelist;
+		}
+	} while (!slab_update_freelist(s, slab,
+		old.freelist, old.counters,
+		new.freelist, new.counters,
+		"unfreezing slab"));
 
+	/*
+	 * Stage three: Manipulate the slab list based on the updated state.
+	 */
 	if (!new.inuse && n->nr_partial >= s->min_partial) {
-		mode = M_FREE;
+		stat(s, DEACTIVATE_EMPTY);
+		discard_slab(s, slab);
+		stat(s, FREE_SLAB);
 	} else if (new.freelist) {
-		mode = M_PARTIAL;
-		/*
-		 * Taking the spinlock removes the possibility that
-		 * acquire_slab() will see a slab that is frozen
-		 */
 		spin_lock_irqsave(&n->list_lock, flags);
-	} else {
-		mode = M_FULL_NOLIST;
-	}
-
-
-	if (!slab_update_freelist(s, slab,
-				old.freelist, old.counters,
-				new.freelist, new.counters,
-				"unfreezing slab")) {
-		if (mode == M_PARTIAL)
-			spin_unlock_irqrestore(&n->list_lock, flags);
-		goto redo;
-	}
-
-
-	if (mode == M_PARTIAL) {
 		add_partial(n, slab, tail);
 		spin_unlock_irqrestore(&n->list_lock, flags);
 		stat(s, tail);
-	} else if (mode == M_FREE) {
-		stat(s, DEACTIVATE_EMPTY);
-		discard_slab(s, slab);
-		stat(s, FREE_SLAB);
-	} else if (mode == M_FULL_NOLIST) {
+	} else {
 		stat(s, DEACTIVATE_FULL);
 	}
 }