[RFC,v3,7/7] slub: Optimize deactivate_slab()

Message ID 20231024093345.3676493-8-chengming.zhou@linux.dev
State New
Headers
Series slub: Delay freezing of CPU partial slabs |

Commit Message

Chengming Zhou Oct. 24, 2023, 9:33 a.m. UTC
  From: Chengming Zhou <zhouchengming@bytedance.com>

Since the introduce of unfrozen slabs on cpu partial list, we don't
need to synchronize the slab frozen state under the node list_lock.

The caller of deactivate_slab() and the caller of __slab_free() won't
manipulate the slab list concurrently.

So we can get node list_lock in the last stage if we really need to
manipulate the slab list in this path.

Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
---
 mm/slub.c | 70 ++++++++++++++++++++-----------------------------------
 1 file changed, 25 insertions(+), 45 deletions(-)
  

Comments

Vlastimil Babka Oct. 31, 2023, 11:15 a.m. UTC | #1
On 10/24/23 11:33, chengming.zhou@linux.dev wrote:
> From: Chengming Zhou <zhouchengming@bytedance.com>
> 
> Since the introduce of unfrozen slabs on cpu partial list, we don't
> need to synchronize the slab frozen state under the node list_lock.
> 
> The caller of deactivate_slab() and the caller of __slab_free() won't
> manipulate the slab list concurrently.
> 
> So we can get node list_lock in the last stage if we really need to
> manipulate the slab list in this path.
> 
> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>

Nice simplification!

> ---
>  mm/slub.c | 70 ++++++++++++++++++++-----------------------------------
>  1 file changed, 25 insertions(+), 45 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index 486d44421432..64d550e415eb 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -2449,10 +2449,8 @@ static void init_kmem_cache_cpus(struct kmem_cache *s)
>  static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
>  			    void *freelist)
>  {
> -	enum slab_modes { M_NONE, M_PARTIAL, M_FREE, M_FULL_NOLIST };
>  	struct kmem_cache_node *n = get_node(s, slab_nid(slab));
>  	int free_delta = 0;
> -	enum slab_modes mode = M_NONE;
>  	void *nextfree, *freelist_iter, *freelist_tail;
>  	int tail = DEACTIVATE_TO_HEAD;
>  	unsigned long flags = 0;
> @@ -2499,58 +2497,40 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
>  	 * unfrozen and number of objects in the slab may have changed.
>  	 * Then release lock and retry cmpxchg again.
>  	 */

This comment above (including parts not visible here) should be updated as
there is no more list manipulation during unfreeze.

> -redo:
> -
> -	old.freelist = READ_ONCE(slab->freelist);
> -	old.counters = READ_ONCE(slab->counters);
> -	VM_BUG_ON(!old.frozen);
> -
> -	/* Determine target state of the slab */
> -	new.counters = old.counters;
> -	if (freelist_tail) {
> -		new.inuse -= free_delta;
> -		set_freepointer(s, freelist_tail, old.freelist);
> -		new.freelist = freelist;
> -	} else
> -		new.freelist = old.freelist;
> +	do {
> +		old.freelist = READ_ONCE(slab->freelist);
> +		old.counters = READ_ONCE(slab->counters);
> +		VM_BUG_ON(!old.frozen);
> +
> +		/* Determine target state of the slab */
> +		new.counters = old.counters;
> +		new.frozen = 0;
> +		if (freelist_tail) {
> +			new.inuse -= free_delta;
> +			set_freepointer(s, freelist_tail, old.freelist);
> +			new.freelist = freelist;
> +		} else
> +			new.freelist = old.freelist;

Per coding style we should have the else with { } even if it's one line, to
match the if branch. Since we touch the code that was previously violating
the style, we can fix up.

>  
> -	new.frozen = 0;
> +	} while (!slab_update_freelist(s, slab,
> +		old.freelist, old.counters,
> +		new.freelist, new.counters,
> +		"unfreezing slab"));
>  
> +	/*
> +	 * Stage three: Manipulate the slab list based on the updated state.
> +	 */
>  	if (!new.inuse && n->nr_partial >= s->min_partial) {
> -		mode = M_FREE;
> +		stat(s, DEACTIVATE_EMPTY);
> +		discard_slab(s, slab);
> +		stat(s, FREE_SLAB);
>  	} else if (new.freelist) {
> -		mode = M_PARTIAL;
> -		/*
> -		 * Taking the spinlock removes the possibility that
> -		 * acquire_slab() will see a slab that is frozen
> -		 */
>  		spin_lock_irqsave(&n->list_lock, flags);
> -	} else {
> -		mode = M_FULL_NOLIST;
> -	}
> -
> -
> -	if (!slab_update_freelist(s, slab,
> -				old.freelist, old.counters,
> -				new.freelist, new.counters,
> -				"unfreezing slab")) {
> -		if (mode == M_PARTIAL)
> -			spin_unlock_irqrestore(&n->list_lock, flags);
> -		goto redo;
> -	}
> -
> -
> -	if (mode == M_PARTIAL) {
>  		add_partial(n, slab, tail);
>  		spin_unlock_irqrestore(&n->list_lock, flags);
>  		stat(s, tail);
> -	} else if (mode == M_FREE) {
> -		stat(s, DEACTIVATE_EMPTY);
> -		discard_slab(s, slab);
> -		stat(s, FREE_SLAB);
> -	} else if (mode == M_FULL_NOLIST) {
> +	} else
>  		stat(s, DEACTIVATE_FULL);
> -	}

Same here.

Thanks!

>  }
>  
>  #ifdef CONFIG_SLUB_CPU_PARTIAL
  
Chengming Zhou Oct. 31, 2023, 11:41 a.m. UTC | #2
On 2023/10/31 19:15, Vlastimil Babka wrote:
> On 10/24/23 11:33, chengming.zhou@linux.dev wrote:
>> From: Chengming Zhou <zhouchengming@bytedance.com>
>>
>> Since the introduce of unfrozen slabs on cpu partial list, we don't
>> need to synchronize the slab frozen state under the node list_lock.
>>
>> The caller of deactivate_slab() and the caller of __slab_free() won't
>> manipulate the slab list concurrently.
>>
>> So we can get node list_lock in the last stage if we really need to
>> manipulate the slab list in this path.
>>
>> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
> 
> Nice simplification!
> 
>> ---
>>  mm/slub.c | 70 ++++++++++++++++++++-----------------------------------
>>  1 file changed, 25 insertions(+), 45 deletions(-)
>>
>> diff --git a/mm/slub.c b/mm/slub.c
>> index 486d44421432..64d550e415eb 100644
>> --- a/mm/slub.c
>> +++ b/mm/slub.c
>> @@ -2449,10 +2449,8 @@ static void init_kmem_cache_cpus(struct kmem_cache *s)
>>  static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
>>  			    void *freelist)
>>  {
>> -	enum slab_modes { M_NONE, M_PARTIAL, M_FREE, M_FULL_NOLIST };
>>  	struct kmem_cache_node *n = get_node(s, slab_nid(slab));
>>  	int free_delta = 0;
>> -	enum slab_modes mode = M_NONE;
>>  	void *nextfree, *freelist_iter, *freelist_tail;
>>  	int tail = DEACTIVATE_TO_HEAD;
>>  	unsigned long flags = 0;
>> @@ -2499,58 +2497,40 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
>>  	 * unfrozen and number of objects in the slab may have changed.
>>  	 * Then release lock and retry cmpxchg again.
>>  	 */
> 
> This comment above (including parts not visible here) should be updated as
> there is no more list manipulation during unfreeze.

Right!

> 
>> -redo:
>> -
>> -	old.freelist = READ_ONCE(slab->freelist);
>> -	old.counters = READ_ONCE(slab->counters);
>> -	VM_BUG_ON(!old.frozen);
>> -
>> -	/* Determine target state of the slab */
>> -	new.counters = old.counters;
>> -	if (freelist_tail) {
>> -		new.inuse -= free_delta;
>> -		set_freepointer(s, freelist_tail, old.freelist);
>> -		new.freelist = freelist;
>> -	} else
>> -		new.freelist = old.freelist;
>> +	do {
>> +		old.freelist = READ_ONCE(slab->freelist);
>> +		old.counters = READ_ONCE(slab->counters);
>> +		VM_BUG_ON(!old.frozen);
>> +
>> +		/* Determine target state of the slab */
>> +		new.counters = old.counters;
>> +		new.frozen = 0;
>> +		if (freelist_tail) {
>> +			new.inuse -= free_delta;
>> +			set_freepointer(s, freelist_tail, old.freelist);
>> +			new.freelist = freelist;
>> +		} else
>> +			new.freelist = old.freelist;
> 
> Per coding style we should have the else with { } even if it's one line, to
> match the if branch. Since we touch the code that was previously violating
> the style, we can fix up.

Ok, I will fix all these.

Big thanks for your review!

> 
>>  
>> -	new.frozen = 0;
>> +	} while (!slab_update_freelist(s, slab,
>> +		old.freelist, old.counters,
>> +		new.freelist, new.counters,
>> +		"unfreezing slab"));
>>  
>> +	/*
>> +	 * Stage three: Manipulate the slab list based on the updated state.
>> +	 */
>>  	if (!new.inuse && n->nr_partial >= s->min_partial) {
>> -		mode = M_FREE;
>> +		stat(s, DEACTIVATE_EMPTY);
>> +		discard_slab(s, slab);
>> +		stat(s, FREE_SLAB);
>>  	} else if (new.freelist) {
>> -		mode = M_PARTIAL;
>> -		/*
>> -		 * Taking the spinlock removes the possibility that
>> -		 * acquire_slab() will see a slab that is frozen
>> -		 */
>>  		spin_lock_irqsave(&n->list_lock, flags);
>> -	} else {
>> -		mode = M_FULL_NOLIST;
>> -	}
>> -
>> -
>> -	if (!slab_update_freelist(s, slab,
>> -				old.freelist, old.counters,
>> -				new.freelist, new.counters,
>> -				"unfreezing slab")) {
>> -		if (mode == M_PARTIAL)
>> -			spin_unlock_irqrestore(&n->list_lock, flags);
>> -		goto redo;
>> -	}
>> -
>> -
>> -	if (mode == M_PARTIAL) {
>>  		add_partial(n, slab, tail);
>>  		spin_unlock_irqrestore(&n->list_lock, flags);
>>  		stat(s, tail);
>> -	} else if (mode == M_FREE) {
>> -		stat(s, DEACTIVATE_EMPTY);
>> -		discard_slab(s, slab);
>> -		stat(s, FREE_SLAB);
>> -	} else if (mode == M_FULL_NOLIST) {
>> +	} else
>>  		stat(s, DEACTIVATE_FULL);
>> -	}
> 
> Same here.
> 
> Thanks!
> 
>>  }
>>  
>>  #ifdef CONFIG_SLUB_CPU_PARTIAL
>
  

Patch

diff --git a/mm/slub.c b/mm/slub.c
index 486d44421432..64d550e415eb 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2449,10 +2449,8 @@  static void init_kmem_cache_cpus(struct kmem_cache *s)
 static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
 			    void *freelist)
 {
-	enum slab_modes { M_NONE, M_PARTIAL, M_FREE, M_FULL_NOLIST };
 	struct kmem_cache_node *n = get_node(s, slab_nid(slab));
 	int free_delta = 0;
-	enum slab_modes mode = M_NONE;
 	void *nextfree, *freelist_iter, *freelist_tail;
 	int tail = DEACTIVATE_TO_HEAD;
 	unsigned long flags = 0;
@@ -2499,58 +2497,40 @@  static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
 	 * unfrozen and number of objects in the slab may have changed.
 	 * Then release lock and retry cmpxchg again.
 	 */
-redo:
-
-	old.freelist = READ_ONCE(slab->freelist);
-	old.counters = READ_ONCE(slab->counters);
-	VM_BUG_ON(!old.frozen);
-
-	/* Determine target state of the slab */
-	new.counters = old.counters;
-	if (freelist_tail) {
-		new.inuse -= free_delta;
-		set_freepointer(s, freelist_tail, old.freelist);
-		new.freelist = freelist;
-	} else
-		new.freelist = old.freelist;
+	do {
+		old.freelist = READ_ONCE(slab->freelist);
+		old.counters = READ_ONCE(slab->counters);
+		VM_BUG_ON(!old.frozen);
+
+		/* Determine target state of the slab */
+		new.counters = old.counters;
+		new.frozen = 0;
+		if (freelist_tail) {
+			new.inuse -= free_delta;
+			set_freepointer(s, freelist_tail, old.freelist);
+			new.freelist = freelist;
+		} else
+			new.freelist = old.freelist;
 
-	new.frozen = 0;
+	} while (!slab_update_freelist(s, slab,
+		old.freelist, old.counters,
+		new.freelist, new.counters,
+		"unfreezing slab"));
 
+	/*
+	 * Stage three: Manipulate the slab list based on the updated state.
+	 */
 	if (!new.inuse && n->nr_partial >= s->min_partial) {
-		mode = M_FREE;
+		stat(s, DEACTIVATE_EMPTY);
+		discard_slab(s, slab);
+		stat(s, FREE_SLAB);
 	} else if (new.freelist) {
-		mode = M_PARTIAL;
-		/*
-		 * Taking the spinlock removes the possibility that
-		 * acquire_slab() will see a slab that is frozen
-		 */
 		spin_lock_irqsave(&n->list_lock, flags);
-	} else {
-		mode = M_FULL_NOLIST;
-	}
-
-
-	if (!slab_update_freelist(s, slab,
-				old.freelist, old.counters,
-				new.freelist, new.counters,
-				"unfreezing slab")) {
-		if (mode == M_PARTIAL)
-			spin_unlock_irqrestore(&n->list_lock, flags);
-		goto redo;
-	}
-
-
-	if (mode == M_PARTIAL) {
 		add_partial(n, slab, tail);
 		spin_unlock_irqrestore(&n->list_lock, flags);
 		stat(s, tail);
-	} else if (mode == M_FREE) {
-		stat(s, DEACTIVATE_EMPTY);
-		discard_slab(s, slab);
-		stat(s, FREE_SLAB);
-	} else if (mode == M_FULL_NOLIST) {
+	} else
 		stat(s, DEACTIVATE_FULL);
-	}
 }
 
 #ifdef CONFIG_SLUB_CPU_PARTIAL