[09/10] maple_tree: Rework mas_wr_slot_store() to be cleaner and more efficient.

Message ID 20230515131757.60035-10-zhangpeng.00@bytedance.com
State New
Headers
Series Clean ups for maple tree |

Commit Message

Peng Zhang May 15, 2023, 1:17 p.m. UTC
  The code of mas_wr_slot_store() is messy, make it clearer and concise,
and add comments. In addition, get whether the two gaps are empty to
avoid calling mas_update_gap() all the time.

Signed-off-by: Peng Zhang <zhangpeng.00@bytedance.com>
---
 lib/maple_tree.c | 79 +++++++++++++++++++++++++++---------------------
 1 file changed, 44 insertions(+), 35 deletions(-)
  

Comments

Liam R. Howlett May 15, 2023, 6:01 p.m. UTC | #1
* Peng Zhang <zhangpeng.00@bytedance.com> [230515 09:18]:
> The code of mas_wr_slot_store() is messy, make it clearer and concise,
> and add comments. In addition, get whether the two gaps are empty to
> avoid calling mas_update_gap() all the time.

Please drop the cases from the comments.  These aren't that complicated
to need diagrams.

Case 1: Overwriting the range and over a part of the next range
Case 2: Overwriting a part of the range and over the entire next range

> 
> Signed-off-by: Peng Zhang <zhangpeng.00@bytedance.com>
> ---
>  lib/maple_tree.c | 79 +++++++++++++++++++++++++++---------------------
>  1 file changed, 44 insertions(+), 35 deletions(-)
> 
> diff --git a/lib/maple_tree.c b/lib/maple_tree.c
> index 538e49feafbe4..d558e7bcb6da8 100644
> --- a/lib/maple_tree.c
> +++ b/lib/maple_tree.c
> @@ -4190,53 +4190,62 @@ static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas)
>   * @wr_mas: the maple write state
>   *
>   * Return: True if stored, false otherwise
> + *
> + * Case 1:
> + *                       r_min   r_max    lmax
> + *                 +-------+-------+-------+
> + * original range: |       |offset | end   |
> + *                 +-----------------------+
> + *                         +-----------+
> + * overwrite:              |           |
> + *                         +-----------+
> + *                        index       last
> + *
> + * Case 2:
> + *                       r_min   r_max    lmax
> + *                 +-------+-------+-------+
> + * original range: |       |offest | end   |
> + *                 +-------+---------------+
> + *                             +-----------+
> + * overwrite:                  |           |
> + *                             +-----------+
> + *                           index        last
>   */
>  static inline bool mas_wr_slot_store(struct ma_wr_state *wr_mas)
>  {
>  	struct ma_state *mas = wr_mas->mas;
> -	unsigned long lmax; /* Logical max. */
>  	unsigned char offset = mas->offset;
> +	unsigned char offset_end = wr_mas->offset_end;
> +	unsigned long lmax = wr_mas->end_piv; /* Logical max. */
> +	bool gap = false;
>  
> -	if ((wr_mas->r_max > mas->last) && ((wr_mas->r_min != mas->index) ||
> -				  (offset != wr_mas->node_end)))
> -		return false;
> -
> -	if (offset == wr_mas->node_end - 1)
> -		lmax = mas->max;
> -	else
> -		lmax = wr_mas->pivots[offset + 1];
> -
> -	/* going to overwrite too many slots. */
> -	if (lmax < mas->last)
> +	if (offset_end - offset != 1)
>  		return false;
>  
> -	if (wr_mas->r_min == mas->index) {
> -		/* overwriting two or more ranges with one. */
> -		if (lmax == mas->last)
> -			return false;
> -
> -		/* Overwriting all of offset and a portion of offset + 1. */
> +	if (mas->index == wr_mas->r_min && mas->last < lmax) {
> +		/* Case 1 */
> +		gap |= !mt_slot_locked(mas->tree, wr_mas->slots, offset);
> +		gap |= !mt_slot_locked(mas->tree, wr_mas->slots, offset + 1);
>  		rcu_assign_pointer(wr_mas->slots[offset], wr_mas->entry);
>  		wr_mas->pivots[offset] = mas->last;
> -		goto done;
> -	}
> -
> -	/* Doesn't end on the next range end. */
> -	if (lmax != mas->last)
> +	} else if (mas->index > wr_mas->r_min && mas->last == lmax) {
> +		/* Case 2 */
> +		gap |= !mt_slot_locked(mas->tree, wr_mas->slots, offset);
> +		gap |= !mt_slot_locked(mas->tree, wr_mas->slots, offset + 1);
> +		rcu_assign_pointer(wr_mas->slots[offset + 1], wr_mas->entry);
> +		wr_mas->pivots[offset] = mas->index - 1;

These two lines need to be in opposite order to ensure a reader sees
either the value or the previous value.  If you overwrite something with
a new value, it is possible that a reader looking for the next range
will get the value stored at offset (but not entry).

> +		mas->offset++; /* Keep mas accurate. */
> +	} else {
>  		return false;
> +	}
>  
> -	/* Overwriting a portion of offset and all of offset + 1 */
> -	if ((offset + 1 < mt_pivots[wr_mas->type]) &&
> -	    (wr_mas->entry || wr_mas->pivots[offset + 1]))
> -		wr_mas->pivots[offset + 1] = mas->last;
> -
> -	rcu_assign_pointer(wr_mas->slots[offset + 1], wr_mas->entry);
> -	wr_mas->pivots[offset] = mas->index - 1;
> -	mas->offset++; /* Keep mas accurate. */
> -
> -done:
>  	trace_ma_write(__func__, mas, 0, wr_mas->entry);
> -	mas_update_gap(mas);
> +	/*
> +	 * Only update gap when the new entry is empty or there is an empty
> +	 * entry in the original two ranges.
> +	 */
> +	if (!wr_mas->entry || gap)
> +		mas_update_gap(mas);
>  	return true;
>  }
>  
> @@ -4418,7 +4427,7 @@ static inline void mas_wr_modify(struct ma_wr_state *wr_mas)
>  	if (new_end == wr_mas->node_end + 1 && mas_wr_append(wr_mas))
>  		return;
>  
> -	if ((wr_mas->offset_end - mas->offset <= 1) && mas_wr_slot_store(wr_mas))
> +	if (new_end == wr_mas->node_end && mas_wr_slot_store(wr_mas))
>  		return;
>  	else if (mas_wr_node_store(wr_mas))
>  		return;
> -- 
> 2.20.1
>
  
Peng Zhang May 16, 2023, 7:27 a.m. UTC | #2
在 2023/5/16 02:01, Liam R. Howlett 写道:
> * Peng Zhang <zhangpeng.00@bytedance.com> [230515 09:18]:
>> The code of mas_wr_slot_store() is messy, make it clearer and concise,
>> and add comments. In addition, get whether the two gaps are empty to
>> avoid calling mas_update_gap() all the time.
> 
> Please drop the cases from the comments.  These aren't that complicated
> to need diagrams.
> 
> Case 1: Overwriting the range and over a part of the next range
> Case 2: Overwriting a part of the range and over the entire next range
> 
>>
>> Signed-off-by: Peng Zhang <zhangpeng.00@bytedance.com>
>> ---
>>   lib/maple_tree.c | 79 +++++++++++++++++++++++++++---------------------
>>   1 file changed, 44 insertions(+), 35 deletions(-)
>>
>> diff --git a/lib/maple_tree.c b/lib/maple_tree.c
>> index 538e49feafbe4..d558e7bcb6da8 100644
>> --- a/lib/maple_tree.c
>> +++ b/lib/maple_tree.c
>> @@ -4190,53 +4190,62 @@ static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas)
>>    * @wr_mas: the maple write state
>>    *
>>    * Return: True if stored, false otherwise
>> + *
>> + * Case 1:
>> + *                       r_min   r_max    lmax
>> + *                 +-------+-------+-------+
>> + * original range: |       |offset | end   |
>> + *                 +-----------------------+
>> + *                         +-----------+
>> + * overwrite:              |           |
>> + *                         +-----------+
>> + *                        index       last
>> + *
>> + * Case 2:
>> + *                       r_min   r_max    lmax
>> + *                 +-------+-------+-------+
>> + * original range: |       |offest | end   |
>> + *                 +-------+---------------+
>> + *                             +-----------+
>> + * overwrite:                  |           |
>> + *                             +-----------+
>> + *                           index        last
>>    */
>>   static inline bool mas_wr_slot_store(struct ma_wr_state *wr_mas)
>>   {
>>   	struct ma_state *mas = wr_mas->mas;
>> -	unsigned long lmax; /* Logical max. */
>>   	unsigned char offset = mas->offset;
>> +	unsigned char offset_end = wr_mas->offset_end;
>> +	unsigned long lmax = wr_mas->end_piv; /* Logical max. */
>> +	bool gap = false;
>>   
>> -	if ((wr_mas->r_max > mas->last) && ((wr_mas->r_min != mas->index) ||
>> -				  (offset != wr_mas->node_end)))
>> -		return false;
>> -
>> -	if (offset == wr_mas->node_end - 1)
>> -		lmax = mas->max;
>> -	else
>> -		lmax = wr_mas->pivots[offset + 1];
>> -
>> -	/* going to overwrite too many slots. */
>> -	if (lmax < mas->last)
>> +	if (offset_end - offset != 1)
>>   		return false;
>>   
>> -	if (wr_mas->r_min == mas->index) {
>> -		/* overwriting two or more ranges with one. */
>> -		if (lmax == mas->last)
>> -			return false;
>> -
>> -		/* Overwriting all of offset and a portion of offset + 1. */
>> +	if (mas->index == wr_mas->r_min && mas->last < lmax) {
>> +		/* Case 1 */
>> +		gap |= !mt_slot_locked(mas->tree, wr_mas->slots, offset);
>> +		gap |= !mt_slot_locked(mas->tree, wr_mas->slots, offset + 1);
>>   		rcu_assign_pointer(wr_mas->slots[offset], wr_mas->entry);
>>   		wr_mas->pivots[offset] = mas->last;
>> -		goto done;
>> -	}
>> -
>> -	/* Doesn't end on the next range end. */
>> -	if (lmax != mas->last)
>> +	} else if (mas->index > wr_mas->r_min && mas->last == lmax) {
>> +		/* Case 2 */
>> +		gap |= !mt_slot_locked(mas->tree, wr_mas->slots, offset);
>> +		gap |= !mt_slot_locked(mas->tree, wr_mas->slots, offset + 1);
>> +		rcu_assign_pointer(wr_mas->slots[offset + 1], wr_mas->entry);
>> +		wr_mas->pivots[offset] = mas->index - 1;
> 
> These two lines need to be in opposite order to ensure a reader sees
> either the value or the previous value.  If you overwrite something with
> a new value, it is possible that a reader looking for the next range
> will get the value stored at offset (but not entry).
Please think again, did you think wrong?
It doesn't happen, swapping the order introduces the problem.
If we update the pivot first, it will cause a part of the value
of the range indexed by offset to change to the value of the
range indexed by offset+1, which is illegal.

My assignment order remains the same as the previous version.

> 
>> +		mas->offset++; /* Keep mas accurate. */
>> +	} else {
>>   		return false;
>> +	}
>>   
>> -	/* Overwriting a portion of offset and all of offset + 1 */
>> -	if ((offset + 1 < mt_pivots[wr_mas->type]) &&
>> -	    (wr_mas->entry || wr_mas->pivots[offset + 1]))
>> -		wr_mas->pivots[offset + 1] = mas->last;
>> -
>> -	rcu_assign_pointer(wr_mas->slots[offset + 1], wr_mas->entry);
>> -	wr_mas->pivots[offset] = mas->index - 1;
>> -	mas->offset++; /* Keep mas accurate. */
>> -
>> -done:
>>   	trace_ma_write(__func__, mas, 0, wr_mas->entry);
>> -	mas_update_gap(mas);
>> +	/*
>> +	 * Only update gap when the new entry is empty or there is an empty
>> +	 * entry in the original two ranges.
>> +	 */
>> +	if (!wr_mas->entry || gap)
>> +		mas_update_gap(mas);
>>   	return true;
>>   }
>>   
>> @@ -4418,7 +4427,7 @@ static inline void mas_wr_modify(struct ma_wr_state *wr_mas)
>>   	if (new_end == wr_mas->node_end + 1 && mas_wr_append(wr_mas))
>>   		return;
>>   
>> -	if ((wr_mas->offset_end - mas->offset <= 1) && mas_wr_slot_store(wr_mas))
>> +	if (new_end == wr_mas->node_end && mas_wr_slot_store(wr_mas))
>>   		return;
>>   	else if (mas_wr_node_store(wr_mas))
>>   		return;
>> -- 
>> 2.20.1
>>
>
  
Liam R. Howlett May 16, 2023, 2:17 p.m. UTC | #3
* Peng Zhang <zhangpeng.00@bytedance.com> [230516 03:27]:
> 
> 
> 在 2023/5/16 02:01, Liam R. Howlett 写道:
> > * Peng Zhang <zhangpeng.00@bytedance.com> [230515 09:18]:
> > > The code of mas_wr_slot_store() is messy, make it clearer and concise,
> > > and add comments. In addition, get whether the two gaps are empty to
> > > avoid calling mas_update_gap() all the time.
> > 
> > Please drop the cases from the comments.  These aren't that complicated
> > to need diagrams.
> > 
> > Case 1: Overwriting the range and over a part of the next range
> > Case 2: Overwriting a part of the range and over the entire next range
> > 
> > > 
> > > Signed-off-by: Peng Zhang <zhangpeng.00@bytedance.com>
> > > ---
> > >   lib/maple_tree.c | 79 +++++++++++++++++++++++++++---------------------
> > >   1 file changed, 44 insertions(+), 35 deletions(-)
> > > 
> > > diff --git a/lib/maple_tree.c b/lib/maple_tree.c
> > > index 538e49feafbe4..d558e7bcb6da8 100644
> > > --- a/lib/maple_tree.c
> > > +++ b/lib/maple_tree.c
> > > @@ -4190,53 +4190,62 @@ static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas)
> > >    * @wr_mas: the maple write state
> > >    *
> > >    * Return: True if stored, false otherwise
> > > + *
> > > + * Case 1:
> > > + *                       r_min   r_max    lmax
> > > + *                 +-------+-------+-------+
> > > + * original range: |       |offset | end   |
> > > + *                 +-----------------------+
> > > + *                         +-----------+
> > > + * overwrite:              |           |
> > > + *                         +-----------+
> > > + *                        index       last
> > > + *
> > > + * Case 2:
> > > + *                       r_min   r_max    lmax
> > > + *                 +-------+-------+-------+
> > > + * original range: |       |offest | end   |
> > > + *                 +-------+---------------+
> > > + *                             +-----------+
> > > + * overwrite:                  |           |
> > > + *                             +-----------+
> > > + *                           index        last
> > >    */
> > >   static inline bool mas_wr_slot_store(struct ma_wr_state *wr_mas)
> > >   {
> > >   	struct ma_state *mas = wr_mas->mas;
> > > -	unsigned long lmax; /* Logical max. */
> > >   	unsigned char offset = mas->offset;
> > > +	unsigned char offset_end = wr_mas->offset_end;
> > > +	unsigned long lmax = wr_mas->end_piv; /* Logical max. */
> > > +	bool gap = false;
> > > -	if ((wr_mas->r_max > mas->last) && ((wr_mas->r_min != mas->index) ||
> > > -				  (offset != wr_mas->node_end)))
> > > -		return false;
> > > -
> > > -	if (offset == wr_mas->node_end - 1)
> > > -		lmax = mas->max;
> > > -	else
> > > -		lmax = wr_mas->pivots[offset + 1];
> > > -
> > > -	/* going to overwrite too many slots. */
> > > -	if (lmax < mas->last)
> > > +	if (offset_end - offset != 1)
> > >   		return false;
> > > -	if (wr_mas->r_min == mas->index) {
> > > -		/* overwriting two or more ranges with one. */
> > > -		if (lmax == mas->last)
> > > -			return false;
> > > -
> > > -		/* Overwriting all of offset and a portion of offset + 1. */
> > > +	if (mas->index == wr_mas->r_min && mas->last < lmax) {
> > > +		/* Case 1 */
> > > +		gap |= !mt_slot_locked(mas->tree, wr_mas->slots, offset);
> > > +		gap |= !mt_slot_locked(mas->tree, wr_mas->slots, offset + 1);
> > >   		rcu_assign_pointer(wr_mas->slots[offset], wr_mas->entry);
> > >   		wr_mas->pivots[offset] = mas->last;
> > > -		goto done;
> > > -	}
> > > -
> > > -	/* Doesn't end on the next range end. */
> > > -	if (lmax != mas->last)
> > > +	} else if (mas->index > wr_mas->r_min && mas->last == lmax) {
> > > +		/* Case 2 */
> > > +		gap |= !mt_slot_locked(mas->tree, wr_mas->slots, offset);
> > > +		gap |= !mt_slot_locked(mas->tree, wr_mas->slots, offset + 1);
> > > +		rcu_assign_pointer(wr_mas->slots[offset + 1], wr_mas->entry);
> > > +		wr_mas->pivots[offset] = mas->index - 1;
> > 
> > These two lines need to be in opposite order to ensure a reader sees
> > either the value or the previous value.  If you overwrite something with
> > a new value, it is possible that a reader looking for the next range
> > will get the value stored at offset (but not entry).
> Please think again, did you think wrong?

I did, thanks.

> It doesn't happen, swapping the order introduces the problem.
> If we update the pivot first, it will cause a part of the value
> of the range indexed by offset to change to the value of the
> range indexed by offset+1, which is illegal.
> 
> My assignment order remains the same as the previous version.

Yes, you are correct.  There is a place where the reverse is required
for RCU, but that's appending.

> 
> > 
> > > +		mas->offset++; /* Keep mas accurate. */
> > > +	} else {
> > >   		return false;
> > > +	}
> > > -	/* Overwriting a portion of offset and all of offset + 1 */
> > > -	if ((offset + 1 < mt_pivots[wr_mas->type]) &&
> > > -	    (wr_mas->entry || wr_mas->pivots[offset + 1]))
> > > -		wr_mas->pivots[offset + 1] = mas->last;
> > > -
> > > -	rcu_assign_pointer(wr_mas->slots[offset + 1], wr_mas->entry);
> > > -	wr_mas->pivots[offset] = mas->index - 1;
> > > -	mas->offset++; /* Keep mas accurate. */
> > > -
> > > -done:
> > >   	trace_ma_write(__func__, mas, 0, wr_mas->entry);
> > > -	mas_update_gap(mas);
> > > +	/*
> > > +	 * Only update gap when the new entry is empty or there is an empty
> > > +	 * entry in the original two ranges.
> > > +	 */
> > > +	if (!wr_mas->entry || gap)
> > > +		mas_update_gap(mas);
> > >   	return true;
> > >   }
> > > @@ -4418,7 +4427,7 @@ static inline void mas_wr_modify(struct ma_wr_state *wr_mas)
> > >   	if (new_end == wr_mas->node_end + 1 && mas_wr_append(wr_mas))
> > >   		return;
> > > -	if ((wr_mas->offset_end - mas->offset <= 1) && mas_wr_slot_store(wr_mas))
> > > +	if (new_end == wr_mas->node_end && mas_wr_slot_store(wr_mas))
> > >   		return;
> > >   	else if (mas_wr_node_store(wr_mas))
> > >   		return;
> > > -- 
> > > 2.20.1
> > > 
> >
  

Patch

diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 538e49feafbe4..d558e7bcb6da8 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -4190,53 +4190,62 @@  static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas)
  * @wr_mas: the maple write state
  *
  * Return: True if stored, false otherwise
+ *
+ * Case 1:
+ *                       r_min   r_max    lmax
+ *                 +-------+-------+-------+
+ * original range: |       |offset | end   |
+ *                 +-----------------------+
+ *                         +-----------+
+ * overwrite:              |           |
+ *                         +-----------+
+ *                        index       last
+ *
+ * Case 2:
+ *                       r_min   r_max    lmax
+ *                 +-------+-------+-------+
+ * original range: |       |offest | end   |
+ *                 +-------+---------------+
+ *                             +-----------+
+ * overwrite:                  |           |
+ *                             +-----------+
+ *                           index        last
  */
 static inline bool mas_wr_slot_store(struct ma_wr_state *wr_mas)
 {
 	struct ma_state *mas = wr_mas->mas;
-	unsigned long lmax; /* Logical max. */
 	unsigned char offset = mas->offset;
+	unsigned char offset_end = wr_mas->offset_end;
+	unsigned long lmax = wr_mas->end_piv; /* Logical max. */
+	bool gap = false;
 
-	if ((wr_mas->r_max > mas->last) && ((wr_mas->r_min != mas->index) ||
-				  (offset != wr_mas->node_end)))
-		return false;
-
-	if (offset == wr_mas->node_end - 1)
-		lmax = mas->max;
-	else
-		lmax = wr_mas->pivots[offset + 1];
-
-	/* going to overwrite too many slots. */
-	if (lmax < mas->last)
+	if (offset_end - offset != 1)
 		return false;
 
-	if (wr_mas->r_min == mas->index) {
-		/* overwriting two or more ranges with one. */
-		if (lmax == mas->last)
-			return false;
-
-		/* Overwriting all of offset and a portion of offset + 1. */
+	if (mas->index == wr_mas->r_min && mas->last < lmax) {
+		/* Case 1 */
+		gap |= !mt_slot_locked(mas->tree, wr_mas->slots, offset);
+		gap |= !mt_slot_locked(mas->tree, wr_mas->slots, offset + 1);
 		rcu_assign_pointer(wr_mas->slots[offset], wr_mas->entry);
 		wr_mas->pivots[offset] = mas->last;
-		goto done;
-	}
-
-	/* Doesn't end on the next range end. */
-	if (lmax != mas->last)
+	} else if (mas->index > wr_mas->r_min && mas->last == lmax) {
+		/* Case 2 */
+		gap |= !mt_slot_locked(mas->tree, wr_mas->slots, offset);
+		gap |= !mt_slot_locked(mas->tree, wr_mas->slots, offset + 1);
+		rcu_assign_pointer(wr_mas->slots[offset + 1], wr_mas->entry);
+		wr_mas->pivots[offset] = mas->index - 1;
+		mas->offset++; /* Keep mas accurate. */
+	} else {
 		return false;
+	}
 
-	/* Overwriting a portion of offset and all of offset + 1 */
-	if ((offset + 1 < mt_pivots[wr_mas->type]) &&
-	    (wr_mas->entry || wr_mas->pivots[offset + 1]))
-		wr_mas->pivots[offset + 1] = mas->last;
-
-	rcu_assign_pointer(wr_mas->slots[offset + 1], wr_mas->entry);
-	wr_mas->pivots[offset] = mas->index - 1;
-	mas->offset++; /* Keep mas accurate. */
-
-done:
 	trace_ma_write(__func__, mas, 0, wr_mas->entry);
-	mas_update_gap(mas);
+	/*
+	 * Only update gap when the new entry is empty or there is an empty
+	 * entry in the original two ranges.
+	 */
+	if (!wr_mas->entry || gap)
+		mas_update_gap(mas);
 	return true;
 }
 
@@ -4418,7 +4427,7 @@  static inline void mas_wr_modify(struct ma_wr_state *wr_mas)
 	if (new_end == wr_mas->node_end + 1 && mas_wr_append(wr_mas))
 		return;
 
-	if ((wr_mas->offset_end - mas->offset <= 1) && mas_wr_slot_store(wr_mas))
+	if (new_end == wr_mas->node_end && mas_wr_slot_store(wr_mas))
 		return;
 	else if (mas_wr_node_store(wr_mas))
 		return;