[1/2] memory tier: Introduce sysfs for tier interleave weights.

Message ID 20230927095002.10245-2-ravis.opensrc@micron.com
State New
Headers
Series mm: mempolicy: Multi-tier interleaving |

Commit Message

Ravi Jonnalagadda Sept. 27, 2023, 9:50 a.m. UTC
  From: Srinivasulu Thanneeru <sthanneeru@micron.com>

Allocating pages across tiers is accomplished by provisioning
interleave weights for each tier, with the distribution based on
these weight values.
By default, all tiers will have a weight of 1, which means
default standard page allocation. By default all nodes within
tier will have weight of 1.

Signed-off-by: Srinivasulu Thanneeru <sthanneeru@micron.com>
Co-authored-by: Ravi Jonnalagadda <ravis.opensrc@micron.com>
---
 include/linux/memory-tiers.h |  2 ++
 mm/memory-tiers.c            | 46 +++++++++++++++++++++++++++++++++++-
 2 files changed, 47 insertions(+), 1 deletion(-)
  

Comments

Jonathan Cameron Oct. 2, 2023, 10:26 a.m. UTC | #1
On Wed, 27 Sep 2023 15:20:01 +0530
Ravi Jonnalagadda <ravis.opensrc@micron.com> wrote:

> From: Srinivasulu Thanneeru <sthanneeru@micron.com>
> 
> Allocating pages across tiers is accomplished by provisioning
> interleave weights for each tier, with the distribution based on
> these weight values.
> By default, all tiers will have a weight of 1, which means
> default standard page allocation. By default all nodes within
> tier will have weight of 1.
> 
> Signed-off-by: Srinivasulu Thanneeru <sthanneeru@micron.com>
> Co-authored-by: Ravi Jonnalagadda <ravis.opensrc@micron.com>

ABI docs?  

Documentation/ABI/testing/sysfs-kernel-mm-memory-tiers

A few trivial comments inline.
> ---
>  include/linux/memory-tiers.h |  2 ++
>  mm/memory-tiers.c            | 46 +++++++++++++++++++++++++++++++++++-
>  2 files changed, 47 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
> index 437441cdf78f..c62d286749d0 100644
> --- a/include/linux/memory-tiers.h
> +++ b/include/linux/memory-tiers.h
> @@ -19,6 +19,8 @@
>   */
>  #define MEMTIER_ADISTANCE_DRAM	((4 * MEMTIER_CHUNK_SIZE) + (MEMTIER_CHUNK_SIZE >> 1))
>  
> +#define MAX_TIER_INTERLEAVE_WEIGHT 100
> +
>  struct memory_tier;
>  struct memory_dev_type {
>  	/* list of memory types that are part of same tier as this type */
> diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
> index 37a4f59d9585..7e06c9e0fa41 100644
> --- a/mm/memory-tiers.c
> +++ b/mm/memory-tiers.c
> @@ -13,6 +13,11 @@ struct memory_tier {
>  	struct list_head list;
>  	/* list of all memory types part of this tier */
>  	struct list_head memory_types;
> +	/*
> +	 * By default all tiers will have weight as 1, which means they
> +	 * follow default standard allocation.
> +	 */
> +	unsigned short interleave_weight;

If you are going to use fixed size, keep it going.
u16 (u8 as per below comment probably makes more sense)


>  	/*
>  	 * start value of abstract distance. memory tier maps
>  	 * an abstract distance  range,
> @@ -145,8 +150,45 @@ static ssize_t nodelist_show(struct device *dev,
>  }
>  static DEVICE_ATTR_RO(nodelist);
>  
> +static ssize_t interleave_weight_show(struct device *dev,
> +				      struct device_attribute *attr, char *buf)
> +{
> +	int ret;
> +	struct memory_tier *tier = to_memory_tier(dev);
> +
> +	mutex_lock(&memory_tier_lock);
> +	ret = sysfs_emit(buf, "%u\n", tier->interleave_weight);
> +	mutex_unlock(&memory_tier_lock);

For this one

	guard(mutex)(&memory_tier_lock);
	return sysfs_emit()...

would perhaps be slightly nicer
 (see below)

> +
> +	return ret;
> +}
> +
> +static ssize_t interleave_weight_store(struct device *dev,
> +				       struct device_attribute *attr,
> +				       const char *buf, size_t size)
> +{
> +	unsigned short value;
> +	int ret;
> +	struct memory_tier *tier = to_memory_tier(dev);
> +
> +	ret = kstrtou16(buf, 0, &value);

Why u16?  Max is 100.  I'd not mind if you just put it in an
unsigned int, but seems odd to chose a specific size and
pick one that is twice as big as needed!

> +
> +	if (ret)
> +		return ret;
> +	if (value > MAX_TIER_INTERLEAVE_WEIGHT)
> +		return -EINVAL;
> +
> +	mutex_lock(&memory_tier_lock);

You could play with the new cleanup.h	 toys though it doesn't save a lot here.

	scoped_guard(mutex)(&memory_tier_lock)
		tier->interleave_weight = value;

> +	tier->interleave_weight = value;
> +	mutex_unlock(&memory_tier_lock);
> +
> +	return size;
> +}
> +static DEVICE_ATTR_RW(interleave_weight);
> +
>  static struct attribute *memtier_dev_attrs[] = {
>  	&dev_attr_nodelist.attr,
> +	&dev_attr_interleave_weight.attr,
>  	NULL
>  };
>  
> @@ -489,8 +531,10 @@ static struct memory_tier *set_node_memory_tier(int node)
>  	memtype = node_memory_types[node].memtype;
>  	node_set(node, memtype->nodes);
>  	memtier = find_create_memory_tier(memtype);
> -	if (!IS_ERR(memtier))
> +	if (!IS_ERR(memtier)) {
>  		rcu_assign_pointer(pgdat->memtier, memtier);
> +		memtier->interleave_weight = 1;
> +	}
>  	return memtier;
>  }
>
  

Patch

diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 437441cdf78f..c62d286749d0 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -19,6 +19,8 @@ 
  */
 #define MEMTIER_ADISTANCE_DRAM	((4 * MEMTIER_CHUNK_SIZE) + (MEMTIER_CHUNK_SIZE >> 1))
 
+#define MAX_TIER_INTERLEAVE_WEIGHT 100
+
 struct memory_tier;
 struct memory_dev_type {
 	/* list of memory types that are part of same tier as this type */
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 37a4f59d9585..7e06c9e0fa41 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -13,6 +13,11 @@  struct memory_tier {
 	struct list_head list;
 	/* list of all memory types part of this tier */
 	struct list_head memory_types;
+	/*
+	 * By default all tiers will have weight as 1, which means they
+	 * follow default standard allocation.
+	 */
+	unsigned short interleave_weight;
 	/*
 	 * start value of abstract distance. memory tier maps
 	 * an abstract distance  range,
@@ -145,8 +150,45 @@  static ssize_t nodelist_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(nodelist);
 
+static ssize_t interleave_weight_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	int ret;
+	struct memory_tier *tier = to_memory_tier(dev);
+
+	mutex_lock(&memory_tier_lock);
+	ret = sysfs_emit(buf, "%u\n", tier->interleave_weight);
+	mutex_unlock(&memory_tier_lock);
+
+	return ret;
+}
+
+static ssize_t interleave_weight_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t size)
+{
+	unsigned short value;
+	int ret;
+	struct memory_tier *tier = to_memory_tier(dev);
+
+	ret = kstrtou16(buf, 0, &value);
+
+	if (ret)
+		return ret;
+	if (value > MAX_TIER_INTERLEAVE_WEIGHT)
+		return -EINVAL;
+
+	mutex_lock(&memory_tier_lock);
+	tier->interleave_weight = value;
+	mutex_unlock(&memory_tier_lock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(interleave_weight);
+
 static struct attribute *memtier_dev_attrs[] = {
 	&dev_attr_nodelist.attr,
+	&dev_attr_interleave_weight.attr,
 	NULL
 };
 
@@ -489,8 +531,10 @@  static struct memory_tier *set_node_memory_tier(int node)
 	memtype = node_memory_types[node].memtype;
 	node_set(node, memtype->nodes);
 	memtier = find_create_memory_tier(memtype);
-	if (!IS_ERR(memtier))
+	if (!IS_ERR(memtier)) {
 		rcu_assign_pointer(pgdat->memtier, memtier);
+		memtier->interleave_weight = 1;
+	}
 	return memtier;
 }