[v4,09/12] x86/mtrr: construct a memory map with cache modes

Message ID 20230306163425.8324-10-jgross@suse.com
State New
Headers
Series x86/mtrr: fix handling with PAT but without MTRR |

Commit Message

Juergen Gross March 6, 2023, 4:34 p.m. UTC
  After MTRR initialization construct a memory map with cache modes from
MTRR values. This will speed up lookups via mtrr_lookup_type()
especially in case of overlapping MTRRs.

This will be needed when switching the semantics of the "uniform"
parameter of mtrr_lookup_type() from "only covered by one MTRR" to
"memory range has a uniform cache mode", which is the data the callers
really want to know. Today this information is not easily available,
in case MTRRs are not well sorted regarding base address.

The map will be built in __initdata. When memory management is up, the
map will be moved to dynamically allocated memory, in order to avoid
the need of an overly large array. The size of this array is calculated
using the number of variable MTRR registers and the needed size for
fixed entries.

Only add the map creation and expansion for now. The lookup will be
added later.

When writing new MTRR entries in the running system rebuild the map
inside the call from mtrr_rendezvous_handler() in order to avoid nasty
race conditions with concurrent lookups.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
V3:
- new patch
---
 arch/x86/kernel/cpu/mtrr/generic.c | 254 +++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/mtrr/mtrr.c    |   6 +-
 arch/x86/kernel/cpu/mtrr/mtrr.h    |   3 +
 3 files changed, 262 insertions(+), 1 deletion(-)
  

Comments

Borislav Petkov March 29, 2023, 12:51 p.m. UTC | #1
On Mon, Mar 06, 2023 at 05:34:22PM +0100, Juergen Gross wrote:
> +struct cache_map {
> +	u64 start;
> +	u64 end;
> +	u8 type;
> +	bool fixed;

Can those last two be a single

	u64 flags;

which contains the type and fixed and later perhaps other things so that we can
have those elements aligned and we don't waste space unnecessarily by having
a bool for a single bit of information?

> +};
> +
> +/*
> + * CACHE_MAP_MAX is the maximum number of memory ranges in cache_map, where
> + * no 2 adjacent ranges have the same cache mode (those would be merged).
> + * The number is based on the worst case:
> + * - no two adjacent fixed MTRRs share the same cache mode
> + * - one variable MTRR is spanning a huge area with mode WB
> + * - 255 variable MTRRs with mode UC all overlap with the WB MTRR, creating 2
> + *   additional ranges each (result like "ababababa...aba" with a = WB, b = UC),
> + *   accounting for MTRR_MAX_VAR_RANGES * 2 - 1 range entries
> + * - a TOM2 area (even with overlapping an UC MTRR can't add 2 range entries
> + *   to the possible maximum, as it always starts at 4GB, thus it can't be in
> + *   the middle of that MTRR, unless that MTRR starts at 0, which would remove
> + *   the initial "a" from the "abababa" pattern above)
> + * The map won't contain ranges with no matching MTRR (those fall back to the
> + * default cache mode).
> + */

Nice.

> +#define CACHE_MAP_MAX	(MTRR_NUM_FIXED_RANGES + MTRR_MAX_VAR_RANGES * 2)
> +
> +static struct cache_map init_cache_map[CACHE_MAP_MAX] __initdata;
> +static struct cache_map *cache_map __refdata = init_cache_map;
> +static unsigned int cache_map_size = CACHE_MAP_MAX;
> +static unsigned int cache_map_n;
> +static unsigned int cache_map_fixed;
> +
>  static unsigned long smp_changes_mask;
>  static int mtrr_state_set;
>  u64 mtrr_tom2;
> @@ -78,6 +109,20 @@ static u64 get_mtrr_size(u64 mask)
>  	return size;
>  }
>  
> +static u8 get_var_mtrr_state(unsigned int reg, u64 *start, u64 *size)
> +{
> +	struct mtrr_var_range *mtrr = mtrr_state.var_ranges + reg;
> +
> +	if (!(mtrr->mask_lo & (1 << 11)))

I'm guessing that's the

	MTRR Pair Enable

bit?

Use a macro with a proper name pls.

> +		return MTRR_TYPE_INVALID;
> +
> +	*start = (((u64)mtrr->base_hi) << 32) + (mtrr->base_lo & PAGE_MASK);
> +	*size = get_mtrr_size((((u64)mtrr->mask_hi) << 32) +
> +			      (mtrr->mask_lo & PAGE_MASK));
> +
> +	return mtrr->base_lo & 0xff;
> +}
> +
>  static u8 get_effective_type(u8 type1, u8 type2)
>  {
>  	if (type1 == MTRR_TYPE_UNCACHABLE || type2 == MTRR_TYPE_UNCACHABLE)
> @@ -241,6 +286,211 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end,
>  	return mtrr_state.def_type;
>  }
>  
> +static void rm_map_entry_at(int idx)
> +{
> +	int i;
> +
> +	for (i = idx; i < cache_map_n - 1; i++)
> +		cache_map[i] = cache_map[i + 1];

That can be a memmove, I think. Something like

	memmove((void *)&cache_map[i],
		(void *)&cache_map[i + 1], 
		(cache_map_n - idx) * sizeof(struct cache_map));


> +
> +	cache_map_n--;
> +}
> +
> +/*
> + * Add an entry into cache_map at a specific index.
> + * Merges adjacent entries if appropriate.
> + * Return the number of merges for correcting the scan index.

Make that a block comment:

* Add an entry into cache_map at a specific index.  Merges adjacent entries if
* appropriate.  Return the number of merges for correcting the scan index.

vim, for example, has the cool "gq}" command for that.

> + */
> +static int add_map_entry_at(u64 start, u64 end, u8 type, int idx)
> +{
> +	bool merge_prev, merge_next;
> +	int i;
> +
> +	if (start >= end)
> +		return 0;
> +
> +	merge_prev = (idx > 0 && !cache_map[idx - 1].fixed &&
> +		      start == cache_map[idx - 1].end &&
> +		      type == cache_map[idx - 1].type);
> +	merge_next = (idx < cache_map_n && !cache_map[idx].fixed &&
> +		      end == cache_map[idx].start &&
> +		      type == cache_map[idx].type);

Uuh, head hurts. How about:

	bool merge_prev = false, merge_next = false;

	...

	if (idx > 0) {
		struct cache_map *prev = &cache_map[idx - 1];

		if (!prev->fixed &&
		     prev->end  == start &&
		     prev->type == type)
			merge_prev = true;
	}

Untested ofc, but you get the idea. It is a lot more readable this way. And the
same with merge_next.

> +
> +	if (merge_prev && merge_next) {
> +		cache_map[idx - 1].end = cache_map[idx].end;
> +		rm_map_entry_at(idx);
> +		return 2;
> +	}
> +	if (merge_prev) {
> +		cache_map[idx - 1].end = end;
> +		return 1;
> +	}
> +	if (merge_next) {
> +		cache_map[idx].start = start;
> +		return 1;
> +	}
> +
> +	/* Sanity check: the array should NEVER be too small! */
> +	if (cache_map_n == cache_map_size) {
> +		WARN(1, "MTRR cache mode memory map exhausted!\n");
> +		cache_map_n = cache_map_fixed;
> +		return 0;
> +	}
> +
> +	for (i = cache_map_n; i > idx; i--)
> +		cache_map[i] = cache_map[i - 1];

memmove as above.

> +
> +	cache_map[idx].start = start;
> +	cache_map[idx].end = end;
> +	cache_map[idx].type = type;
> +	cache_map[idx].fixed = false;
> +	cache_map_n++;
> +
> +	return 0;
> +}
> +
> +/* Clear a part of an entry. Return 1 if start of entry is still valid. */
> +static int clr_map_range_at(u64 start, u64 end, int idx)
> +{
> +	int ret = start != cache_map[idx].start;
> +	u64 tmp;
> +
> +	if (start == cache_map[idx].start && end == cache_map[idx].end) {
> +		rm_map_entry_at(idx);
> +	} else if (start == cache_map[idx].start) {
> +		cache_map[idx].start = end;
> +	} else if (end == cache_map[idx].end) {
> +		cache_map[idx].end = start;
> +	} else {
> +		tmp = cache_map[idx].end;
> +		cache_map[idx].end = start;
> +		add_map_entry_at(end, tmp, cache_map[idx].type, idx + 1);
> +	}
> +
> +	return ret;
> +}
> +
> +static void add_map_entry(u64 start, u64 end, u8 type)
> +{
> +	int i;
> +	u8 new_type, old_type;
> +	u64 tmp;

"int i;" goes here.

> +
> +	for (i = 0; i < cache_map_n && start < end; i++) {
> +		if (start >= cache_map[i].end)
> +			continue;
> +
> +		if (start < cache_map[i].start) {
> +			/* Region start has no overlap. */
> +			tmp = min(end, cache_map[i].start);
> +			i -= add_map_entry_at(start, tmp,  type, i);

Uff, what happens if i == 0 and becomes negative here?

Can that even happen?

This feels weird: using function retvals as index var decrements. I have
only a vague idea what you're doing in this function but it feels weird.
Maybe I need to play through an example to grok it better...

> +			start = tmp;
> +			continue;
> +		}
> +
> +		new_type = get_effective_type(type, cache_map[i].type);
> +		old_type = cache_map[i].type;
> +
> +		if (cache_map[i].fixed || new_type == old_type) {
> +			/* Cut off start of new entry. */
> +			start = cache_map[i].end;
> +			continue;
> +		}
> +
> +		tmp = min(end, cache_map[i].end);
> +		i += clr_map_range_at(start, tmp, i);
> +		i -= add_map_entry_at(start, tmp, new_type, i);
> +		start = tmp;
> +	}
> +
> +	add_map_entry_at(start, end, type, i);
> +}
> +
> +/* Add variable MTRRs to cache map. */
> +static void map_add_var(void)
> +{
> +	unsigned int i;
> +	u64 start, size;
> +	u8 type;
> +
> +	/* Add AMD magic MTRR. */

Why magic?

> +	if (mtrr_tom2) {
> +		add_map_entry(1ULL << 32, mtrr_tom2 - 1, MTRR_TYPE_WRBACK);

BIT_ULL(32)

> +		cache_map[cache_map_n - 1].fixed = true;
> +	}
> +
> +	for (i = 0; i < num_var_ranges; i++) {
> +		type = get_var_mtrr_state(i, &start, &size);
> +		if (type != MTRR_TYPE_INVALID)
> +			add_map_entry(start, start + size, type);
> +	}
> +}
> +
> +/* Rebuild map by replacing variable entries. */
> +static void rebuild_map(void)
> +{
> +	cache_map_n = cache_map_fixed;
> +
> +	map_add_var();
> +}
> +
> +/* Build the cache_map containing the cache modes per memory range. */
> +void mtrr_build_map(void)
> +{
> +	unsigned int i;
> +	u64 start, end, size;
> +	u8 type;
> +
> +	if (!mtrr_state.enabled)
> +		return;
> +
> +	/* Add fixed MTRRs, optimize for adjacent entries with same type. */
> +	if (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED) {
> +		start = 0;
> +		end = size = 0x10000;
> +		type = mtrr_state.fixed_ranges[0];
> +
> +		for (i = 1; i < MTRR_NUM_FIXED_RANGES; i++) {
> +			if (i == 8 || i == 24)
> +				size >>= 2;
> +
> +			if (mtrr_state.fixed_ranges[i] != type) {
> +				add_map_entry(start, end, type);
> +				start = end;
> +				type = mtrr_state.fixed_ranges[i];
> +			}
> +			end += size;
> +		}
> +		add_map_entry(start, end, type);
> +	}
> +
> +	/* Mark fixed and magic MTRR as fixed, they take precedence. */
> +	for (i = 0; i < cache_map_n; i++)
> +		cache_map[i].fixed = true;
> +	cache_map_fixed = cache_map_n;
> +
> +	map_add_var();

I think it would be useful to issue some stats here:

	pr_info("MTRR map: ... size: ..., ... entries: ..., memory used: ....");

and so on so that we can get some feedback when that happens. We can always
drop it later but for the initial runs it would be prudent to have that.

> +/* Copy the cache_map from __initdata memory to dynamically allocated one. */
> +void __init mtrr_copy_map(void)
> +{
> +	unsigned int new_size = cache_map_fixed + 2 * num_var_ranges;
> +
> +	if (!mtrr_state.enabled || !new_size) {
> +		cache_map = NULL;
> +		return;
> +	}
> +
> +	mutex_lock(&mtrr_mutex);
> +
> +	cache_map = kcalloc(new_size, sizeof(*cache_map), GFP_KERNEL);
> +	memmove(cache_map, init_cache_map, cache_map_n * sizeof(*cache_map));
> +	cache_map_size = new_size;
> +
> +	mutex_unlock(&mtrr_mutex);
> +}
> +
>  /**
>   * mtrr_overwrite_state - set static MTRR state
>   *
> @@ -815,6 +1065,10 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
>  
>  	cache_enable();
>  	local_irq_restore(flags);
> +
> +	/* On the first cpu rebuild the cache mode memory map. */

s/cpu/CPU/

> +	if (smp_processor_id() == cpumask_first(cpu_online_mask))

Why not in mtrr_bp_init()? That is the first CPU.

> +		rebuild_map();
>  }
>  
>  int generic_validate_add_page(unsigned long base, unsigned long size,
> diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
> index 50cd2287b6e1..1dbb9fdfd87b 100644
> --- a/arch/x86/kernel/cpu/mtrr/mtrr.c
> +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
> @@ -65,7 +65,7 @@ static bool mtrr_enabled(void)
>  }
>  
>  unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
> -static DEFINE_MUTEX(mtrr_mutex);
> +DEFINE_MUTEX(mtrr_mutex);
>  
>  u64 size_or_mask, size_and_mask;
>  
> @@ -668,6 +668,7 @@ void __init mtrr_bp_init(void)
>  		/* Software overwrite of MTRR state, only for generic case. */
>  		mtrr_calc_physbits(true);
>  		init_table();
> +		mtrr_build_map();
>  		pr_info("MTRRs set to read-only\n");
>  
>  		return;
> @@ -705,6 +706,7 @@ void __init mtrr_bp_init(void)
>  			if (get_mtrr_state()) {
>  				memory_caching_control |= CACHE_MTRR;
>  				changed_by_mtrr_cleanup = mtrr_cleanup(phys_addr);
> +				mtrr_build_map();
>  			} else {
>  				mtrr_if = NULL;
>  				why = "by BIOS";
> @@ -733,6 +735,8 @@ void mtrr_save_state(void)
>  
>  static int __init mtrr_init_finialize(void)
>  {
> +	mtrr_copy_map();

Move that...

> +
>  	if (!mtrr_enabled())
>  		return 0;

... here.

So yeah, I like the general direction but this is a complex patch. Lemme
add some printks to it in order to get a better idea of what happens.

Thx.
  
Juergen Gross March 29, 2023, 1:39 p.m. UTC | #2
On 29.03.23 14:51, Borislav Petkov wrote:
> On Mon, Mar 06, 2023 at 05:34:22PM +0100, Juergen Gross wrote:
>> +struct cache_map {
>> +	u64 start;
>> +	u64 end;
>> +	u8 type;
>> +	bool fixed;
> 
> Can those last two be a single
> 
> 	u64 flags;
> 
> which contains the type and fixed and later perhaps other things so that we can
> have those elements aligned and we don't waste space unnecessarily by having
> a bool for a single bit of information?

Yes, of course.

> 
>> +};
>> +
>> +/*
>> + * CACHE_MAP_MAX is the maximum number of memory ranges in cache_map, where
>> + * no 2 adjacent ranges have the same cache mode (those would be merged).
>> + * The number is based on the worst case:
>> + * - no two adjacent fixed MTRRs share the same cache mode
>> + * - one variable MTRR is spanning a huge area with mode WB
>> + * - 255 variable MTRRs with mode UC all overlap with the WB MTRR, creating 2
>> + *   additional ranges each (result like "ababababa...aba" with a = WB, b = UC),
>> + *   accounting for MTRR_MAX_VAR_RANGES * 2 - 1 range entries
>> + * - a TOM2 area (even with overlapping an UC MTRR can't add 2 range entries
>> + *   to the possible maximum, as it always starts at 4GB, thus it can't be in
>> + *   the middle of that MTRR, unless that MTRR starts at 0, which would remove
>> + *   the initial "a" from the "abababa" pattern above)
>> + * The map won't contain ranges with no matching MTRR (those fall back to the
>> + * default cache mode).
>> + */
> 
> Nice.
> 
>> +#define CACHE_MAP_MAX	(MTRR_NUM_FIXED_RANGES + MTRR_MAX_VAR_RANGES * 2)
>> +
>> +static struct cache_map init_cache_map[CACHE_MAP_MAX] __initdata;
>> +static struct cache_map *cache_map __refdata = init_cache_map;
>> +static unsigned int cache_map_size = CACHE_MAP_MAX;
>> +static unsigned int cache_map_n;
>> +static unsigned int cache_map_fixed;
>> +
>>   static unsigned long smp_changes_mask;
>>   static int mtrr_state_set;
>>   u64 mtrr_tom2;
>> @@ -78,6 +109,20 @@ static u64 get_mtrr_size(u64 mask)
>>   	return size;
>>   }
>>   
>> +static u8 get_var_mtrr_state(unsigned int reg, u64 *start, u64 *size)
>> +{
>> +	struct mtrr_var_range *mtrr = mtrr_state.var_ranges + reg;
>> +
>> +	if (!(mtrr->mask_lo & (1 << 11)))
> 
> I'm guessing that's the
> 
> 	MTRR Pair Enable
> 
> bit?
> 
> Use a macro with a proper name pls.

Okay.

> 
>> +		return MTRR_TYPE_INVALID;
>> +
>> +	*start = (((u64)mtrr->base_hi) << 32) + (mtrr->base_lo & PAGE_MASK);
>> +	*size = get_mtrr_size((((u64)mtrr->mask_hi) << 32) +
>> +			      (mtrr->mask_lo & PAGE_MASK));
>> +
>> +	return mtrr->base_lo & 0xff;
>> +}
>> +
>>   static u8 get_effective_type(u8 type1, u8 type2)
>>   {
>>   	if (type1 == MTRR_TYPE_UNCACHABLE || type2 == MTRR_TYPE_UNCACHABLE)
>> @@ -241,6 +286,211 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end,
>>   	return mtrr_state.def_type;
>>   }
>>   
>> +static void rm_map_entry_at(int idx)
>> +{
>> +	int i;
>> +
>> +	for (i = idx; i < cache_map_n - 1; i++)
>> +		cache_map[i] = cache_map[i + 1];
> 
> That can be a memmove, I think. Something like
> 
> 	memmove((void *)&cache_map[i],
> 		(void *)&cache_map[i + 1],
> 		(cache_map_n - idx) * sizeof(struct cache_map));

Okay.

> 
> 
>> +
>> +	cache_map_n--;
>> +}
>> +
>> +/*
>> + * Add an entry into cache_map at a specific index.
>> + * Merges adjacent entries if appropriate.
>> + * Return the number of merges for correcting the scan index.
> 
> Make that a block comment:
> 
> * Add an entry into cache_map at a specific index.  Merges adjacent entries if
> * appropriate.  Return the number of merges for correcting the scan index.

Okay.

> 
> vim, for example, has the cool "gq}" command for that.
> 
>> + */
>> +static int add_map_entry_at(u64 start, u64 end, u8 type, int idx)
>> +{
>> +	bool merge_prev, merge_next;
>> +	int i;
>> +
>> +	if (start >= end)
>> +		return 0;
>> +
>> +	merge_prev = (idx > 0 && !cache_map[idx - 1].fixed &&
>> +		      start == cache_map[idx - 1].end &&
>> +		      type == cache_map[idx - 1].type);
>> +	merge_next = (idx < cache_map_n && !cache_map[idx].fixed &&
>> +		      end == cache_map[idx].start &&
>> +		      type == cache_map[idx].type);
> 
> Uuh, head hurts. How about:
> 
> 	bool merge_prev = false, merge_next = false;
> 
> 	...
> 
> 	if (idx > 0) {
> 		struct cache_map *prev = &cache_map[idx - 1];
> 
> 		if (!prev->fixed &&
> 		     prev->end  == start &&
> 		     prev->type == type)
> 			merge_prev = true;
> 	}
> 
> Untested ofc, but you get the idea. It is a lot more readable this way. And the
> same with merge_next.

Fine with me.

> 
>> +
>> +	if (merge_prev && merge_next) {
>> +		cache_map[idx - 1].end = cache_map[idx].end;
>> +		rm_map_entry_at(idx);
>> +		return 2;
>> +	}
>> +	if (merge_prev) {
>> +		cache_map[idx - 1].end = end;
>> +		return 1;
>> +	}
>> +	if (merge_next) {
>> +		cache_map[idx].start = start;
>> +		return 1;
>> +	}
>> +
>> +	/* Sanity check: the array should NEVER be too small! */
>> +	if (cache_map_n == cache_map_size) {
>> +		WARN(1, "MTRR cache mode memory map exhausted!\n");
>> +		cache_map_n = cache_map_fixed;
>> +		return 0;
>> +	}
>> +
>> +	for (i = cache_map_n; i > idx; i--)
>> +		cache_map[i] = cache_map[i - 1];
> 
> memmove as above.

Okay.

> 
>> +
>> +	cache_map[idx].start = start;
>> +	cache_map[idx].end = end;
>> +	cache_map[idx].type = type;
>> +	cache_map[idx].fixed = false;
>> +	cache_map_n++;
>> +
>> +	return 0;
>> +}
>> +
>> +/* Clear a part of an entry. Return 1 if start of entry is still valid. */
>> +static int clr_map_range_at(u64 start, u64 end, int idx)
>> +{
>> +	int ret = start != cache_map[idx].start;
>> +	u64 tmp;
>> +
>> +	if (start == cache_map[idx].start && end == cache_map[idx].end) {
>> +		rm_map_entry_at(idx);
>> +	} else if (start == cache_map[idx].start) {
>> +		cache_map[idx].start = end;
>> +	} else if (end == cache_map[idx].end) {
>> +		cache_map[idx].end = start;
>> +	} else {
>> +		tmp = cache_map[idx].end;
>> +		cache_map[idx].end = start;
>> +		add_map_entry_at(end, tmp, cache_map[idx].type, idx + 1);
>> +	}
>> +
>> +	return ret;
>> +}
>> +
>> +static void add_map_entry(u64 start, u64 end, u8 type)
>> +{
>> +	int i;
>> +	u8 new_type, old_type;
>> +	u64 tmp;
> 
> "int i;" goes here.

Okay.

> 
>> +
>> +	for (i = 0; i < cache_map_n && start < end; i++) {
>> +		if (start >= cache_map[i].end)
>> +			continue;
>> +
>> +		if (start < cache_map[i].start) {
>> +			/* Region start has no overlap. */
>> +			tmp = min(end, cache_map[i].start);
>> +			i -= add_map_entry_at(start, tmp,  type, i);
> 
> Uff, what happens if i == 0 and becomes negative here?
> 
> Can that even happen?

No. :-)

> 
> This feels weird: using function retvals as index var decrements. I have
> only a vague idea what you're doing in this function but it feels weird.
> Maybe I need to play through an example to grok it better...

The final form of the code is the result of an iterative process. :-)

It looked much worse in the beginning.

> 
>> +			start = tmp;
>> +			continue;
>> +		}
>> +
>> +		new_type = get_effective_type(type, cache_map[i].type);
>> +		old_type = cache_map[i].type;
>> +
>> +		if (cache_map[i].fixed || new_type == old_type) {
>> +			/* Cut off start of new entry. */
>> +			start = cache_map[i].end;
>> +			continue;
>> +		}
>> +
>> +		tmp = min(end, cache_map[i].end);
>> +		i += clr_map_range_at(start, tmp, i);
>> +		i -= add_map_entry_at(start, tmp, new_type, i);
>> +		start = tmp;
>> +	}
>> +
>> +	add_map_entry_at(start, end, type, i);
>> +}
>> +
>> +/* Add variable MTRRs to cache map. */
>> +static void map_add_var(void)
>> +{
>> +	unsigned int i;
>> +	u64 start, size;
>> +	u8 type;
>> +
>> +	/* Add AMD magic MTRR. */
> 
> Why magic?

I've reused the wording from cleanup.c (just above amd_special_default_mtrr()).

> 
>> +	if (mtrr_tom2) {
>> +		add_map_entry(1ULL << 32, mtrr_tom2 - 1, MTRR_TYPE_WRBACK);
> 
> BIT_ULL(32)

Okay.

> 
>> +		cache_map[cache_map_n - 1].fixed = true;
>> +	}
>> +
>> +	for (i = 0; i < num_var_ranges; i++) {
>> +		type = get_var_mtrr_state(i, &start, &size);
>> +		if (type != MTRR_TYPE_INVALID)
>> +			add_map_entry(start, start + size, type);
>> +	}
>> +}
>> +
>> +/* Rebuild map by replacing variable entries. */
>> +static void rebuild_map(void)
>> +{
>> +	cache_map_n = cache_map_fixed;
>> +
>> +	map_add_var();
>> +}
>> +
>> +/* Build the cache_map containing the cache modes per memory range. */
>> +void mtrr_build_map(void)
>> +{
>> +	unsigned int i;
>> +	u64 start, end, size;
>> +	u8 type;
>> +
>> +	if (!mtrr_state.enabled)
>> +		return;
>> +
>> +	/* Add fixed MTRRs, optimize for adjacent entries with same type. */
>> +	if (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED) {
>> +		start = 0;
>> +		end = size = 0x10000;
>> +		type = mtrr_state.fixed_ranges[0];
>> +
>> +		for (i = 1; i < MTRR_NUM_FIXED_RANGES; i++) {
>> +			if (i == 8 || i == 24)
>> +				size >>= 2;
>> +
>> +			if (mtrr_state.fixed_ranges[i] != type) {
>> +				add_map_entry(start, end, type);
>> +				start = end;
>> +				type = mtrr_state.fixed_ranges[i];
>> +			}
>> +			end += size;
>> +		}
>> +		add_map_entry(start, end, type);
>> +	}
>> +
>> +	/* Mark fixed and magic MTRR as fixed, they take precedence. */
>> +	for (i = 0; i < cache_map_n; i++)
>> +		cache_map[i].fixed = true;
>> +	cache_map_fixed = cache_map_n;
>> +
>> +	map_add_var();
> 
> I think it would be useful to issue some stats here:
> 
> 	pr_info("MTRR map: ... size: ..., ... entries: ..., memory used: ....");
> 
> and so on so that we can get some feedback when that happens. We can always
> drop it later but for the initial runs it would be prudent to have that.

Fine with me.

> 
>> +/* Copy the cache_map from __initdata memory to dynamically allocated one. */
>> +void __init mtrr_copy_map(void)
>> +{
>> +	unsigned int new_size = cache_map_fixed + 2 * num_var_ranges;
>> +
>> +	if (!mtrr_state.enabled || !new_size) {
>> +		cache_map = NULL;
>> +		return;
>> +	}
>> +
>> +	mutex_lock(&mtrr_mutex);
>> +
>> +	cache_map = kcalloc(new_size, sizeof(*cache_map), GFP_KERNEL);
>> +	memmove(cache_map, init_cache_map, cache_map_n * sizeof(*cache_map));
>> +	cache_map_size = new_size;
>> +
>> +	mutex_unlock(&mtrr_mutex);
>> +}
>> +
>>   /**
>>    * mtrr_overwrite_state - set static MTRR state
>>    *
>> @@ -815,6 +1065,10 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
>>   
>>   	cache_enable();
>>   	local_irq_restore(flags);
>> +
>> +	/* On the first cpu rebuild the cache mode memory map. */
> 
> s/cpu/CPU/
> 
>> +	if (smp_processor_id() == cpumask_first(cpu_online_mask))
> 
> Why not in mtrr_bp_init()? That is the first CPU.

Yeah, but generic_set_mtrr() can be called after boot, too.

> 
>> +		rebuild_map();
>>   }
>>   
>>   int generic_validate_add_page(unsigned long base, unsigned long size,
>> diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
>> index 50cd2287b6e1..1dbb9fdfd87b 100644
>> --- a/arch/x86/kernel/cpu/mtrr/mtrr.c
>> +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
>> @@ -65,7 +65,7 @@ static bool mtrr_enabled(void)
>>   }
>>   
>>   unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
>> -static DEFINE_MUTEX(mtrr_mutex);
>> +DEFINE_MUTEX(mtrr_mutex);
>>   
>>   u64 size_or_mask, size_and_mask;
>>   
>> @@ -668,6 +668,7 @@ void __init mtrr_bp_init(void)
>>   		/* Software overwrite of MTRR state, only for generic case. */
>>   		mtrr_calc_physbits(true);
>>   		init_table();
>> +		mtrr_build_map();
>>   		pr_info("MTRRs set to read-only\n");
>>   
>>   		return;
>> @@ -705,6 +706,7 @@ void __init mtrr_bp_init(void)
>>   			if (get_mtrr_state()) {
>>   				memory_caching_control |= CACHE_MTRR;
>>   				changed_by_mtrr_cleanup = mtrr_cleanup(phys_addr);
>> +				mtrr_build_map();
>>   			} else {
>>   				mtrr_if = NULL;
>>   				why = "by BIOS";
>> @@ -733,6 +735,8 @@ void mtrr_save_state(void)
>>   
>>   static int __init mtrr_init_finialize(void)
>>   {
>> +	mtrr_copy_map();
> 
> Move that...
> 
>> +
>>   	if (!mtrr_enabled())
>>   		return 0;
> 
> ... here.

Umm, not really. I want to do the copy even in the Xen PV case.

> 
> So yeah, I like the general direction but this is a complex patch. Lemme
> add some printks to it in order to get a better idea of what happens.

Yeah, those were part of my iterative process mentioned above.


Juergen
  
Borislav Petkov March 31, 2023, 12:55 p.m. UTC | #3
On Wed, Mar 29, 2023 at 03:39:35PM +0200, Juergen Gross wrote:
> No. :-)

Because?

> The final form of the code is the result of an iterative process. :-)

I have a similar iterative process: until it hasn't been reviewed and
explained properly, this is not going anywhere.

So however you wanna do it, fine by me.

> I've reused the wording from cleanup.c (just above amd_special_default_mtrr()).

That got added with K8. K8 is ancient history so nothing magic about
that anymore. It is basically a bit in the SYSCFG MSR which says that

	[4G ... TOP_MEM2]

is WB.

> > Why not in mtrr_bp_init()? That is the first CPU.
> 
> Yeah, but generic_set_mtrr() can be called after boot, too.

That function sets a single MTRR register so you'd have to merge the
ranges, AFAICT. Not rebuild the whole map...

> Umm, not really. I want to do the copy even in the Xen PV case.

How about some comments? Or you're expecting me to be able to read your
mind?!

;-\
  
Borislav Petkov March 31, 2023, 12:57 p.m. UTC | #4
On Mon, Mar 06, 2023 at 05:34:22PM +0100, Juergen Gross wrote:
> +/* Build the cache_map containing the cache modes per memory range. */
> +void mtrr_build_map(void)
> +{
> +	unsigned int i;
> +	u64 start, end, size;
> +	u8 type;
> +
> +	if (!mtrr_state.enabled)
	     ^^^^^^^^^^^^^^^^^^^

> +		return;
> +
> +	/* Add fixed MTRRs, optimize for adjacent entries with same type. */
> +	if (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED) {
	    ^^^^^^^^^^^^^^^^^^

First check can go.

> +		start = 0;
> +		end = size = 0x10000;
> +		type = mtrr_state.fixed_ranges[0];
> +
> +		for (i = 1; i < MTRR_NUM_FIXED_RANGES; i++) {

Loops starts at 1. Comment explaining why pls.

> +			if (i == 8 || i == 24)

Magic numbers.

Ok, I'll stop here.

Please send a new version with the review comments incorporated. I need
to look more at that map thing more.

Thx.
  
Juergen Gross March 31, 2023, 1:23 p.m. UTC | #5
On 31.03.23 14:55, Borislav Petkov wrote:
> On Wed, Mar 29, 2023 at 03:39:35PM +0200, Juergen Gross wrote:
>> No. :-)
> 
> Because?

In general the critical case is add_map_entry_at() returning 2 (in the
case it is returning 1, the index can be set to -1, but there is always
the "continue" statement right after that, which would execute the "i++"
of the "for" statement).

add_map_entry_at() can return 2 only, if it detects "merge_prev" and
"merge_next". "merge_prev" can be set only if the current index was > 0,
which makes it impossible to return 2 if the index was 0.

>> The final form of the code is the result of an iterative process. :-)
> 
> I have a similar iterative process: until it hasn't been reviewed and
> explained properly, this is not going anywhere.

Of course.

> So however you wanna do it, fine by me.
> 
>> I've reused the wording from cleanup.c (just above amd_special_default_mtrr()).
> 
> That got added with K8. K8 is ancient history so nothing magic about
> that anymore. It is basically a bit in the SYSCFG MSR which says that
> 
> 	[4G ... TOP_MEM2]
> 
> is WB.

How should it be named? AMD TOP_MEM2 MSR?

>>> Why not in mtrr_bp_init()? That is the first CPU.
>>
>> Yeah, but generic_set_mtrr() can be called after boot, too.
> 
> That function sets a single MTRR register so you'd have to merge the
> ranges, AFAICT. Not rebuild the whole map...

The problem isn't an added MTRR register, but a possibly replaced or removed
one. Handling that is much more complicated, so I've chosen to do it the simple
way.

In the end I'd expect setting of MTRRs to be a rare event, so there shouldn't be
a performance issue with that approach.

> 
>> Umm, not really. I want to do the copy even in the Xen PV case.
> 
> How about some comments? Or you're expecting me to be able to read your
> mind?!

Okay, I'll add some more comments.

OTOH, what was hard to write should be hard to read (just kidding).


Juergen
  
Juergen Gross March 31, 2023, 1:35 p.m. UTC | #6
On 31.03.23 14:57, Borislav Petkov wrote:
> On Mon, Mar 06, 2023 at 05:34:22PM +0100, Juergen Gross wrote:
>> +/* Build the cache_map containing the cache modes per memory range. */
>> +void mtrr_build_map(void)
>> +{
>> +	unsigned int i;
>> +	u64 start, end, size;
>> +	u8 type;
>> +
>> +	if (!mtrr_state.enabled)
> 	     ^^^^^^^^^^^^^^^^^^^
> 
>> +		return;
>> +
>> +	/* Add fixed MTRRs, optimize for adjacent entries with same type. */
>> +	if (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED) {
> 	    ^^^^^^^^^^^^^^^^^^
> 
> First check can go.

Hmm, this makes it much harder to see that the code really does nothing
if enabled isn't set.

> 
>> +		start = 0;
>> +		end = size = 0x10000;
>> +		type = mtrr_state.fixed_ranges[0];
>> +
>> +		for (i = 1; i < MTRR_NUM_FIXED_RANGES; i++) {
> 
> Loops starts at 1. Comment explaining why pls.

Okay.

> 
>> +			if (i == 8 || i == 24)
> 
> Magic numbers.

I'll add more comments.

> 
> Ok, I'll stop here.
> 
> Please send a new version with the review comments incorporated. I need
> to look more at that map thing more.

Will do.


Thanks,

Juergen
  
Borislav Petkov April 1, 2023, 2:24 p.m. UTC | #7
On Fri, Mar 31, 2023 at 03:23:13PM +0200, Juergen Gross wrote:
> In general the critical case is add_map_entry_at() returning 2 (in the
> case it is returning 1, the index can be set to -1, but there is always
> the "continue" statement right after that, which would execute the "i++"
> of the "for" statement).
> 
> add_map_entry_at() can return 2 only, if it detects "merge_prev" and
> "merge_next". "merge_prev" can be set only if the current index was > 0,
> which makes it impossible to return 2 if the index was 0.

Yeah, in the meantime I did add some debug printks in order to find my
way around that code...

> How should it be named? AMD TOP_MEM2 MSR?

It is already called that way - see "git grep TOP_MEM2" output.

> The problem isn't an added MTRR register, but a possibly replaced or removed
> one. Handling that is much more complicated, so I've chosen to do it the simple
> way.

Pls put that blurb over the function: needs to be called when MTRRs get
modified so that the map is kept valid, yadda yadda...

> In the end I'd expect setting of MTRRs to be a rare event, so there shouldn't be
> a performance issue with that approach.

Ack.

> Okay, I'll add some more comments.

Thx.

> OTOH, what was hard to write should be hard to read (just kidding).

I'm not kidding - it is not super easy.

:-)
  
Borislav Petkov April 1, 2023, 2:26 p.m. UTC | #8
On Fri, Mar 31, 2023 at 03:35:07PM +0200, Juergen Gross wrote:
> Hmm, this makes it much harder to see that the code really does nothing
> if enabled isn't set.

Both callsites in mtrr_bp_init() are behind such a check already.

Thx.
  
Juergen Gross April 3, 2023, 6:57 a.m. UTC | #9
On 01.04.23 16:24, Borislav Petkov wrote:
> On Fri, Mar 31, 2023 at 03:23:13PM +0200, Juergen Gross wrote:
>> In general the critical case is add_map_entry_at() returning 2 (in the
>> case it is returning 1, the index can be set to -1, but there is always
>> the "continue" statement right after that, which would execute the "i++"
>> of the "for" statement).
>>
>> add_map_entry_at() can return 2 only, if it detects "merge_prev" and
>> "merge_next". "merge_prev" can be set only if the current index was > 0,
>> which makes it impossible to return 2 if the index was 0.
> 
> Yeah, in the meantime I did add some debug printks in order to find my
> way around that code...
> 
>> How should it be named? AMD TOP_MEM2 MSR?
> 
> It is already called that way - see "git grep TOP_MEM2" output.
> 
>> The problem isn't an added MTRR register, but a possibly replaced or removed
>> one. Handling that is much more complicated, so I've chosen to do it the simple
>> way.
> 
> Pls put that blurb over the function: needs to be called when MTRRs get
> modified so that the map is kept valid, yadda yadda...

Okay.


Juergen
  
Juergen Gross April 3, 2023, 7:02 a.m. UTC | #10
On 01.04.23 16:26, Borislav Petkov wrote:
> On Fri, Mar 31, 2023 at 03:35:07PM +0200, Juergen Gross wrote:
>> Hmm, this makes it much harder to see that the code really does nothing
>> if enabled isn't set.
> 
> Both callsites in mtrr_bp_init() are behind such a check already.

Oh, indeed they are.

I'll remove the check.


Juergen
  

Patch

diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index d271e0c73775..13bc637a50e9 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -33,6 +33,37 @@  static struct fixed_range_block fixed_range_blocks[] = {
 	{}
 };
 
+struct cache_map {
+	u64 start;
+	u64 end;
+	u8 type;
+	bool fixed;
+};
+
+/*
+ * CACHE_MAP_MAX is the maximum number of memory ranges in cache_map, where
+ * no 2 adjacent ranges have the same cache mode (those would be merged).
+ * The number is based on the worst case:
+ * - no two adjacent fixed MTRRs share the same cache mode
+ * - one variable MTRR is spanning a huge area with mode WB
+ * - 255 variable MTRRs with mode UC all overlap with the WB MTRR, creating 2
+ *   additional ranges each (result like "ababababa...aba" with a = WB, b = UC),
+ *   accounting for MTRR_MAX_VAR_RANGES * 2 - 1 range entries
+ * - a TOM2 area (even with overlapping an UC MTRR can't add 2 range entries
+ *   to the possible maximum, as it always starts at 4GB, thus it can't be in
+ *   the middle of that MTRR, unless that MTRR starts at 0, which would remove
+ *   the initial "a" from the "abababa" pattern above)
+ * The map won't contain ranges with no matching MTRR (those fall back to the
+ * default cache mode).
+ */
+#define CACHE_MAP_MAX	(MTRR_NUM_FIXED_RANGES + MTRR_MAX_VAR_RANGES * 2)
+
+static struct cache_map init_cache_map[CACHE_MAP_MAX] __initdata;
+static struct cache_map *cache_map __refdata = init_cache_map;
+static unsigned int cache_map_size = CACHE_MAP_MAX;
+static unsigned int cache_map_n;
+static unsigned int cache_map_fixed;
+
 static unsigned long smp_changes_mask;
 static int mtrr_state_set;
 u64 mtrr_tom2;
@@ -78,6 +109,20 @@  static u64 get_mtrr_size(u64 mask)
 	return size;
 }
 
+static u8 get_var_mtrr_state(unsigned int reg, u64 *start, u64 *size)
+{
+	struct mtrr_var_range *mtrr = mtrr_state.var_ranges + reg;
+
+	if (!(mtrr->mask_lo & (1 << 11)))
+		return MTRR_TYPE_INVALID;
+
+	*start = (((u64)mtrr->base_hi) << 32) + (mtrr->base_lo & PAGE_MASK);
+	*size = get_mtrr_size((((u64)mtrr->mask_hi) << 32) +
+			      (mtrr->mask_lo & PAGE_MASK));
+
+	return mtrr->base_lo & 0xff;
+}
+
 static u8 get_effective_type(u8 type1, u8 type2)
 {
 	if (type1 == MTRR_TYPE_UNCACHABLE || type2 == MTRR_TYPE_UNCACHABLE)
@@ -241,6 +286,211 @@  static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end,
 	return mtrr_state.def_type;
 }
 
+static void rm_map_entry_at(int idx)
+{
+	int i;
+
+	for (i = idx; i < cache_map_n - 1; i++)
+		cache_map[i] = cache_map[i + 1];
+
+	cache_map_n--;
+}
+
+/*
+ * Add an entry into cache_map at a specific index.
+ * Merges adjacent entries if appropriate.
+ * Return the number of merges for correcting the scan index.
+ */
+static int add_map_entry_at(u64 start, u64 end, u8 type, int idx)
+{
+	bool merge_prev, merge_next;
+	int i;
+
+	if (start >= end)
+		return 0;
+
+	merge_prev = (idx > 0 && !cache_map[idx - 1].fixed &&
+		      start == cache_map[idx - 1].end &&
+		      type == cache_map[idx - 1].type);
+	merge_next = (idx < cache_map_n && !cache_map[idx].fixed &&
+		      end == cache_map[idx].start &&
+		      type == cache_map[idx].type);
+
+	if (merge_prev && merge_next) {
+		cache_map[idx - 1].end = cache_map[idx].end;
+		rm_map_entry_at(idx);
+		return 2;
+	}
+	if (merge_prev) {
+		cache_map[idx - 1].end = end;
+		return 1;
+	}
+	if (merge_next) {
+		cache_map[idx].start = start;
+		return 1;
+	}
+
+	/* Sanity check: the array should NEVER be too small! */
+	if (cache_map_n == cache_map_size) {
+		WARN(1, "MTRR cache mode memory map exhausted!\n");
+		cache_map_n = cache_map_fixed;
+		return 0;
+	}
+
+	for (i = cache_map_n; i > idx; i--)
+		cache_map[i] = cache_map[i - 1];
+
+	cache_map[idx].start = start;
+	cache_map[idx].end = end;
+	cache_map[idx].type = type;
+	cache_map[idx].fixed = false;
+	cache_map_n++;
+
+	return 0;
+}
+
+/* Clear a part of an entry. Return 1 if start of entry is still valid. */
+static int clr_map_range_at(u64 start, u64 end, int idx)
+{
+	int ret = start != cache_map[idx].start;
+	u64 tmp;
+
+	if (start == cache_map[idx].start && end == cache_map[idx].end) {
+		rm_map_entry_at(idx);
+	} else if (start == cache_map[idx].start) {
+		cache_map[idx].start = end;
+	} else if (end == cache_map[idx].end) {
+		cache_map[idx].end = start;
+	} else {
+		tmp = cache_map[idx].end;
+		cache_map[idx].end = start;
+		add_map_entry_at(end, tmp, cache_map[idx].type, idx + 1);
+	}
+
+	return ret;
+}
+
+static void add_map_entry(u64 start, u64 end, u8 type)
+{
+	int i;
+	u8 new_type, old_type;
+	u64 tmp;
+
+	for (i = 0; i < cache_map_n && start < end; i++) {
+		if (start >= cache_map[i].end)
+			continue;
+
+		if (start < cache_map[i].start) {
+			/* Region start has no overlap. */
+			tmp = min(end, cache_map[i].start);
+			i -= add_map_entry_at(start, tmp,  type, i);
+			start = tmp;
+			continue;
+		}
+
+		new_type = get_effective_type(type, cache_map[i].type);
+		old_type = cache_map[i].type;
+
+		if (cache_map[i].fixed || new_type == old_type) {
+			/* Cut off start of new entry. */
+			start = cache_map[i].end;
+			continue;
+		}
+
+		tmp = min(end, cache_map[i].end);
+		i += clr_map_range_at(start, tmp, i);
+		i -= add_map_entry_at(start, tmp, new_type, i);
+		start = tmp;
+	}
+
+	add_map_entry_at(start, end, type, i);
+}
+
+/* Add variable MTRRs to cache map. */
+static void map_add_var(void)
+{
+	unsigned int i;
+	u64 start, size;
+	u8 type;
+
+	/* Add AMD magic MTRR. */
+	if (mtrr_tom2) {
+		add_map_entry(1ULL << 32, mtrr_tom2 - 1, MTRR_TYPE_WRBACK);
+		cache_map[cache_map_n - 1].fixed = true;
+	}
+
+	for (i = 0; i < num_var_ranges; i++) {
+		type = get_var_mtrr_state(i, &start, &size);
+		if (type != MTRR_TYPE_INVALID)
+			add_map_entry(start, start + size, type);
+	}
+}
+
+/* Rebuild map by replacing variable entries. */
+static void rebuild_map(void)
+{
+	cache_map_n = cache_map_fixed;
+
+	map_add_var();
+}
+
+/* Build the cache_map containing the cache modes per memory range. */
+void mtrr_build_map(void)
+{
+	unsigned int i;
+	u64 start, end, size;
+	u8 type;
+
+	if (!mtrr_state.enabled)
+		return;
+
+	/* Add fixed MTRRs, optimize for adjacent entries with same type. */
+	if (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED) {
+		start = 0;
+		end = size = 0x10000;
+		type = mtrr_state.fixed_ranges[0];
+
+		for (i = 1; i < MTRR_NUM_FIXED_RANGES; i++) {
+			if (i == 8 || i == 24)
+				size >>= 2;
+
+			if (mtrr_state.fixed_ranges[i] != type) {
+				add_map_entry(start, end, type);
+				start = end;
+				type = mtrr_state.fixed_ranges[i];
+			}
+			end += size;
+		}
+		add_map_entry(start, end, type);
+	}
+
+	/* Mark fixed and magic MTRR as fixed, they take precedence. */
+	for (i = 0; i < cache_map_n; i++)
+		cache_map[i].fixed = true;
+	cache_map_fixed = cache_map_n;
+
+	map_add_var();
+}
+
+/* Copy the cache_map from __initdata memory to dynamically allocated one. */
+void __init mtrr_copy_map(void)
+{
+	unsigned int new_size = cache_map_fixed + 2 * num_var_ranges;
+
+	if (!mtrr_state.enabled || !new_size) {
+		cache_map = NULL;
+		return;
+	}
+
+	mutex_lock(&mtrr_mutex);
+
+	cache_map = kcalloc(new_size, sizeof(*cache_map), GFP_KERNEL);
+	memmove(cache_map, init_cache_map, cache_map_n * sizeof(*cache_map));
+	cache_map_size = new_size;
+
+	mutex_unlock(&mtrr_mutex);
+}
+
 /**
  * mtrr_overwrite_state - set static MTRR state
  *
@@ -815,6 +1065,10 @@  static void generic_set_mtrr(unsigned int reg, unsigned long base,
 
 	cache_enable();
 	local_irq_restore(flags);
+
+	/* On the first cpu rebuild the cache mode memory map. */
+	if (smp_processor_id() == cpumask_first(cpu_online_mask))
+		rebuild_map();
 }
 
 int generic_validate_add_page(unsigned long base, unsigned long size,
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
index 50cd2287b6e1..1dbb9fdfd87b 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.c
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
@@ -65,7 +65,7 @@  static bool mtrr_enabled(void)
 }
 
 unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
-static DEFINE_MUTEX(mtrr_mutex);
+DEFINE_MUTEX(mtrr_mutex);
 
 u64 size_or_mask, size_and_mask;
 
@@ -668,6 +668,7 @@  void __init mtrr_bp_init(void)
 		/* Software overwrite of MTRR state, only for generic case. */
 		mtrr_calc_physbits(true);
 		init_table();
+		mtrr_build_map();
 		pr_info("MTRRs set to read-only\n");
 
 		return;
@@ -705,6 +706,7 @@  void __init mtrr_bp_init(void)
 			if (get_mtrr_state()) {
 				memory_caching_control |= CACHE_MTRR;
 				changed_by_mtrr_cleanup = mtrr_cleanup(phys_addr);
+				mtrr_build_map();
 			} else {
 				mtrr_if = NULL;
 				why = "by BIOS";
@@ -733,6 +735,8 @@  void mtrr_save_state(void)
 
 static int __init mtrr_init_finialize(void)
 {
+	mtrr_copy_map();
+
 	if (!mtrr_enabled())
 		return 0;
 
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index a3c362d3d5bf..6246a1d8650b 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -53,6 +53,7 @@  bool get_mtrr_state(void);
 
 extern u64 size_or_mask, size_and_mask;
 extern const struct mtrr_ops *mtrr_if;
+extern struct mutex mtrr_mutex;
 
 extern unsigned int num_var_ranges;
 extern u64 mtrr_tom2;
@@ -61,6 +62,8 @@  extern struct mtrr_state_type mtrr_state;
 void mtrr_state_warn(void);
 const char *mtrr_attrib_to_str(int x);
 void mtrr_wrmsr(unsigned, unsigned, unsigned);
+void mtrr_build_map(void);
+void mtrr_copy_map(void);
 
 /* CPU specific mtrr_ops vectors. */
 extern const struct mtrr_ops amd_mtrr_ops;