@@ -268,25 +268,33 @@ boot := arch/x86/boot
BOOT_TARGETS = bzdisk fdimage fdimage144 fdimage288 hdimage isoimage
-PHONY += bzImage $(BOOT_TARGETS)
-
-# Default kernel to build
-all: bzImage
+PHONY += bzImage vmlinuz.efi $(BOOT_TARGETS)
# KBUILD_IMAGE specify target image being built
+ifeq ($(CONFIG_EFI_ZBOOT),)
KBUILD_IMAGE := $(boot)/bzImage
+else
+KBUILD_IMAGE := arch/x86/zboot/vmlinuz.efi
+endif
+
+# Default kernel to build
+all: $(notdir $(KBUILD_IMAGE))
bzImage: vmlinux
ifeq ($(CONFIG_X86_DECODER_SELFTEST),y)
$(Q)$(MAKE) $(build)=arch/x86/tools posttest
endif
- $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
+ $(Q)$(MAKE) $(build)=$(boot) $(boot)/$(@)
$(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot
$(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@
$(BOOT_TARGETS): vmlinux
$(Q)$(MAKE) $(build)=$(boot) $@
+vmlinuz.efi: zboot := arch/x86/zboot
+vmlinuz.efi: vmlinux
+ $(Q)$(MAKE) $(build)=$(zboot) $(zboot)/$@
+
PHONY += install
install:
$(call cmd,install)
@@ -471,4 +471,9 @@ static inline int efi_runtime_map_copy(void *buf, size_t bufsz)
#endif
+static inline unsigned long efi_get_kimg_min_align(void)
+{
+ return SZ_2M;
+}
+
#endif /* _ASM_X86_EFI_H */
@@ -64,6 +64,17 @@ SYM_CODE_START_NOALIGN(startup_64)
/* Set up the stack for verify_cpu(), similar to initial_stack below */
leaq (__end_init_task - FRAME_SIZE)(%rip), %rsp
+#ifdef CONFIG_EFI_ZBOOT
+ /*
+ * The generic EFI zboot code expects a __le32 at offset 0x10 of the
+ * decompressed image describing the size in memory of the kernel
+ * image. This is typically part of the image header, but we don't have
+ * such a header on x86 so just put the bare number here, encoded in a
+ * NOP instruction.
+ */
+ .org startup_64 + 0x10 - 3, BYTES_NOP1
+ nopl (_end - startup_64)(%rax)
+#endif
leaq _text(%rip), %rdi
/*
new file mode 100644
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2023 Google LLC. <ardb@google.com>
+#
+
+$(obj)/Image: OBJCOPYFLAGS := -O binary -S \
+ -R .note -R .note.gnu.build-id -R .comment
+$(obj)/Image: vmlinux FORCE
+ $(call if_changed,objcopy)
+
+CMD_RELOCS := arch/x86/tools/relocs
+
+quiet_cmd_relocs = RELOCS $@
+ cmd_relocs = $(CMD_RELOCS) $< > $@
+
+$(obj)/vmlinux.relocs: vmlinux FORCE
+ $(call if_changed,relocs)
+
+efi-zboot-relocs-$(CONFIG_X86_NEED_RELOCS) := $(obj)/vmlinux.relocs
+EFI_ZBOOT_PAYLOAD_TRAILER := $(efi-zboot-relocs-y)
+
+EFI_ZBOOT_PAYLOAD := Image
+EFI_ZBOOT_BFD_TARGET := elf64-x86-64
+EFI_ZBOOT_MACH_TYPE := AMD64
+EFI_ZBOOT_FORWARD_CFI := $(CONFIG_X86_KERNEL_IBT)
+
+targets := Image vmlinux.relocs
+
+include $(srctree)/drivers/firmware/efi/libstub/Makefile.zboot
@@ -74,7 +74,7 @@ config EFI_GENERIC_STUB
config EFI_ZBOOT
bool "Enable the generic EFI decompressor"
- depends on EFI_GENERIC_STUB && !ARM
+ depends on (EFI_GENERIC_STUB && !ARM) || X86_64
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZ4
select HAVE_KERNEL_LZMA
@@ -9,6 +9,9 @@
# non-x86 reuses KBUILD_CFLAGS, x86 does not
cflags-y := $(KBUILD_CFLAGS)
+cflags-x86-$(CONFIG_X86_KERNEL_IBT) := \
+ $(call cc-option,-fcf-protection=branch -fno-jump-tables)
+
cflags-$(CONFIG_X86_32) := -march=i386
cflags-$(CONFIG_X86_64) := -mcmodel=small
cflags-$(CONFIG_X86) += -m$(BITS) -D__KERNEL__ \
@@ -18,7 +21,7 @@ cflags-$(CONFIG_X86) += -m$(BITS) -D__KERNEL__ \
$(call cc-disable-warning, address-of-packed-member) \
$(call cc-disable-warning, gnu) \
-fno-asynchronous-unwind-tables \
- $(CLANG_FLAGS)
+ $(CLANG_FLAGS) $(cflags-x86-y)
# arm64 uses the full KBUILD_CFLAGS so it's necessary to explicitly
# disable the stackleak plugin
@@ -82,8 +85,8 @@ lib-$(CONFIG_EFI_PARAMS_FROM_FDT) += fdt.o \
$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
$(call if_changed_rule,cc_o_c)
-lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o string.o intrinsics.o systable.o \
- screen_info.o efi-stub-entry.o
+lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o efi-stub-entry.o screen_info.o
+lib-y += string.o intrinsics.o systable.o
lib-$(CONFIG_ARM) += arm32-stub.o
lib-$(CONFIG_ARM64) += arm64.o arm64-stub.o smbios.o
@@ -91,8 +94,12 @@ lib-$(CONFIG_X86) += x86.o x86-stub.o
lib-$(CONFIG_RISCV) += riscv.o riscv-stub.o
lib-$(CONFIG_LOONGARCH) += loongarch.o loongarch-stub.o
+cflags-zboot-$(CONFIG_X86) := -Defi_zboot_entry=__efistub_efi_zboot_entry
+CFLAGS_zboot.o := $(cflags-zboot-y)
+
CFLAGS_arm32-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
+zboot-obj-$(CONFIG_X86_64) := x86-zboot.o
zboot-obj-$(CONFIG_RISCV) := lib-clz_ctz.o lib-ashldi3.o
lib-$(CONFIG_EFI_ZBOOT) += zboot.o $(zboot-obj-y)
@@ -29,7 +29,7 @@ zboot-size-len-y := 4
zboot-method-$(CONFIG_KERNEL_GZIP) := gzip
zboot-size-len-$(CONFIG_KERNEL_GZIP) := 0
-$(obj)/vmlinuz: $(obj)/vmlinux.bin FORCE
+$(obj)/vmlinuz: $(obj)/vmlinux.bin $(EFI_ZBOOT_PAYLOAD_TRAILER) FORCE
$(call if_changed,$(zboot-method-y))
OBJCOPYFLAGS_vmlinuz.o := -I binary -O $(EFI_ZBOOT_BFD_TARGET) \
@@ -16,6 +16,7 @@
#include "efistub.h"
+bool efi_no5lvl;
bool efi_nochunk;
bool efi_nokaslr = !IS_ENABLED(CONFIG_RANDOMIZE_BASE);
bool efi_novamap;
@@ -73,6 +74,8 @@ efi_status_t efi_parse_options(char const *cmdline)
efi_loglevel = CONSOLE_LOGLEVEL_QUIET;
} else if (!strcmp(param, "noinitrd")) {
efi_noinitrd = true;
+ } else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) {
+ efi_no5lvl = true;
} else if (!strcmp(param, "efi") && val) {
efi_nochunk = parse_option_str(val, "nochunk");
efi_novamap |= parse_option_str(val, "novamap");
@@ -19,7 +19,6 @@
/* Maximum physical address for 64-bit kernel with 4-level paging */
#define MAXMEM_X86_64_4LEVEL (1ull << 46)
-const efi_system_table_t *efi_system_table;
const efi_dxe_services_table_t *efi_dxe_table;
u32 image_offset __section(".data");
static efi_loaded_image_t *image = NULL;
new file mode 100644
@@ -0,0 +1,295 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 Google LLC. <ardb@google.com>
+ */
+
+#include <linux/efi.h>
+#include <linux/pci.h>
+#include <linux/stddef.h>
+
+#include <asm/efi.h>
+#include <asm/e820/types.h>
+#include <asm/setup.h>
+#include <asm/desc.h>
+#include <asm/boot.h>
+
+#include "efistub.h"
+
+extern char _gzdata_end[];
+extern bool efi_no5lvl;
+
+static const struct desc_struct gdt[] = {
+ [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
+ [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
+ [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff),
+};
+
+static void (*la57_toggle)(void *cr3, void *gdt);
+
+#ifdef CONFIG_EFI_MIXED
+const bool efi_is64 = true;
+
+u64 __efi64_thunk(u32 func, ...)
+{
+ return EFI_UNSUPPORTED;
+}
+#endif
+
+efi_status_t efi_handle_cmdline(efi_loaded_image_t *image, char **cmdline_ptr)
+{
+ int options_size = 0;
+ efi_status_t status;
+
+ /* Convert unicode cmdline to ascii */
+ *cmdline_ptr = efi_convert_cmdline(image, &options_size);
+ if (!*cmdline_ptr)
+ return EFI_OUT_OF_RESOURCES;
+
+#ifdef CONFIG_CMDLINE_BOOL
+ status = efi_parse_options(CONFIG_CMDLINE);
+ if (status != EFI_SUCCESS) {
+ efi_err("Failed to parse options\n");
+ return status;
+ }
+#endif
+ if (!IS_ENABLED(CONFIG_CMDLINE_OVERRIDE)) {
+ status = efi_parse_options(*cmdline_ptr);
+ if (status != EFI_SUCCESS)
+ efi_err("Failed to parse options\n");
+ }
+ return status;
+}
+
+void efi_cache_sync_image(unsigned long image_base, unsigned long alloc_size)
+{
+ const u32 payload_size = *(u32 *)(_gzdata_end - 4);
+ const u32 image_size = *(u32 *)(image_base + 0x10);
+ const s32 *reloc = (s32 *)(image_base + payload_size);
+ u64 va_offset = __START_KERNEL - image_base;
+ u64 range, delta;
+ u32 seed;
+
+ if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE) ||
+ image_size == payload_size ||
+ efi_get_random_bytes(sizeof(seed), (u8 *)&seed) != EFI_SUCCESS)
+ return;
+
+ range = KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR - image_size;
+ delta = LOAD_PHYSICAL_ADDR + ((seed * range) >> 32UL);
+ delta &= ~(CONFIG_PHYSICAL_ALIGN - 1);
+
+ /*
+ * Process relocations: 32 bit relocations first then 64 bit after.
+ * Three sets of binary relocations are added to the end of the kernel
+ * before compression. Each relocation table entry is the kernel
+ * address of the location which needs to be updated stored as a
+ * 32-bit value which is sign extended to 64 bits.
+ *
+ * Format is:
+ *
+ * kernel bits...
+ * 0 - zero terminator for 64 bit relocations
+ * 64 bit relocation repeated
+ * 0 - zero terminator for inverse 32 bit relocations
+ * 32 bit inverse relocation repeated
+ * 0 - zero terminator for 32 bit relocations
+ * 32 bit relocation repeated
+ *
+ * So we work backwards from the end of the decompressed image.
+ */
+ while (*--reloc)
+ *(u32 *)((s64)*reloc - va_offset) += delta;
+
+ while (*--reloc)
+ *(u32 *)((s64)*reloc - va_offset) -= delta;
+
+ while (*--reloc)
+ *(u64 *)((s64)*reloc - va_offset) += delta;
+
+ efi_free(alloc_size - image_size, image_base + image_size);
+}
+
+static void __naked tmpl_toggle(void *cr3, void *gdt)
+{
+ /*
+ * This is template code that will be copied into a 32-bit addressable
+ * buffer, allowing us to drop to 32-bit mode with paging disabled,
+ * which is required to be able to toggle the CR4.LA57 bit.
+ *
+ * The first MOVB instruction is only there to capture the size of the
+ * sequence, and implicitly, the offset to the LJMP's immediate, which
+ * will be populated with the correct absolute address after copying.
+ */
+ asm("0: movb $(3f - .), %%al \n\t"
+ " lgdt (%%rsi) \n\t"
+ " movw %[ds], %%ax \n\t"
+ " movw %%ax, %%ds \n\t"
+ " movw %%ax, %%ss \n\t"
+ " leaq 2f(%%rip), %%rax \n\t"
+ " pushq %[cs32] \n\t"
+ " pushq %%rax \n\t"
+ " lretq \n\t"
+ "1: retq \n\t"
+ " .code32 \n\t"
+ "2: movl %%cr0, %%eax \n\t"
+ " btrl %[pg], %%eax \n\t"
+ " movl %%eax, %%cr0 \n\t"
+ " movl %%cr4, %%ecx \n\t"
+ " btcl %[la57], %%ecx \n\t"
+ " movl %%ecx, %%cr4 \n\t"
+ " movl %%edi, %%cr3 \n\t"
+ " btsl %[pg], %%eax \n\t"
+ " movl %%eax, %%cr0 \n\t"
+ " ljmpl %[cs], $(1b - 0b) \n\t"
+ "3: .code64"
+ :
+ : [cs32] "i"(__KERNEL32_CS),
+ [cs] "i"(__KERNEL_CS),
+ [ds] "i"(__KERNEL_DS),
+ [pg] "i"(X86_CR0_PG_BIT),
+ [la57] "i"(X86_CR4_LA57_BIT));
+}
+
+/*
+ * Enabling (or disabling) 5 level paging is tricky, because it can only be
+ * done from 32-bit mode with paging disabled. This means not only that the
+ * code itself must be running from 32-bit addressable physical memory, but
+ * also that the root page table must be 32-bit addressable, as we cannot
+ * program a 64-bit value into CR3 when running in 32-bit mode.
+ */
+static efi_status_t efi_setup_5level_paging(void)
+{
+ bool want_la57 = IS_ENABLED(CONFIG_X86_5LEVEL) && !efi_no5lvl;
+ bool have_la57 = native_read_cr4() & X86_CR4_LA57;
+ const u8 tmpl_size = ((u8 *)tmpl_toggle)[1];
+ efi_status_t status;
+ u8 *la57_code;
+
+ /* check for 5 level paging support */
+ if (native_cpuid_eax(0) < 7 ||
+ !(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
+ return EFI_SUCCESS;
+
+ /*
+ * If LA57 is supported but disabled, and we have no interest in
+ * enabling it, we can bail here. In all other cases, we need to
+ * prepare the toggle support routine, even for the case where LA57 is
+ * currently on and we want to keep it on, as the firmware might return
+ * from ExitBootServices() with LA57 disabled.
+ */
+ if (!want_la57 && !have_la57)
+ return EFI_SUCCESS;
+
+ /* allocate some 32-bit addressable memory for code and a page table */
+ status = efi_allocate_pages(2 * PAGE_SIZE, (unsigned long *)&la57_code,
+ U32_MAX);
+ if (status != EFI_SUCCESS)
+ return status;
+
+ la57_toggle = memcpy(la57_code, tmpl_toggle, tmpl_size);
+ memset(la57_code + tmpl_size, 0x0, 2 * PAGE_SIZE - tmpl_size);
+
+ /*
+ * To avoid having to allocate a 32-bit addressable stack, we use a
+ * ljmp to switch back to long mode. However, this takes an absolute
+ * address, so we have to poke it in at runtime. The dummy MOVB
+ * instruction at the beginning can be used to locate the immediate.
+ */
+ *(u32 *)&la57_code[tmpl_size - 6] += (u64)la57_code;
+
+ return EFI_SUCCESS;
+}
+
+static void efi_5level_switch(void)
+{
+ bool want_la57 = IS_ENABLED(CONFIG_X86_5LEVEL) && !efi_no5lvl;
+ bool have_la57 = native_read_cr4() & X86_CR4_LA57;
+ u64 *pgt = (void *)la57_toggle + PAGE_SIZE;
+ u64 *cr3 = (u64 *)__native_read_cr3();
+ struct desc_ptr desc;
+ u64 *new_cr3;
+
+ if (!la57_toggle || (want_la57 && have_la57))
+ return;
+
+ if (!have_la57) {
+ /*
+ * We are going to enable 5 level paging, so we need to
+ * allocate a root level page from the 32-bit addressable
+ * physical region, and plug the existing hierarchy into it.
+ */
+ new_cr3 = pgt;
+ new_cr3[0] = (u64)cr3 | _PAGE_TABLE_NOENC;
+ } else {
+ // take the new root table pointer from the current entry #0
+ new_cr3 = (u64 *)(cr3[0] & PAGE_MASK);
+
+ // copy the new root level table if it is not 32-bit addressable
+ if ((u64)new_cr3 > U32_MAX) {
+ for (int i = 0; i < PTRS_PER_PGD; i++)
+ pgt[i] = new_cr3[i];
+ new_cr3 = pgt;
+ }
+ }
+
+ desc.size = sizeof(gdt) - 1;
+ desc.address = (u64)gdt;
+
+ la57_toggle(new_cr3, &desc);
+}
+
+efi_status_t efi_stub_common(efi_handle_t handle,
+ efi_loaded_image_t *image,
+ unsigned long image_addr,
+ char *cmdline_ptr)
+{
+ void __noreturn (*startup_64)(void *, struct boot_params *);
+ const struct linux_efi_initrd *initrd = NULL;
+ struct boot_params *boot_params;
+ struct setup_header *hdr;
+ efi_status_t status;
+
+ status = efi_setup_5level_paging();
+ if (status != EFI_SUCCESS) {
+ efi_err("efi_setup_5level_paging() failed!\n");
+ return status;
+ }
+
+ boot_params = efi_alloc_boot_params();
+ if (!boot_params)
+ return EFI_OUT_OF_RESOURCES;
+
+ hdr = &boot_params->hdr;
+ hdr->type_of_loader = 0x21;
+
+ efi_set_u64_split((unsigned long)cmdline_ptr,
+ &hdr->cmd_line_ptr, &boot_params->ext_cmd_line_ptr);
+
+ status = efi_load_initrd(image, hdr->initrd_addr_max, ULONG_MAX,
+ &initrd);
+ if (status != EFI_SUCCESS)
+ goto fail;
+ if (initrd && initrd->size > 0) {
+ efi_set_u64_split(initrd->base, &hdr->ramdisk_image,
+ &boot_params->ext_ramdisk_image);
+ efi_set_u64_split(initrd->size, &hdr->ramdisk_size,
+ &boot_params->ext_ramdisk_size);
+ }
+
+ status = efi_x86_stub_common(boot_params, handle);
+ if (status != EFI_SUCCESS)
+ goto fail;
+
+ efi_5level_switch();
+
+ startup_64 = (void *)image_addr;
+ startup_64(NULL, boot_params);
+fail:
+ efi_free(sizeof(struct boot_params), (unsigned long)boot_params);
+ return status;
+}
+
+struct screen_info *__alloc_screen_info(void)
+{
+ return NULL;
+}
@@ -65,6 +65,7 @@ asmlinkage efi_status_t __efiapi
efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab)
{
unsigned long compressed_size = _gzdata_end - _gzdata_start;
+ efi_guid_t loaded_image = LOADED_IMAGE_PROTOCOL_GUID;
unsigned long image_base, alloc_size;
efi_loaded_image_t *image;
efi_status_t status;
@@ -77,7 +78,7 @@ efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab)
free_mem_end_ptr = free_mem_ptr + sizeof(zboot_heap);
status = efi_bs_call(handle_protocol, handle,
- &LOADED_IMAGE_PROTOCOL_GUID, (void **)&image);
+ &loaded_image, (void **)&image);
if (status != EFI_SUCCESS) {
error("Failed to locate parent's loaded image protocol");
return status;
@@ -14,8 +14,10 @@ SECTIONS
.rodata : ALIGN(8) {
__efistub__gzdata_start = .;
+ _gzdata_start = .;
*(.gzdata)
__efistub__gzdata_end = .;
+ _gzdata_end = .;
*(.rodata* .init.rodata* .srodata*)
_etext = ALIGN(4096);
. = _etext;
@@ -35,11 +37,14 @@ SECTIONS
/DISCARD/ : {
*(.modinfo .init.modinfo)
+ *(.discard*)
}
}
PROVIDE(__efistub__gzdata_size =
ABSOLUTE(__efistub__gzdata_end - __efistub__gzdata_start));
+PROVIDE(_gzdata_size = __efistub__gzdata_size);
+
PROVIDE(__data_rawsize = ABSOLUTE(_edata - _etext));
PROVIDE(__data_size = ABSOLUTE(_end - _etext));