Skip to content

Commit

Permalink
stage2: Relocate stage1/stage2 above 8 MB
Browse files Browse the repository at this point in the history
The stage2 binary has grown beyond the point that it can fit below 640
KB with its heap.  The entirety of stage1, stage2, and all of the
initial kernel images, are relocated to a base address of 8 MB.  Stage 1
and the initial stage 2 stack consume the first 24 KB, followed by the
secrets page and CPUID page, and then come the stage 2 image (with
enough room for a heap) and the kernel images.

Signed-off-by: Jon Lange <jlange@microsoft.com>
  • Loading branch information
msft-jlange committed Aug 14, 2024
1 parent 8f04e01 commit 8baf808
Show file tree
Hide file tree
Showing 9 changed files with 78 additions and 104 deletions.
63 changes: 19 additions & 44 deletions igvmbuilder/src/gpa_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ impl GpaRange {

#[derive(Debug)]
pub struct GpaMap {
pub base_addr: u64,
pub stage1_image: GpaRange,
pub low_memory: GpaRange,
pub stage2_stack: GpaRange,
pub stage2_image: GpaRange,
pub stage2_free: GpaRange,
Expand All @@ -73,16 +73,15 @@ impl GpaMap {
options: &CmdOptions,
firmware: &Option<Box<dyn Firmware>>,
) -> Result<Self, Box<dyn Error>> {
// 0x000000-0x00EFFF: zero-filled (must be pre-validated)
// 0x00F000-0x00FFFF: initial stage 2 stack page
// 0x010000-0x0nnnnn: stage 2 image
// 0x0nnnnn-0x09DFFF: zero-filled (must be pre-validated)
// 0x09E000-0x09EFFF: Secrets page
// 0x09F000-0x09FFFF: CPUID page
// 0x100000-0x1nnnnn: kernel
// 0x1nnnnn-0x1nnnnn: filesystem
// 0x1nnnnn-0x1nnnnn: IGVM parameter block
// 0x1nnnnn-0x1nnnnn: general and memory map parameter pages
// 0x800000-0x804FFF: zero-filled (must be pre-validated)
// 0x805000-0x805FFF: initial stage 2 stack page
// 0x806000-0x806FFF: Secrets page
// 0x807000-0x807FFF: CPUID page
// 0x808000-0x8nnnnn: stage 2 image
// 0x8nnnnn-0x8nnnnn: kernel
// 0x8nnnnn-0x8nnnnn: filesystem
// 0x8nnnnn-0x8nnnnn: IGVM parameter block
// 0x8nnnnn-0x8nnnnn: general and memory map parameter pages
// 0xFFnn0000-0xFFFFFFFF: [TDX stage 1 +] OVMF firmware (QEMU only, if specified)

let stage1_image = if let Some(stage1) = &options.tdx_stage1 {
Expand All @@ -107,35 +106,11 @@ impl GpaMap {
0
};

let stage2_image = GpaRange::new(0x10000, stage2_len as u64)?;
let stage2_image = GpaRange::new(0x808000, stage2_len as u64)?;

// Calculate the firmware range
let firmware_range = if let Some(firmware) = firmware {
let fw_start = firmware.get_fw_info().start as u64;
let fw_size = firmware.get_fw_info().size as u64;
GpaRange::new(fw_start, fw_size)?
} else {
GpaRange::new(0, 0)?
};

let kernel_address = match options.hypervisor {
Hypervisor::Qemu => {
// Plan to load the kernel image at a base address of 1 MB unless it must
// be relocated due to firmware.
1 << 20
}
Hypervisor::HyperV => {
// Load the kernel image after the firmware, but now lower than
// 1 MB.
let firmware_end = firmware_range.get_end();
let addr_1mb = 1 << 20;
if firmware_end < addr_1mb {
addr_1mb
} else {
firmware_end
}
}
};
// The kernel image is loaded beyond the end of the stage2 heap,
// at 0x8A0000.
let kernel_address = 0x8A0000;
let kernel_elf = GpaRange::new(kernel_address, kernel_elf_len as u64)?;
let kernel_fs = GpaRange::new(kernel_elf.get_end(), kernel_fs_len as u64)?;

Expand Down Expand Up @@ -174,13 +149,13 @@ impl GpaMap {
};

let gpa_map = Self {
base_addr: 0x800000,
stage1_image,
low_memory: GpaRange::new(0, 0xf000)?,
stage2_stack: GpaRange::new_page(0xf000)?,
stage2_stack: GpaRange::new_page(0x805000)?,
stage2_image,
stage2_free: GpaRange::new(stage2_image.get_end(), 0x9e000 - &stage2_image.get_end())?,
secrets_page: GpaRange::new_page(0x9e000)?,
cpuid_page: GpaRange::new_page(0x9f000)?,
stage2_free: GpaRange::new(stage2_image.get_end(), 0x8a0000 - &stage2_image.get_end())?,
secrets_page: GpaRange::new_page(0x806000)?,
cpuid_page: GpaRange::new_page(0x807000)?,
kernel_elf,
kernel_fs,
igvm_param_block,
Expand Down
37 changes: 22 additions & 15 deletions igvmbuilder/src/igvm_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ use igvm::{
Arch, IgvmDirectiveHeader, IgvmFile, IgvmInitializationHeader, IgvmPlatformHeader, IgvmRevision,
};
use igvm_defs::{
IgvmPageDataFlags, IgvmPageDataType, IgvmPlatformType, IGVM_VHS_PARAMETER,
IGVM_VHS_PARAMETER_INSERT, IGVM_VHS_SUPPORTED_PLATFORM, PAGE_SIZE_4K,
IgvmNativeVpContextX64, IgvmPageDataFlags, IgvmPageDataType, IgvmPlatformType,
IGVM_VHS_PARAMETER, IGVM_VHS_PARAMETER_INSERT, IGVM_VHS_SUPPORTED_PLATFORM, PAGE_SIZE_4K,
};
use zerocopy::AsBytes;

Expand Down Expand Up @@ -118,7 +118,13 @@ impl IgvmBuilder {

pub fn build(mut self) -> Result<(), Box<dyn Error>> {
let param_block = self.create_param_block()?;
self.build_directives(&param_block)?;

// Construct a native context object to capture the start context.
let start_rip = self.gpa_map.stage2_image.get_start();
let start_rsp = self.gpa_map.stage2_stack.get_end() - size_of::<Stage2Stack>() as u64;
let start_context = construct_start_context(start_rip, start_rsp);

self.build_directives(&param_block, start_context)?;
self.build_initialization()?;
self.build_platforms(&param_block);

Expand Down Expand Up @@ -275,7 +281,11 @@ impl IgvmBuilder {
Ok(())
}

fn build_directives(&mut self, param_block: &IgvmParamBlock) -> Result<(), Box<dyn Error>> {
fn build_directives(
&mut self,
param_block: &IgvmParamBlock,
start_context: Box<IgvmNativeVpContextX64>,
) -> Result<(), Box<dyn Error>> {
// Populate firmware directives.
if let Some(firmware) = &self.firmware {
self.directives.extend_from_slice(firmware.directives());
Expand Down Expand Up @@ -334,9 +344,6 @@ impl IgvmBuilder {
},
));

// Construct a native context object to capture the start context.
let start_context = construct_start_context();

if COMPATIBILITY_MASK.contains(SNP_COMPATIBILITY_MASK) {
// Add the VMSA.
self.directives.push(construct_vmsa(
Expand Down Expand Up @@ -404,6 +411,14 @@ impl IgvmBuilder {
)?;
}

// Populate the empty region below the stage2 stack.
self.add_empty_pages(
self.gpa_map.base_addr,
self.gpa_map.stage2_stack.get_start() - self.gpa_map.base_addr,
COMPATIBILITY_MASK.get(),
IgvmPageDataType::NORMAL,
)?;

// Populate the empty region above the stage 2 binary.
self.add_empty_pages(
self.gpa_map.stage2_free.get_start(),
Expand Down Expand Up @@ -444,14 +459,6 @@ impl IgvmBuilder {
);
}

// Populate the empty region at the bottom of RAM.
self.add_empty_pages(
self.gpa_map.low_memory.get_start(),
self.gpa_map.low_memory.get_size(),
COMPATIBILITY_MASK.get(),
IgvmPageDataType::NORMAL,
)?;

Ok(())
}

Expand Down
9 changes: 3 additions & 6 deletions igvmbuilder/src/vmsa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,14 @@
//
// Author: Roy Hopkins <roy.hopkins@suse.com>

use std::mem::size_of;

use igvm::snp_defs::{SevFeatures, SevVmsa};
use igvm::IgvmDirectiveHeader;
use igvm_defs::IgvmNativeVpContextX64;
use zerocopy::FromZeroes;

use crate::cmd_options::SevExtraFeatures;
use crate::stage2_stack::Stage2Stack;

pub fn construct_start_context() -> Box<IgvmNativeVpContextX64> {
pub fn construct_start_context(start_rip: u64, start_rsp: u64) -> Box<IgvmNativeVpContextX64> {
let mut context_box = IgvmNativeVpContextX64::new_box_zeroed();
let context = context_box.as_mut();

Expand All @@ -35,8 +32,8 @@ pub fn construct_start_context() -> Box<IgvmNativeVpContextX64> {
context.cr4 = 0x40;

context.rflags = 2;
context.rip = 0x10000;
context.rsp = context.rip - size_of::<Stage2Stack>() as u64;
context.rip = start_rip;
context.rsp = start_rsp;

context_box
}
Expand Down
8 changes: 2 additions & 6 deletions kernel/src/stage2.lds
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ OUTPUT_ARCH(i386:x86-64)

SECTIONS
{
. = 64k;
/* Base address is 8 MB + 32 KB */
. = 8224k;
.stext = .;
.text : {
*(.startup.*)
Expand All @@ -35,11 +36,6 @@ SECTIONS
}
. = ALIGN(4096);
heap_start = .;

. = 632k;
SECRETS_PAGE = .;
. = 636k;
_reserved_for_cpuid_page = .;
}

ENTRY(startup_32)
16 changes: 11 additions & 5 deletions kernel/src/stage2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,16 @@ fn setup_env(
.env_setup(debug_serial_port)
.expect("Early environment setup failed");

// Validate the first 640 KB of memory so it can be used if necessary.
let region = MemoryRegion::<VirtAddr>::new(VirtAddr::from(0u64), 640 * 1024);
platform
.validate_page_range(region)
.expect("failed to validate low 640 KB");

init_kernel_mapping_info(
VirtAddr::null(),
VirtAddr::from(640 * 1024usize),
PhysAddr::null(),
VirtAddr::from(0x808000u64),
VirtAddr::from(0x8A0000u64),
PhysAddr::from(0x808000u64),
);

let cpuid_page = unsafe {
Expand All @@ -100,8 +106,8 @@ fn setup_env(

set_init_pgtable(PageTableRef::shared(unsafe { addr_of_mut!(pgtable) }));

// The end of the heap is the base of the secrets page.
setup_stage2_allocator(0x9e000);
// The end of the heap is the base of the kernel image.
setup_stage2_allocator(launch_info.kernel_elf_start as u64);
init_percpu(platform).expect("Failed to initialize per-cpu area");

// Init IDT again with handlers requiring GHCB (eg. #VC handler)
Expand Down
10 changes: 8 additions & 2 deletions kernel/src/svsm_paging.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,16 @@ pub fn invalidate_early_boot_memory(
// invalidate stage 2 memory, unless firmware is loaded into low memory.
// Also invalidate the boot data if required.
if !config.fw_in_low_memory() {
let stage2_region = MemoryRegion::new(PhysAddr::null(), 640 * 1024);
invalidate_boot_memory_region(platform, config, stage2_region)?;
let lowmem_region = MemoryRegion::new(PhysAddr::null(), 640 * 1024);
invalidate_boot_memory_region(platform, config, lowmem_region)?;
}

let stage2_base: usize = 0x800000;
let stage2_len =
usize::try_from(launch_info.kernel_elf_stage2_virt_start).unwrap() - stage2_base;
let stage2_region = MemoryRegion::new(PhysAddr::new(stage2_base), stage2_len);
invalidate_boot_memory_region(platform, config, stage2_region)?;

if config.invalidate_boot_data() {
let kernel_elf_size =
launch_info.kernel_elf_stage2_virt_end - launch_info.kernel_elf_stage2_virt_start;
Expand Down
20 changes: 8 additions & 12 deletions stage1/stage1.S
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,10 @@

#include "types.h"

/* Use first 640kb of memory for stage2 loader */
#define STAGE2_RANGE_START 0
#define STAGE2_RANGE_PAGES 158
/* Stage2 is loaded at 8 MB + 16 KB */
#define STAGE2_START 0x808000

/* Stage2 is loaded at 64kb */
#define STAGE2_START 0x10000

#define STAGE1_STACK 0x10000
#define STAGE1_STACK 0x806000

.text
.section ".startup.text","ax"
Expand Down Expand Up @@ -49,7 +45,7 @@ startup_32:
rep movsl

/* Setup stack for stage 2*/
movl $STAGE2_START, %esp
movl $STAGE1_STACK, %esp

/* Write startup information to stage2 stack */
xorl %eax, %eax
Expand All @@ -68,11 +64,11 @@ startup_32:
leal kernel_elf(%ebp), %edi
pushl %edi

/* Push the location of the secrets page. It is always at 9E000 */
pushl $0x9E000
/* Push the location of the secrets page. It is at 8 MB plus 56 KB */
pushl $0x806000

/* Push the location of the CPUID page. It is always at 9F000 */
pushl $0x9F000
/* Push the location of the CPUID page. It is at 8 MB plus 60 KB */
pushl $0x807000

/* Push the value 1 to indicate SNP */
pushl $1
Expand Down
1 change: 1 addition & 0 deletions stage1/stage1.lds
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ OUTPUT_ARCH(i386:x86-64)

SECTIONS
{
. = 0x800000;
.stext = ALIGN(.sdata - SIZEOF(.text) - 4095, 4096);
. = .stext;
.text : { *(.startup.*) *(.text) *(.text.*) }
Expand Down
18 changes: 4 additions & 14 deletions utils/gen_meta.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,15 +194,15 @@ void init_sev_meta(struct svsm_meta_data *svsm_meta)
svsm_meta->version = 1;
svsm_meta->num_desc = NUM_DESCS;

svsm_meta->descs[0].base = 0;
svsm_meta->descs[0].len = 632 * 1024;
svsm_meta->descs[0].base = 8192 * 1024;
svsm_meta->descs[0].len = 8832 * 1024;
svsm_meta->descs[0].type = SEV_DESC_TYPE_SNP_SEC_MEM;

svsm_meta->descs[1].base = 632 * 1024;
svsm_meta->descs[1].base = 8200 * 1024;
svsm_meta->descs[1].len = 4096;
svsm_meta->descs[1].type = SEV_DESC_TYPE_SNP_SECRETS;

svsm_meta->descs[2].base = 636 * 1024;
svsm_meta->descs[2].base = 8204 * 1024;
svsm_meta->descs[2].len = 4096;
svsm_meta->descs[2].type = SEV_DESC_TYPE_CPUID;
}
Expand Down Expand Up @@ -232,16 +232,6 @@ void fill_buffer(struct meta_buffer *meta)
secret.size = 0;
add_table(meta, SEV_SECRET_GUID, (char *)&secret, sizeof(secret));

#if 0
boot_block.pre_validated_start = 0;
boot_block.pre_validated_end = 636 * 1024;
boot_block.secrets_addr = 636 * 1024;
boot_block.secrets_len = 4096;
boot_block.cpuid_addr = 632 * 1024;
boot_block.cpuid_len = 4096;
add_table(meta, SEV_SNP_BOOT_BLOCK_GUID, (char *)&boot_block, sizeof(boot_block));
#endif

svsm_info.launch_offset = 0;
add_table(meta, SVSM_INFO_GUID, (char *)&svsm_info, sizeof(svsm_info));

Expand Down

0 comments on commit 8baf808

Please sign in to comment.