Skip to content

Commit

Permalink
riscv: Move kernel mapping outside of linear mapping
Browse files Browse the repository at this point in the history
This is a preparatory patch for relocatable kernel and sv48 support.

The kernel used to be linked at PAGE_OFFSET address therefore we could use
the linear mapping for the kernel mapping. But the relocated kernel base
address will be different from PAGE_OFFSET and since in the linear mapping,
two different virtual addresses cannot point to the same physical address,
the kernel mapping needs to lie outside the linear mapping so that we don't
have to copy it at the same physical offset.

The kernel mapping is moved to the last 2GB of the address space, BPF
is now always after the kernel and modules use the 2GB memory range right
before the kernel, so BPF and modules regions do not overlap. KASLR
implementation will simply have to move the kernel in the last 2GB range
and just take care of leaving enough space for BPF.

In addition, by moving the kernel to the end of the address space, both
sv39 and sv48 kernels will be exactly the same without needing to be
relocated at runtime.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
[Palmer: Squash the STRICT_RWX fix, and a !MMU fix]
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
  • Loading branch information
AlexGhiti authored and palmer-dabbelt committed Apr 26, 2021
1 parent 8a07ac3 commit 2bfc6cd
Show file tree
Hide file tree
Showing 12 changed files with 182 additions and 36 deletions.
3 changes: 2 additions & 1 deletion arch/riscv/boot/loader.lds.S
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0 */

#include <asm/page.h>
#include <asm/pgtable.h>

OUTPUT_ARCH(riscv)
ENTRY(_start)

SECTIONS
{
. = PAGE_OFFSET;
. = KERNEL_LINK_ADDR;

.payload : {
*(.payload)
Expand Down
26 changes: 24 additions & 2 deletions arch/riscv/include/asm/page.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,37 @@ typedef struct page *pgtable_t;

#ifdef CONFIG_MMU
extern unsigned long va_pa_offset;
#ifdef CONFIG_64BIT
extern unsigned long va_kernel_pa_offset;
#endif
extern unsigned long pfn_base;
#define ARCH_PFN_OFFSET (pfn_base)
#else
#define va_pa_offset 0
#ifdef CONFIG_64BIT
#define va_kernel_pa_offset 0
#endif
#define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT)
#endif /* CONFIG_MMU */

#define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) + va_pa_offset))
#define __va_to_pa_nodebug(x) ((unsigned long)(x) - va_pa_offset)
#ifdef CONFIG_64BIT
extern unsigned long kernel_virt_addr;

#define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + va_pa_offset))
#define kernel_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + va_kernel_pa_offset))
#define __pa_to_va_nodebug(x) linear_mapping_pa_to_va(x)

#define linear_mapping_va_to_pa(x) ((unsigned long)(x) - va_pa_offset)
#define kernel_mapping_va_to_pa(x) ((unsigned long)(x) - va_kernel_pa_offset)
#define __va_to_pa_nodebug(x) ({ \
unsigned long _x = x; \
(_x < kernel_virt_addr) ? \
linear_mapping_va_to_pa(_x) : kernel_mapping_va_to_pa(_x); \
})
#else
#define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) + va_pa_offset))
#define __va_to_pa_nodebug(x) ((unsigned long)(x) - va_pa_offset)
#endif

#ifdef CONFIG_DEBUG_VIRTUAL
extern phys_addr_t __virt_to_phys(unsigned long x);
Expand Down
39 changes: 31 additions & 8 deletions arch/riscv/include/asm/pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,38 @@

#include <asm/pgtable-bits.h>

#ifndef __ASSEMBLY__
#ifndef CONFIG_MMU
#define KERNEL_LINK_ADDR PAGE_OFFSET
#else

/* Page Upper Directory not used in RISC-V */
#include <asm-generic/pgtable-nopud.h>
#include <asm/page.h>
#include <asm/tlbflush.h>
#include <linux/mm_types.h>
#define ADDRESS_SPACE_END (UL(-1))

#ifdef CONFIG_MMU
#ifdef CONFIG_64BIT
/* Leave 2GB for kernel and BPF at the end of the address space */
#define KERNEL_LINK_ADDR (ADDRESS_SPACE_END - SZ_2G + 1)
#else
#define KERNEL_LINK_ADDR PAGE_OFFSET
#endif

#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
#define VMALLOC_END (PAGE_OFFSET - 1)
#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)

#define BPF_JIT_REGION_SIZE (SZ_128M)
#ifdef CONFIG_64BIT
/* KASLR should leave at least 128MB for BPF after the kernel */
#define BPF_JIT_REGION_START PFN_ALIGN((unsigned long)&_end)
#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE)
#else
#define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
#define BPF_JIT_REGION_END (VMALLOC_END)
#endif

/* Modules always live before the kernel */
#ifdef CONFIG_64BIT
#define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G)
#define MODULES_END (PFN_ALIGN((unsigned long)&_start))
#endif

/*
* Roughly size the vmemmap space to be large enough to fit enough
Expand Down Expand Up @@ -57,9 +72,16 @@
#define FIXADDR_SIZE PGDIR_SIZE
#endif
#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)

#endif

#ifndef __ASSEMBLY__

/* Page Upper Directory not used in RISC-V */
#include <asm-generic/pgtable-nopud.h>
#include <asm/page.h>
#include <asm/tlbflush.h>
#include <linux/mm_types.h>

#ifdef CONFIG_64BIT
#include <asm/pgtable-64.h>
#else
Expand Down Expand Up @@ -484,6 +506,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,

#define kern_addr_valid(addr) (1) /* FIXME */

extern char _start[];
extern void *dtb_early_va;
extern uintptr_t dtb_early_pa;
void setup_bootmem(void);
Expand Down
1 change: 1 addition & 0 deletions arch/riscv/include/asm/set_memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ int set_memory_x(unsigned long addr, int numpages);
int set_memory_nx(unsigned long addr, int numpages);
int set_memory_rw_nx(unsigned long addr, int numpages);
void protect_kernel_text_data(void);
void protect_kernel_linear_mapping_text_rodata(void);
#else
static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
Expand Down
3 changes: 2 additions & 1 deletion arch/riscv/kernel/head.S
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ pe_head_start:
#ifdef CONFIG_MMU
relocate:
/* Relocate return address */
li a1, PAGE_OFFSET
la a1, kernel_virt_addr
REG_L a1, 0(a1)
la a2, _start
sub a1, a1, a2
add ra, ra, a1
Expand Down
6 changes: 2 additions & 4 deletions arch/riscv/kernel/module.c
Original file line number Diff line number Diff line change
Expand Up @@ -408,12 +408,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
}

#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
#define VMALLOC_MODULE_START \
max(PFN_ALIGN((unsigned long)&_end - SZ_2G), VMALLOC_START)
void *module_alloc(unsigned long size)
{
return __vmalloc_node_range(size, 1, VMALLOC_MODULE_START,
VMALLOC_END, GFP_KERNEL,
return __vmalloc_node_range(size, 1, MODULES_VADDR,
MODULES_END, GFP_KERNEL,
PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
__builtin_return_address(0));
}
Expand Down
7 changes: 6 additions & 1 deletion arch/riscv/kernel/setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -263,8 +263,13 @@ void __init setup_arch(char **cmdline_p)

sbi_init();

if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
protect_kernel_text_data();
#if defined(CONFIG_64BIT) && defined(CONFIG_MMU)
protect_kernel_linear_mapping_text_rodata();
#endif
}

#ifdef CONFIG_SWIOTLB
swiotlb_init(1);
#endif
Expand Down
3 changes: 2 additions & 1 deletion arch/riscv/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
* Copyright (C) 2017 SiFive
*/

#define LOAD_OFFSET PAGE_OFFSET
#include <asm/pgtable.h>
#define LOAD_OFFSET KERNEL_LINK_ADDR
#include <asm/vmlinux.lds.h>
#include <asm/page.h>
#include <asm/cache.h>
Expand Down
13 changes: 13 additions & 0 deletions arch/riscv/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,19 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
return;
}

#ifdef CONFIG_64BIT
/*
* Modules in 64bit kernels lie in their own virtual region which is not
* in the vmalloc region, but dealing with page faults in this region
* or the vmalloc region amounts to doing the same thing: checking that
* the mapping exists in init_mm.pgd and updating user page table, so
* just use vmalloc_fault.
*/
if (unlikely(addr >= MODULES_VADDR && addr < MODULES_END)) {
vmalloc_fault(regs, code, addr);
return;
}
#endif
/* Enable interrupts if they were enabled in the parent context. */
if (likely(regs->status & SR_PIE))
local_irq_enable();
Expand Down
Loading

0 comments on commit 2bfc6cd

Please sign in to comment.