From b4c622af3be25516f5442c5af64e235295576d34 Mon Sep 17 00:00:00 2001 From: Andy Soffer Date: Fri, 2 Feb 2024 17:47:34 -0500 Subject: [PATCH] Ensure instruction set does not need to depend on architectures. --- examples/brainfuck/BUILD | 15 ++++- examples/brainfuck/instructions.h | 60 ----------------- examples/brainfuck/jit.cc | 11 ++-- examples/brainfuck/x64_code_generator.h | 76 ++++++++++++++++++++++ jasmin/compile/x64/BUILD | 6 +- jasmin/compile/x64/function_emitter.cc | 74 +++++++++++++++++++++ jasmin/compile/x64/function_emitter.h | 86 +++++++++++++++++++++++++ 7 files changed, 258 insertions(+), 70 deletions(-) create mode 100644 examples/brainfuck/x64_code_generator.h create mode 100644 jasmin/compile/x64/function_emitter.cc create mode 100644 jasmin/compile/x64/function_emitter.h diff --git a/examples/brainfuck/BUILD b/examples/brainfuck/BUILD index e2b1a62..b7c8f23 100644 --- a/examples/brainfuck/BUILD +++ b/examples/brainfuck/BUILD @@ -26,8 +26,6 @@ cc_library( hdrs = ["instructions.h"], deps = [ "//jasmin/core:instruction", - "//jasmin/compile/x64:code_generator", - "//jasmin/compile/x64:location_map", ], ) @@ -50,9 +48,20 @@ cc_binary( ":build", ":file", ":instructions", + ":x64_code_generator", "//jasmin/core:function", "//jasmin/ssa", - "//jasmin/compile/x64:code_generator", + "//jasmin/compile/x64:function_emitter", "@nth_cc//nth/dynamic:jit_function", ], ) + +cc_library( + name = "x64_code_generator", + hdrs = ["x64_code_generator.h"], + deps = [ + "//jasmin/core:instruction", + "//jasmin/compile/x64:function_emitter", + "//jasmin/compile/x64:location_map", + ], +) diff --git a/examples/brainfuck/instructions.h b/examples/brainfuck/instructions.h index 417317a..a33c23d 100644 --- a/examples/brainfuck/instructions.h +++ b/examples/brainfuck/instructions.h @@ -1,8 +1,6 @@ #ifndef EXAMPLES_BRAINFUCK_INSTRUCTIONS_H #define EXAMPLES_BRAINFUCK_INSTRUCTIONS_H -#include "jasmin/compile/x64/code_generator.h" -#include "jasmin/compile/x64/location_map.h" #include "jasmin/core/instruction.h" #include "nth/debug/debug.h" @@ -19,19 +17,6 @@ struct Initialize : jasmin::Instruction { jasmin::Output out) { out.set(state.buffer); } - - static void generate_code(jasmin::x64::CodeGenerator &gen, - jasmin::LocationMap const &) { - gen.write({0x48, 0x81, 0xec, 0x30, 0x75, 0x00, 0x00}); // sub rsp, 0x7530 - gen.mov(jasmin::x64::Register::rsi, jasmin::x64::Register::rsp); - gen.write({ - 0xba, 0x01, 0x00, 0x00, 0x00, // mov edx, 0x1 - 0x48, 0x89, 0xe7, // mov rdi, rsp - 0x31, 0xc0, // xor eax eax - 0x48, 0xc7, 0xc1, 0x30, 0x75, 0x00, 0x00, // mov rcx, 0x1000 - 0xf3, 0xaa, // rep stos BYTE PTR es:[rdi],al - }); - } }; struct Increment : jasmin::Instruction { @@ -40,11 +25,6 @@ struct Increment : jasmin::Instruction { jasmin::Output<>) { ++*in.get<0>(); } - - static void generate_code(jasmin::x64::CodeGenerator &gen, - jasmin::LocationMap const &) { - gen.write({0x80, 0x06, 0x01}); // add BYTE PTR [rsi], 0x1 - } }; struct Decrement : jasmin::Instruction { @@ -53,11 +33,6 @@ struct Decrement : jasmin::Instruction { jasmin::Output<>) { --*in.get<0>(); } - - static void generate_code(jasmin::x64::CodeGenerator &gen, - jasmin::LocationMap const &) { - gen.write({0x80, 0x2e, 0x01}); // sub BYTE PTR [rsi], 0x1 - } }; struct Left : jasmin::Instruction { @@ -68,11 +43,6 @@ struct Left : jasmin::Instruction { NTH_REQUIRE(ptr != state.buffer); out.set(ptr - 1); } - - static void generate_code(jasmin::x64::CodeGenerator &gen, - jasmin::LocationMap const &) { - gen.write({0x48, 0x8d, 0x76, 0xff}); // lea rsi, [rsi - 1] - } }; struct Right : jasmin::Instruction { @@ -83,11 +53,6 @@ struct Right : jasmin::Instruction { NTH_REQUIRE(ptr < state.buffer + state.buffer_size); out.set(ptr + 1); } - - static void generate_code(jasmin::x64::CodeGenerator &gen, - jasmin::LocationMap const &) { - gen.write({0x48, 0x8d, 0x76, 0x01}); // lea rsi, [rsi + 1] - } }; struct Input : jasmin::Instruction { @@ -97,16 +62,6 @@ struct Input : jasmin::Instruction { auto [ptr] = in; *ptr = static_cast(std::getchar()); } - - static void generate_code(jasmin::x64::CodeGenerator &gen, - jasmin::LocationMap const &) { - gen.write({ - 0x48, 0xc7, 0xc0, 0x00, 0x00, 0x00, 0x00, // mov rax, 0x0 - 0x48, 0xc7, 0xc7, 0x00, 0x00, 0x00, 0x00, // mov rdi, 0x0 - 0x48, 0xc7, 0xc2, 0x01, 0x00, 0x00, 0x00, // mov rdx, 0x1 - }); - gen.syscall(); - } }; struct Output : jasmin::Instruction { @@ -115,16 +70,6 @@ struct Output : jasmin::Instruction { jasmin::Output<>) { std::putchar(static_cast(*in.get<0>())); } - - static void generate_code(jasmin::x64::CodeGenerator &gen, - jasmin::LocationMap const &) { - gen.write({ - 0x48, 0xc7, 0xc0, 0x01, 0x00, 0x00, 0x00, // mov rax, 0x1 - 0x48, 0xc7, 0xc7, 0x01, 0x00, 0x00, 0x00, // mov rdi, 0x1 - 0x48, 0xc7, 0xc2, 0x01, 0x00, 0x00, 0x00, // mov rdx, 0x1 - }); - gen.syscall(); - } }; struct Zero : jasmin::Instruction { @@ -133,11 +78,6 @@ struct Zero : jasmin::Instruction { jasmin::Output out) { out.set(*in.get<0>() == 0); } - - static void generate_code(jasmin::x64::CodeGenerator &gen, - jasmin::LocationMap const &) { - gen.write({0x8a, 0x06}); // mov al, BYTE PTR [rsi] - } }; using Instructions = diff --git a/examples/brainfuck/jit.cc b/examples/brainfuck/jit.cc index 6d07910..c1da325 100644 --- a/examples/brainfuck/jit.cc +++ b/examples/brainfuck/jit.cc @@ -3,7 +3,8 @@ #include "examples/brainfuck/build.h" #include "examples/brainfuck/file.h" #include "examples/brainfuck/instructions.h" -#include "jasmin/compile/x64/code_generator.h" +#include "examples/brainfuck/x64_code_generator.h" +#include "jasmin/compile/compiled_function.h" #include "jasmin/ssa/ssa.h" #include "nth/dynamic/jit_function.h" @@ -23,10 +24,12 @@ int main(int argc, char* argv[]) { auto& f = std::get>(fn_or_parse_error); - // Generate executable code for the function, storing it in `code`. jasmin::CompiledFunction code; - jasmin::x64::CodeGenerator gen(nth::type); - gen.function(jasmin::SsaFunction(f), code); + { // Generate executable code for the function, storing it in `code`. + bf::X64CodeGenerator gen; + jasmin::x64::FunctionEmitter emitter(nth::type, gen); + emitter.emit(jasmin::SsaFunction(f), code); + } // Construct a JIT-compiled function from the code. nth::jit_function jitted_fn(code); diff --git a/examples/brainfuck/x64_code_generator.h b/examples/brainfuck/x64_code_generator.h new file mode 100644 index 0000000..c1a838b --- /dev/null +++ b/examples/brainfuck/x64_code_generator.h @@ -0,0 +1,76 @@ +#ifndef EXAMPLES_BRAINFUCK_X64_CODE_GENERATOR_H +#define EXAMPLES_BRAINFUCK_X64_CODE_GENERATOR_H + +#include "jasmin/compile/x64/function_emitter.h" +#include "jasmin/compile/x64/location_map.h" +#include "nth/meta/type.h" + +namespace bf { + +struct X64CodeGenerator { + void operator()(decltype(nth::type), + jasmin::x64::FunctionEmitter &gen, + jasmin::LocationMap const &) { + gen.write({0x48, 0x81, 0xec, 0x30, 0x75, 0x00, 0x00}); // sub rsp, 0x7530 + gen.mov(jasmin::x64::Register::rsi, jasmin::x64::Register::rsp); + gen.write({ + 0xba, 0x01, 0x00, 0x00, 0x00, // mov edx, 0x1 + 0x48, 0x89, 0xe7, // mov rdi, rsp + 0x31, 0xc0, // xor eax eax + 0x48, 0xc7, 0xc1, 0x30, 0x75, 0x00, 0x00, // mov rcx, 0x1000 + 0xf3, 0xaa, // rep stos BYTE PTR es:[rdi],al + }); + } + + void operator()(decltype(nth::type), + jasmin::x64::FunctionEmitter &gen, + jasmin::LocationMap const &) { + gen.write({0x80, 0x06, 0x01}); // add BYTE PTR [rsi], 0x1 + } + + void operator()(decltype(nth::type), + jasmin::x64::FunctionEmitter &gen, + jasmin::LocationMap const &) { + gen.write({0x80, 0x2e, 0x01}); // sub BYTE PTR [rsi], 0x1 + } + + void operator()(decltype(nth::type), jasmin::x64::FunctionEmitter &gen, + jasmin::LocationMap const &) { + gen.write({0x48, 0x8d, 0x76, 0xff}); // lea rsi, [rsi - 1] + } + + void operator()(decltype(nth::type), jasmin::x64::FunctionEmitter &gen, + jasmin::LocationMap const &) { + gen.write({0x48, 0x8d, 0x76, 0x01}); // lea rsi, [rsi + 1] + } + + void operator()(decltype(nth::type), + jasmin::x64::FunctionEmitter &gen, + jasmin::LocationMap const &) { + gen.write({ + 0x48, 0xc7, 0xc0, 0x01, 0x00, 0x00, 0x00, // mov rax, 0x1 + 0x48, 0xc7, 0xc7, 0x01, 0x00, 0x00, 0x00, // mov rdi, 0x1 + 0x48, 0xc7, 0xc2, 0x01, 0x00, 0x00, 0x00, // mov rdx, 0x1 + }); + gen.syscall(); + } + + void operator()(decltype(nth::type), jasmin::x64::FunctionEmitter &gen, + jasmin::LocationMap const &) { + gen.write({ + 0x48, 0xc7, 0xc0, 0x00, 0x00, 0x00, 0x00, // mov rax, 0x0 + 0x48, 0xc7, 0xc7, 0x00, 0x00, 0x00, 0x00, // mov rdi, 0x0 + 0x48, 0xc7, 0xc2, 0x01, 0x00, 0x00, 0x00, // mov rdx, 0x1 + }); + gen.syscall(); + } + + void operator()(decltype(nth::type), jasmin::x64::FunctionEmitter &gen, + jasmin::LocationMap const &) { + gen.write({0x8a, 0x06}); // mov al, BYTE PTR [rsi] + } +}; + +} // namespace bf + +#endif // EXAMPLES_BRAINFUCK_X64_CODE_GENERATOR_H diff --git a/jasmin/compile/x64/BUILD b/jasmin/compile/x64/BUILD index 8a1e25f..6d1d66f 100644 --- a/jasmin/compile/x64/BUILD +++ b/jasmin/compile/x64/BUILD @@ -1,9 +1,9 @@ package(default_visibility = ["//visibility:private"]) cc_library( - name = "code_generator", - hdrs = ["code_generator.h"], - srcs = ["code_generator.cc"], + name = "function_emitter", + hdrs = ["function_emitter.h"], + srcs = ["function_emitter.cc"], visibility = ["//visibility:public"], deps = [ ":location_map", diff --git a/jasmin/compile/x64/function_emitter.cc b/jasmin/compile/x64/function_emitter.cc new file mode 100644 index 0000000..a9a2c43 --- /dev/null +++ b/jasmin/compile/x64/function_emitter.cc @@ -0,0 +1,74 @@ +#include "jasmin/compile/x64/function_emitter.h" + +namespace jasmin::x64 { + +void FunctionEmitter::write(std::initializer_list instructions) { + fn_->write(instructions); +} + +void FunctionEmitter::push(Register reg) { + write({static_cast(static_cast(reg) + 0x50)}); +} + +void FunctionEmitter::pop(Register reg) { + write({static_cast(static_cast(reg) + 0x58)}); +} + +void FunctionEmitter::mov(Register destination, Register source) { + write({0x48, 0x89, + static_cast(0xc0 + static_cast(destination) + + 8 * static_cast(source))}); +} + +void FunctionEmitter::syscall() { write({0x0f, 0x05}); } + +void FunctionEmitter::ret() { write({0xc3}); } + +void FunctionEmitter::emit(SsaFunction const &fn, CompiledFunction &c) { + fn_ = &c; + push(Register::rbp); + mov(Register::rbp, Register::rsp); + + block_starts_.reserve(fn.blocks().size()); + LocationMap loc_map; + for (auto const &block : fn.blocks()) { + block_starts_.push_back(fn_->size()); + for (auto const &inst : block.instructions()) { + generators_[metadata_.opcode(inst.op_code())](generator_, *this, loc_map); + } + switch (block.branch().kind()) { + case SsaBranchKind::Return: + mov(Register::rsp, Register::rbp); + pop(Register::rbp); + ret(); + break; + case SsaBranchKind::Conditional: { + auto const &c = block.branch().AsConditional(); + // TODO: Support arbirtary register choices here. + // TODO: Prefer fallthroughs when we can make that happen. + write({ + 0x84, 0xc0, // test al, al + 0x0f, 0x84, 0x00, 0x00, 0x00, 0x00, // jz __ + }); + block_jumps_.emplace(fn_->size(), c.true_block); + write({ + 0xe9, 0x00, 0x00, 0x00, 0x00, // jmp ___ + }); + block_jumps_.emplace(fn_->size(), c.false_block); + } break; + case SsaBranchKind::Unconditional: { + NTH_UNIMPLEMENTED(); + } break; + case SsaBranchKind::Unreachable: break; + } + } + + for (auto const &[offset, block_number] : block_jumps_) { + fn_->write_at(offset - 4, + static_cast(block_starts_[block_number] - offset)); + } + + fn_ = nullptr; +} + +} // namespace jasmin::x64 diff --git a/jasmin/compile/x64/function_emitter.h b/jasmin/compile/x64/function_emitter.h new file mode 100644 index 0000000..50e5c86 --- /dev/null +++ b/jasmin/compile/x64/function_emitter.h @@ -0,0 +1,86 @@ +#ifndef JASMIN_COMPILE_X64_FUNCTION_EMITTER_H +#define JASMIN_COMPILE_X64_FUNCTION_EMITTER_H + +#include +#include +#include + +#include "absl/container/flat_hash_map.h" +#include "jasmin/compile/compiled_function.h" +#include "jasmin/compile/x64/location_map.h" +#include "jasmin/core/instruction.h" +#include "jasmin/ssa/ssa.h" +#include "nth/meta/type.h" + +namespace jasmin::x64 { + +enum class Register : uint8_t { + rax = 0, + rcx = 1, + rdx = 2, + rbx = 3, + rsp = 4, + rbp = 5, + rsi = 6, + rdi = 7, +}; + +struct FunctionEmitter { + FunctionEmitter(nth::Type auto instruction_set, auto &generator) + : metadata_(Metadata>()), + generator_(&generator_) { + using generator_type = std::remove_reference_t; + nth::type_t::instructions.reduce([this](auto... ts) { + generators_ = {Generate(ts)...}; + }); + } + + void emit(SsaFunction const &fn, CompiledFunction &f); + + void write(std::initializer_list instructions); + + void push(Register reg); + void pop(Register reg); + void mov(Register destination, Register source); + void ret(); + void syscall(); + + private: + template + static auto Generate(nth::Type auto t) + -> void (*)(void *, FunctionEmitter &, LocationMap const &); + + CompiledFunction *fn_ = nullptr; + std::vector block_starts_; + absl::flat_hash_map block_jumps_; + InstructionSetMetadata const &metadata_; + void *generator_; + std::vector + generators_; +}; + +// Implementation + +template +auto FunctionEmitter::Generate(nth::Type auto t) + -> void (*)(void *, FunctionEmitter &, LocationMap const &) { + if constexpr (t == nth::type) { + return nullptr; + } else if constexpr (t == nth::type) { + return nullptr; + } else if constexpr (t == nth::type) { + return nullptr; + } else if constexpr (t == nth::type) { + return nullptr; + } else if constexpr (t == nth::type) { + return nullptr; + } else { + return +[](void *gen, FunctionEmitter &cg, LocationMap const &map) { + (*reinterpret_cast(gen))(decltype(t){}, cg, map); + }; + } +} + +} // namespace jasmin::x64 + +#endif // JASMIN_COMPILE_X64_FUNCTION_EMITTER_H