diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 48812b10b9eacd..9033d0c869cbe7 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3733,6 +3733,8 @@ static void RenderDebugOptions(const ToolChain &TC, const Driver &D, if (T.getArch() == llvm::Triple::amdgcn) { CmdArgs.push_back("-disable-O0-optnone"); CmdArgs.push_back("-disable-O0-noinline"); + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-amdgpu-spill-cfi-saved-regs"); // -ggdb with AMDGCN does not currently compose with options that // affect the debug info kind. The behavior of commands like `-ggdb // -g` may be surprising (the -g is effectively ignored). diff --git a/clang/test/Driver/amdgpu-debug.cl b/clang/test/Driver/amdgpu-debug.cl index a68c72ce701705..1b51b9a76dc31c 100644 --- a/clang/test/Driver/amdgpu-debug.cl +++ b/clang/test/Driver/amdgpu-debug.cl @@ -6,6 +6,7 @@ // CHECK-SIMPLE-DAG: "-debug-info-kind=line-tables-only" // CHECK-SIMPLE-DAG: "-disable-O0-optnone" // CHECK-SIMPLE-DAG: "-disable-O0-noinline" +// CHECK-SIMPLE-DAG: "-mllvm" "-amdgpu-spill-cfi-saved-regs" // CHECK-SIMPLE-DAG: "-debugger-tuning=gdb" // Check that a debug-related option which does not affect the debug-info-kind @@ -15,6 +16,7 @@ // CHECK-DWARF2-DAG: "-debug-info-kind=line-tables-only" // CHECK-DWARF2-DAG: "-disable-O0-optnone" // CHECK-DWARF2-DAG: "-disable-O0-noinline" +// CHECK-DWARF2-DAG: "-mllvm" "-amdgpu-spill-cfi-saved-regs" // CHECK-DWARF2-DAG: "-debugger-tuning=gdb" // CHECK-DWARF2-DAG: "-dwarf-version=2" @@ -34,6 +36,7 @@ // CHECK-LLDBBEFORE-DAG: "-debug-info-kind=line-tables-only" // CHECK-LLDBBEFORE-DAG: "-disable-O0-optnone" // CHECK-LLDBBEFORE-DAG: "-disable-O0-noinline" +// CHECK-LLDBBEFORE-DAG: "-mllvm" "-amdgpu-spill-cfi-saved-regs" // CHECK-LLDBBEFORE-DAG: "-debugger-tuning=gdb" // RUN: %clang -### -target amdgcn-amd-amdhsa -x cl -c -emit-llvm -ggdb -glldb %s 2>&1 | FileCheck -check-prefix=CHECK-LLDBAFTER %s // CHECK-LLDBAFTER: "-cc1" diff --git a/clang/test/Driver/hip-debug.hip b/clang/test/Driver/hip-debug.hip index 0f6627c0628942..21e065c0f95852 100644 --- a/clang/test/Driver/hip-debug.hip +++ b/clang/test/Driver/hip-debug.hip @@ -12,12 +12,15 @@ // CHECK-DAG: "-debug-info-kind=line-tables-only" // CHECK-DAG: "-disable-O0-optnone" // CHECK-DAG: "-disable-O0-noinline" +// CHECK-DAG: "-mllvm" "-amdgpu-spill-cfi-saved-regs" // CHECK-DAG: "-debugger-tuning=gdb" // CHECK-LABEL: clang-offload-bundler // CHECK: {{.*}}clang{{.*}}"-triple" "x86_64-unknown-linux-gnu" // CHECK-NOT: "-disable-O0-optnone" // CHECK-NOT: "-disable-O0-noinline" +// CHECK-NOT: "-amdgpu-spill-cfi-saved-regs" // CHECK-DAG: "-debug-info-kind=limited" // CHECK-DAG: "-debugger-tuning=gdb" // CHECK-NOT: "-disable-O0-optnone" // CHECK-NOT: "-disable-O0-noinline" +// CHECK-NOT: "-amdgpu-spill-cfi-saved-regs" diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def index 956c67c3e1376c..13f15d1cd76c28 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.def +++ b/llvm/include/llvm/BinaryFormat/Dwarf.def @@ -418,6 +418,13 @@ HANDLE_DW_AT(0x3e02, LLVM_sysroot, 0, LLVM) HANDLE_DW_AT(0x3e03, LLVM_tag_offset, 0, LLVM) // The missing numbers here are reserved for ptrauth support. HANDLE_DW_AT(0x3e07, LLVM_apinotes, 0, APPLE) +// Heterogeneous Debugging Extension defined at +// https://llvm.org/docs/AMDGPUDwarfProposalForHeterogeneousDebugging.html. +HANDLE_DW_AT(0x3e08, LLVM_active_lane, 0, LLVM) +HANDLE_DW_AT(0x3e09, LLVM_augmentation, 0, LLVM) +HANDLE_DW_AT(0x3e0a, LLVM_lanes, 0, LLVM) +HANDLE_DW_AT(0x3e0b, LLVM_lane_pc, 0, LLVM) +HANDLE_DW_AT(0x3e0c, LLVM_vector_size, 0, LLVM) // Apple extensions. @@ -669,6 +676,23 @@ HANDLE_DW_OP(0xf3, GNU_entry_value, 0, GNU) // Extensions for Fission proposal. HANDLE_DW_OP(0xfb, GNU_addr_index, 0, GNU) HANDLE_DW_OP(0xfc, GNU_const_index, 0, GNU) +// Heterogeneous Debugging Extension defined at +// https://llvm.org/docs/AMDGPUDwarfProposalForHeterogeneousDebugging.html. +// These collide with some HP and PGI vendor extensions, but this ambiguity is +// resolved by ensuring CIE augmentation strings and compilation unit +// DW_AT_LLVM_augmentation strings include "[llvm:v0.0]". +HANDLE_DW_OP(0xe1, LLVM_form_aspace_address, 0, LLVM) +HANDLE_DW_OP(0xe2, LLVM_push_lane, 0, LLVM) +HANDLE_DW_OP(0xe3, LLVM_offset, 0, LLVM) +HANDLE_DW_OP(0xe4, LLVM_offset_uconst, 0, LLVM) +HANDLE_DW_OP(0xe5, LLVM_bit_offset, 0, LLVM) +HANDLE_DW_OP(0xe6, LLVM_call_frame_entry_reg, 0, LLVM) +HANDLE_DW_OP(0xe7, LLVM_undefined, 0, LLVM) +HANDLE_DW_OP(0xe8, LLVM_aspace_bregx, 0, LLVM) +HANDLE_DW_OP(0xe9, LLVM_aspace_implicit_pointer, 0, LLVM) +HANDLE_DW_OP(0xea, LLVM_piece_end, 0, LLVM) +HANDLE_DW_OP(0xeb, LLVM_extend, 0, LLVM) +HANDLE_DW_OP(0xec, LLVM_select_bit_piece, 0, LLVM) // DWARF languages. HANDLE_DW_LANG(0x0001, C89, 0, 2, DWARF) @@ -895,6 +919,10 @@ HANDLE_DW_CFA_PRED(0x1d, MIPS_advance_loc8, SELECT_MIPS64) HANDLE_DW_CFA_PRED(0x2d, GNU_window_save, SELECT_SPARC) HANDLE_DW_CFA_PRED(0x2d, AARCH64_negate_ra_state, SELECT_AARCH64) HANDLE_DW_CFA_PRED(0x2e, GNU_args_size, SELECT_X86) +// Heterogeneous Debugging Extension defined at +// https://llvm.org/docs/AMDGPUDwarfProposalForHeterogeneousDebugging.html +HANDLE_DW_CFA(0x30, LLVM_def_aspace_cfa) +HANDLE_DW_CFA(0x31, LLVM_def_aspace_cfa_sf) // Apple Objective-C Property Attributes. // Keep this list in sync with clang's DeclSpec.h ObjCPropertyAttributeKind! diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h index 1eb22b2eae8030..7f4bc45445d4cd 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h @@ -98,6 +98,15 @@ class CFIProgram { Instructions.back().Ops.push_back(Operand2); } + /// Add a new instruction that has three operands. + void addInstruction(uint8_t Opcode, uint64_t Operand1, uint64_t Operand2, + uint64_t Operand3) { + Instructions.push_back(Instruction(Opcode)); + Instructions.back().Ops.push_back(Operand1); + Instructions.back().Ops.push_back(Operand2); + Instructions.back().Ops.push_back(Operand3); + } + /// Types of operands to CFI instructions /// In DWARF, this type is implicitly tied to a CFI instruction opcode and /// thus this type doesn't need to be explictly written to the file (this is @@ -113,12 +122,13 @@ class CFIProgram { OT_SignedFactDataOffset, OT_UnsignedFactDataOffset, OT_Register, + OT_AddressSpace, OT_Expression }; /// Retrieve the array describing the types of operands according to the enum /// above. This is indexed by opcode. - static ArrayRef getOperandTypes(); + static ArrayRef getOperandTypes(); /// Print \p Opcode's operand number \p OperandIdx which has value \p Operand. void printOperand(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH, diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h index 18f1637deb5024..cfb9c6a2b44e5f 100644 --- a/llvm/include/llvm/MC/MCAsmInfo.h +++ b/llvm/include/llvm/MC/MCAsmInfo.h @@ -353,6 +353,11 @@ class MCAsmInfo { /// false. bool SupportsDebugInformation = false; + /// True if target supports emitting .debug_frame unwind information when + /// ExceptionsType = ExceptionHandling::None and debug info is requested. + /// Defaults to false. + bool SupportsDebugUnwindInformation = false; + /// Exception handling format for the target. Defaults to None. ExceptionHandling ExceptionsType = ExceptionHandling::None; @@ -379,6 +384,11 @@ class MCAsmInfo { /// location is allowed. bool SupportsExtendedDwarfLocDirective = true; + /// True if the target supports the extensions defined at + /// https://llvm.org/docs/AMDGPUDwarfProposalForHeterogeneousDebugging.html. + /// Defaults to false. + bool SupportsHeterogeneousDebuggingExtensions = false; + //===--- Prologue State ----------------------------------------------===// std::vector InitialFrameState; @@ -609,6 +619,10 @@ class MCAsmInfo { bool doesSupportDebugInformation() const { return SupportsDebugInformation; } + bool doesSupportDebugUnwindInformation() const { + return SupportsDebugUnwindInformation; + } + bool doesSupportExceptionHandling() const { return ExceptionsType != ExceptionHandling::None; } @@ -643,6 +657,9 @@ class MCAsmInfo { bool supportsExtendedDwarfLocDirective() const { return SupportsExtendedDwarfLocDirective; } + bool supportsHeterogeneousDebuggingExtensions() const { + return SupportsHeterogeneousDebuggingExtensions; + } void addInitialFrameState(const MCCFIInstruction &Inst); diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h index 2950a7b0b13a93..c8a08790487ce7 100644 --- a/llvm/include/llvm/MC/MCDwarf.h +++ b/llvm/include/llvm/MC/MCDwarf.h @@ -444,6 +444,7 @@ class MCCFIInstruction { OpRememberState, OpRestoreState, OpOffset, + OpLLVMDefAspaceCfa, OpDefCfaRegister, OpDefCfaOffset, OpDefCfa, @@ -466,12 +467,13 @@ class MCCFIInstruction { int Offset; unsigned Register2; }; + unsigned AddressSpace; std::vector Values; MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int O, StringRef V) : Operation(Op), Label(L), Register(R), Offset(O), Values(V.begin(), V.end()) { - assert(Op != OpRegister); + assert(Op != OpRegister && Op != OpLLVMDefAspaceCfa); } MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R1, unsigned R2) @@ -479,6 +481,11 @@ class MCCFIInstruction { assert(Op == OpRegister); } + MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int O, unsigned AS) + : Operation(Op), Label(L), Register(R), Offset(O), AddressSpace(AS) { + assert(Op == OpLLVMDefAspaceCfa); + } + public: /// .cfi_def_cfa defines a rule for computing CFA as: take address from /// Register and add Offset to it. @@ -507,6 +514,17 @@ class MCCFIInstruction { return MCCFIInstruction(OpAdjustCfaOffset, L, 0, Adjustment, ""); } + // FIXME: Update the remaining docs to use the new proposal wording. + /// .cfi_llvm_def_aspace_cfa defines the rule for computing the CFA to + /// be the result of evaluating the DWARF operation expression + /// `DW_OP_constu AS; DW_OP_aspace_bregx R, B` as a location description. + static MCCFIInstruction createLLVMDefAspaceCfa(MCSymbol *L, unsigned Register, + int Offset, + unsigned AddressSpace) { + return MCCFIInstruction(OpLLVMDefAspaceCfa, L, Register, -Offset, + AddressSpace); + } + /// .cfi_offset Previous value of Register is saved at offset Offset /// from CFA. static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, @@ -586,7 +604,8 @@ class MCCFIInstruction { assert(Operation == OpDefCfa || Operation == OpOffset || Operation == OpRestore || Operation == OpUndefined || Operation == OpSameValue || Operation == OpDefCfaRegister || - Operation == OpRelOffset || Operation == OpRegister); + Operation == OpRelOffset || Operation == OpRegister || + Operation == OpLLVMDefAspaceCfa); return Register; } @@ -595,10 +614,16 @@ class MCCFIInstruction { return Register2; } + unsigned getAddressSpace() const { + assert(Operation == OpLLVMDefAspaceCfa); + return AddressSpace; + } + int getOffset() const { assert(Operation == OpDefCfa || Operation == OpOffset || Operation == OpRelOffset || Operation == OpDefCfaOffset || - Operation == OpAdjustCfaOffset || Operation == OpGnuArgsSize); + Operation == OpAdjustCfaOffset || Operation == OpGnuArgsSize || + Operation == OpLLVMDefAspaceCfa); return Offset; } diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h index 4a34e1497ccf72..00f27319b6ac9c 100644 --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -942,6 +942,8 @@ class MCStreamer { virtual void emitCFIDefCfa(int64_t Register, int64_t Offset); virtual void emitCFIDefCfaOffset(int64_t Offset); virtual void emitCFIDefCfaRegister(int64_t Register); + virtual void emitCFILLVMDefAspaceCfa(int64_t Register, int64_t Offset, + int64_t AddressSpace); virtual void emitCFIOffset(int64_t Register, int64_t Offset); virtual void emitCFIPersonality(const MCSymbol *Sym, unsigned Encoding); virtual void emitCFILsda(const MCSymbol *Sym, unsigned Encoding); diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index b7bd6c9ad9f58b..72569068458638 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -14,6 +14,7 @@ #include "CodeViewDebug.h" #include "DwarfDebug.h" #include "DwarfException.h" +#include "UnwindStreamer.h" #include "WasmException.h" #include "WinCFGuard.h" #include "WinException.h" @@ -139,6 +140,8 @@ static const char *const DWARFGroupName = "dwarf"; static const char *const DWARFGroupDescription = "DWARF Emission"; static const char *const DbgTimerName = "emit"; static const char *const DbgTimerDescription = "Debug Info Emission"; +static const char *const UnwindTimerName = "write_unwind"; +static const char *const UnwindTimerDescription = "DWARF Unwind Writer"; static const char *const EHTimerName = "write_exception"; static const char *const EHTimerDescription = "DWARF Exception Writer"; static const char *const CFGuardName = "Control Flow Guard"; @@ -328,6 +331,16 @@ bool AsmPrinter::doInitialization(Module &M) { } } + if (MMI->hasDebugInfo() && + MAI->getExceptionHandlingType() == ExceptionHandling::None && + MAI->doesSupportDebugUnwindInformation()) { + isCFIMoveForDebugging = true; + Handlers.emplace_back(std::make_unique(this), + UnwindTimerName, UnwindTimerDescription, + DWARFGroupName, DWARFGroupDescription); + return false; + } + switch (MAI->getExceptionHandlingType()) { case ExceptionHandling::SjLj: case ExceptionHandling::DwarfCFI: @@ -995,7 +1008,8 @@ bool AsmPrinter::needsSEHMoves() { void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) { ExceptionHandling ExceptionHandlingType = MAI->getExceptionHandlingType(); - if (ExceptionHandlingType != ExceptionHandling::DwarfCFI && + if (!MAI->doesSupportDebugUnwindInformation() && + ExceptionHandlingType != ExceptionHandling::DwarfCFI && ExceptionHandlingType != ExceptionHandling::ARM) return; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 2a8ab091bf9a7b..a8d6d23a7d8b0f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -221,6 +221,10 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { case MCCFIInstruction::OpDefCfaRegister: OutStreamer->emitCFIDefCfaRegister(Inst.getRegister()); break; + case MCCFIInstruction::OpLLVMDefAspaceCfa: + OutStreamer->emitCFILLVMDefAspaceCfa(Inst.getRegister(), Inst.getOffset(), + Inst.getAddressSpace()); + break; case MCCFIInstruction::OpOffset: OutStreamer->emitCFIOffset(Inst.getRegister(), Inst.getOffset()); break; diff --git a/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt b/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt index 1445bf4fbf2003..eae778ce577fa7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -24,6 +24,7 @@ add_llvm_component_library(LLVMAsmPrinter WinException.cpp CodeViewDebug.cpp WasmException.cpp + UnwindStreamer.cpp DEPENDS intrinsics_gen diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index e75bf95dd196a7..0ca4925b201dbb 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -375,6 +375,8 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) UseLocSection = !TT.isNVPTX(); HasAppleExtensionAttributes = tuneForLLDB(); + HasHeterogeneousExtensionAttributes = + Asm->MAI->supportsHeterogeneousDebuggingExtensions(); // Handle split DWARF. HasSplitDwarf = !Asm->TM.Options.MCOptions.SplitDwarfFile.empty(); @@ -972,6 +974,10 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit, dwarf::DW_FORM_data1, RVer); } + if (useHeterogeneousExtensionAttributes()) { + NewCU.addString(Die, dwarf::DW_AT_LLVM_augmentation, "[llvm:v0.0]"); + } + if (DIUnit->getDWOId()) { // This CU is either a clang module DWO or a skeleton CU. NewCU.addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index e2c6c85dfb997e..6bd4570f1b1fc0 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -379,6 +379,9 @@ class DwarfDebug : public DebugHandlerBase { AccelTableKind TheAccelTableKind; bool HasAppleExtensionAttributes; bool HasSplitDwarf; + // Enables extensions defined at + // https://llvm.org/docs/AMDGPUDwarfProposalForHeterogeneousDebugging.html + bool HasHeterogeneousExtensionAttributes; /// Whether to generate the DWARF v5 string offsets table. /// It consists of a series of contributions, each preceded by a header. @@ -702,6 +705,13 @@ class DwarfDebug : public DebugHandlerBase { return HasAppleExtensionAttributes; } + /// Returns whether extensions defined at + /// https://llvm.org/docs/AMDGPUDwarfProposalForHeterogeneousDebugging.html + /// are enabled. + bool useHeterogeneousExtensionAttributes() const { + return HasHeterogeneousExtensionAttributes; + } + /// Returns whether or not to change the current debug info for the /// split dwarf proposal support. bool useSplitDwarf() const { return HasSplitDwarf; } diff --git a/llvm/lib/CodeGen/AsmPrinter/UnwindStreamer.cpp b/llvm/lib/CodeGen/AsmPrinter/UnwindStreamer.cpp new file mode 100644 index 00000000000000..fd6df30e9f18c3 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/UnwindStreamer.cpp @@ -0,0 +1,38 @@ +//===- CodeGen/AsmPrinter/UnwindStreamer.cpp - Unwind Directive Streamer --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing unwind info into assembly files. +// +//===----------------------------------------------------------------------===// + +#include "UnwindStreamer.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCStreamer.h" + +using namespace llvm; + +namespace llvm { +class MachineFunction; +} // end namespace llvm + +UnwindStreamer::UnwindStreamer(AsmPrinter *A) : Asm(A) {} + +UnwindStreamer::~UnwindStreamer() = default; + +void UnwindStreamer::beginFunction(const MachineFunction *MF) { + assert(Asm->needsCFIMoves() == AsmPrinter::CFI_M_Debug); + if (!HasEmittedDebugFrame) { + Asm->OutStreamer->emitCFISections(false, true); + HasEmittedDebugFrame = true; + } + Asm->OutStreamer->emitCFIStartProc(/*IsSimple=*/false); +} + +void UnwindStreamer::endFunction(const MachineFunction *MF) { + Asm->OutStreamer->emitCFIEndProc(); +} diff --git a/llvm/lib/CodeGen/AsmPrinter/UnwindStreamer.h b/llvm/lib/CodeGen/AsmPrinter/UnwindStreamer.h new file mode 100644 index 00000000000000..0b76eb565b8693 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/UnwindStreamer.h @@ -0,0 +1,50 @@ +//===- UnwindStreamer.h - Unwind Directive Streamer -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing unwind info into assembly files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_UNWINDSTREAMER_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_UNWINDSTREAMER_H + +#include "llvm/CodeGen/AsmPrinterHandler.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + +class AsmPrinter; +class MachineInstr; +class MCSymbol; + +/// Emits unwind info directives. +class LLVM_LIBRARY_VISIBILITY UnwindStreamer : public AsmPrinterHandler { +protected: + /// Target of directive emission. + AsmPrinter *Asm; + + /// Per-module flag to indicate if .debug_frame has been emitted yet. + bool HasEmittedDebugFrame = false; + +public: + UnwindStreamer(AsmPrinter *A); + ~UnwindStreamer() override; + + // Unused. + void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} + void endModule() override {} + void beginInstruction(const MachineInstr *MI) override {} + void endInstruction() override {} + + void beginFunction(const MachineFunction *MF) override; + void endFunction(const MachineFunction *MF) override; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_ASMPRINTER_UNWINDSTREAMER_H diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp index ef548c84d3c005..295670904f593e 100644 --- a/llvm/lib/CodeGen/CFIInstrInserter.cpp +++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp @@ -181,6 +181,14 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { SetRegister = CFI.getRegister(); SetOffset = CFI.getOffset(); break; + case MCCFIInstruction::OpLLVMDefAspaceCfa: + // TODO: Add support for handling cfi_def_aspace_cfa. +#ifndef NDEBUG + report_fatal_error( + "Support for cfi_llvm_def_aspace_cfa not implemented! Value of CFA " + "may be incorrect!\n"); +#endif + break; case MCCFIInstruction::OpRememberState: // TODO: Add support for handling cfi_remember_state. #ifndef NDEBUG diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp index e4852d3069418d..e215328297cb4f 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -215,9 +215,9 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("contract", MIToken::kw_contract) .Case("afn", MIToken::kw_afn) .Case("reassoc", MIToken::kw_reassoc) - .Case("nuw" , MIToken::kw_nuw) - .Case("nsw" , MIToken::kw_nsw) - .Case("exact" , MIToken::kw_exact) + .Case("nuw", MIToken::kw_nuw) + .Case("nsw", MIToken::kw_nsw) + .Case("exact", MIToken::kw_exact) .Case("nofpexcept", MIToken::kw_nofpexcept) .Case("debug-location", MIToken::kw_debug_location) .Case("same_value", MIToken::kw_cfi_same_value) @@ -228,13 +228,15 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("adjust_cfa_offset", MIToken::kw_cfi_adjust_cfa_offset) .Case("escape", MIToken::kw_cfi_escape) .Case("def_cfa", MIToken::kw_cfi_def_cfa) + .Case("llvm_def_aspace_cfa", MIToken::kw_cfi_llvm_def_aspace_cfa) .Case("remember_state", MIToken::kw_cfi_remember_state) .Case("restore", MIToken::kw_cfi_restore) .Case("restore_state", MIToken::kw_cfi_restore_state) .Case("undefined", MIToken::kw_cfi_undefined) .Case("register", MIToken::kw_cfi_register) .Case("window_save", MIToken::kw_cfi_window_save) - .Case("negate_ra_sign_state", MIToken::kw_cfi_aarch64_negate_ra_sign_state) + .Case("negate_ra_sign_state", + MIToken::kw_cfi_aarch64_negate_ra_sign_state) .Case("blockaddress", MIToken::kw_blockaddress) .Case("intrinsic", MIToken::kw_intrinsic) .Case("target-index", MIToken::kw_target_index) diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h index c804e1604f7b3b..f8440b8eed49e4 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -83,6 +83,7 @@ struct MIToken { kw_cfi_adjust_cfa_offset, kw_cfi_escape, kw_cfi_def_cfa, + kw_cfi_llvm_def_aspace_cfa, kw_cfi_register, kw_cfi_remember_state, kw_cfi_restore, diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 68de9f49fe4885..90813bcd7d6ca6 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -475,6 +475,7 @@ class MIParser { bool parseMetadataOperand(MachineOperand &Dest); bool parseCFIOffset(int &Offset); bool parseCFIRegister(Register &Reg); + bool parseCFIAddressSpace(unsigned &AddressSpace); bool parseCFIEscapeValues(std::string& Values); bool parseCFIOperand(MachineOperand &Dest); bool parseIRBlock(BasicBlock *&BB, const Function &F); @@ -2189,6 +2190,16 @@ bool MIParser::parseCFIRegister(Register &Reg) { return false; } +bool MIParser::parseCFIAddressSpace(unsigned &AddressSpace) { + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected a cfi address space"); + if (Token.integerValue().isSigned()) + return error("expected an unsigned integer (cfi address space)"); + AddressSpace = Token.integerValue().getZExtValue(); + lex(); + return false; +} + bool MIParser::parseCFIEscapeValues(std::string &Values) { do { if (Token.isNot(MIToken::HexLiteral)) @@ -2209,6 +2220,7 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) { lex(); int Offset; Register Reg; + unsigned AddressSpace; unsigned CFIIndex; switch (Kind) { case MIToken::kw_cfi_same_value: @@ -2257,6 +2269,15 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) { CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset)); break; + case MIToken::kw_cfi_llvm_def_aspace_cfa: + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || + parseCFIOffset(Offset) || expectAndConsume(MIToken::comma) || + parseCFIAddressSpace(AddressSpace)) + return true; + // NB: MCCFIInstruction::createDefCfa negates the offset. + CFIIndex = MF.addFrameInst(MCCFIInstruction::createLLVMDefAspaceCfa( + nullptr, Reg, -Offset, AddressSpace)); + break; case MIToken::kw_cfi_remember_state: CFIIndex = MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr)); break; @@ -2604,6 +2625,7 @@ bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx, case MIToken::kw_cfi_adjust_cfa_offset: case MIToken::kw_cfi_escape: case MIToken::kw_cfi_def_cfa: + case MIToken::kw_cfi_llvm_def_aspace_cfa: case MIToken::kw_cfi_register: case MIToken::kw_cfi_remember_state: case MIToken::kw_cfi_restore: diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index 5e4d5edb9ce682..7ff811c0d90078 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -642,6 +642,14 @@ static void printCFI(raw_ostream &OS, const MCCFIInstruction &CFI, printCFIRegister(CFI.getRegister(), OS, TRI); OS << ", " << CFI.getOffset(); break; + case MCCFIInstruction::OpLLVMDefAspaceCfa: + OS << "llvm_def_aspace_cfa "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + printCFIRegister(CFI.getRegister(), OS, TRI); + OS << ", " << CFI.getOffset(); + OS << ", " << CFI.getAddressSpace(); + break; case MCCFIInstruction::OpRelOffset: OS << "rel_offset "; if (MCSymbol *Label = CFI.getLabel()) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index 7c1280dd19442c..0f05c795ffb2ad 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -101,6 +101,16 @@ Error CFIProgram::parse(DWARFDataExtractor Data, uint64_t *Offset, // Operands: SLEB128 addInstruction(Opcode, Data.getSLEB128(Offset)); break; + case DW_CFA_LLVM_def_aspace_cfa: + case DW_CFA_LLVM_def_aspace_cfa_sf: { + auto RegNum = Data.getULEB128(Offset); + auto CfaOffset = Opcode == DW_CFA_LLVM_def_aspace_cfa + ? Data.getULEB128(Offset) + : Data.getSLEB128(Offset); + auto AddressSpace = Data.getULEB128(Offset); + addInstruction(Opcode, RegNum, CfaOffset, AddressSpace); + break; + } case DW_CFA_offset_extended: case DW_CFA_register: case DW_CFA_def_cfa: @@ -160,19 +170,22 @@ namespace { } // end anonymous namespace -ArrayRef CFIProgram::getOperandTypes() { - static OperandType OpTypes[DW_CFA_restore+1][2]; +ArrayRef CFIProgram::getOperandTypes() { + static OperandType OpTypes[DW_CFA_restore + 1][3]; static bool Initialized = false; if (Initialized) { - return ArrayRef(&OpTypes[0], DW_CFA_restore+1); + return ArrayRef(&OpTypes[0], DW_CFA_restore + 1); } Initialized = true; -#define DECLARE_OP2(OP, OPTYPE0, OPTYPE1) \ - do { \ - OpTypes[OP][0] = OPTYPE0; \ - OpTypes[OP][1] = OPTYPE1; \ +#define DECLARE_OP3(OP, OPTYPE0, OPTYPE1, OPTYPE2) \ + do { \ + OpTypes[OP][0] = OPTYPE0; \ + OpTypes[OP][1] = OPTYPE1; \ + OpTypes[OP][2] = OPTYPE2; \ } while (false) +#define DECLARE_OP2(OP, OPTYPE0, OPTYPE1) \ + DECLARE_OP3(OP, OPTYPE0, OPTYPE1, OT_None) #define DECLARE_OP1(OP, OPTYPE0) DECLARE_OP2(OP, OPTYPE0, OT_None) #define DECLARE_OP0(OP) DECLARE_OP1(OP, OT_None) @@ -185,6 +198,10 @@ ArrayRef CFIProgram::getOperandTypes() { DECLARE_OP2(DW_CFA_def_cfa, OT_Register, OT_Offset); DECLARE_OP2(DW_CFA_def_cfa_sf, OT_Register, OT_SignedFactDataOffset); DECLARE_OP1(DW_CFA_def_cfa_register, OT_Register); + DECLARE_OP3(DW_CFA_LLVM_def_aspace_cfa, OT_Register, OT_Offset, + OT_AddressSpace); + DECLARE_OP3(DW_CFA_LLVM_def_aspace_cfa_sf, OT_Register, + OT_SignedFactDataOffset, OT_AddressSpace); DECLARE_OP1(DW_CFA_def_cfa_offset, OT_Offset); DECLARE_OP1(DW_CFA_def_cfa_offset_sf, OT_SignedFactDataOffset); DECLARE_OP1(DW_CFA_def_cfa_expression, OT_Expression); @@ -210,14 +227,14 @@ ArrayRef CFIProgram::getOperandTypes() { #undef DECLARE_OP1 #undef DECLARE_OP2 - return ArrayRef(&OpTypes[0], DW_CFA_restore+1); + return ArrayRef(&OpTypes[0], DW_CFA_restore + 1); } /// Print \p Opcode's operand number \p OperandIdx which has value \p Operand. void CFIProgram::printOperand(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH, const Instruction &Instr, unsigned OperandIdx, uint64_t Operand) const { - assert(OperandIdx < 2); + assert(OperandIdx < 3); uint8_t Opcode = Instr.Opcode; OperandType Type = getOperandTypes()[Opcode][OperandIdx]; @@ -263,6 +280,9 @@ void CFIProgram::printOperand(raw_ostream &OS, const MCRegisterInfo *MRI, case OT_Register: OS << format(" reg%" PRId64, Operand); break; + case OT_AddressSpace: + OS << format(" as%" PRId64, Operand); + break; case OT_Expression: assert(Instr.Expression && "missing DWARFExpression object"); OS << " "; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp index 0a658034b67b8a..8c57eadb7babee 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp @@ -100,6 +100,21 @@ static DescVector getDescriptions() { Descriptions[DW_OP_GNU_addr_index] = Desc(Op::Dwarf4, Op::SizeLEB); Descriptions[DW_OP_GNU_const_index] = Desc(Op::Dwarf4, Op::SizeLEB); Descriptions[DW_OP_GNU_entry_value] = Desc(Op::Dwarf4, Op::SizeLEB); + Descriptions[DW_OP_LLVM_form_aspace_address] = Desc(Op::Dwarf4); + Descriptions[DW_OP_LLVM_push_lane] = Desc(Op::Dwarf4); + Descriptions[DW_OP_LLVM_offset] = Desc(Op::Dwarf4); + Descriptions[DW_OP_LLVM_offset_uconst] = Desc(Op::Dwarf4, Op::SizeLEB); + Descriptions[DW_OP_LLVM_bit_offset] = Desc(Op::Dwarf4); + Descriptions[DW_OP_LLVM_call_frame_entry_reg] = Desc(Op::Dwarf4, Op::SizeLEB); + Descriptions[DW_OP_LLVM_undefined] = Desc(Op::Dwarf4); + Descriptions[DW_OP_LLVM_aspace_bregx] = + Desc(Op::Dwarf4, Op::SizeLEB, Op::SizeLEB); + Descriptions[DW_OP_LLVM_aspace_implicit_pointer] = + Desc(Op::Dwarf4, Op::SizeRefAddr, Op::SignedSizeLEB); + Descriptions[DW_OP_LLVM_piece_end] = Desc(Op::Dwarf4); + Descriptions[DW_OP_LLVM_extend] = Desc(Op::Dwarf4, Op::SizeLEB, Op::SizeLEB); + Descriptions[DW_OP_LLVM_select_bit_piece] = + Desc(Op::Dwarf4, Op::SizeLEB, Op::SizeLEB); Descriptions[DW_OP_convert] = Desc(Op::Dwarf5, Op::BaseTypeRef); Descriptions[DW_OP_entry_value] = Desc(Op::Dwarf5, Op::SizeLEB); @@ -213,8 +228,10 @@ static bool prettyPrintRegisterOp(DWARFUnit *U, raw_ostream &OS, uint8_t Opcode, unsigned OpNum = 0; if (Opcode == DW_OP_bregx || Opcode == DW_OP_regx || - Opcode == DW_OP_regval_type) + Opcode == DW_OP_regval_type || Opcode == DW_OP_LLVM_aspace_bregx) DwarfRegNum = Operands[OpNum++]; + else if (Opcode == DW_OP_LLVM_call_frame_entry_reg) + DwarfRegNum = Operands[OpNum]; else if (Opcode >= DW_OP_breg0 && Opcode < DW_OP_bregx) DwarfRegNum = Opcode - DW_OP_breg0; else @@ -223,7 +240,7 @@ static bool prettyPrintRegisterOp(DWARFUnit *U, raw_ostream &OS, uint8_t Opcode, if (Optional LLVMRegNum = MRI->getLLVMRegNum(DwarfRegNum, isEH)) { if (const char *RegName = MRI->getName(*LLVMRegNum)) { if ((Opcode >= DW_OP_breg0 && Opcode <= DW_OP_breg31) || - Opcode == DW_OP_bregx) + Opcode == DW_OP_bregx || Opcode == DW_OP_LLVM_aspace_bregx) OS << format(" %s%+" PRId64, RegName, Operands[OpNum]); else OS << ' ' << RegName; @@ -254,7 +271,9 @@ bool DWARFExpression::Operation::print(raw_ostream &OS, if ((Opcode >= DW_OP_breg0 && Opcode <= DW_OP_breg31) || (Opcode >= DW_OP_reg0 && Opcode <= DW_OP_reg31) || Opcode == DW_OP_bregx || Opcode == DW_OP_regx || - Opcode == DW_OP_regval_type) + Opcode == DW_OP_regval_type || + Opcode == DW_OP_LLVM_call_frame_entry_reg || + Opcode == DW_OP_LLVM_aspace_bregx) if (prettyPrintRegisterOp(U, OS, Opcode, Operands, RegInfo, isEH)) return true; diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index 3792655d0bf293..ddf6ab15b5b666 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -295,6 +295,8 @@ class MCAsmStreamer final : public MCStreamer { void emitCFIDefCfa(int64_t Register, int64_t Offset) override; void emitCFIDefCfaOffset(int64_t Offset) override; void emitCFIDefCfaRegister(int64_t Register) override; + void emitCFILLVMDefAspaceCfa(int64_t Register, int64_t Offset, + int64_t AddressSpace) override; void emitCFIOffset(int64_t Register, int64_t Offset) override; void emitCFIPersonality(const MCSymbol *Sym, unsigned Encoding) override; void emitCFILsda(const MCSymbol *Sym, unsigned Encoding) override; @@ -1580,6 +1582,19 @@ void MCAsmStreamer::emitCFIDefCfaOffset(int64_t Offset) { EmitEOL(); } +void MCAsmStreamer::emitCFILLVMDefAspaceCfa(int64_t Register, int64_t Offset, + int64_t AddressSpace) { + MCStreamer::emitCFILLVMDefAspaceCfa(Register, Offset, AddressSpace); + OS << "\t.cfi_llvm_def_aspace_cfa "; + EmitRegisterName(Register); + OS << ", " << Offset; + OS << ", " << AddressSpace; + // FIXME: When going from ASM->ASM a lot of these directives seem to add too + // much whitespace; maybe something isn't consuming whitespace eagerly enough + // while parsing? + EmitEOL(); +} + static void PrintCFIEscape(llvm::formatted_raw_ostream &OS, StringRef Values) { OS << "\t.cfi_escape "; if (!Values.empty()) { diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp index 3c51020056bc54..959d8c4ac637bc 100644 --- a/llvm/lib/MC/MCDwarf.cpp +++ b/llvm/lib/MC/MCDwarf.cpp @@ -1424,6 +1424,17 @@ void FrameEmitterImpl::emitCFIInstruction(const MCCFIInstruction &Instr) { return; } + case MCCFIInstruction::OpLLVMDefAspaceCfa: { + unsigned Reg = Instr.getRegister(); + unsigned AddressSpace = Instr.getAddressSpace(); + Streamer.emitIntValue(dwarf::DW_CFA_LLVM_def_aspace_cfa, 1); + Streamer.emitULEB128IntValue(Reg); + CFAOffset = -Instr.getOffset(); + Streamer.emitULEB128IntValue(CFAOffset); + Streamer.emitULEB128IntValue(AddressSpace); + + return; + } case MCCFIInstruction::OpOffset: case MCCFIInstruction::OpRelOffset: { const bool IsRelative = @@ -1592,6 +1603,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(const MCDwarfFrameInfo &Frame) { MCContext &context = Streamer.getContext(); const MCRegisterInfo *MRI = context.getRegisterInfo(); const MCObjectFileInfo *MOFI = context.getObjectFileInfo(); + const MCAsmInfo *MAI = context.getAsmInfo(); MCSymbol *sectionStart = context.createTempSymbol(); Streamer.emitLabel(sectionStart); @@ -1611,8 +1623,8 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(const MCDwarfFrameInfo &Frame) { uint8_t CIEVersion = getCIEVersion(IsEH, context.getDwarfVersion()); Streamer.emitInt8(CIEVersion); + SmallString<8> Augmentation; if (IsEH) { - SmallString<8> Augmentation; Augmentation += "z"; if (Frame.Personality) Augmentation += "P"; @@ -1623,8 +1635,10 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(const MCDwarfFrameInfo &Frame) { Augmentation += "S"; if (Frame.IsBKeyFrame) Augmentation += "B"; - Streamer.emitBytes(Augmentation); } + if (MAI->supportsHeterogeneousDebuggingExtensions()) + Augmentation += "[llvm:v0.0]"; + Streamer.emitBytes(Augmentation); Streamer.emitInt8(0); if (CIEVersion >= 4) { @@ -1688,7 +1702,6 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(const MCDwarfFrameInfo &Frame) { // Initial Instructions - const MCAsmInfo *MAI = context.getAsmInfo(); if (!Frame.IsSimple) { const std::vector &Instructions = MAI->getInitialFrameState(); diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index c5c44a7861f004..bf7ae9b8f1a23d 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -481,6 +481,7 @@ class AsmParser : public MCAsmParser { DK_CFI_DEF_CFA_OFFSET, DK_CFI_ADJUST_CFA_OFFSET, DK_CFI_DEF_CFA_REGISTER, + DK_CFI_LLVM_DEF_ASPACE_CFA, DK_CFI_OFFSET, DK_CFI_REL_OFFSET, DK_CFI_PERSONALITY, @@ -580,6 +581,7 @@ class AsmParser : public MCAsmParser { bool parseDirectiveCFIDefCfa(SMLoc DirectiveLoc); bool parseDirectiveCFIAdjustCfaOffset(); bool parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc); + bool parseDirectiveCFILLVMDefAspaceCfa(SMLoc DirectiveLoc); bool parseDirectiveCFIOffset(SMLoc DirectiveLoc); bool parseDirectiveCFIRelOffset(SMLoc DirectiveLoc); bool parseDirectiveCFIPersonalityOrLsda(bool IsPersonality); @@ -2100,6 +2102,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, return parseDirectiveCFIAdjustCfaOffset(); case DK_CFI_DEF_CFA_REGISTER: return parseDirectiveCFIDefCfaRegister(IDLoc); + case DK_CFI_LLVM_DEF_ASPACE_CFA: + return parseDirectiveCFILLVMDefAspaceCfa(IDLoc); case DK_CFI_OFFSET: return parseDirectiveCFIOffset(IDLoc); case DK_CFI_REL_OFFSET: @@ -4176,6 +4180,21 @@ bool AsmParser::parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc) { return false; } +/// parseDirectiveCFILLVMDefAspaceCfa +/// ::= .cfi_llvm_def_aspace_cfa register, offset, address_space +bool AsmParser::parseDirectiveCFILLVMDefAspaceCfa(SMLoc DirectiveLoc) { + int64_t Register = 0, Offset = 0, AddressSpace = 0; + if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) || + parseToken(AsmToken::Comma, "unexpected token in directive") || + parseAbsoluteExpression(Offset) || + parseToken(AsmToken::Comma, "unexpected token in directive") || + parseAbsoluteExpression(AddressSpace)) + return true; + + getStreamer().emitCFILLVMDefAspaceCfa(Register, Offset, AddressSpace); + return false; +} + /// parseDirectiveCFIOffset /// ::= .cfi_offset register, offset bool AsmParser::parseDirectiveCFIOffset(SMLoc DirectiveLoc) { @@ -5445,6 +5464,7 @@ void AsmParser::initializeDirectiveKindMap() { DirectiveKindMap[".cfi_def_cfa_offset"] = DK_CFI_DEF_CFA_OFFSET; DirectiveKindMap[".cfi_adjust_cfa_offset"] = DK_CFI_ADJUST_CFA_OFFSET; DirectiveKindMap[".cfi_def_cfa_register"] = DK_CFI_DEF_CFA_REGISTER; + DirectiveKindMap[".cfi_llvm_def_aspace_cfa"] = DK_CFI_LLVM_DEF_ASPACE_CFA; DirectiveKindMap[".cfi_offset"] = DK_CFI_OFFSET; DirectiveKindMap[".cfi_rel_offset"] = DK_CFI_REL_OFFSET; DirectiveKindMap[".cfi_personality"] = DK_CFI_PERSONALITY; diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp index af5249dd551956..a4cc149d3020ba 100644 --- a/llvm/lib/MC/MCStreamer.cpp +++ b/llvm/lib/MC/MCStreamer.cpp @@ -500,6 +500,18 @@ void MCStreamer::emitCFIDefCfaRegister(int64_t Register) { CurFrame->CurrentCfaRegister = static_cast(Register); } +void MCStreamer::emitCFILLVMDefAspaceCfa(int64_t Register, int64_t Offset, + int64_t AddressSpace) { + MCSymbol *Label = emitCFILabel(); + MCCFIInstruction Instruction = MCCFIInstruction::createLLVMDefAspaceCfa( + Label, Register, Offset, AddressSpace); + MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; + CurFrame->Instructions.push_back(Instruction); + CurFrame->CurrentCfaRegister = static_cast(Register); +} + void MCStreamer::emitCFIOffset(int64_t Register, int64_t Offset) { MCSymbol *Label = emitCFILabel(); MCCFIInstruction Instruction = diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp index 9ac1b1badee1cd..4dd4996d5e1ad7 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -44,6 +44,9 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT, //===--- Dwarf Emission Directives -----------------------------------===// SupportsDebugInformation = true; DwarfRegNumForCFI = true; + SupportsHeterogeneousDebuggingExtensions = true; + if (TT.getArch() == Triple::amdgcn) + SupportsDebugUnwindInformation = true; } bool AMDGPUMCAsmInfo::shouldOmitSectionDirective(StringRef SectionName) const { diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 93f79f682058f4..675b875b9f64b7 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -13,11 +13,15 @@ #include "SIRegisterInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/Support/LEB128.h" using namespace llvm; @@ -332,6 +336,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, const SIInstrInfo *TII = ST.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); const Function &F = MF.getFunction(); + const MCRegisterInfo *MCRI = MF.getMMI().getContext().getRegisterInfo(); assert(MFI->isEntryFunction()); @@ -379,6 +384,24 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, DebugLoc DL; MachineBasicBlock::iterator I = MBB.begin(); + // On entry the SP/FP are not set up, so we need to define the CFA in terms + // of a literal location expression. + static const char CFAEncodedInst[] = { + dwarf::DW_CFA_def_cfa_expression, + 3, // length + static_cast(dwarf::DW_OP_lit0), + static_cast( + dwarf::DW_OP_lit6), // DW_ASPACE_AMDGPU_private_wave FIXME: + // should be defined elsewhere + static_cast(dwarf::DW_OP_LLVM_form_aspace_address)}; + buildCFI(MBB, I, DL, + MCCFIInstruction::createEscape( + nullptr, StringRef(CFAEncodedInst, sizeof(CFAEncodedInst)))); + // Unwinding halts when the return address (PC) is undefined. + buildCFI(MBB, I, DL, + MCCFIInstruction::createUndefined( + nullptr, MCRI->getDwarfRegNum(AMDGPU::PC_REG, false))); + if (MF.getFrameInfo().hasCalls()) { Register SPReg = MFI->getStackPtrOffsetReg(); assert(SPReg != AMDGPU::SP_REG); @@ -581,6 +604,64 @@ bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const { llvm_unreachable("Invalid TargetStackID::Value"); } +void SIFrameLowering::emitPrologueEntryCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL) const { + const MachineFunction &MF = *MBB.getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const MCRegisterInfo *MCRI = MF.getMMI().getContext().getRegisterInfo(); + Register StackPtrReg = + MF.getInfo()->getStackPtrOffsetReg(); + + // DW_ASPACE_AMDGPU_private_wave FIXME: should be defined elsewhere + buildCFI(MBB, MBBI, DL, + MCCFIInstruction::createLLVMDefAspaceCfa( + nullptr, MCRI->getDwarfRegNum(StackPtrReg, false), 0, 6)); + + static const char PCEncodedInst[] = { + dwarf::DW_CFA_expression, + 16, // PC 64 + 8, // length + static_cast(dwarf::DW_OP_regx), + 62, // SGPR30 + static_cast(dwarf::DW_OP_piece), + 4, // 32 bits + static_cast(dwarf::DW_OP_regx), + 63, // SGPR31 + static_cast(dwarf::DW_OP_piece), + 4 // 32 bits + }; + buildCFI(MBB, MBBI, DL, + MCCFIInstruction::createEscape( + nullptr, StringRef(PCEncodedInst, sizeof(PCEncodedInst)))); + + static const MCPhysReg CallerSavedRegs[] = { + AMDGPU::VGPR0, AMDGPU::VGPR1, AMDGPU::VGPR2, AMDGPU::VGPR3, + AMDGPU::VGPR4, AMDGPU::VGPR5, AMDGPU::VGPR6, AMDGPU::VGPR7, + AMDGPU::VGPR8, AMDGPU::VGPR9, AMDGPU::VGPR10, AMDGPU::VGPR11, + AMDGPU::VGPR12, AMDGPU::VGPR13, AMDGPU::VGPR14, AMDGPU::VGPR15, + AMDGPU::VGPR16, AMDGPU::VGPR17, AMDGPU::VGPR18, AMDGPU::VGPR19, + AMDGPU::VGPR20, AMDGPU::VGPR21, AMDGPU::VGPR22, AMDGPU::VGPR23, + AMDGPU::VGPR24, AMDGPU::VGPR25, AMDGPU::VGPR26, AMDGPU::VGPR27, + AMDGPU::VGPR28, AMDGPU::VGPR29, AMDGPU::VGPR30, AMDGPU::VGPR31, + AMDGPU::SGPR0, AMDGPU::SGPR1, AMDGPU::SGPR2, AMDGPU::SGPR3, + AMDGPU::SGPR4, AMDGPU::SGPR5, AMDGPU::SGPR6, AMDGPU::SGPR7, + AMDGPU::SGPR8, AMDGPU::SGPR9, AMDGPU::SGPR10, AMDGPU::SGPR11, + AMDGPU::SGPR12, AMDGPU::SGPR13, AMDGPU::SGPR14, AMDGPU::SGPR15, + AMDGPU::SGPR16, AMDGPU::SGPR17, AMDGPU::SGPR18, AMDGPU::SGPR19, + AMDGPU::SGPR20, AMDGPU::SGPR21, AMDGPU::SGPR22, AMDGPU::SGPR23, + AMDGPU::SGPR24, AMDGPU::SGPR25, AMDGPU::SGPR26, AMDGPU::SGPR27, + AMDGPU::SGPR28, AMDGPU::SGPR29, AMDGPU::SGPR30, AMDGPU::SGPR31, + AMDGPU::NoRegister}; + for (int I = 0; CallerSavedRegs[I]; ++I) { + if (!MRI.isPhysRegModified(CallerSavedRegs[I])) + continue; + MCRegister DwarfReg = MCRI->getDwarfRegNum(CallerSavedRegs[I], false); + buildCFI(MBB, MBBI, DL, + MCCFIInstruction::createUndefined(nullptr, DwarfReg)); + } +}; + void SIFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { SIMachineFunctionInfo *FuncInfo = MF.getInfo(); @@ -594,6 +675,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); + const MCRegisterInfo *MCRI = MF.getMMI().getContext().getRegisterInfo(); unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg(); unsigned FramePtrReg = FuncInfo->getFrameOffsetReg(); @@ -609,11 +691,56 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, // turn on all lanes before doing the spill to memory. unsigned ScratchExecCopy = AMDGPU::NoRegister; + emitPrologueEntryCFI(MBB, MBBI, DL); + // Emit the copy if we need an FP, and are using a free SGPR to save it. if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) { BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy) .addReg(FramePtrReg) .setMIFlag(MachineInstr::FrameSetup); + buildCFI( + MBB, MBBI, DL, + MCCFIInstruction::createRegister( + nullptr, MCRI->getDwarfRegNum(FramePtrReg, false), + MCRI->getDwarfRegNum(FuncInfo->SGPRForFPSaveRestoreCopy, false))); + } + + if (TRI.isCFISavedRegsSpillEnabled()) { + MCRegister ReturnAddressReg = TRI.getReturnAddressReg(MF); + ArrayRef ReturnAddressSpill = + FuncInfo->getSGPRToVGPRSpills(FuncInfo->ReturnAddressSaveIndex); + assert(ReturnAddressSpill.size() == 2); + BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), + ReturnAddressSpill[0].VGPR) + .addReg(TRI.getSubReg(ReturnAddressReg, TRI.getSubRegFromChannel(0))) + .addImm(ReturnAddressSpill[0].Lane) + .addReg(ReturnAddressSpill[0].VGPR, RegState::Undef); + BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), + ReturnAddressSpill[1].VGPR) + .addReg(TRI.getSubReg(ReturnAddressReg, TRI.getSubRegFromChannel(1))) + .addImm(ReturnAddressSpill[1].Lane) + .addReg(ReturnAddressSpill[1].VGPR, RegState::Undef); + buildCFIForSGPRToVGPRSpill(MBB, MBBI, DL, AMDGPU::PC_REG, + ReturnAddressSpill); + + ArrayRef EXECSpill = + FuncInfo->getSGPRToVGPRSpills(FuncInfo->EXECSaveIndex); + assert(EXECSpill.size()); + BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), + EXECSpill[0].VGPR) + .addReg(AMDGPU::EXEC_LO) + .addImm(EXECSpill[0].Lane) + .addReg(EXECSpill[0].VGPR, RegState::Undef); + if (!ST.isWave32()) { + assert(EXECSpill.size() == 2); + BuildMI(MBB, MBBI, DL, + TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), + EXECSpill[1].VGPR) + .addReg(AMDGPU::EXEC_HI) + .addImm(EXECSpill[1].Lane) + .addReg(EXECSpill[1].VGPR, RegState::Undef); + } + buildCFIForSGPRToVGPRSpill(MBB, MBBI, DL, AMDGPU::EXEC, EXECSpill); } for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg @@ -641,10 +768,16 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, .addImm(-1); } + int FI = Reg.FI.getValue(); + buildPrologSpill(LiveRegs, MBB, MBBI, TII, Reg.VGPR, - FuncInfo->getScratchRSrcReg(), - StackPtrReg, - Reg.FI.getValue()); + FuncInfo->getScratchRSrcReg(), StackPtrReg, FI); + + // We spill the entire VGPR, so we can get away with just cfi_offset + buildCFI(MBB, MBBI, DL, + MCCFIInstruction::createOffset( + nullptr, MCRI->getDwarfRegNum(Reg.VGPR, false), + MFI.getObjectOffset(FI) * ST.getWavefrontSize())); } if (ScratchExecCopy != AMDGPU::NoRegister) { @@ -672,6 +805,9 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, .addReg(FramePtrReg) .addImm(Spill[0].Lane) .addReg(Spill[0].VGPR, RegState::Undef); + + buildCFIForSGPRToVGPRSpill(MBB, MBBI, DL, FramePtrReg, Spill[0].VGPR, + Spill[0].Lane); } if (TRI.needsStackRealignment(MF)) { @@ -711,6 +847,12 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); } + if (HasFP) { + buildCFI(MBB, MBBI, DL, + MCCFIInstruction::createDefCfaRegister( + nullptr, MCRI->getDwarfRegNum(FramePtrReg, false))); + } + if (HasFP && RoundedSize != 0) { BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg) .addReg(StackPtrReg) @@ -736,6 +878,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); + const MCRegisterInfo *MCRI = MF.getMMI().getContext().getRegisterInfo(); MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); LivePhysRegs LiveRegs; DebugLoc DL; @@ -745,6 +888,8 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, uint32_t RoundedSize = FuncInfo->isStackRealigned() ? NumBytes + MFI.getMaxAlign().value() : NumBytes; + const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); + const Register FramePtrReg = FuncInfo->getFrameOffsetReg(); if (RoundedSize != 0 && hasFP(MF)) { const unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg(); @@ -755,9 +900,9 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, } if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) { - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->getFrameOffsetReg()) - .addReg(FuncInfo->SGPRForFPSaveRestoreCopy) - .setMIFlag(MachineInstr::FrameSetup); + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) + .addReg(FuncInfo->SGPRForFPSaveRestoreCopy) + .setMIFlag(MachineInstr::FrameSetup); } if (FuncInfo->FramePointerSaveIndex) { @@ -770,9 +915,15 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, = FuncInfo->getSGPRToVGPRSpills(FI); assert(Spill.size() == 1); BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), - FuncInfo->getFrameOffsetReg()) - .addReg(Spill[0].VGPR) - .addImm(Spill[0].Lane); + FramePtrReg) + .addReg(Spill[0].VGPR) + .addImm(Spill[0].Lane); + } + + if (hasFP(MF)) { + buildCFI(MBB, MBBI, DL, + MCCFIInstruction::createDefCfaRegister( + nullptr, MCRI->getDwarfRegNum(StackPtrReg, false))); } unsigned ScratchExecCopy = AMDGPU::NoRegister; @@ -884,6 +1035,18 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( } } +static void allocateCFISave(MachineFunction &MF, int &FI, Register Reg) { + SIMachineFunctionInfo *MFI = MF.getInfo(); + const GCNSubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + int NewFI = MF.getFrameInfo().CreateStackObject( + TRI->getSpillSize(*RC), TRI->getSpillAlignment(*RC), true); + if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI)) + llvm_unreachable("allocate SGPR spill should have worked"); + FI = NewFI; +} + // Only report VGPRs to generic code. void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedVGPRs, @@ -917,6 +1080,13 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, for (auto SSpill : MFI->getSGPRSpillVGPRs()) SavedVGPRs.reset(SSpill.VGPR); + if (TRI->isCFISavedRegsSpillEnabled()) { + allocateCFISave(MF, MFI->ReturnAddressSaveIndex, + TRI->getReturnAddressReg(MF)); + allocateCFISave(MF, MFI->EXECSaveIndex, + ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC); + } + const bool HasFP = WillHaveFP || hasFP(MF); if (!HasFP) return; @@ -1029,6 +1199,30 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr( return MBB.erase(I); } +bool SIFrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const ArrayRef CSI, const TargetRegisterInfo *TRI) const { + MachineFunction &MF = *MBB.getParent(); + const SIInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + + for (const CalleeSavedInfo &CS : CSI) { + // Insert the spill to the stack frame. + unsigned Reg = CS.getReg(); + + if (CS.isSpilledToReg()) { + BuildMI(MBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), + CS.getDstReg()) + .addReg(Reg, getKillRegState(true)); + } else { + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII->storeRegToStackSlotCFI(MBB, MBBI, Reg, true, CS.getFrameIdx(), RC, + TRI); + } + } + + return true; +} + bool SIFrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -1049,3 +1243,159 @@ bool SIFrameLowering::hasFP(const MachineFunction &MF) const { MF.getSubtarget().getRegisterInfo()->needsStackRealignment(MF) || MF.getTarget().Options.DisableFramePointerElim(MF); } + +void SIFrameLowering::buildCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, + const MCCFIInstruction &CFIInst) const { + MachineFunction &MF = *MBB.getParent(); + const SIInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(MF.addFrameInst(CFIInst)) + .setMIFlag(MachineInstr::FrameSetup); +} + +static void encodeDwarfRegisterLocation(int DwarfReg, raw_ostream &OS) { + if (DwarfReg < 32) { + OS << uint8_t(dwarf::DW_OP_reg0 + DwarfReg); + } else { + OS << uint8_t(dwarf::DW_OP_regx); + encodeULEB128(DwarfReg, OS); + } +} + +void SIFrameLowering::buildCFIForSGPRToVGPRSpill( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, const Register SGPR, const Register VGPR, + const int Lane) const { + MachineFunction &MF = *MBB.getParent(); + const MCRegisterInfo &MCRI = *MF.getMMI().getContext().getRegisterInfo(); + int DwarfSGPR = MCRI.getDwarfRegNum(SGPR, false); + int DwarfVGPR = MCRI.getDwarfRegNum(VGPR, false); + + // CFI for an SGPR spilled to a single lane of a VGPR is implemented as an + // expression(E) rule where E is a register location description referencing + // a VGPR register location storage at a byte offset of the lane index + // multiplied by the size of an SGPR (4 bytes). In other words we generate + // the following DWARF: + // + // DW_CFA_expression: , + // (DW_OP_regx ) (DW_OP_LLVM_offset_uconst *4) + // + // The memory location description for the current CFA is pushed on the + // stack before E is evaluated, but we choose not to drop it as it would + // require a longer expression E and DWARF defines the result of the + // evaulation to be the location description on the top of the stack (i.e. the + // implictly pushed one is just ignored.) + SmallString<20> CFIInst; + raw_svector_ostream OSCFIInst(CFIInst); + SmallString<20> Block; + raw_svector_ostream OSBlock(Block); + + OSCFIInst << uint8_t(dwarf::DW_CFA_expression); + encodeULEB128(DwarfSGPR, OSCFIInst); + + encodeDwarfRegisterLocation(DwarfVGPR, OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_LLVM_offset_uconst); + // FIXME: + const unsigned SGPRByteSize = 4; + encodeULEB128(Lane * SGPRByteSize, OSBlock); + + encodeULEB128(Block.size(), OSCFIInst); + OSCFIInst << Block; + + buildCFI(MBB, MBBI, DL, + MCCFIInstruction::createEscape(nullptr, OSCFIInst.str())); +} + +void SIFrameLowering::buildCFIForSGPRToVGPRSpill( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register SGPR, + ArrayRef VGPRSpills) const { + MachineFunction &MF = *MBB.getParent(); + const MCRegisterInfo &MCRI = *MF.getMMI().getContext().getRegisterInfo(); + int DwarfSGPR = MCRI.getDwarfRegNum(SGPR, false); + + // CFI for an SGPR spilled to a multiple lanes of VGPRs is implemented as an + // expression(E) rule where E is a composite location description + // with multiple parts each referencing + // VGPR register location storage with a bit offset of the lane index + // multiplied by the size of an SGPR (32 bits). In other words we generate + // the following DWARF: + // + // DW_CFA_expression: , + // (DW_OP_regx ) (DW_OP_bit_piece 32, *32) + // (DW_OP_regx ) (DW_OP_bit_piece 32, *32) + // ... + // (DW_OP_regx ) (DW_OP_bit_piece 32, *32) + // + // The memory location description for the current CFA is pushed on the + // stack before E is evaluated, but we choose not to drop it as it would + // require a longer expression E and DWARF defines the result of the + // evaulation to be the location description on the top of the stack (i.e. the + // implictly pushed one is just ignored.) + SmallString<20> CFIInst; + raw_svector_ostream OSCFIInst(CFIInst); + SmallString<20> Block; + raw_svector_ostream OSBlock(Block); + + OSCFIInst << uint8_t(dwarf::DW_CFA_expression); + encodeULEB128(DwarfSGPR, OSCFIInst); + + // TODO: Detect when we can merge multiple adjacent pieces, or even reduce + // this to a register location description (when all pieces are adjacent). + for (SIMachineFunctionInfo::SpilledReg Spill : VGPRSpills) { + encodeDwarfRegisterLocation(MCRI.getDwarfRegNum(Spill.VGPR, false), + OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_bit_piece); + // FIXME: + const unsigned SGPRBitSize = 32; + encodeULEB128(SGPRBitSize, OSBlock); + encodeULEB128(SGPRBitSize * Spill.Lane, OSBlock); + } + + encodeULEB128(Block.size(), OSCFIInst); + OSCFIInst << Block; + + buildCFI(MBB, MBBI, DL, + MCCFIInstruction::createEscape(nullptr, OSCFIInst.str())); +} + +void SIFrameLowering::buildCFIForVGPRToVMEMSpill( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned VGPR, int64_t Offset) const { + MachineFunction &MF = *MBB.getParent(); + const GCNSubtarget &ST = MF.getSubtarget(); + const MCRegisterInfo &MCRI = *MF.getMMI().getContext().getRegisterInfo(); + int DwarfVGPR = MCRI.getDwarfRegNum(VGPR, false); + + SmallString<20> CFIInst; + raw_svector_ostream OSCFIInst(CFIInst); + SmallString<20> Block; + raw_svector_ostream OSBlock(Block); + + OSCFIInst << uint8_t(dwarf::DW_CFA_expression); + encodeULEB128(DwarfVGPR, OSCFIInst); + + encodeDwarfRegisterLocation(DwarfVGPR, OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_swap); + OSBlock << uint8_t(dwarf::DW_OP_LLVM_offset_uconst); + encodeULEB128(Offset, OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_LLVM_call_frame_entry_reg); + encodeULEB128(MCRI.getDwarfRegNum( + ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC, false), + OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_deref_size); + OSBlock << uint8_t(ST.getWavefrontSize() / CHAR_BIT); + OSBlock << uint8_t(dwarf::DW_OP_LLVM_select_bit_piece); + // FIXME: + const unsigned VGPRLaneBitSize = 32; + encodeULEB128(VGPRLaneBitSize, OSBlock); + encodeULEB128(ST.getWavefrontSize(), OSBlock); + + encodeULEB128(Block.size(), OSCFIInst); + OSCFIInst << Block; + + buildCFI(MBB, MBBI, DL, + MCCFIInstruction::createEscape(nullptr, OSCFIInst.str())); +} diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h index 5bd3b0b6ca991f..16fc6e6f0e2e41 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -10,11 +10,11 @@ #define LLVM_LIB_TARGET_AMDGPU_SIFRAMELOWERING_H #include "AMDGPUFrameLowering.h" +#include "SIMachineFunctionInfo.h" namespace llvm { class SIInstrInfo; -class SIMachineFunctionInfo; class SIRegisterInfo; class GCNSubtarget; @@ -54,6 +54,11 @@ class SIFrameLowering final : public AMDGPUFrameLowering { MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const ArrayRef CSI, + const TargetRegisterInfo *TRI) const override; + private: void emitEntryFunctionFlatScratchInit(MachineFunction &MF, MachineBasicBlock &MBB, @@ -71,8 +76,34 @@ class SIFrameLowering final : public AMDGPUFrameLowering { Register PreloadedPrivateBufferReg, Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const; + void emitPrologueEntryCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL) const; + public: bool hasFP(const MachineFunction &MF) const override; + + /// Create a CFI index for CFIInst and build a MachineInstr around it. + void buildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, const MCCFIInstruction &CFIInst) const; + /// Create a CFI index describing a spill of an SGPR to a single lane of + /// a VGPR and build a MachineInstr around it. + void buildCFIForSGPRToVGPRSpill(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, const Register SGPR, + const Register VGPR, const int Lane) const; + /// Create a CFI index describing a spill of an SGPR to multiple lanes of + /// VGPRs and build a MachineInstr around it. + void buildCFIForSGPRToVGPRSpill( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register SGPR, + ArrayRef VGPRSpills) const; + /// Create a CFI index describing a spill of a VGPR to VMEM and + /// build a MachineInstr around it. + void buildCFIForVGPRToVMEMSpill(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned VGPR, + int64_t Offset) const; }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 5cf4909bd9b7eb..fbea8d7d567404 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1640,13 +1640,15 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { // TODO: Could insert earlier and schedule more liberally with operations // that only use caller preserved registers. MachineBasicBlock &EntryBB = MF.front(); + MachineBasicBlock::iterator I = EntryBB.begin(); + for (MachineBasicBlock::iterator E = EntryBB.end(); + I != E && (I->isPHI() || I->isMetaInstruction()); ++I) + ; + BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0); if (ST->hasVscnt()) - BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), - TII->get(AMDGPU::S_WAITCNT_VSCNT)) - .addReg(AMDGPU::SGPR_NULL, RegState::Undef) - .addImm(0); - BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WAITCNT)) - .addImm(0); + BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT_VSCNT)) + .addReg(AMDGPU::SGPR_NULL, RegState::Undef) + .addImm(0); Modified = true; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index fb1b4b9f5f03f2..07b077a0b26f6a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1126,47 +1126,57 @@ const MCInstrDesc &SIInstrInfo::getIndirectRegWritePseudo( return get(getIndirectVGPRWritePseudoOpc(VecSize)); } -static unsigned getSGPRSpillSaveOpcode(unsigned Size) { +static unsigned getSGPRSpillSaveOpcode(unsigned Size, bool NeedsCFI) { switch (Size) { case 4: - return AMDGPU::SI_SPILL_S32_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S32_CFI_SAVE : AMDGPU::SI_SPILL_S32_SAVE; case 8: - return AMDGPU::SI_SPILL_S64_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S64_CFI_SAVE : AMDGPU::SI_SPILL_S64_SAVE; case 12: - return AMDGPU::SI_SPILL_S96_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S96_CFI_SAVE : AMDGPU::SI_SPILL_S96_SAVE; case 16: - return AMDGPU::SI_SPILL_S128_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S128_CFI_SAVE + : AMDGPU::SI_SPILL_S128_SAVE; case 20: - return AMDGPU::SI_SPILL_S160_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S160_CFI_SAVE + : AMDGPU::SI_SPILL_S160_SAVE; case 32: - return AMDGPU::SI_SPILL_S256_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S256_CFI_SAVE + : AMDGPU::SI_SPILL_S256_SAVE; case 64: - return AMDGPU::SI_SPILL_S512_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S512_CFI_SAVE + : AMDGPU::SI_SPILL_S512_SAVE; case 128: - return AMDGPU::SI_SPILL_S1024_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S1024_CFI_SAVE + : AMDGPU::SI_SPILL_S1024_SAVE; default: llvm_unreachable("unknown register size"); } } -static unsigned getVGPRSpillSaveOpcode(unsigned Size) { +static unsigned getVGPRSpillSaveOpcode(unsigned Size, bool NeedsCFI) { switch (Size) { case 4: - return AMDGPU::SI_SPILL_V32_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V32_CFI_SAVE : AMDGPU::SI_SPILL_V32_SAVE; case 8: - return AMDGPU::SI_SPILL_V64_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V64_CFI_SAVE : AMDGPU::SI_SPILL_V64_SAVE; case 12: - return AMDGPU::SI_SPILL_V96_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V96_CFI_SAVE : AMDGPU::SI_SPILL_V96_SAVE; case 16: - return AMDGPU::SI_SPILL_V128_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V128_CFI_SAVE + : AMDGPU::SI_SPILL_V128_SAVE; case 20: - return AMDGPU::SI_SPILL_V160_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V160_CFI_SAVE + : AMDGPU::SI_SPILL_V160_SAVE; case 32: - return AMDGPU::SI_SPILL_V256_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V256_CFI_SAVE + : AMDGPU::SI_SPILL_V256_SAVE; case 64: - return AMDGPU::SI_SPILL_V512_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V512_CFI_SAVE + : AMDGPU::SI_SPILL_V512_SAVE; case 128: - return AMDGPU::SI_SPILL_V1024_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V1024_CFI_SAVE + : AMDGPU::SI_SPILL_V1024_SAVE; default: llvm_unreachable("unknown register size"); } @@ -1189,12 +1199,10 @@ static unsigned getAGPRSpillSaveOpcode(unsigned Size) { } } -void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - Register SrcReg, bool isKill, - int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void SIInstrInfo::storeRegToStackSlotImpl( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, bool NeedsCFI) const { MachineFunction *MF = MBB.getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); @@ -1213,7 +1221,8 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, // We are only allowed to create one new instruction when spilling // registers, so we need to use pseudo instruction for spilling SGPRs. - const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(SpillSize)); + const MCInstrDesc &OpDesc = + get(getSGPRSpillSaveOpcode(SpillSize, NeedsCFI)); // The SGPR spill/restore instructions only work on number sgprs, so we need // to make sure we are using the correct register class. @@ -1236,8 +1245,9 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, return; } - unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillSaveOpcode(SpillSize) - : getVGPRSpillSaveOpcode(SpillSize); + unsigned Opcode = RI.hasAGPRs(RC) + ? getAGPRSpillSaveOpcode(SpillSize) + : getVGPRSpillSaveOpcode(SpillSize, NeedsCFI); MFI->setHasSpilledVGPRs(); auto MIB = BuildMI(MBB, MI, DL, get(Opcode)); @@ -1254,6 +1264,24 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addMemOperand(MMO); } +void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + Register SrcReg, bool isKill, + int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + storeRegToStackSlotImpl(MBB, MI, SrcReg, isKill, FrameIndex, RC, TRI, false); +} + +void SIInstrInfo::storeRegToStackSlotCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + Register SrcReg, bool isKill, + int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + storeRegToStackSlotImpl(MBB, MI, SrcReg, isKill, FrameIndex, RC, TRI, true); +} + static unsigned getSGPRSpillRestoreOpcode(unsigned Size) { switch (Size) { case 4: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 9fcc5caf7dfdd2..195a5e942066c9 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -219,12 +219,27 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const; +private: + void storeRegToStackSlotImpl(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, Register SrcReg, + bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + bool NeedsCFI) const; + +public: void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; + void storeRegToStackSlotCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, Register SrcReg, + bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 6e6b2e77b0fe2e..a0e079c572055e 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -614,6 +614,13 @@ multiclass SI_SPILL_SGPR { let mayLoad = 0; } + def _CFI_SAVE : PseudoInstSI < + (outs), + (ins sgpr_class:$data, i32imm:$addr)> { + let mayStore = 1; + let mayLoad = 0; + } + def _RESTORE : PseudoInstSI < (outs sgpr_class:$data), (ins i32imm:$addr)> { @@ -651,6 +658,18 @@ multiclass SI_SPILL_VGPR { let Size = !if(!le(MaxSize, 256), MaxSize, 252); } + def _CFI_SAVE : VPseudoInstSI < + (outs), + (ins vgpr_class:$vdata, i32imm:$vaddr, SReg_128:$srsrc, + SReg_32:$soffset, i32imm:$offset)> { + let mayStore = 1; + let mayLoad = 0; + // (2 * 4) + (8 * num_subregs) bytes maximum + int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8); + // Size field is unsigned char and cannot fit more. + let Size = !if(!le(MaxSize, 256), MaxSize, 252); + } + def _RESTORE : VPseudoInstSI < (outs vgpr_class:$vdata), (ins i32imm:$vaddr, SReg_128:$srsrc, SReg_32:$soffset, diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index 8fbc953bcdae49..02decc7aa34675 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -89,7 +89,7 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock, ArrayRef CSI, LiveIntervals *LIS) { MachineFunction &MF = *SaveBlock.getParent(); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const SIInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); @@ -102,8 +102,8 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock, MachineInstrSpan MIS(I, &SaveBlock); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC, - TRI); + TII.storeRegToStackSlotCFI(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC, + TRI); if (LIS) { assert(std::distance(MIS.begin(), I) == 1); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 589b2742737558..6076a89b0b350a 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -488,6 +488,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { Register VGPRReservedForSGPRSpill; bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg); + int ReturnAddressSaveIndex; + int EXECSaveIndex; + public: SIMachineFunctionInfo(const MachineFunction &MF); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 1e7a4a3eca8104..2f02bd2839d03f 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -14,14 +14,15 @@ #include "SIRegisterInfo.h" #include "AMDGPURegisterBankInfo.h" #include "AMDGPUSubtarget.h" -#include "SIInstrInfo.h" -#include "SIMachineFunctionInfo.h" #include "MCTargetDesc/AMDGPUInstPrinter.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/IR/Function.h" @@ -38,6 +39,11 @@ static cl::opt EnableSpillSGPRToVGPR( cl::ReallyHidden, cl::init(true)); +static cl::opt EnableSpillCFISavedRegs( + "amdgpu-spill-cfi-saved-regs", + cl::desc("Enable spilling the registers required for CFI emission"), + cl::ReallyHidden, cl::init(false)); + SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour()), ST(ST), SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) { @@ -187,6 +193,10 @@ unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel, return SubRegFromChannelTable[NumRegIndex][Channel]; } +bool SIRegisterInfo::isCFISavedRegsSpillEnabled() const { + return EnableSpillCFISavedRegs; +} + MCRegister SIRegisterInfo::reservedPrivateSegmentBufferReg( const MachineFunction &MF) const { unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4; @@ -483,51 +493,67 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) { switch (Op) { case AMDGPU::SI_SPILL_S1024_SAVE: + case AMDGPU::SI_SPILL_S1024_CFI_SAVE: case AMDGPU::SI_SPILL_S1024_RESTORE: case AMDGPU::SI_SPILL_V1024_SAVE: + case AMDGPU::SI_SPILL_V1024_CFI_SAVE: case AMDGPU::SI_SPILL_V1024_RESTORE: case AMDGPU::SI_SPILL_A1024_SAVE: case AMDGPU::SI_SPILL_A1024_RESTORE: return 32; case AMDGPU::SI_SPILL_S512_SAVE: + case AMDGPU::SI_SPILL_S512_CFI_SAVE: case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_V512_SAVE: + case AMDGPU::SI_SPILL_V512_CFI_SAVE: case AMDGPU::SI_SPILL_V512_RESTORE: case AMDGPU::SI_SPILL_A512_SAVE: case AMDGPU::SI_SPILL_A512_RESTORE: return 16; case AMDGPU::SI_SPILL_S256_SAVE: + case AMDGPU::SI_SPILL_S256_CFI_SAVE: case AMDGPU::SI_SPILL_S256_RESTORE: case AMDGPU::SI_SPILL_V256_SAVE: + case AMDGPU::SI_SPILL_V256_CFI_SAVE: case AMDGPU::SI_SPILL_V256_RESTORE: return 8; case AMDGPU::SI_SPILL_S160_SAVE: + case AMDGPU::SI_SPILL_S160_CFI_SAVE: case AMDGPU::SI_SPILL_S160_RESTORE: case AMDGPU::SI_SPILL_V160_SAVE: + case AMDGPU::SI_SPILL_V160_CFI_SAVE: case AMDGPU::SI_SPILL_V160_RESTORE: return 5; case AMDGPU::SI_SPILL_S128_SAVE: + case AMDGPU::SI_SPILL_S128_CFI_SAVE: case AMDGPU::SI_SPILL_S128_RESTORE: case AMDGPU::SI_SPILL_V128_SAVE: + case AMDGPU::SI_SPILL_V128_CFI_SAVE: case AMDGPU::SI_SPILL_V128_RESTORE: case AMDGPU::SI_SPILL_A128_SAVE: case AMDGPU::SI_SPILL_A128_RESTORE: return 4; case AMDGPU::SI_SPILL_S96_SAVE: + case AMDGPU::SI_SPILL_S96_CFI_SAVE: case AMDGPU::SI_SPILL_S96_RESTORE: case AMDGPU::SI_SPILL_V96_SAVE: + case AMDGPU::SI_SPILL_V96_CFI_SAVE: case AMDGPU::SI_SPILL_V96_RESTORE: return 3; case AMDGPU::SI_SPILL_S64_SAVE: + case AMDGPU::SI_SPILL_S64_CFI_SAVE: case AMDGPU::SI_SPILL_S64_RESTORE: case AMDGPU::SI_SPILL_V64_SAVE: + case AMDGPU::SI_SPILL_V64_CFI_SAVE: case AMDGPU::SI_SPILL_V64_RESTORE: case AMDGPU::SI_SPILL_A64_SAVE: case AMDGPU::SI_SPILL_A64_RESTORE: return 2; case AMDGPU::SI_SPILL_S32_SAVE: + case AMDGPU::SI_SPILL_S32_CFI_SAVE: case AMDGPU::SI_SPILL_S32_RESTORE: case AMDGPU::SI_SPILL_V32_SAVE: + case AMDGPU::SI_SPILL_V32_CFI_SAVE: case AMDGPU::SI_SPILL_V32_RESTORE: case AMDGPU::SI_SPILL_A32_SAVE: case AMDGPU::SI_SPILL_A32_RESTORE: @@ -661,20 +687,16 @@ static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, return true; } -void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, - unsigned LoadStoreOp, - int Index, - Register ValueReg, - bool IsKill, - MCRegister ScratchRsrcReg, - MCRegister ScratchOffsetReg, - int64_t InstOffset, - MachineMemOperand *MMO, - RegScavenger *RS) const { +void SIRegisterInfo::buildSpillLoadStore( + MachineBasicBlock::iterator MI, unsigned LoadStoreOp, int Index, + Register ValueReg, bool IsKill, MCRegister ScratchRsrcReg, + MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO, + RegScavenger *RS, bool NeedsCFI) const { MachineBasicBlock *MBB = MI->getParent(); MachineFunction *MF = MI->getParent()->getParent(); const SIInstrInfo *TII = ST.getInstrInfo(); const MachineFrameInfo &MFI = MF->getFrameInfo(); + const SIFrameLowering *TFL = ST.getFrameLowering(); const MCInstrDesc &Desc = TII->get(LoadStoreOp); const DebugLoc &DL = MI->getDebugLoc(); @@ -689,6 +711,7 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, unsigned Size = NumSubRegs * EltSize; int64_t Offset = InstOffset + MFI.getObjectOffset(Index); int64_t ScratchOffsetRegDelta = 0; + int64_t AdditionalCFIOffset = 0; Align Alignment = MFI.getObjectAlign(Index); const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo(); @@ -729,6 +752,8 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, Scavenged = true; } + AdditionalCFIOffset = Offset; + if (ScratchOffsetReg == AMDGPU::NoRegister) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset) .addImm(Offset); @@ -787,6 +812,11 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, .addImm(0) // swz .addMemOperand(NewMMO); + if (IsStore && NeedsCFI) + TFL->buildCFIForVGPRToVMEMSpill(*MBB, MI, DL, SubReg, + Offset * ST.getWavefrontSize() + + AdditionalCFIOffset); + if (!IsStore && TmpReg != AMDGPU::NoRegister) MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32), FinalReg) @@ -805,14 +835,14 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, } } -bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, - int Index, - RegScavenger *RS, - bool OnlyToVGPR) const { +bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, + RegScavenger *RS, bool OnlyToVGPR, + bool NeedsCFI) const { MachineBasicBlock *MBB = MI->getParent(); MachineFunction *MF = MBB->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); DenseSet SGPRSpillVGPRDefinedSet; + const SIFrameLowering *TFL = ST.getFrameLowering(); ArrayRef VGPRSpills = MFI->getSGPRToVGPRSpills(Index); @@ -868,6 +898,10 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, .addImm(Spill.Lane) .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef); + if (NeedsCFI) + TFL->buildCFIForSGPRToVGPRSpill(*MBB, MI, DL, SubReg, Spill.VGPR, + Spill.Lane); + // FIXME: Since this spills to another register instead of an actual // frame index, we should delete the frame index when all references to // it are fixed. @@ -907,6 +941,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, .addReg(MFI->getStackPtrOffsetReg()) // soffset .addImm(i * 4) // offset .addMemOperand(MMO); + + // TODO: Implement CFI for SpillToVMEM if/when it is fully supported. } } @@ -1004,7 +1040,18 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const { + bool NeedsCFI = false; switch (MI->getOpcode()) { + case AMDGPU::SI_SPILL_S1024_CFI_SAVE: + case AMDGPU::SI_SPILL_S512_CFI_SAVE: + case AMDGPU::SI_SPILL_S256_CFI_SAVE: + case AMDGPU::SI_SPILL_S160_CFI_SAVE: + case AMDGPU::SI_SPILL_S128_CFI_SAVE: + case AMDGPU::SI_SPILL_S96_CFI_SAVE: + case AMDGPU::SI_SPILL_S64_CFI_SAVE: + case AMDGPU::SI_SPILL_S32_CFI_SAVE: + NeedsCFI = true; + LLVM_FALLTHROUGH; case AMDGPU::SI_SPILL_S1024_SAVE: case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S256_SAVE: @@ -1013,7 +1060,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( case AMDGPU::SI_SPILL_S96_SAVE: case AMDGPU::SI_SPILL_S64_SAVE: case AMDGPU::SI_SPILL_S32_SAVE: - return spillSGPR(MI, FI, RS, true); + return spillSGPR(MI, FI, RS, true, NeedsCFI); case AMDGPU::SI_SPILL_S1024_RESTORE: case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE: @@ -1045,8 +1092,21 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, Register FrameReg = getFrameRegister(*MF); + bool NeedsCFI = false; + switch (MI->getOpcode()) { // SGPR register spill + case AMDGPU::SI_SPILL_S1024_CFI_SAVE: + case AMDGPU::SI_SPILL_S512_CFI_SAVE: + case AMDGPU::SI_SPILL_S256_CFI_SAVE: + case AMDGPU::SI_SPILL_S160_CFI_SAVE: + case AMDGPU::SI_SPILL_S128_CFI_SAVE: + case AMDGPU::SI_SPILL_S96_CFI_SAVE: + case AMDGPU::SI_SPILL_S64_CFI_SAVE: + case AMDGPU::SI_SPILL_S32_CFI_SAVE: { + NeedsCFI = true; + LLVM_FALLTHROUGH; + } case AMDGPU::SI_SPILL_S1024_SAVE: case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S256_SAVE: @@ -1073,6 +1133,16 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, } // VGPR register spill + case AMDGPU::SI_SPILL_V1024_CFI_SAVE: + case AMDGPU::SI_SPILL_V512_CFI_SAVE: + case AMDGPU::SI_SPILL_V256_CFI_SAVE: + case AMDGPU::SI_SPILL_V160_CFI_SAVE: + case AMDGPU::SI_SPILL_V128_CFI_SAVE: + case AMDGPU::SI_SPILL_V96_CFI_SAVE: + case AMDGPU::SI_SPILL_V64_CFI_SAVE: + case AMDGPU::SI_SPILL_V32_CFI_SAVE: + NeedsCFI = true; + LLVM_FALLTHROUGH; case AMDGPU::SI_SPILL_V1024_SAVE: case AMDGPU::SI_SPILL_V512_SAVE: case AMDGPU::SI_SPILL_V256_SAVE: @@ -1091,14 +1161,12 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == MFI->getStackPtrOffsetReg()); - buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, - Index, - VData->getReg(), VData->isKill(), - TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(), - FrameReg, - TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), - *MI->memoperands_begin(), - RS); + buildSpillLoadStore( + MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, Index, VData->getReg(), + VData->isKill(), + TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(), FrameReg, + TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), + *MI->memoperands_begin(), RS, NeedsCFI); MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode())); MI->eraseFromParent(); break; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index 65915b89fb5fbc..9b0e3e64d2c92b 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -47,6 +47,8 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { return SpillSGPRToVGPR; } + bool isCFISavedRegsSpillEnabled() const; + /// Return the end register initially reserved for the scratch buffer in case /// spilling is needed. MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const; @@ -95,9 +97,8 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { const MachineFunction &MF, unsigned Kind = 0) const override; /// If \p OnlyToVGPR is true, this will only succeed if this - bool spillSGPR(MachineBasicBlock::iterator MI, - int FI, RegScavenger *RS, - bool OnlyToVGPR = false) const; + bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, + bool OnlyToVGPR = false, bool NeedsCFI = false) const; bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, @@ -290,16 +291,12 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { } private: - void buildSpillLoadStore(MachineBasicBlock::iterator MI, - unsigned LoadStoreOp, - int Index, - Register ValueReg, - bool ValueIsKill, + void buildSpillLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp, + int Index, Register ValueReg, bool ValueIsKill, MCRegister ScratchRsrcReg, - MCRegister ScratchOffsetReg, - int64_t InstrOffset, - MachineMemOperand *MMO, - RegScavenger *RS) const; + MCRegister ScratchOffsetReg, int64_t InstrOffset, + MachineMemOperand *MMO, RegScavenger *RS, + bool NeedsCFI = false) const; }; } // End namespace llvm diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-spill-cfi-saved-regs.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-spill-cfi-saved-regs.ll new file mode 100644 index 00000000000000..01491a4961dbd0 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-spill-cfi-saved-regs.ll @@ -0,0 +1,106 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=asm -amdgpu-spill-cfi-saved-regs -o - %s | FileCheck --check-prefixes=CHECK,WAVE64 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -filetype=asm -amdgpu-spill-cfi-saved-regs -o - %s | FileCheck --check-prefixes=CHECK,WAVE32 %s + +; CHECK-LABEL: kern: +; CHECK: .cfi_startproc +; CHECK-NOT: .cfi_{{.*}} +; CHECK: %bb.0: +; CHECK-NEXT: .cfi_escape 0x0f, 0x03, 0x30, 0x36, 0xe1 +; CHECK-NEXT: .cfi_undefined 16 +; CHECK-NOT: .cfi_{{.*}} +; CHECK: .cfi_endproc +define protected amdgpu_kernel void @kern() #0 { +entry: + ret void +} + +; CHECK-LABEL: func: +; CHECK: .cfi_startproc +; CHECK-NOT: .cfi_{{.*}} +; CHECK: %bb.0: +; SGPR32 = 64 +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + + +; FIXME: ideally this would not care what VGPR we spill to, but since we are +; using .cfi_escape it isn't trivial/possible to make this general yet + +; CHECK: v_writelane_b32 v0, s30, 0 +; CHECK-NEXT: v_writelane_b32 v0, s31, 1 + +; DW_CFA_expression [0x10] +; PC_64 ULEB128(17)=[0x10] +; BLOCK_LENGTH ULEB128(12)=[0x0c] +; DW_OP_regx [0x90] +; VGPR0_wave64 ULEB128(2560)=[0x80, 0x14] +; DW_OP_bit_piece [0x9d] +; PIECE_SIZE [0x20] +; PIECE_OFFSET [0x00] +; DW_OP_regx [0x90] +; VGPR0_wave64 ULEB128(2560)=[0x80, 0x14] +; DW_OP_bit_piece [0x9d] +; PIECE_SIZE [0x20] +; PIECE_OFFSET [0x20] +; WAVE64-NEXT: .cfi_escape 0x10, 0x10, 0x0c, 0x90, 0x80, 0x14, 0x9d, 0x20, 0x00, 0x90, 0x80, 0x14, 0x9d, 0x20, 0x20 + +; DW_CFA_expression [0x10] +; PC_64 ULEB128(17)=[0x10] +; BLOCK_LENGTH ULEB128(12)=[0x0c] +; DW_OP_regx [0x90] +; VGPR0_wave32 ULEB128(1536)=[0x80, 0x0c] +; DW_OP_bit_piece [0x9d] +; PIECE_SIZE [0x20] +; PIECE_OFFSET [0x00] +; DW_OP_regx [0x90] +; VGPR0_wave32 ULEB128(1536)=[0x80, 0x0c] +; DW_OP_bit_piece [0x9d] +; PIECE_SIZE [0x20] +; PIECE_OFFSET [0x20] +; WAVE32-NEXT: .cfi_escape 0x10, 0x10, 0x0c, 0x90, 0x80, 0x0c, 0x9d, 0x20, 0x00, 0x90, 0x80, 0x0c, 0x9d, 0x20, 0x20 + + +; WAVE64: v_writelane_b32 v0, exec_lo, 2 +; WAVE64-NEXT: v_writelane_b32 v0, exec_hi, 3 +; DW_CFA_expression [0x10] +; EXEC_MASK_wave64 ULEB128(17)=[0x11] +; BLOCK_LENGTH ULEB128(12)=[0x0c] +; DW_OP_regx [0x90] +; VGPR0_wave64 ULEB128(2560)=[0x80, 0x14] +; DW_OP_bit_piece [0x9d] +; PIECE_SIZE [0x20] +; PIECE_OFFSET [0x40] +; DW_OP_regx [0x90] +; VGPR0_wave64 ULEB128(2560)=[0x80, 0x14] +; DW_OP_bit_piece [0x9d] +; PIECE_SIZE [0x20] +; PIECE_OFFSET [0x60] +; WAVE64-NEXT: .cfi_escape 0x10, 0x11, 0x0c, 0x90, 0x80, 0x14, 0x9d, 0x20, 0x40, 0x90, 0x80, 0x14, 0x9d, 0x20, 0x60 + +; WAVE32: v_writelane_b32 v0, exec_lo, 2 +; DW_CFA_expression [0x10] +; EXEC_MASK_wave32 ULEB128(1)=[0x01] +; BLOCK_LENGTH ULEB128(6)=[0x06] +; DW_OP_regx [0x90] +; VGPR0_wave32 ULEB128(1536)=[0x80, 0x0c] +; DW_OP_bit_piece [0x9d] +; PIECE_SIZE [0x20] +; PIECE_OFFSET [0x40] +; WAVE32-NEXT: .cfi_escape 0x10, 0x01, 0x06, 0x90, 0x80, 0x0c, 0x9d, 0x20, 0x40 + +; CHECK-NOT: .cfi_{{.*}} +; CHECK: .cfi_endproc +define hidden void @func() #0 { +entry: + ret void +} + +attributes #0 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, emissionKind: FullDebug) +!1 = !DIFile(filename: "filename", directory: "directory") +!2 = !{i32 7, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll index 56faa3b8c129f3..654475c026d49f 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -197,7 +197,7 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 { ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: v_readlane_b32 s42, v0, 0 ; GCN-NEXT: s_setpc_b64 -define void @spill_only_csr_sgpr() { +define void @spill_only_csr_sgpr() #0 { call void asm sideeffect "; clobber s42", "~{s42}"() ret void } @@ -207,16 +207,16 @@ define void @spill_only_csr_sgpr() { ; GCN: s_waitcnt ; GCN-NEXT:s_mov_b32 [[FP_COPY:s[0-9]+]], s33 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_add_u32 s32, s32, 0x300 +; GCN: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 -; GCN-DAG: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-DAG: buffer_store_dword [[ZERO]], off, s[0:3], s33 offset:8 +; GCN: buffer_store_dword [[ZERO]], off, s[0:3], s33 offset:8 ; GCN: ;;#ASMSTART ; GCN-NEXT: ; clobber v41 ; GCN-NEXT: ;;#ASMEND ; GCN: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN: s_add_u32 s32, s32, 0x300 ; GCN-NEXT: s_sub_u32 s32, s32, 0x300 ; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -233,14 +233,14 @@ define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 { ; GCN: s_waitcnt ; GCN-NEXT: v_writelane_b32 v1, s33, 63 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_add_u32 s32, s32, 0x300 ; GCN: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-COUNT-63: v_writelane_b32 v1 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:8 ; GCN: ;;#ASMSTART ; GCN-COUNT-63: v_readlane_b32 s{{[0-9]+}}, v1 -; GCN: s_add_u32 s32, s32, 0x300 -; GCN-NEXT: s_sub_u32 s32, s32, 0x300 +; GCN: s_sub_u32 s32, s32, 0x300 ; GCN-NEXT: v_readlane_b32 s33, v1, 63 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 @@ -265,6 +265,7 @@ define void @last_lane_vgpr_for_fp_csr() #1 { ; GCN: s_waitcnt ; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_add_u32 s32, s32, 0x300 ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-COUNT-64: v_writelane_b32 v1, @@ -273,7 +274,6 @@ define void @last_lane_vgpr_for_fp_csr() #1 { ; GCN-COUNT-64: v_readlane_b32 s{{[0-9]+}}, v1 ; GCN: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN: s_add_u32 s32, s32, 0x300 ; GCN-NEXT: s_sub_u32 s32, s32, 0x300 ; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]] ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -296,8 +296,8 @@ define void @no_new_vgpr_for_fp_csr() #1 { ; GCN-LABEL: {{^}}realign_stack_no_fp_elim: ; GCN: s_waitcnt -; GCN-NEXT: s_add_u32 [[SCRATCH:s[0-9]+]], s32, 0x7ffc0 ; GCN-NEXT: s_mov_b32 s4, s33 +; GCN-NEXT: s_add_u32 [[SCRATCH:s[0-9]+]], s32, 0x7ffc0 ; GCN-NEXT: s_and_b32 s33, [[SCRATCH]], 0xfff80000 ; GCN-NEXT: s_add_u32 s32, s32, 0x100000 ; GCN-NEXT: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 @@ -315,14 +315,14 @@ define void @realign_stack_no_fp_elim() #1 { ; GCN-LABEL: {{^}}no_unused_non_csr_sgpr_for_fp: ; GCN: s_waitcnt ; GCN-NEXT: v_writelane_b32 v1, s33, 2 -; GCN-NEXT: v_writelane_b32 v1, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_writelane_b32 v1, s30, 0 ; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 ; GCN: v_writelane_b32 v1, s31, 1 ; GCN: buffer_store_dword [[ZERO]], off, s[0:3], s33 offset:4 ; GCN: ;;#ASMSTART -; GCN: v_readlane_b32 s4, v1, 0 -; GCN-NEXT: s_add_u32 s32, s32, 0x200 +; GCN: s_add_u32 s32, s32, 0x200 +; GCN-NEXT: v_readlane_b32 s4, v1, 0 ; GCN-NEXT: v_readlane_b32 s5, v1, 1 ; GCN-NEXT: s_sub_u32 s32, s32, 0x200 ; GCN-NEXT: v_readlane_b32 s33, v1, 2 @@ -349,8 +349,8 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 { ; GCN-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2 -; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s30, 0 ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1 ; GCN-DAG: buffer_store_dword @@ -396,8 +396,8 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 { ; GCN-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], [[SCRATCH_VGPR]], s[0:3], s32 offen ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2 -; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s30, 0 ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1 ; GCN-DAG: s_add_u32 s32, s32, 0x40300{{$}} ; GCN-DAG: buffer_store_dword diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll index 79ea9dbee2fff4..e18120754c44eb 100644 --- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll @@ -31,8 +31,8 @@ define float @call_split_type_used_outside_block_v2f32() #0 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_writelane_b32 v40, s33, 2 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_v2f32@rel32@lo+4 @@ -65,8 +65,8 @@ define float @call_split_type_used_outside_block_v3f32() #0 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_writelane_b32 v40, s33, 2 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_v3f32@rel32@lo+4 @@ -99,8 +99,8 @@ define half @call_split_type_used_outside_block_v4f16() #0 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_writelane_b32 v40, s33, 2 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_v4f16@rel32@lo+4 @@ -133,8 +133,8 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_writelane_b32 v40, s33, 2 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_struct@rel32@lo+4 diff --git a/llvm/test/CodeGen/AMDGPU/debug-frame.ll b/llvm/test/CodeGen/AMDGPU/debug-frame.ll new file mode 100644 index 00000000000000..bceb9ce9ee1e00 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/debug-frame.ll @@ -0,0 +1,229 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=asm -o - %s | FileCheck --check-prefixes=CHECK,WAVE64 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -filetype=asm -o - %s | FileCheck --check-prefixes=CHECK,WAVE32 %s + +; CHECK-LABEL: kern1: +; CHECK: .cfi_startproc + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: %bb.0: +; DW_CFA_def_cfa_expression [0x0f] +; BLOCK_LENGTH ULEB128(3)=[0x03] +; DW_OP_lit0 [0x30] +; DW_OP_lit6 [0x36] +; DW_OP_LLVM_form_aspace_address [0xe1] +; CHECK-NEXT: .cfi_escape 0x0f, 0x03, 0x30, 0x36, 0xe1 +; PC_64 = 16 +; CHECK-NEXT: .cfi_undefined 16 + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: .cfi_endproc +define protected amdgpu_kernel void @kern1() #0 { +entry: + ret void +} + +; CHECK-LABEL: func1: +; CHECK: .cfi_startproc + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: %bb.0: +; SGPR32 = 64 +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; DW_CFA_expression [0x10] +; PC_64 ULEB128(17)=[0x10] +; BLOCK_LENGTH ULEB128(8)=[0x08] +; DW_OP_regx [0x90] +; SGPR30 ULEB128(62)=[0x3e] +; DW_OP_piece [0x93] +; PIECE_SIZE [0x04] +; DW_OP_regx [0x90] +; SGPR31 ULEB128(63)=[0x3f] +; DW_OP_piece [0x93] +; PIECE_SIZE [0x04] +; CHECK-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: .cfi_endproc +define hidden void @func1() #0 { +entry: + ret void +} + +declare hidden void @ex() #0 + +; CHECK-LABEL: func2: +; CHECK: .cfi_startproc + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: %bb.0: +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 +; VGPR0_wave64 = 2560 +; WAVE64-NEXT: .cfi_undefined 2560 +; WAVE64-NEXT: .cfi_undefined 2561 +; WAVE64-NEXT: .cfi_undefined 2562 +; WAVE64-NEXT: .cfi_undefined 2563 +; WAVE64-NEXT: .cfi_undefined 2564 +; WAVE64-NEXT: .cfi_undefined 2565 +; WAVE64-NEXT: .cfi_undefined 2566 +; WAVE64-NEXT: .cfi_undefined 2567 +; WAVE64-NEXT: .cfi_undefined 2568 +; WAVE64-NEXT: .cfi_undefined 2569 +; WAVE64-NEXT: .cfi_undefined 2570 +; WAVE64-NEXT: .cfi_undefined 2571 +; WAVE64-NEXT: .cfi_undefined 2572 +; WAVE64-NEXT: .cfi_undefined 2573 +; WAVE64-NEXT: .cfi_undefined 2574 +; WAVE64-NEXT: .cfi_undefined 2575 +; WAVE64-NEXT: .cfi_undefined 2576 +; WAVE64-NEXT: .cfi_undefined 2577 +; WAVE64-NEXT: .cfi_undefined 2578 +; WAVE64-NEXT: .cfi_undefined 2579 +; WAVE64-NEXT: .cfi_undefined 2580 +; WAVE64-NEXT: .cfi_undefined 2581 +; WAVE64-NEXT: .cfi_undefined 2582 +; WAVE64-NEXT: .cfi_undefined 2583 +; WAVE64-NEXT: .cfi_undefined 2584 +; WAVE64-NEXT: .cfi_undefined 2585 +; WAVE64-NEXT: .cfi_undefined 2586 +; WAVE64-NEXT: .cfi_undefined 2587 +; WAVE64-NEXT: .cfi_undefined 2588 +; WAVE64-NEXT: .cfi_undefined 2589 +; WAVE64-NEXT: .cfi_undefined 2590 +; WAVE64-NEXT: .cfi_undefined 2591 +; VGPR0_wave32 = 1536 +; WAVE32-NEXT: .cfi_undefined 1536 +; WAVE32-NEXT: .cfi_undefined 1537 +; WAVE32-NEXT: .cfi_undefined 1538 +; WAVE32-NEXT: .cfi_undefined 1539 +; WAVE32-NEXT: .cfi_undefined 1540 +; WAVE32-NEXT: .cfi_undefined 1541 +; WAVE32-NEXT: .cfi_undefined 1542 +; WAVE32-NEXT: .cfi_undefined 1543 +; WAVE32-NEXT: .cfi_undefined 1544 +; WAVE32-NEXT: .cfi_undefined 1545 +; WAVE32-NEXT: .cfi_undefined 1546 +; WAVE32-NEXT: .cfi_undefined 1547 +; WAVE32-NEXT: .cfi_undefined 1548 +; WAVE32-NEXT: .cfi_undefined 1549 +; WAVE32-NEXT: .cfi_undefined 1550 +; WAVE32-NEXT: .cfi_undefined 1551 +; WAVE32-NEXT: .cfi_undefined 1552 +; WAVE32-NEXT: .cfi_undefined 1553 +; WAVE32-NEXT: .cfi_undefined 1554 +; WAVE32-NEXT: .cfi_undefined 1555 +; WAVE32-NEXT: .cfi_undefined 1556 +; WAVE32-NEXT: .cfi_undefined 1557 +; WAVE32-NEXT: .cfi_undefined 1558 +; WAVE32-NEXT: .cfi_undefined 1559 +; WAVE32-NEXT: .cfi_undefined 1560 +; WAVE32-NEXT: .cfi_undefined 1561 +; WAVE32-NEXT: .cfi_undefined 1562 +; WAVE32-NEXT: .cfi_undefined 1563 +; WAVE32-NEXT: .cfi_undefined 1564 +; WAVE32-NEXT: .cfi_undefined 1565 +; WAVE32-NEXT: .cfi_undefined 1566 +; WAVE32-NEXT: .cfi_undefined 1567 +; SGPR0 = 32 +; CHECK-NEXT: .cfi_undefined 32 +; CHECK-NEXT: .cfi_undefined 33 +; CHECK-NEXT: .cfi_undefined 34 +; CHECK-NEXT: .cfi_undefined 35 +; CHECK-NEXT: .cfi_undefined 36 +; CHECK-NEXT: .cfi_undefined 37 +; CHECK-NEXT: .cfi_undefined 38 +; CHECK-NEXT: .cfi_undefined 39 +; CHECK-NEXT: .cfi_undefined 40 +; CHECK-NEXT: .cfi_undefined 41 +; CHECK-NEXT: .cfi_undefined 42 +; CHECK-NEXT: .cfi_undefined 43 +; CHECK-NEXT: .cfi_undefined 44 +; CHECK-NEXT: .cfi_undefined 45 +; CHECK-NEXT: .cfi_undefined 46 +; CHECK-NEXT: .cfi_undefined 47 +; CHECK-NEXT: .cfi_undefined 48 +; CHECK-NEXT: .cfi_undefined 49 +; CHECK-NEXT: .cfi_undefined 50 +; CHECK-NEXT: .cfi_undefined 51 +; CHECK-NEXT: .cfi_undefined 52 +; CHECK-NEXT: .cfi_undefined 53 +; CHECK-NEXT: .cfi_undefined 54 +; CHECK-NEXT: .cfi_undefined 55 +; CHECK-NEXT: .cfi_undefined 56 +; CHECK-NEXT: .cfi_undefined 57 +; CHECK-NEXT: .cfi_undefined 58 +; CHECK-NEXT: .cfi_undefined 59 +; CHECK-NEXT: .cfi_undefined 60 +; CHECK-NEXT: .cfi_undefined 61 +; CHECK-NEXT: .cfi_undefined 62 +; CHECK-NEXT: .cfi_undefined 63 + +; CHECK-NOT: .cfi_{{.*}} + +; WAVE64: s_or_saveexec_b64 s[4:5], -1 +; WAVE32: s_or_saveexec_b32 s4, -1 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; VGPR40_wave64 = 2600 +; WAVE64-NEXT: .cfi_offset 2600, 0 +; VGPR32_wave32 = 1576 +; WAVE32-NEXT: .cfi_offset 1576, 0 +; CHECK-NOT: .cfi_{{.*}} +; WAVE64: s_mov_b64 exec, s[4:5] +; WAVE32: s_mov_b32 exec_lo, s4 + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: v_writelane_b32 v40, s33, 2 + +; DW_CFA_expression [0x10] SGPR33 ULEB128(65)=[0x41] +; BLOCK_LENGTH ULEB128(5)=[0x05] +; DW_OP_regx [0x90] +; VGPR40_wave64 ULEB128(2600)=[0xa8, 0x14] +; DW_OP_LLVM_offset_uconst [0xe4] +; OFFSET ULEB128(0x08) [0x08] +; WAVE64-NEXT: .cfi_escape 0x10, 0x41, 0x05, 0x90, 0xa8, 0x14, 0xe4, 0x08 + +; DW_CFA_expression [0x10] SGPR33 ULEB128(65)=[0x41] +; BLOCK_LENGTH ULEB128(5)=[0x05] +; DW_OP_regx [0x90] +; VGPR40_wave32 ULEB128(1576)=[0xa8, 0x0c] +; DW_OP_LLVM_offset_uconst [0xe4] +; OFFSET ULEB128(0x08) [0x08] +; WAVE32-NEXT: .cfi_escape 0x10, 0x41, 0x05, 0x90, 0xa8, 0x0c, 0xe4, 0x08 + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: s_mov_b32 s33, s32 +; SGPR33 = 65 +; CHECK-NEXT: .cfi_def_cfa_register 65 + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: s_sub_u32 s32, s32, +; CHECK-NEXT: v_readlane_b32 s33, v40, 2 +; SGPR32 = 64 +; CHECK-NEXT: .cfi_def_cfa_register 64 + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: .cfi_endproc +define hidden void @func2() #0 { +entry: + call void @ex() #0 + ret void +} + +attributes #0 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, emissionKind: FullDebug) +!1 = !DIFile(filename: "filename", directory: "directory") +!2 = !{i32 7, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll index 697fdfb3695ec9..06ec3b1fed8176 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll @@ -21,6 +21,8 @@ entry: ; GCN-LABEL: {{^}}only_undef_dbg_value: ; NOOPT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef] undef +; NOOPT-NEXT: .cfi_escape 0x0f, 0x03, 0x30, 0x36, 0xe1 +; NOOPT-NEXT: .cfi_undefined 16 ; NOOPT-NEXT: s_endpgm ; OPT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll index 7328529596a225..cb64be4a260ab7 100644 --- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll +++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll @@ -196,10 +196,10 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 { ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_writelane_b32 v43, s34, 0 +; GFX9-NEXT: v_writelane_b32 v43, s35, 1 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, foo@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, foo@gotpcrel32@hi+4 -; GFX9-NEXT: v_writelane_b32 v43, s35, 1 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v40, v1 ; GFX9-NEXT: v_mov_b32_e32 v41, v0 diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills.mir index 5a022efb38a35b..4e56a28472563b 100644 --- a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills.mir +++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills.mir @@ -2,9 +2,13 @@ # CHECK-LABEL: name: empty_entry_block # CHECK: V_WRITELANE +# CHECK-NEXT: CFI_INSTRUCTION # CHECK-NEXT: V_WRITELANE +# CHECK-NEXT: CFI_INSTRUCTION # CHECK-NEXT: V_WRITELANE +# CHECK-NEXT: CFI_INSTRUCTION # CHECK-NEXT: V_WRITELANE +# CHECK-NEXT: CFI_INSTRUCTION # CHECK: V_READLANE # CHECK-NEXT: V_READLANE # CHECK-NEXT: V_READLANE diff --git a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll index 06a4d90dfeb384..7962ec658ed8d2 100644 --- a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll +++ b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll @@ -6,16 +6,21 @@ define hidden <4 x float> @split_v4f32_arg(<4 x float> returned %arg) local_unna ; GCN: .Lfunc_begin0: ; GCN-NEXT: .file 0 ; GCN-NEXT: .loc 0 3 0 ; /tmp/dbg.cl:3:0 +; GCN-NEXT: .cfi_sections .debug_frame +; GCN-NEXT: .cfi_startproc ; GCN-NEXT: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: .Ltmp0: ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 96 32] $vgpr3 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: .Ltmp0: ; GCN-NEXT: .loc 0 4 5 prologue_end ; /tmp/dbg.cl:4:5 ; GCN-NEXT: s_setpc_b64 s[30:31] ; GCN-NEXT: .Ltmp1: +; GCN: .cfi_endproc call void @llvm.dbg.value(metadata <4 x float> %arg, metadata !18, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !19 ret <4 x float> %arg, !dbg !20 } @@ -24,15 +29,22 @@ define hidden <4 x float> @split_v4f32_multi_arg(<4 x float> %arg0, <2 x float> ; GCN-LABEL: split_v4f32_multi_arg: ; GCN: .Lfunc_begin1: ; GCN-NEXT: .loc 0 7 0 ; /tmp/dbg.cl:7:0 +; GCN-NEXT: .cfi_startproc ; GCN-NEXT: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: .Ltmp2: ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg1 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr5 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg1 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr4 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 96 32] $vgpr3 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 +; GCN-NEXT: .cfi_undefined 2560 +; GCN-NEXT: .cfi_undefined 2561 +; GCN-NEXT: .cfi_undefined 2562 +; GCN-NEXT: .cfi_undefined 2563 +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: .Ltmp2: ; GCN-NEXT: .loc 0 8 17 prologue_end ; /tmp/dbg.cl:8:17 ; GCN-NEXT: v_add_f32_e32 v0, v4, v0 ; GCN-NEXT: .Ltmp3: @@ -45,6 +57,7 @@ define hidden <4 x float> @split_v4f32_multi_arg(<4 x float> %arg0, <2 x float> ; GCN-NEXT: .loc 0 8 5 is_stmt 0 ; /tmp/dbg.cl:8:5 ; GCN-NEXT: s_setpc_b64 s[30:31] ; GCN-NEXT: .Ltmp7: +; GCN: .cfi_endproc call void @llvm.dbg.value(metadata <4 x float> %arg0, metadata !29, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !31 call void @llvm.dbg.value(metadata <2 x float> %arg1, metadata !30, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !31 %tmp = shufflevector <2 x float> %arg1, <2 x float> undef, <4 x i32> , !dbg !32 @@ -56,11 +69,14 @@ define hidden <4 x half> @split_v4f16_arg(<4 x half> returned %arg) local_unname ; GCN-LABEL: split_v4f16_arg: ; GCN: .Lfunc_begin2: ; GCN-NEXT: .loc 0 11 0 is_stmt 1 ; /tmp/dbg.cl:11:0 +; GCN-NEXT: .cfi_startproc ; GCN-NEXT: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: .Ltmp8: ; GCN-NEXT: ;DEBUG_VALUE: split_v4f16_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f16_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: .Ltmp8: ; GCN-NEXT: .loc 0 12 5 prologue_end ; /tmp/dbg.cl:12:5 ; GCN-NEXT: s_setpc_b64 s[30:31] ; GCN-NEXT: .Ltmp9: @@ -72,14 +88,18 @@ define hidden double @split_f64_arg(double returned %arg) local_unnamed_addr #0 ; GCN-LABEL: split_f64_arg: ; GCN: .Lfunc_begin3: ; GCN-NEXT: .loc 0 15 0 ; /tmp/dbg.cl:15:0 +; GCN-NEXT: .cfi_startproc ; GCN-NEXT: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: .Ltmp10: ; GCN-NEXT: ;DEBUG_VALUE: split_f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: .Ltmp10: ; GCN-NEXT: .loc 0 16 5 prologue_end ; /tmp/dbg.cl:16:5 ; GCN-NEXT: s_setpc_b64 s[30:31] ; GCN-NEXT: .Ltmp11: +; GCN: .cfi_endproc call void @llvm.dbg.value(metadata double %arg, metadata !50, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !51 ret double %arg, !dbg !52 } @@ -88,13 +108,16 @@ define hidden <2 x double> @split_v2f64_arg(<2 x double> returned %arg) local_un ; GCN-LABEL: split_v2f64_arg: ; GCN: .Lfunc_begin4: ; GCN-NEXT: .loc 0 19 0 ; /tmp/dbg.cl:19:0 +; GCN-NEXT: .cfi_startproc ; GCN-NEXT: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: .Ltmp12: ; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 96 32] $vgpr3 ; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2 ; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: .Ltmp12: ; GCN-NEXT: .loc 0 20 5 prologue_end ; /tmp/dbg.cl:20:5 ; GCN-NEXT: s_setpc_b64 s[30:31] ; GCN-NEXT: .Ltmp13: @@ -106,14 +129,18 @@ define hidden i64 @split_i64_arg(i64 returned %arg) local_unnamed_addr #0 !dbg ! ; GCN-LABEL: split_i64_arg: ; GCN: .Lfunc_begin5: ; GCN-NEXT: .loc 0 23 0 ; /tmp/dbg.cl:23:0 +; GCN-NEXT: .cfi_startproc ; GCN-NEXT: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: .Ltmp14: ; GCN-NEXT: ;DEBUG_VALUE: split_i64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_i64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: .Ltmp14: ; GCN-NEXT: .loc 0 24 5 prologue_end ; /tmp/dbg.cl:24:5 ; GCN-NEXT: s_setpc_b64 s[30:31] ; GCN-NEXT: .Ltmp15: +; GCN: .cfi_endproc call void @llvm.dbg.value(metadata i64 %arg, metadata !67, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !68 ret i64 %arg, !dbg !69 } @@ -122,14 +149,18 @@ define hidden i8 addrspace(1)* @split_ptr_arg(i8 addrspace(1)* readnone returned ; GCN-LABEL: split_ptr_arg: ; GCN: .Lfunc_begin6: ; GCN-NEXT: .loc 0 27 0 ; /tmp/dbg.cl:27:0 +; GCN-NEXT: .cfi_startproc ; GCN-NEXT: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: .Ltmp16: ; GCN-NEXT: ;DEBUG_VALUE: split_ptr_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_ptr_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: .Ltmp16: ; GCN-NEXT: .loc 0 28 5 prologue_end ; /tmp/dbg.cl:28:5 ; GCN-NEXT: s_setpc_b64 s[30:31] ; GCN-NEXT: .Ltmp17: +; GCN: .cfi_endproc call void @llvm.dbg.value(metadata i8 addrspace(1)* %arg, metadata !76, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !77 ret i8 addrspace(1)* %arg, !dbg !78 } diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll index 2a3cfe7a09928c..69d29a4e9e08ef 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll @@ -124,8 +124,8 @@ define amdgpu_kernel void @kernel_call_align4_from_5() { } ; GCN-LABEL: {{^}}default_realign_align128: -; GCN: s_add_u32 [[TMP:s[0-9]+]], s32, 0x1fc0 -; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 +; GCN-DAG: s_add_u32 [[TMP:s[0-9]+]], s32, 0x1fc0 +; GCN-DAG: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 ; GCN-NEXT: s_and_b32 s33, [[TMP]], 0xffffe000 ; GCN-NEXT: s_add_u32 s32, s32, 0x4000 ; GCN-NOT: s33 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll index c3f41a467a2015..b3a77138954ff1 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 | FileCheck %s --check-prefix=GCN -define void @vgpr_descriptor_waterfall_loop_idom_update(<4 x i32>* %arg) { +define void @vgpr_descriptor_waterfall_loop_idom_update(<4 x i32>* %arg) #0 { ; GCN-LABEL: vgpr_descriptor_waterfall_loop_idom_update: ; GCN: ; %bb.0: ; %entry +; GCN-NEXT: ; implicit-def: $vcc_hi ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_waitcnt_vscnt null, 0x0 -; GCN-NEXT: ; implicit-def: $vcc_hi ; GCN-NEXT: BB0_1: ; %bb0 ; GCN-NEXT: ; =>This Loop Header: Depth=1 ; GCN-NEXT: ; Child Loop BB0_2 Depth 2 diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-skip-meta.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-skip-meta.mir new file mode 100644 index 00000000000000..00a2b78e903f6f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-skip-meta.mir @@ -0,0 +1,96 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-waitcnts %s -o - | FileCheck %s + +# Ensure we insert waitcnts after any meta instructions at the start of +# non-kernel functions. Without this, the inserted waitcnts can affect e.g. the +# PC ranges covered by CFI and debug values. + +--- +# CHECK-LABEL: name: skip_implicit_def{{$}} +# CHECK: IMPLICIT_DEF +# CHECK: S_WAITCNT +name: skip_implicit_def +machineFunctionInfo: +body: | + bb.0: + $sgpr0 = IMPLICIT_DEF +... +--- +# CHECK-LABEL: name: skip_kill{{$}} +# CHECK: KILL +# CHECK: S_WAITCNT +name: skip_kill +machineFunctionInfo: +body: | + bb.0: + KILL $sgpr0 +... +--- +# CHECK-LABEL: name: skip_cfi{{$}} +# CHECK: CFI_INSTRUCTION +# CHECK: S_WAITCNT +name: skip_cfi +machineFunctionInfo: +body: | + bb.0: + CFI_INSTRUCTION undefined $sgpr0 +... +--- +# CHECK-LABEL: name: skip_eh_label{{$}} +# CHECK: EH_LABEL +# CHECK: S_WAITCNT +name: skip_eh_label +machineFunctionInfo: +body: | + bb.0: + EH_LABEL 0 +... +--- +# CHECK-LABEL: name: skip_gc_label{{$}} +# CHECK: GC_LABEL +# CHECK: S_WAITCNT +name: skip_gc_label +machineFunctionInfo: +body: | + bb.0: + GC_LABEL 0 +... +--- +# CHECK-LABEL: name: skip_dbg_value{{$}} +# CHECK: DBG_VALUE +# CHECK: S_WAITCNT +name: skip_dbg_value +machineFunctionInfo: +body: | + bb.0: + DBG_VALUE 0 +... +--- +# CHECK-LABEL: name: skip_dbg_label{{$}} +# CHECK: DBG_LABEL +# CHECK: S_WAITCNT +name: skip_dbg_label +machineFunctionInfo: +body: | + bb.0: + DBG_LABEL 0 +... +--- +# CHECK-LABEL: name: skip_lifetime_start{{$}} +# CHECK: LIFETIME_START +# CHECK: S_WAITCNT +name: skip_lifetime_start +machineFunctionInfo: +body: | + bb.0: + LIFETIME_START 0 +... +--- +# CHECK-LABEL: name: skip_lifetime_end{{$}} +# CHECK: LIFETIME_END +# CHECK: S_WAITCNT +name: skip_lifetime_end +machineFunctionInfo: +body: | + bb.0: + LIFETIME_END 0 +... diff --git a/llvm/test/CodeGen/MIR/AArch64/cfi.mir b/llvm/test/CodeGen/MIR/AArch64/cfi.mir index 04380e07f3ee3c..b4ae7e1dad15ba 100644 --- a/llvm/test/CodeGen/MIR/AArch64/cfi.mir +++ b/llvm/test/CodeGen/MIR/AArch64/cfi.mir @@ -23,6 +23,8 @@ body: | frame-setup CFI_INSTRUCTION def_cfa_register $w29 ; CHECK: CFI_INSTRUCTION def_cfa_offset -8 frame-setup CFI_INSTRUCTION def_cfa_offset -8 + ; CHECK: CFI_INSTRUCTION llvm_def_aspace_cfa $w29, 16, 6 + frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $w29, 16, 6 ; CHECK: CFI_INSTRUCTION offset $w30, -8 frame-setup CFI_INSTRUCTION offset $w30, -8 ; CHECK: CFI_INSTRUCTION rel_offset $w30, -8 diff --git a/llvm/test/DebugInfo/AMDGPU/cfi.ll b/llvm/test/DebugInfo/AMDGPU/cfi.ll new file mode 100644 index 00000000000000..d28f7b87916c39 --- /dev/null +++ b/llvm/test/DebugInfo/AMDGPU/cfi.ll @@ -0,0 +1,35 @@ +; RUN: llc -mcpu=gfx900 -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - %s | llvm-dwarfdump -debug-frame - | FileCheck %s + +; CHECK: .debug_frame contents: +; CHECK: 00000000 0000001c ffffffff CIE +; CHECK-NEXT: Version: 4 +; CHECK-NEXT: Augmentation: "[llvm:v0.0]" +; CHECK-NEXT: Address size: 8 +; CHECK-NEXT: Segment desc size: 0 +; CHECK-NEXT: Code alignment factor: 4 +; CHECK-NEXT: Data alignment factor: 4 +; CHECK-NEXT: Return address column: 16 +; CHECK-EMPTY: +; CHECK-NEXT: DW_CFA_nop: +; CHECK-NEXT: DW_CFA_nop: +; CHECK-NEXT: DW_CFA_nop: +; CHECK-NEXT: DW_CFA_nop: +; CHECK-NEXT: DW_CFA_nop: +; CHECK-NEXT: DW_CFA_nop: +; CHECK-EMPTY: +; CHECK-NEXT: 00000020 {{[0-9]+}} 00000000 FDE cie=00000000 pc=00000000...{{[0-9]+}} +; CHECK: .eh_frame contents: + +define void @func() #0 { + ret void +} + +attributes #0 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.dbg.cu = !{!2} + +!0 = !{i32 2, !"Dwarf Version", i32 4} +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, emissionKind: FullDebug) +!3 = !DIFile(filename: "file", directory: "dir") diff --git a/llvm/test/DebugInfo/AMDGPU/cu-augmentation.ll b/llvm/test/DebugInfo/AMDGPU/cu-augmentation.ll new file mode 100644 index 00000000000000..72eb1df7a1b7cd --- /dev/null +++ b/llvm/test/DebugInfo/AMDGPU/cu-augmentation.ll @@ -0,0 +1,21 @@ +; RUN: llc -mcpu=gfx900 -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - %s | llvm-dwarfdump --debug-info - | FileCheck %s + +; CHECK: DW_TAG_compile_unit +; CHECK: DW_AT_LLVM_augmentation ("[llvm:v0.0]") +; CHECK: DW_TAG_subprogram +define void @func() #0 !dbg !4 { + ret void +} + +attributes #0 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.dbg.cu = !{!2} + +!0 = !{i32 2, !"Dwarf Version", i32 4} +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, emissionKind: FullDebug) +!3 = !DIFile(filename: "file", directory: "dir") +!4 = distinct !DISubprogram(name: "func", scope: !3, file: !3, line: 0, type: !5, scopeLine: 0, unit: !2) +!5 = !DISubroutineType(types: !6) +!6 = !{} diff --git a/llvm/test/MC/ELF/AMDGPU/cfi.s b/llvm/test/MC/ELF/AMDGPU/cfi.s new file mode 100644 index 00000000000000..ba0a0f3ccdd8fc --- /dev/null +++ b/llvm/test/MC/ELF/AMDGPU/cfi.s @@ -0,0 +1,58 @@ +// RUN: llvm-mc -filetype=asm -mcpu=gfx900 -triple amdgcn-amd-amdhsa %s -o - | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -filetype=obj -mcpu=gfx900 -triple amdgcn-amd-amdhsa %s -o - | llvm-readobj -S --sr --sd | FileCheck --check-prefix=READOBJ %s + +f: + .cfi_sections .debug_frame + .cfi_startproc + s_nop 0 + .cfi_endproc + +// ASM: f: +// ASM-NEXT: .cfi_sections .debug_frame +// FIXME Why emit an extra empty line? +// ASM-EMPTY: +// ASM-NEXT: .cfi_startproc +// ASM-NEXT: s_nop 0 +// FIXME Why emit an extra empty line? +// ASM-EMPTY: +// ASM-NEXT: .cfi_endproc + +// READOBJ: Section { +// READOBJ: Name: .debug_frame +// READOBJ-NEXT: Type: SHT_PROGBITS +// READOBJ-NEXT: Flags [ +// READOBJ-NEXT: ] +// READOBJ-NEXT: Address: 0x0 +// READOBJ-NEXT: Offset: 0x48 +// READOBJ-NEXT: Size: 56 +// READOBJ-NEXT: Link: 0 +// READOBJ-NEXT: Info: 0 +// READOBJ-NEXT: AddressAlignment: 8 +// READOBJ-NEXT: EntrySize: 0 +// READOBJ-NEXT: Relocations [ +// READOBJ-NEXT: ] +// READOBJ-NEXT: SectionData ( +// READOBJ-NEXT: 0000: 1C000000 FFFFFFFF 045B6C6C 766D3A76 |.........[llvm:v| +// READOBJ-NEXT: 0010: 302E305D 00080004 04100000 00000000 |0.0]............| +// READOBJ-NEXT: 0020: 14000000 00000000 00000000 00000000 |................| +// READOBJ-NEXT: 0030: 04000000 00000000 |........| +// READOBJ-NEXT: ) +// READOBJ-NEXT: } + +// READOBJ: Section { +// READOBJ: Name: .rela.debug_frame +// READOBJ-NEXT: Type: SHT_RELA +// READOBJ-NEXT: Flags [ +// READOBJ-NEXT: ] +// READOBJ-NEXT: Address: 0x0 +// READOBJ-NEXT: Offset: +// READOBJ-NEXT: Size: 48 +// READOBJ-NEXT: Link: +// READOBJ-NEXT: Info: +// READOBJ-NEXT: AddressAlignment: 8 +// READOBJ-NEXT: EntrySize: 24 +// READOBJ-NEXT: Relocations [ +// READOBJ-NEXT: 0x24 R_AMDGPU_ABS32 .debug_frame 0x0 +// READOBJ-NEXT: 0x28 R_AMDGPU_ABS64 .text 0x0 +// READOBJ-NEXT: ] +// READOBJ: } diff --git a/llvm/test/MC/ELF/AMDGPU/lit.local.cfg b/llvm/test/MC/ELF/AMDGPU/lit.local.cfg new file mode 100644 index 00000000000000..2500c96b678aa4 --- /dev/null +++ b/llvm/test/MC/ELF/AMDGPU/lit.local.cfg @@ -0,0 +1,3 @@ +# We have to reset config.unsupported here because the parent directory is +# predicated on 'X86'. +config.unsupported = not 'AMDGPU' in config.root.targets diff --git a/llvm/test/MC/ELF/cfi-llvm-def-cfa-aspace.s b/llvm/test/MC/ELF/cfi-llvm-def-cfa-aspace.s new file mode 100644 index 00000000000000..aff309eff60f99 --- /dev/null +++ b/llvm/test/MC/ELF/cfi-llvm-def-cfa-aspace.s @@ -0,0 +1,57 @@ +// RUN: llvm-mc -filetype=asm -triple x86_64-pc-linux-gnu %s -o - | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -S --sr --sd | FileCheck --check-prefix=READOBJ %s + +f: + .cfi_startproc + nop + .cfi_llvm_def_aspace_cfa %rcx, 0, 6 + nop + .cfi_endproc + +// ASM: f: +// ASM-NEXT: .cfi_startproc +// ASM-NEXT: nop +// ASM-NEXT: .cfi_llvm_def_aspace_cfa %rcx, 0, 6 +// FIXME Why emit an extra empty line? +// ASM-EMPTY: +// ASM-NEXT: nop +// ASM-NEXT: .cfi_endproc + +// READOBJ: Section { +// READOBJ: Name: .eh_frame +// READOBJ-NEXT: Type: SHT_X86_64_UNWIND +// READOBJ-NEXT: Flags [ +// READOBJ-NEXT: SHF_ALLOC +// READOBJ-NEXT: ] +// READOBJ-NEXT: Address: 0x0 +// READOBJ-NEXT: Offset: 0x48 +// READOBJ-NEXT: Size: 48 +// READOBJ-NEXT: Link: 0 +// READOBJ-NEXT: Info: 0 +// READOBJ-NEXT: AddressAlignment: 8 +// READOBJ-NEXT: EntrySize: 0 +// READOBJ-NEXT: Relocations [ +// READOBJ-NEXT: ] +// READOBJ-NEXT: SectionData ( +// READOBJ-NEXT: 0000: 14000000 00000000 017A5200 01781001 +// READOBJ-NEXT: 0010: 1B0C0708 90010000 14000000 1C000000 +// READOBJ-NEXT: 0020: 00000000 02000000 00413002 00060000 +// READOBJ-NEXT: ) +// READOBJ-NEXT: } + +// READOBJ: Section { +// READOBJ: Name: .rela.eh_frame +// READOBJ-NEXT: Type: SHT_RELA +// READOBJ-NEXT: Flags [ +// READOBJ-NEXT: ] +// READOBJ-NEXT: Address: 0x0 +// READOBJ-NEXT: Offset: +// READOBJ-NEXT: Size: 24 +// READOBJ-NEXT: Link: +// READOBJ-NEXT: Info: +// READOBJ-NEXT: AddressAlignment: 8 +// READOBJ-NEXT: EntrySize: 24 +// READOBJ-NEXT: Relocations [ +// READOBJ-NEXT: 0x20 R_X86_64_PC32 .text 0x0 +// READOBJ-NEXT: ] +// READOBJ: } diff --git a/llvm/test/tools/llvm-dwarfdump/X86/debug_frame_LLVM_def_cfa_aspace.s b/llvm/test/tools/llvm-dwarfdump/X86/debug_frame_LLVM_def_cfa_aspace.s new file mode 100644 index 00000000000000..1c105a190cf24d --- /dev/null +++ b/llvm/test/tools/llvm-dwarfdump/X86/debug_frame_LLVM_def_cfa_aspace.s @@ -0,0 +1,15 @@ +# RUN: llvm-mc %s -filetype=obj -triple=i686-pc-linux -o %t +# RUN: llvm-dwarfdump -v %t | FileCheck %s + +# CHECK: .eh_frame contents: +# CHECK: FDE +# CHECK-NEXT: DW_CFA_LLVM_def_aspace_cfa: reg2 +0 as6 +# CHECK-NEXT: DW_CFA_nop: + +.text +.globl foo +.type foo,@function +foo: + .cfi_startproc +.cfi_llvm_def_aspace_cfa %edx, 0, 6 + .cfi_endproc diff --git a/llvm/test/tools/llvm-dwarfdump/X86/heterogeneous_proposal.s b/llvm/test/tools/llvm-dwarfdump/X86/heterogeneous_proposal.s new file mode 100644 index 00000000000000..314700b29cb7e7 --- /dev/null +++ b/llvm/test/tools/llvm-dwarfdump/X86/heterogeneous_proposal.s @@ -0,0 +1,40 @@ +# RUN: llvm-mc %s -filetype=obj -triple=i686-pc-linux -o %t +# RUN: llvm-dwarfdump -v %t | FileCheck %s + +# Check that we can decode new ops described at +# llvm/docs/AMDGPUUsage.rst#expression-operation-encodings + +# FIXME: Is there a better approach than using `DW_CFA_expression reg0 `? + +# CHECK: .eh_frame contents: +# CHECK: FDE + +foo: + .cfi_startproc + # CHECK-NEXT: DW_CFA_expression: reg0 DW_OP_LLVM_form_aspace_address + .cfi_escape 0x10, 0x00, 0x01, 0xe1 + # CHECK-NEXT: DW_CFA_expression: reg0 DW_OP_LLVM_push_lane + .cfi_escape 0x10, 0x00, 0x01, 0xe2 + # CHECK-NEXT: DW_CFA_expression: reg0 DW_OP_LLVM_offset + .cfi_escape 0x10, 0x00, 0x01, 0xe3 + # CHECK-NEXT: DW_CFA_expression: reg0 DW_OP_LLVM_offset_uconst 0x0 + .cfi_escape 0x10, 0x00, 0x02, 0xe4, 0x00 + # CHECK-NEXT: DW_CFA_expression: reg0 DW_OP_LLVM_bit_offset + .cfi_escape 0x10, 0x00, 0x01, 0xe5 + # CHECK-NEXT: DW_CFA_expression: reg0 DW_OP_LLVM_call_frame_entry_reg EAX + .cfi_escape 0x10, 0x00, 0x02, 0xe6, 0x00 + # CHECK-NEXT: DW_CFA_expression: reg0 DW_OP_LLVM_undefined + .cfi_escape 0x10, 0x00, 0x01, 0xe7 + # CHECK-NEXT: DW_CFA_expression: reg0 DW_OP_LLVM_aspace_bregx EAX+2 + .cfi_escape 0x10, 0x00, 0x03, 0xe8, 0x0, 0x2 + # CHECK-NEXT: DW_CFA_expression: reg0 DW_OP_LLVM_aspace_implicit_pointer 0x1 +2 + .cfi_escape 0x10, 0x00, 0x06, 0xe9, 0x1, 0x0, 0x0, 0x0, 0x2 + # CHECK-NEXT: DW_CFA_expression: reg0 DW_OP_LLVM_piece_end + .cfi_escape 0x10, 0x00, 0x01, 0xea + # CHECK-NEXT: DW_CFA_expression: reg0 DW_OP_LLVM_extend 0x0 0x0 + .cfi_escape 0x10, 0x00, 0x03, 0xeb, 0x0, 0x0 + # CHECK-NEXT: DW_CFA_expression: reg0 DW_OP_LLVM_select_bit_piece 0x0 0x0 + .cfi_escape 0x10, 0x00, 0x03, 0xec, 0x0, 0x0 + .cfi_endproc + +# CHECK-NEXT: DW_CFA_nop: