From 11546898e2ffc761acedad887ff00991a9affb62 Mon Sep 17 00:00:00 2001 From: diggerlin Date: Tue, 21 Jul 2020 16:03:04 -0400 Subject: [PATCH] [AIX][XCOFF]emit extern linkage for the llvm intrinsic symbol SUMMARY: when we call memset, memcopy,memmove etc(this are llvm intrinsic function) in the c source code. the llvm will generate IR like call call void @llvm.memset.p0i8.i32(i8* align 4 bitcast (%struct.S* @s to i8*), i8 %1, i32 %2, i1 false) for c source code bash> cat test_memset.call struct S{ int a; int b; }; extern struct S s; void bar() { memset(&s, s.b, s.b); } like %struct.S = type { i32, i32 } @s = external global %struct.S, align 4 ; Function Attrs: noinline nounwind optnone define void @bar() #0 { entry: %0 = load i32, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 1), align 4 %1 = trunc i32 %0 to i8 %2 = load i32, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 1), align 4 call void @llvm.memset.p0i8.i32(i8* align 4 bitcast (%struct.S* @s to i8*), i8 %1, i32 %2, i1 false) ret void } declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg) #1 If we want to let the aix as assembly compile pass without -u it need to has following assembly code. .extern .memset (we do not output extern linkage for llvm instrinsic function. even if we output the extern linkage for llvm intrinsic function, we should not out .extern llvm.memset.p0i8.i32, instead of we should emit .extern memset) for other llvm buildin function floatdidf . even if we do not call these function floatdidf in the c source code(the generated IR also do not the call __floatdidf . the function call was generated in the LLVM optimized. the function is not in the functions list of Module, but we still need to emit extern .__floatdidf The solution for it as : We record all the lllvm intrinsic extern symbol when transformCallee(), and emit all these symbol in the AsmPrinter::doFinalization(Module &M) Reviewers: jasonliu, Sean Fertile, hubert.reinterpretcast, Differential Revision: https://reviews.llvm.org/D78929 --- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 59 ++++++++++++++++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 40 ++++++----- llvm/lib/Target/PowerPC/PPCInstrInfo.td | 4 ++ llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll | 10 +-- .../aix-external-sym-sdnode-lowering.ll | 4 +- .../CodeGen/PowerPC/aix-llvm-intrinsic.ll | 70 +++++++++++++++++++ .../PowerPC/aix-user-defined-memcpy.ll | 6 ++ 7 files changed, 168 insertions(+), 25 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/aix-llvm-intrinsic.ll diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index bf5fe741bac8a3..d26df9a6c7f75e 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -27,6 +27,7 @@ #include "PPCTargetStreamer.h" #include "TargetInfo/PowerPCTargetInfo.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" @@ -47,6 +48,7 @@ #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" @@ -147,6 +149,10 @@ class PPCLinuxAsmPrinter : public PPCAsmPrinter { class PPCAIXAsmPrinter : public PPCAsmPrinter { private: + /// Symbols lowered from ExternalSymbolSDNodes, we will need to emit extern + /// linkage for them in AIX. + SmallPtrSet ExtSymSDNodeSymbols; + static void ValidateGV(const GlobalVariable *GV); public: @@ -170,6 +176,10 @@ class PPCAIXAsmPrinter : public PPCAsmPrinter { void emitEndOfAsmFile(Module &) override; void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const override; + + void emitInstruction(const MachineInstr *MI) override; + + bool doFinalization(Module &M) override; }; } // end anonymous namespace @@ -1812,6 +1822,55 @@ bool PPCAIXAsmPrinter::doInitialization(Module &M) { return Result; } +void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) { + switch (MI->getOpcode()) { + default: + break; + case PPC::BL8: + case PPC::BL: + case PPC::BL8_NOP: + case PPC::BL_NOP: { + const MachineOperand &MO = MI->getOperand(0); + if (MO.isSymbol()) { + MCSymbolXCOFF *S = + cast(OutContext.getOrCreateSymbol(MO.getSymbolName())); + if (!S->hasRepresentedCsectSet()) { + // On AIX, an undefined symbol needs to be associated with a + // MCSectionXCOFF to get the correct storage mapping class. + // In this case, XCOFF::XMC_PR. + MCSectionXCOFF *Sec = OutContext.getXCOFFSection( + S->getName(), XCOFF::XMC_PR, XCOFF::XTY_ER, XCOFF::C_EXT, + SectionKind::getMetadata()); + S->setRepresentedCsect(Sec); + } + ExtSymSDNodeSymbols.insert(S); + } + } break; + case PPC::BL_TLS: + case PPC::BL8_TLS: + case PPC::BL8_TLS_: + case PPC::BL8_NOP_TLS: + report_fatal_error("TLS call not yet implemented"); + case PPC::TAILB: + case PPC::TAILB8: + case PPC::TAILBA: + case PPC::TAILBA8: + case PPC::TAILBCTR: + case PPC::TAILBCTR8: + if (MI->getOperand(0).isSymbol()) + report_fatal_error("Tail call for extern symbol not yet supported."); + break; + } + return PPCAsmPrinter::emitInstruction(MI); +} + +bool PPCAIXAsmPrinter::doFinalization(Module &M) { + bool Ret = PPCAsmPrinter::doFinalization(M); + for (MCSymbol *Sym : ExtSymSDNodeSymbols) + OutStreamer->emitSymbolAttribute(Sym, MCSA_Extern); + return Ret; +} + /// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code /// for a MachineFunction to the given output stream, in a format that the /// Darwin assembler can deal with. diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index ddfbd04e1ebc59..126364ab1943b4 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5335,13 +5335,18 @@ static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, // On AIX, direct function calls reference the symbol for the function's // entry point, which is named by prepending a "." before the function's // C-linkage name. + const auto getFunctionEntryPointSymbol = [&](StringRef SymName) { + auto &Context = DAG.getMachineFunction().getMMI().getContext(); + return cast( + Context.getOrCreateSymbol(Twine(".") + Twine(SymName))); + }; + const auto getAIXFuncEntryPointSymbolSDNode = [&](StringRef FuncName, bool IsDeclaration, const XCOFF::StorageClass &SC) { - auto &Context = DAG.getMachineFunction().getMMI().getContext(); + MCSymbolXCOFF *S = getFunctionEntryPointSymbol(FuncName); - MCSymbolXCOFF *S = cast( - Context.getOrCreateSymbol(Twine(".") + Twine(FuncName))); + auto &Context = DAG.getMachineFunction().getMMI().getContext(); if (IsDeclaration && !S->hasRepresentedCsectSet()) { // On AIX, an undefined symbol needs to be associated with a @@ -5376,22 +5381,21 @@ static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { const char *SymName = S->getSymbol(); - if (!Subtarget.isAIXABI()) - return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(), - UsePlt ? PPCII::MO_PLT : 0); - - // If there exists a user-declared function whose name is the same as the - // ExternalSymbol's, then we pick up the user-declared version. - const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); - if (const Function *F = - dyn_cast_or_null(Mod->getNamedValue(SymName))) { - const XCOFF::StorageClass SC = - TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(F); - return getAIXFuncEntryPointSymbolSDNode(F->getName(), F->isDeclaration(), - SC); + if (Subtarget.isAIXABI()) { + // If there exists a user-declared function whose name is the same as the + // ExternalSymbol's, then we pick up the user-declared version. + const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); + if (const Function *F = + dyn_cast_or_null(Mod->getNamedValue(SymName))) { + const XCOFF::StorageClass SC = + TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(F); + return getAIXFuncEntryPointSymbolSDNode(F->getName(), + F->isDeclaration(), SC); + } + SymName = getFunctionEntryPointSymbol(SymName)->getName().data(); } - - return getAIXFuncEntryPointSymbolSDNode(SymName, true, XCOFF::C_EXT); + return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(), + UsePlt ? PPCII::MO_PLT : 0); } // No transformation needed. diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 673ab63039cf72..c49e7a3dc6c236 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -3246,9 +3246,13 @@ def : Pat<(PPCcall (i32 texternalsym:$dst)), // Calls for AIX only def : Pat<(PPCcall (i32 mcsym:$dst)), (BL mcsym:$dst)>; + def : Pat<(PPCcall_nop (i32 mcsym:$dst)), (BL_NOP mcsym:$dst)>; +def : Pat<(PPCcall_nop (i32 texternalsym:$dst)), + (BL_NOP texternalsym:$dst)>; + def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm), (TCRETURNdi tglobaladdr:$dst, imm:$imm)>; diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll index 53f18cd39ae0ae..82927fdda86aee 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll @@ -88,7 +88,7 @@ entry: ; 32BIT-DAG: $r3 = COPY %0 ; 32BIT-DAG: $r4 = COPY %1 ; 32BIT-DAG: $r5 = COPY %2 -; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3 +; 32BIT-NEXT: BL_NOP &.memcpy, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3 ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 ; 32BIT: ADJCALLSTACKDOWN 312, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-DAG: $r3 = COPY %{{[0-9]+}} @@ -120,7 +120,7 @@ entry: ; 64BIT-DAG: $x3 = COPY %0 ; 64BIT-DAG: $x4 = COPY %1 ; 64BIT-DAG: $x5 = COPY %2 -; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3 +; 64BIT-NEXT: BL8_NOP &.memcpy, csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3 ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 ; 64BIT: ADJCALLSTACKDOWN 368, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-DAG: $x3 = COPY %{{[0-9]+}} @@ -187,7 +187,7 @@ entry: ; 32BIT-DAG: $r3 = COPY %2 ; 32BIT-DAG: $r4 = COPY %1 ; 32BIT-DAG: $r5 = COPY %3 -; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3 +; 32BIT-NEXT: BL_NOP &.memcpy, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3 ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 ; 32BIT: ADJCALLSTACKDOWN 92, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-DAG: $r3 = COPY %{{[0-9]+}} @@ -305,7 +305,7 @@ entry: ; 32BIT-DAG: $r3 = COPY %3 ; 32BIT-DAG: $r4 = COPY %4 ; 32BIT-DAG: $r5 = COPY %5 -; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3 +; 32BIT-NEXT: BL_NOP &.memcpy, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3 ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 ; 32BIT: ADJCALLSTACKDOWN 316, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-DAG: $r3 = COPY %{{[0-9]+}} @@ -349,7 +349,7 @@ entry: ; 64BIT-DAG: $x3 = COPY %2 ; 64BIT-DAG: $x4 = COPY %1 ; 64BIT-DAG: $x5 = COPY %3 -; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3 +; 64BIT-NEXT: BL8_NOP &.memcpy, csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3 ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 ; 64BIT: ADJCALLSTACKDOWN 344, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-DAG: $x3 = COPY %{{[0-9]+}} diff --git a/llvm/test/CodeGen/PowerPC/aix-external-sym-sdnode-lowering.ll b/llvm/test/CodeGen/PowerPC/aix-external-sym-sdnode-lowering.ll index a4957a514e0929..690ff99d053048 100644 --- a/llvm/test/CodeGen/PowerPC/aix-external-sym-sdnode-lowering.ll +++ b/llvm/test/CodeGen/PowerPC/aix-external-sym-sdnode-lowering.ll @@ -14,5 +14,5 @@ entry: declare double @llvm.ceil.f64(double) -; 32BIT: BL_NOP -; 64BIT: BL8_NOP +; 32BIT: BL_NOP &.ceil +; 64BIT: BL8_NOP &.ceil diff --git a/llvm/test/CodeGen/PowerPC/aix-llvm-intrinsic.ll b/llvm/test/CodeGen/PowerPC/aix-llvm-intrinsic.ll new file mode 100644 index 00000000000000..cc0baf5d0436c4 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-llvm-intrinsic.ll @@ -0,0 +1,70 @@ +; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr4 -mattr=-altivec < %s | \ +; RUN: FileCheck %s + +; RUN: llc -verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr4 -mattr=-altivec < %s | \ +; RUN: FileCheck %s + +; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr4 \ +; RUN: -mattr=-altivec -filetype=obj -o %t.o < %s +; RUN: llvm-readobj --symbols %t.o | FileCheck --check-prefix=CHECKSYM %s +; RUN: llvm-objdump -r -d --symbol-description %t.o | FileCheck --check-prefix=CHECKRELOC %s + +; RUN: not --crash llc -verify-machineinstrs -mcpu=pwr4 -mtriple powerpc64-ibm-aix-xcoff \ +; RUN: -mattr=-altivec -filetype=obj -o %t.o 2>&1 < %s | \ +; RUN: FileCheck --check-prefix=XCOFF64 %s +; XCOFF64: LLVM ERROR: 64-bit XCOFF object files are not supported yet. + +%struct.S = type { i32, i32 } + +@s = external global %struct.S, align 4 + +define void @bar() { +entry: + %0 = load i32, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 1), align 4 + %1 = trunc i32 %0 to i8 + %2 = load i32, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 1), align 4 + call void @llvm.memset.p0i8.i32(i8* align 4 bitcast (%struct.S* @s to i8*), i8 %1, i32 %2, i1 false) + ret void +} + +declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg) + +; CHECK-LABEL: .bar: +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 + +; CHECK: bl .memset + +; CHECK: .extern .memset + +; CHECKSYM: Symbol { +; CHECKSYM-NEXT: Index: 0 +; CHECKSYM-NEXT: Name: .memset +; CHECKSYM-NEXT: Value (RelocatableAddress): 0x0 +; CHECKSYM-NEXT: Section: N_UNDEF +; CHECKSYM-NEXT: Type: 0x0 +; CHECKSYM-NEXT: StorageClass: C_EXT (0x2) +; CHECKSYM-NEXT: NumberOfAuxEntries: 1 +; CHECKSYM-NEXT: CSECT Auxiliary Entry { +; CHECKSYM-NEXT: Index: 1 +; CHECKSYM-NEXT: SectionLen: 0 +; CHECKSYM-NEXT: ParameterHashIndex: 0x0 +; CHECKSYM-NEXT: TypeChkSectNum: 0x0 +; CHECKSYM-NEXT: SymbolAlignmentLog2: 0 +; CHECKSYM-NEXT: SymbolType: XTY_ER (0x0) +; CHECKSYM-NEXT: StorageMappingClass: XMC_PR (0x0) +; CHECKSYM-NEXT: StabInfoIndex: 0x0 +; CHECKSYM-NEXT: StabSectNum: 0x0 +; CHECKSYM-NEXT: } +; CHECKSYM-NEXT: } + +; CHECKRELOC: 00000000 (idx: 6) .bar: +; CHECKRELOC-NEXT: 0: 7c 08 02 a6 mflr 0 +; CHECKRELOC-NEXT: 4: 90 01 00 08 stw 0, 8(1) +; CHECKRELOC-NEXT: 8: 94 21 ff c0 stwu 1, -64(1) +; CHECKRELOC-NEXT: c: 80 62 00 00 lwz 3, 0(2) +; CHECKRELOC-NEXT: 0000000e: R_TOC (idx: 12) s[TC] +; CHECKRELOC-NEXT: 10: 80 83 00 04 lwz 4, 4(3) +; CHECKRELOC-NEXT: 14: 7c 85 23 78 mr 5, 4 +; CHECKRELOC-NEXT: 18: 4b ff ff e9 bl 0x0 +; CHECKRELOC-NEXT: 00000018: R_RBR (idx: 0) .memset[PR] diff --git a/llvm/test/CodeGen/PowerPC/aix-user-defined-memcpy.ll b/llvm/test/CodeGen/PowerPC/aix-user-defined-memcpy.ll index 0e6fbcb6a3efa9..c24108895c2aca 100644 --- a/llvm/test/CodeGen/PowerPC/aix-user-defined-memcpy.ll +++ b/llvm/test/CodeGen/PowerPC/aix-user-defined-memcpy.ll @@ -8,6 +8,10 @@ ; RUN: llvm-objdump -D %t.o | FileCheck --check-prefix=32-DIS %s +; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff \ +; RUN: -mcpu=pwr4 -mattr=-altivec < %s | \ +; RUN: FileCheck %s + ; RUN: not --crash llc -verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff \ ; RUN: -mcpu=pwr4 -mattr=-altivec -filetype=obj < %s 2>&1 | FileCheck \ ; RUN: --check-prefix=64-CHECK %s @@ -35,6 +39,8 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture r ; 2. There is no relocation associated with the call, since callee is defined. ; 3. Branch instruction in raw data is branching back to the right callee location. +; CHECK-NOT: .extern .memcpy + ; 32-SYM: Symbol {{[{][[:space:]] *}}Index: [[#Index:]]{{[[:space:]] *}}Name: .memcpy ; 32-SYM-NEXT: Value (RelocatableAddress): 0x0 ; 32-SYM-NEXT: Section: .text