diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp index 37a5b937ebcaa3..9677530919b90d 100644 --- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp +++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp @@ -200,7 +200,9 @@ void PseudoProbeRewriter::updatePseudoProbes() { } unsigned ProbeTrack = AP.second.size(); - std::list::iterator Probe = AP.second.begin(); + auto Probe = llvm::map_iterator( + AP.second.begin(), + [](auto RW) -> MCDecodedPseudoProbe & { return RW.get(); }); while (ProbeTrack != 0) { if (Probe->isBlock()) { Probe->setAddress(BlkOutputAddress); @@ -218,9 +220,7 @@ void PseudoProbeRewriter::updatePseudoProbes() { } while (CallOutputAddress != CallOutputAddresses.second) { - AP.second.push_back(*Probe); - AP.second.back().setAddress(CallOutputAddress->second); - Probe->getInlineTreeNode()->addProbes(&(AP.second.back())); + ProbeDecoder.addInjectedProbe(*Probe, CallOutputAddress->second); CallOutputAddress = std::next(CallOutputAddress); } } @@ -332,7 +332,7 @@ void PseudoProbeRewriter::encodePseudoProbes() { ProbeDecoder.getDummyInlineRoot(); for (auto Child = Root.getChildren().begin(); Child != Root.getChildren().end(); ++Child) - Inlinees[Child->first] = Child->second.get(); + Inlinees[Child->getInlineSite()] = &*Child; for (auto Inlinee : Inlinees) // INT64_MAX is "placeholder" of unused callsite index field in the pair @@ -358,25 +358,37 @@ void PseudoProbeRewriter::encodePseudoProbes() { EmitInt(Cur->Guid, 8); // Emit number of probes in this node uint64_t Deleted = 0; - for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) + for (MCDecodedPseudoProbe *&Probe : + llvm::make_pointer_range(Cur->getProbes())) if (Probe->getAddress() == INT64_MAX) Deleted++; LLVM_DEBUG(dbgs() << "Deleted Probes:" << Deleted << "\n"); - uint64_t ProbesSize = Cur->getProbes().size() - Deleted; + size_t InjectedProbes = ProbeDecoder.getNumInjectedProbes(Cur); + uint64_t ProbesSize = Cur->getProbes().size() - Deleted + InjectedProbes; EmitULEB128IntValue(ProbesSize); // Emit number of direct inlinees EmitULEB128IntValue(Cur->getChildren().size()); // Emit probes in this group - for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) { + for (MCDecodedPseudoProbe *&Probe : + llvm::make_pointer_range(Cur->getProbes())) { if (Probe->getAddress() == INT64_MAX) continue; EmitDecodedPseudoProbe(Probe); LastProbe = Probe; } + if (InjectedProbes) { + for (MCDecodedPseudoProbe *&Probe : + llvm::make_pointer_range(ProbeDecoder.getInjectedProbes(Cur))) { + if (Probe->getAddress() == INT64_MAX) + continue; + EmitDecodedPseudoProbe(Probe); + LastProbe = Probe; + } + } for (auto Child = Cur->getChildren().begin(); Child != Cur->getChildren().end(); ++Child) - Inlinees[Child->first] = Child->second.get(); + Inlinees[Child->getInlineSite()] = &*Child; for (const auto &Inlinee : Inlinees) { assert(Cur->Guid != 0 && "non root tree node must have nonzero Guid"); NextNodes.push_back({std::get<1>(Inlinee.first), Inlinee.second}); diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h index 3dd10c0717679b..66ad9db4860d8a 100644 --- a/llvm/include/llvm/MC/MCPseudoProbe.h +++ b/llvm/include/llvm/MC/MCPseudoProbe.h @@ -54,20 +54,21 @@ #ifndef LLVM_MC_MCPSEUDOPROBE_H #define LLVM_MC_MCPSEUDOPROBE_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator.h" #include "llvm/IR/PseudoProbe.h" #include "llvm/Support/ErrorOr.h" -#include +#include #include #include #include #include #include #include -#include #include namespace llvm { @@ -103,14 +104,15 @@ using MCPseudoProbeInlineStack = SmallVector; using GUIDProbeFunctionMap = std::unordered_map; // Address to pseudo probes map. -using AddressProbesMap = std::map>; +using AddressProbesMap = + std::map>>; class MCDecodedPseudoProbeInlineTree; class MCPseudoProbeBase { protected: - uint64_t Guid; - uint64_t Index; + uint32_t Index; uint32_t Discriminator; uint8_t Attributes; uint8_t Type; @@ -120,14 +122,12 @@ class MCPseudoProbeBase { const static uint32_t PseudoProbeFirstId = 1; public: - MCPseudoProbeBase(uint64_t G, uint64_t I, uint64_t At, uint8_t T, uint32_t D) - : Guid(G), Index(I), Discriminator(D), Attributes(At), Type(T) {} + MCPseudoProbeBase(uint64_t I, uint64_t At, uint8_t T, uint32_t D) + : Index(I), Discriminator(D), Attributes(At), Type(T) {} bool isEntry() const { return Index == PseudoProbeFirstId; } - uint64_t getGuid() const { return Guid; } - - uint64_t getIndex() const { return Index; } + uint32_t getIndex() const { return Index; } uint32_t getDiscriminator() const { return Discriminator; } @@ -157,18 +157,20 @@ class MCPseudoProbeBase { /// uses an address from a temporary label created at the current address in the /// current section. class MCPseudoProbe : public MCPseudoProbeBase { + uint64_t Guid; MCSymbol *Label; public: MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type, uint64_t Attributes, uint32_t Discriminator) - : MCPseudoProbeBase(Guid, Index, Attributes, Type, Discriminator), + : MCPseudoProbeBase(Index, Attributes, Type, Discriminator), Guid(Guid), Label(Label) { assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8"); assert(Attributes <= 0xFF && "Probe attributes too big to encode, exceeding 2^16"); } + uint64_t getGuid() const { return Guid; }; MCSymbol *getLabel() const { return Label; } void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const; }; @@ -181,11 +183,11 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase { MCDecodedPseudoProbeInlineTree *InlineTree; public: - MCDecodedPseudoProbe(uint64_t Ad, uint64_t G, uint32_t I, PseudoProbeType K, - uint8_t At, uint32_t D, - MCDecodedPseudoProbeInlineTree *Tree) - : MCPseudoProbeBase(G, I, At, static_cast(K), D), Address(Ad), + MCDecodedPseudoProbe(uint64_t Ad, uint32_t I, PseudoProbeType K, uint8_t At, + uint32_t D, MCDecodedPseudoProbeInlineTree *Tree) + : MCPseudoProbeBase(I, At, static_cast(K), D), Address(Ad), InlineTree(Tree){}; + uint64_t getGuid() const; uint64_t getAddress() const { return Address; } @@ -211,21 +213,14 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase { bool ShowName) const; }; -template +template class MCPseudoProbeInlineTreeBase { - struct InlineSiteHash { - uint64_t operator()(const InlineSite &Site) const { - return std::get<0>(Site) ^ std::get<1>(Site); - } - }; - protected: // Track children (e.g. inlinees) of current context - using InlinedProbeTreeMap = std::unordered_map< - InlineSite, std::unique_ptr, InlineSiteHash>; InlinedProbeTreeMap Children; // Set of probes that come with the function. - std::vector Probes; + ProbesType Probes; MCPseudoProbeInlineTreeBase() { static_assert(std::is_base_of::value, @@ -240,12 +235,10 @@ class MCPseudoProbeInlineTreeBase { bool isRoot() const { return Guid == 0; } InlinedProbeTreeMap &getChildren() { return Children; } const InlinedProbeTreeMap &getChildren() const { return Children; } - std::vector &getProbes() { return Probes; } - const std::vector &getProbes() const { return Probes; } - void addProbes(ProbeType Probe) { Probes.push_back(Probe); } + const ProbesType &getProbes() const { return Probes; } // Caller node of the inline site - MCPseudoProbeInlineTreeBase *Parent = - nullptr; + MCPseudoProbeInlineTreeBase *Parent = nullptr; DerivedProbeInlineTreeType *getOrAddNode(const InlineSite &Site) { auto Ret = Children.emplace( Site, std::make_unique(Site)); @@ -259,9 +252,17 @@ class MCPseudoProbeInlineTreeBase { // instance is created as the root of a tree. // A real instance of this class is created for each function, either a // not inlined function that has code in .text section or an inlined function. +struct InlineSiteHash { + uint64_t operator()(const InlineSite &Site) const { + return std::get<0>(Site) ^ std::get<1>(Site); + } +}; class MCPseudoProbeInlineTree - : public MCPseudoProbeInlineTreeBase { + : public MCPseudoProbeInlineTreeBase< + std::vector, MCPseudoProbeInlineTree, + std::unordered_map, + InlineSiteHash>> { public: MCPseudoProbeInlineTree() = default; MCPseudoProbeInlineTree(uint64_t Guid) { this->Guid = Guid; } @@ -277,16 +278,31 @@ class MCPseudoProbeInlineTree // inline tree node for the decoded pseudo probe class MCDecodedPseudoProbeInlineTree - : public MCPseudoProbeInlineTreeBase { -public: - InlineSite ISite; + : public MCPseudoProbeInlineTreeBase< + MCDecodedPseudoProbe *, MCDecodedPseudoProbeInlineTree, + MutableArrayRef> { + uint32_t NumProbes = 0; + uint32_t ProbeId = 0; +public: MCDecodedPseudoProbeInlineTree() = default; - MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){}; + MCDecodedPseudoProbeInlineTree(const InlineSite &Site, + MCDecodedPseudoProbeInlineTree *Parent) + : ProbeId(std::get<1>(Site)) { + this->Guid = std::get<0>(Site); + this->Parent = Parent; + } // Return false if it's a dummy inline site bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); } + InlineSite getInlineSite() const { return InlineSite(Guid, ProbeId); } + void setProbes(MutableArrayRef ProbesRef) { + Probes = ProbesRef.data(); + NumProbes = ProbesRef.size(); + } + auto getProbes() const { + return MutableArrayRef(Probes, NumProbes); + } }; /// Instances of this class represent the pseudo probes inserted into a compile @@ -336,6 +352,20 @@ class MCPseudoProbeTable { }; class MCPseudoProbeDecoder { + // Decoded pseudo probes vector. + std::vector PseudoProbeVec; + // Injected pseudo probes, identified by the containing inline tree node. + // Need to keep injected probes separately for two reasons: + // 1) Probes cannot be added to the PseudoProbeVec: appending may cause + // reallocation so that pointers to its elements will become invalid. + // 2) Probes belonging to function record must be contiguous in PseudoProbeVec + // as owning InlineTree references them with an ArrayRef to save space. + std::unordered_map> + InjectedProbeMap; + // Decoded inline records vector. + std::vector InlineTreeVec; + // GUID to PseudoProbeFuncDesc map. GUIDProbeFunctionMap GUID2FuncDescMap; @@ -382,10 +412,6 @@ class MCPseudoProbeDecoder { const Uint64Set &GuildFilter, const Uint64Map &FuncStartAddrs); - bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur, - uint64_t &LastAddr, const Uint64Set &GuildFilter, - const Uint64Map &FuncStartAddrs); - // Print pseudo_probe_desc section info void printGUID2FuncDescMap(raw_ostream &OS); @@ -428,6 +454,34 @@ class MCPseudoProbeDecoder { const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const { return DummyInlineRoot; } + + void addInjectedProbe(const MCDecodedPseudoProbe &Probe, uint64_t Address) { + const MCDecodedPseudoProbeInlineTree *Parent = Probe.getInlineTreeNode(); + InjectedProbeMap[Parent].emplace_back(Probe).setAddress(Address); + } + + size_t + getNumInjectedProbes(const MCDecodedPseudoProbeInlineTree *Parent) const { + auto It = InjectedProbeMap.find(Parent); + if (It == InjectedProbeMap.end()) + return 0; + return It->second.size(); + } + + auto getInjectedProbes(MCDecodedPseudoProbeInlineTree *Parent) { + auto It = InjectedProbeMap.find(Parent); + assert(It != InjectedProbeMap.end()); + return iterator_range(It->second); + } + +private: + // Recursively parse an inlining tree encoded in pseudo_probe section. Returns + // whether the the top-level node should be skipped. + template + bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur, + uint64_t &LastAddr, const Uint64Set &GuildFilter, + const Uint64Map &FuncStartAddrs, + const uint32_t CurChildIndex); }; } // end namespace llvm diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp index 77ac1fee4120f5..1031dac331bb1c 100644 --- a/llvm/lib/MC/MCPseudoProbe.cpp +++ b/llvm/lib/MC/MCPseudoProbe.cpp @@ -49,6 +49,8 @@ static const MCExpr *buildSymbolDiff(MCObjectStreamer *MCOS, const MCSymbol *A, return AddrDelta; } +uint64_t MCDecodedPseudoProbe::getGuid() const { return InlineTree->Guid; } + void MCPseudoProbe::emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const { bool IsSentinel = isSentinelProbe(getAttributes()); @@ -289,8 +291,8 @@ void MCDecodedPseudoProbe::getInlineContext( // Note that it won't include the probe's belonging function(leaf location) while (Cur->hasInlineSite()) { StringRef FuncName = getProbeFNameForGUID(GUID2FuncMAP, Cur->Parent->Guid); - ContextStack.emplace_back( - MCPseudoProbeFrameLocation(FuncName, std::get<1>(Cur->ISite))); + ContextStack.emplace_back(MCPseudoProbeFrameLocation( + FuncName, std::get<1>(Cur->getInlineSite()))); Cur = static_cast(Cur->Parent); } // Make the ContextStack in caller-callee order @@ -318,10 +320,10 @@ void MCDecodedPseudoProbe::print(raw_ostream &OS, bool ShowName) const { OS << "FUNC: "; if (ShowName) { - StringRef FuncName = getProbeFNameForGUID(GUID2FuncMAP, Guid); + StringRef FuncName = getProbeFNameForGUID(GUID2FuncMAP, getGuid()); OS << FuncName.str() << " "; } else { - OS << Guid << " "; + OS << getGuid() << " "; } OS << "Index: " << Index << " "; if (Discriminator) @@ -417,17 +419,18 @@ bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start, return true; } +template bool MCPseudoProbeDecoder::buildAddress2ProbeMap( MCDecodedPseudoProbeInlineTree *Cur, uint64_t &LastAddr, - const Uint64Set &GuidFilter, const Uint64Map &FuncStartAddrs) { + const Uint64Set &GuidFilter, const Uint64Map &FuncStartAddrs, + const uint32_t CurChildIndex) { // The pseudo_probe section encodes an inline forest and each tree has a // format defined in MCPseudoProbe.h uint32_t Index = 0; - bool IsTopLevelFunc = Cur == &DummyInlineRoot; if (IsTopLevelFunc) { // Use a sequential id for top level inliner. - Index = Cur->getChildren().size(); + Index = CurChildIndex; } else { // Read inline site for inlinees Index = cantFail(errorOrToExpected(readUnsignedNumber())); @@ -443,8 +446,9 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap( // If the incoming node is null, all its children nodes should be disgarded. if (Cur) { // Switch/add to a new tree node(inlinee) - Cur = Cur->getOrAddNode(std::make_tuple(Guid, Index)); - Cur->Guid = Guid; + Cur->getChildren()[CurChildIndex] = + MCDecodedPseudoProbeInlineTree(InlineSite(Guid, Index), Cur); + Cur = &Cur->getChildren()[CurChildIndex]; if (IsTopLevelFunc && !EncodingIsAddrBased) { if (auto V = FuncStartAddrs.lookup(Guid)) LastAddr = V; @@ -454,6 +458,7 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap( // Read number of probes in the current node. uint32_t NodeCount = cantFail(errorOrToExpected(readUnsignedNumber())); + uint32_t CurrentProbeCount = 0; // Read number of direct inlinees uint32_t ChildrenToProcess = cantFail(errorOrToExpected(readUnsignedNumber())); @@ -494,19 +499,25 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap( } if (Cur && !isSentinelProbe(Attr)) { - // Populate Address2ProbesMap - auto &Probes = Address2ProbesMap[Addr]; - Probes.emplace_back(Addr, Cur->Guid, Index, PseudoProbeType(Kind), Attr, - Discriminator, Cur); - Cur->addProbes(&Probes.back()); + PseudoProbeVec.emplace_back(Addr, Index, PseudoProbeType(Kind), Attr, + Discriminator, Cur); + Address2ProbesMap[Addr].emplace_back(PseudoProbeVec.back()); + ++CurrentProbeCount; } LastAddr = Addr; } + if (Cur) { + Cur->setProbes( + MutableArrayRef(PseudoProbeVec).take_back(CurrentProbeCount)); + InlineTreeVec.resize(InlineTreeVec.size() + ChildrenToProcess); + Cur->getChildren() = + MutableArrayRef(InlineTreeVec).take_back(ChildrenToProcess); + } for (uint32_t I = 0; I < ChildrenToProcess; I++) { - buildAddress2ProbeMap(Cur, LastAddr, GuidFilter, FuncStartAddrs); + buildAddress2ProbeMap(Cur, LastAddr, GuidFilter, FuncStartAddrs, I); } - return true; + return Cur; } template @@ -605,14 +616,25 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap( TopLevelFuncs += !Discard; } assert(Data == End && "Have unprocessed data in pseudo_probe section"); + PseudoProbeVec.reserve(ProbeCount); + InlineTreeVec.reserve(InlinedCount); + + // Allocate top-level function records as children of DummyInlineRoot. + InlineTreeVec.resize(TopLevelFuncs); + DummyInlineRoot.getChildren() = MutableArrayRef(InlineTreeVec); Data = Start; End = Data + Size; uint64_t LastAddr = 0; + uint32_t CurChildIndex = 0; while (Data < End) - buildAddress2ProbeMap(&DummyInlineRoot, LastAddr, GuidFilter, - FuncStartAddrs); + CurChildIndex += buildAddress2ProbeMap( + &DummyInlineRoot, LastAddr, GuidFilter, FuncStartAddrs, CurChildIndex); assert(Data == End && "Have unprocessed data in pseudo_probe section"); + assert(PseudoProbeVec.size() == ProbeCount && + "Mismatching probe count pre- and post-parsing"); + assert(InlineTreeVec.size() == InlinedCount && + "Mismatching function records count pre- and post-parsing"); return true; } diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index 5094871a1d415d..ea7b9b9c7bd528 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -1293,9 +1293,9 @@ void CSProfileGenerator::populateBodySamplesWithProbes( // and will be inferred by the compiler. for (auto &I : FrameSamples) { for (auto *FunctionProfile : I.second) { - for (auto *Probe : I.first->getProbes()) { - FunctionProfile->addBodySamples(Probe->getIndex(), - Probe->getDiscriminator(), 0); + for (const MCDecodedPseudoProbe &Probe : I.first->getProbes()) { + FunctionProfile->addBodySamples(Probe.getIndex(), + Probe.getDiscriminator(), 0); } } } diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index a458ffcb96b41a..e4fc3816cd0c45 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -132,7 +132,7 @@ void BinarySizeContextTracker::trackInlineesOptimizedAway( MCPseudoProbeDecoder &ProbeDecoder) { ProbeFrameStack ProbeContext; for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) - trackInlineesOptimizedAway(ProbeDecoder, *Child.second, ProbeContext); + trackInlineesOptimizedAway(ProbeDecoder, Child, ProbeContext); } void BinarySizeContextTracker::trackInlineesOptimizedAway( @@ -160,9 +160,9 @@ void BinarySizeContextTracker::trackInlineesOptimizedAway( // DFS down the probe inline tree for (const auto &ChildNode : ProbeNode.getChildren()) { - InlineSite Location = ChildNode.first; + InlineSite Location = ChildNode.getInlineSite(); ProbeContext.back().second = std::get<1>(Location); - trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second, ProbeContext); + trackInlineesOptimizedAway(ProbeDecoder, ChildNode, ProbeContext); } ProbeContext.pop_back(); @@ -454,8 +454,8 @@ void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) { // Build TopLevelProbeFrameMap to track size for optimized inlinees when probe // is available if (TrackFuncContextSize) { - for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) { - auto *Frame = Child.second.get(); + for (auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) { + auto *Frame = &Child; StringRef FuncName = ProbeDecoder.getFuncDescForGUID(Frame->Guid)->FuncName; TopLevelProbeFrameMap[FuncName] = Frame;