Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MC][NFC] Statically allocate storage for decoded pseudo probes and function records #102789

24 changes: 16 additions & 8 deletions bolt/lib/Rewrite/PseudoProbeRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,6 @@ void PseudoProbeRewriter::parsePseudoProbe() {
if (!ProbeDecoder.buildAddress2ProbeMap(
reinterpret_cast<const uint8_t *>(Contents.data()), Contents.size(),
GuidFilter, FuncStartAddrs)) {
ProbeDecoder.getAddress2ProbesMap().clear();
errs() << "BOLT-WARNING: fail in building Address2ProbeMap\n";
return;
}
Expand Down Expand Up @@ -201,7 +200,9 @@ void PseudoProbeRewriter::updatePseudoProbes() {
}

unsigned ProbeTrack = AP.second.size();
std::list<MCDecodedPseudoProbe>::iterator Probe = AP.second.begin();
auto Probe = llvm::map_iterator(
AP.second.begin(),
[](auto RW) -> MCDecodedPseudoProbe & { return RW.get(); });
while (ProbeTrack != 0) {
if (Probe->isBlock()) {
Probe->setAddress(BlkOutputAddress);
Expand All @@ -219,9 +220,7 @@ void PseudoProbeRewriter::updatePseudoProbes() {
}

while (CallOutputAddress != CallOutputAddresses.second) {
AP.second.push_back(*Probe);
AP.second.back().setAddress(CallOutputAddress->second);
Probe->getInlineTreeNode()->addProbes(&(AP.second.back()));
ProbeDecoder.addInjectedProbe(*Probe, CallOutputAddress->second);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why we need this change? seems it's not related to decoding pseudo probe, or is it because we allocate a fixed size of MCDecodedPseudoProbe, so later there is no way to add additional probe to the vector. To address this, we have to use a new container InjectedProbeMap to save new probes. If so, could you add comments to explain this(maybe in the definition of InjectedProbeMap)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's required because appending to ProbeVec may cause its reallocation so that pointers to its elements will become invalid. I'll leave a comment.

CallOutputAddress = std::next(CallOutputAddress);
}
}
Expand Down Expand Up @@ -333,7 +332,7 @@ void PseudoProbeRewriter::encodePseudoProbes() {
ProbeDecoder.getDummyInlineRoot();
for (auto Child = Root.getChildren().begin();
Child != Root.getChildren().end(); ++Child)
Inlinees[Child->first] = Child->second.get();
Inlinees[Child->getInlineSite()] = &*Child;

for (auto Inlinee : Inlinees)
// INT64_MAX is "placeholder" of unused callsite index field in the pair
Expand Down Expand Up @@ -363,7 +362,8 @@ void PseudoProbeRewriter::encodePseudoProbes() {
if (Probe->getAddress() == INT64_MAX)
Deleted++;
LLVM_DEBUG(dbgs() << "Deleted Probes:" << Deleted << "\n");
uint64_t ProbesSize = Cur->getProbes().size() - Deleted;
size_t InjectedProbes = ProbeDecoder.getNumInjectedProbes(Cur);
uint64_t ProbesSize = Cur->NumProbes + InjectedProbes - Deleted;
EmitULEB128IntValue(ProbesSize);
// Emit number of direct inlinees
EmitULEB128IntValue(Cur->getChildren().size());
Expand All @@ -374,10 +374,18 @@ void PseudoProbeRewriter::encodePseudoProbes() {
EmitDecodedPseudoProbe(Probe);
LastProbe = Probe;
}
if (InjectedProbes) {
for (MCDecodedPseudoProbe *&Probe : ProbeDecoder.getInjectedProbes(Cur)) {
if (Probe->getAddress() == INT64_MAX)
continue;
EmitDecodedPseudoProbe(Probe);
LastProbe = Probe;
}
}

for (auto Child = Cur->getChildren().begin();
Child != Cur->getChildren().end(); ++Child)
Inlinees[Child->first] = Child->second.get();
Inlinees[Child->getInlineSite()] = &*Child;
for (const auto &Inlinee : Inlinees) {
assert(Cur->Guid != 0 && "non root tree node must have nonzero Guid");
NextNodes.push_back({std::get<1>(Inlinee.first), Inlinee.second});
Expand Down
105 changes: 81 additions & 24 deletions llvm/include/llvm/MC/MCPseudoProbe.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,20 +54,21 @@
#ifndef LLVM_MC_MCPSEUDOPROBE_H
#define LLVM_MC_MCPSEUDOPROBE_H

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/Support/ErrorOr.h"
#include <list>
#include <functional>
#include <map>
#include <memory>
#include <string>
#include <tuple>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <vector>

namespace llvm {
Expand Down Expand Up @@ -103,14 +104,15 @@ using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
using GUIDProbeFunctionMap =
std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>;
// Address to pseudo probes map.
using AddressProbesMap = std::map<uint64_t, std::list<MCDecodedPseudoProbe>>;
using AddressProbesMap =
std::map<uint64_t,
std::vector<std::reference_wrapper<MCDecodedPseudoProbe>>>;

class MCDecodedPseudoProbeInlineTree;

class MCPseudoProbeBase {
protected:
uint64_t Guid;
uint64_t Index;
uint32_t Index;
uint32_t Discriminator;
uint8_t Attributes;
uint8_t Type;
Expand All @@ -120,14 +122,12 @@ class MCPseudoProbeBase {
const static uint32_t PseudoProbeFirstId = 1;

public:
MCPseudoProbeBase(uint64_t G, uint64_t I, uint64_t At, uint8_t T, uint32_t D)
: Guid(G), Index(I), Discriminator(D), Attributes(At), Type(T) {}
MCPseudoProbeBase(uint64_t I, uint64_t At, uint8_t T, uint32_t D)
: Index(I), Discriminator(D), Attributes(At), Type(T) {}

bool isEntry() const { return Index == PseudoProbeFirstId; }

uint64_t getGuid() const { return Guid; }

uint64_t getIndex() const { return Index; }
uint32_t getIndex() const { return Index; }

uint32_t getDiscriminator() const { return Discriminator; }

Expand Down Expand Up @@ -157,18 +157,20 @@ class MCPseudoProbeBase {
/// uses an address from a temporary label created at the current address in the
/// current section.
class MCPseudoProbe : public MCPseudoProbeBase {
uint64_t Guid;
MCSymbol *Label;

public:
MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type,
uint64_t Attributes, uint32_t Discriminator)
: MCPseudoProbeBase(Guid, Index, Attributes, Type, Discriminator),
: MCPseudoProbeBase(Index, Attributes, Type, Discriminator), Guid(Guid),
Label(Label) {
assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8");
assert(Attributes <= 0xFF &&
"Probe attributes too big to encode, exceeding 2^16");
}

uint64_t getGuid() const { return Guid; };
MCSymbol *getLabel() const { return Label; }
void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const;
};
Expand All @@ -181,11 +183,11 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase {
MCDecodedPseudoProbeInlineTree *InlineTree;

public:
MCDecodedPseudoProbe(uint64_t Ad, uint64_t G, uint32_t I, PseudoProbeType K,
uint8_t At, uint32_t D,
MCDecodedPseudoProbeInlineTree *Tree)
: MCPseudoProbeBase(G, I, At, static_cast<uint8_t>(K), D), Address(Ad),
MCDecodedPseudoProbe(uint64_t Ad, uint32_t I, PseudoProbeType K, uint8_t At,
uint32_t D, MCDecodedPseudoProbeInlineTree *Tree)
: MCPseudoProbeBase(I, At, static_cast<uint8_t>(K), D), Address(Ad),
InlineTree(Tree){};
uint64_t getGuid() const;

uint64_t getAddress() const { return Address; }

Expand Down Expand Up @@ -276,17 +278,35 @@ class MCPseudoProbeInlineTree
};

// inline tree node for the decoded pseudo probe
class MCDecodedPseudoProbeInlineTree
: public MCPseudoProbeInlineTreeBase<MCDecodedPseudoProbe *,
MCDecodedPseudoProbeInlineTree> {
class MCDecodedPseudoProbeInlineTree {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that this class is no longer derived from MCPseudoProbeInlineTreeBase, MCPseudoProbeInlineTree will be the only derived class ofMCPseudoProbeInlineTreeBase, there is no need for the base class? we may need to refactor it.

public:
InlineSite ISite;
uint64_t Guid = 0;

// Caller node of the inline site
MCDecodedPseudoProbeInlineTree *Parent = nullptr;

MCDecodedPseudoProbeInlineTree() = default;
MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){};
MCDecodedPseudoProbeInlineTree(uint64_t Guid, uint32_t ProbeId,
MCDecodedPseudoProbeInlineTree *Parent)
: Guid(Guid), Parent(Parent), ProbeId(ProbeId) {}

// Track children (e.g. inlinees) of current context
MutableArrayRef<MCDecodedPseudoProbeInlineTree> Children;
// Set of probes that come with the function.
MCDecodedPseudoProbe *Probes = nullptr;
uint32_t NumProbes = 0;
uint32_t ProbeId = 0;

// Root node has a GUID 0.
bool isRoot() const { return Guid == 0; }
// Return false if it's a dummy inline site
bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); }
InlineSite getInlineSite() const { return InlineSite(Guid, ProbeId); }
auto getChildren() const { return Children; }
auto getProbes() const {
return llvm::make_pointer_range(
MutableArrayRef<MCDecodedPseudoProbe>(Probes, NumProbes));
}
};

/// Instances of this class represent the pseudo probes inserted into a compile
Expand Down Expand Up @@ -336,6 +356,15 @@ class MCPseudoProbeTable {
};

class MCPseudoProbeDecoder {
// Decoded pseudo probes vector.
std::vector<MCDecodedPseudoProbe> PseudoProbeVec;
// Injected pseudo probes, identified by the containing inline tree node.
std::unordered_map<const MCDecodedPseudoProbeInlineTree *,
std::vector<MCDecodedPseudoProbe>>
InjectedProbeMap;
// Decoded inline records vector.
std::vector<MCDecodedPseudoProbeInlineTree> InlineTreeVec;

// GUID to PseudoProbeFuncDesc map.
GUIDProbeFunctionMap GUID2FuncDescMap;

Expand Down Expand Up @@ -370,16 +399,18 @@ class MCPseudoProbeDecoder {
// Decode pseudo_probe_desc section to build GUID to PseudoProbeFuncDesc map.
bool buildGUID2FuncDescMap(const uint8_t *Start, std::size_t Size);

// Decode pseudo_probe section to count the number of probes and inlined
// function records for each function record.
template <bool IsTopLevelFunc>
bool countRecords(bool &Discard, uint32_t &ProbeCount, uint32_t &InlinedCount,
const Uint64Set &GuidFilter);

// Decode pseudo_probe section to build address to probes map for specifed
// functions only.
bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size,
const Uint64Set &GuildFilter,
const Uint64Map &FuncStartAddrs);

bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur,
uint64_t &LastAddr, const Uint64Set &GuildFilter,
const Uint64Map &FuncStartAddrs);

// Print pseudo_probe_desc section info
void printGUID2FuncDescMap(raw_ostream &OS);

Expand Down Expand Up @@ -422,6 +453,32 @@ class MCPseudoProbeDecoder {
const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const {
return DummyInlineRoot;
}

void addInjectedProbe(const MCDecodedPseudoProbe &Probe, uint64_t Address) {
const MCDecodedPseudoProbeInlineTree *Parent = Probe.getInlineTreeNode();
InjectedProbeMap[Parent].emplace_back(Probe).setAddress(Address);
}

size_t
getNumInjectedProbes(const MCDecodedPseudoProbeInlineTree *Parent) const {
auto It = InjectedProbeMap.find(Parent);
if (It == InjectedProbeMap.end())
return 0;
return It->second.size();
}

auto getInjectedProbes(MCDecodedPseudoProbeInlineTree *Parent) {
auto It = InjectedProbeMap.find(Parent);
assert(It != InjectedProbeMap.end());
return llvm::make_pointer_range(iterator_range(It->second));
}

private:
template <bool IsTopLevelFunc>
void buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur,
uint64_t &LastAddr, const Uint64Set &GuildFilter,
const Uint64Map &FuncStartAddrs,
uint32_t &CurChild);
};

} // end namespace llvm
Expand Down
Loading
Loading