Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MC][NFC] Statically allocate storage for decoded pseudo probes and function records #102789

31 changes: 21 additions & 10 deletions bolt/lib/Rewrite/PseudoProbeRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,6 @@ void PseudoProbeRewriter::parsePseudoProbe() {
if (!ProbeDecoder.buildAddress2ProbeMap(
reinterpret_cast<const uint8_t *>(Contents.data()), Contents.size(),
GuidFilter, FuncStartAddrs)) {
ProbeDecoder.getAddress2ProbesMap().clear();
errs() << "BOLT-WARNING: fail in building Address2ProbeMap\n";
return;
}
Expand Down Expand Up @@ -201,7 +200,9 @@ void PseudoProbeRewriter::updatePseudoProbes() {
}

unsigned ProbeTrack = AP.second.size();
std::list<MCDecodedPseudoProbe>::iterator Probe = AP.second.begin();
auto Probe = llvm::map_iterator(
AP.second.begin(),
[](auto RW) -> MCDecodedPseudoProbe & { return RW.get(); });
while (ProbeTrack != 0) {
if (Probe->isBlock()) {
Probe->setAddress(BlkOutputAddress);
Expand All @@ -219,9 +220,7 @@ void PseudoProbeRewriter::updatePseudoProbes() {
}

while (CallOutputAddress != CallOutputAddresses.second) {
AP.second.push_back(*Probe);
AP.second.back().setAddress(CallOutputAddress->second);
Probe->getInlineTreeNode()->addProbes(&(AP.second.back()));
ProbeDecoder.addInjectedProbe(*Probe, CallOutputAddress->second);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why we need this change? seems it's not related to decoding pseudo probe, or is it because we allocate a fixed size of MCDecodedPseudoProbe, so later there is no way to add additional probe to the vector. To address this, we have to use a new container InjectedProbeMap to save new probes. If so, could you add comments to explain this(maybe in the definition of InjectedProbeMap)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's required because appending to ProbeVec may cause its reallocation so that pointers to its elements will become invalid. I'll leave a comment.

CallOutputAddress = std::next(CallOutputAddress);
}
}
Expand Down Expand Up @@ -333,7 +332,7 @@ void PseudoProbeRewriter::encodePseudoProbes() {
ProbeDecoder.getDummyInlineRoot();
for (auto Child = Root.getChildren().begin();
Child != Root.getChildren().end(); ++Child)
Inlinees[Child->first] = Child->second.get();
Inlinees[Child->getInlineSite()] = &*Child;

for (auto Inlinee : Inlinees)
// INT64_MAX is "placeholder" of unused callsite index field in the pair
Expand All @@ -359,25 +358,37 @@ void PseudoProbeRewriter::encodePseudoProbes() {
EmitInt(Cur->Guid, 8);
// Emit number of probes in this node
uint64_t Deleted = 0;
for (MCDecodedPseudoProbe *&Probe : Cur->getProbes())
for (MCDecodedPseudoProbe *&Probe :
llvm::make_pointer_range(Cur->getProbes()))
if (Probe->getAddress() == INT64_MAX)
Deleted++;
LLVM_DEBUG(dbgs() << "Deleted Probes:" << Deleted << "\n");
uint64_t ProbesSize = Cur->getProbes().size() - Deleted;
size_t InjectedProbes = ProbeDecoder.getNumInjectedProbes(Cur);
uint64_t ProbesSize = Cur->getProbes().size() - Deleted + InjectedProbes;
EmitULEB128IntValue(ProbesSize);
// Emit number of direct inlinees
EmitULEB128IntValue(Cur->getChildren().size());
// Emit probes in this group
for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) {
for (MCDecodedPseudoProbe *&Probe :
llvm::make_pointer_range(Cur->getProbes())) {
if (Probe->getAddress() == INT64_MAX)
continue;
EmitDecodedPseudoProbe(Probe);
LastProbe = Probe;
}
if (InjectedProbes) {
for (MCDecodedPseudoProbe *&Probe :
llvm::make_pointer_range(ProbeDecoder.getInjectedProbes(Cur))) {
if (Probe->getAddress() == INT64_MAX)
continue;
EmitDecodedPseudoProbe(Probe);
LastProbe = Probe;
}
}

for (auto Child = Cur->getChildren().begin();
Child != Cur->getChildren().end(); ++Child)
Inlinees[Child->first] = Child->second.get();
Inlinees[Child->getInlineSite()] = &*Child;
for (const auto &Inlinee : Inlinees) {
assert(Cur->Guid != 0 && "non root tree node must have nonzero Guid");
NextNodes.push_back({std::get<1>(Inlinee.first), Inlinee.second});
Expand Down
143 changes: 102 additions & 41 deletions llvm/include/llvm/MC/MCPseudoProbe.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,20 +54,21 @@
#ifndef LLVM_MC_MCPSEUDOPROBE_H
#define LLVM_MC_MCPSEUDOPROBE_H

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/Support/ErrorOr.h"
#include <list>
#include <functional>
#include <map>
#include <memory>
#include <string>
#include <tuple>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <vector>

namespace llvm {
Expand Down Expand Up @@ -103,14 +104,15 @@ using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
using GUIDProbeFunctionMap =
std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>;
// Address to pseudo probes map.
using AddressProbesMap = std::map<uint64_t, std::list<MCDecodedPseudoProbe>>;
using AddressProbesMap =
std::map<uint64_t,
std::vector<std::reference_wrapper<MCDecodedPseudoProbe>>>;

class MCDecodedPseudoProbeInlineTree;

class MCPseudoProbeBase {
protected:
uint64_t Guid;
uint64_t Index;
uint32_t Index;
uint32_t Discriminator;
uint8_t Attributes;
uint8_t Type;
Expand All @@ -120,14 +122,12 @@ class MCPseudoProbeBase {
const static uint32_t PseudoProbeFirstId = 1;

public:
MCPseudoProbeBase(uint64_t G, uint64_t I, uint64_t At, uint8_t T, uint32_t D)
: Guid(G), Index(I), Discriminator(D), Attributes(At), Type(T) {}
MCPseudoProbeBase(uint64_t I, uint64_t At, uint8_t T, uint32_t D)
: Index(I), Discriminator(D), Attributes(At), Type(T) {}

bool isEntry() const { return Index == PseudoProbeFirstId; }

uint64_t getGuid() const { return Guid; }

uint64_t getIndex() const { return Index; }
uint32_t getIndex() const { return Index; }

uint32_t getDiscriminator() const { return Discriminator; }

Expand Down Expand Up @@ -157,18 +157,20 @@ class MCPseudoProbeBase {
/// uses an address from a temporary label created at the current address in the
/// current section.
class MCPseudoProbe : public MCPseudoProbeBase {
uint64_t Guid;
MCSymbol *Label;

public:
MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type,
uint64_t Attributes, uint32_t Discriminator)
: MCPseudoProbeBase(Guid, Index, Attributes, Type, Discriminator),
: MCPseudoProbeBase(Index, Attributes, Type, Discriminator), Guid(Guid),
Label(Label) {
assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8");
assert(Attributes <= 0xFF &&
"Probe attributes too big to encode, exceeding 2^16");
}

uint64_t getGuid() const { return Guid; };
MCSymbol *getLabel() const { return Label; }
void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const;
};
Expand All @@ -181,11 +183,11 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase {
MCDecodedPseudoProbeInlineTree *InlineTree;

public:
MCDecodedPseudoProbe(uint64_t Ad, uint64_t G, uint32_t I, PseudoProbeType K,
uint8_t At, uint32_t D,
MCDecodedPseudoProbeInlineTree *Tree)
: MCPseudoProbeBase(G, I, At, static_cast<uint8_t>(K), D), Address(Ad),
MCDecodedPseudoProbe(uint64_t Ad, uint32_t I, PseudoProbeType K, uint8_t At,
uint32_t D, MCDecodedPseudoProbeInlineTree *Tree)
: MCPseudoProbeBase(I, At, static_cast<uint8_t>(K), D), Address(Ad),
InlineTree(Tree){};
uint64_t getGuid() const;

uint64_t getAddress() const { return Address; }

Expand All @@ -211,21 +213,14 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase {
bool ShowName) const;
};

template <typename ProbeType, typename DerivedProbeInlineTreeType>
template <typename ProbesType, typename DerivedProbeInlineTreeType,
typename InlinedProbeTreeMap>
class MCPseudoProbeInlineTreeBase {
struct InlineSiteHash {
uint64_t operator()(const InlineSite &Site) const {
return std::get<0>(Site) ^ std::get<1>(Site);
}
};

protected:
// Track children (e.g. inlinees) of current context
using InlinedProbeTreeMap = std::unordered_map<
InlineSite, std::unique_ptr<DerivedProbeInlineTreeType>, InlineSiteHash>;
InlinedProbeTreeMap Children;
// Set of probes that come with the function.
std::vector<ProbeType> Probes;
ProbesType Probes;
MCPseudoProbeInlineTreeBase() {
static_assert(std::is_base_of<MCPseudoProbeInlineTreeBase,
DerivedProbeInlineTreeType>::value,
Expand All @@ -240,12 +235,11 @@ class MCPseudoProbeInlineTreeBase {
bool isRoot() const { return Guid == 0; }
InlinedProbeTreeMap &getChildren() { return Children; }
const InlinedProbeTreeMap &getChildren() const { return Children; }
std::vector<ProbeType> &getProbes() { return Probes; }
const std::vector<ProbeType> &getProbes() const { return Probes; }
void addProbes(ProbeType Probe) { Probes.push_back(Probe); }
ProbesType &getProbes() { return Probes; }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you know where we require this function(anywhere can't replace with const), I thought we won't change the probe after it's decoded. is it possible to only keep only one getProbes and getChildren (with the const)?

const ProbesType &getProbes() const { return Probes; }
// Caller node of the inline site
MCPseudoProbeInlineTreeBase<ProbeType, DerivedProbeInlineTreeType> *Parent =
nullptr;
MCPseudoProbeInlineTreeBase<ProbesType, DerivedProbeInlineTreeType,
InlinedProbeTreeMap> *Parent = nullptr;
DerivedProbeInlineTreeType *getOrAddNode(const InlineSite &Site) {
auto Ret = Children.emplace(
Site, std::make_unique<DerivedProbeInlineTreeType>(Site));
Expand All @@ -259,9 +253,17 @@ class MCPseudoProbeInlineTreeBase {
// instance is created as the root of a tree.
// A real instance of this class is created for each function, either a
// not inlined function that has code in .text section or an inlined function.
struct InlineSiteHash {
uint64_t operator()(const InlineSite &Site) const {
return std::get<0>(Site) ^ std::get<1>(Site);
}
};
class MCPseudoProbeInlineTree
: public MCPseudoProbeInlineTreeBase<MCPseudoProbe,
MCPseudoProbeInlineTree> {
: public MCPseudoProbeInlineTreeBase<
std::vector<MCPseudoProbe>, MCPseudoProbeInlineTree,
std::unordered_map<InlineSite,
std::unique_ptr<MCPseudoProbeInlineTree>,
InlineSiteHash>> {
public:
MCPseudoProbeInlineTree() = default;
MCPseudoProbeInlineTree(uint64_t Guid) { this->Guid = Guid; }
Expand All @@ -277,16 +279,31 @@ class MCPseudoProbeInlineTree

// inline tree node for the decoded pseudo probe
class MCDecodedPseudoProbeInlineTree
: public MCPseudoProbeInlineTreeBase<MCDecodedPseudoProbe *,
MCDecodedPseudoProbeInlineTree> {
public:
InlineSite ISite;
: public MCPseudoProbeInlineTreeBase<
MCDecodedPseudoProbe *, MCDecodedPseudoProbeInlineTree,
MutableArrayRef<MCDecodedPseudoProbeInlineTree>> {
uint32_t NumProbes = 0;
uint32_t ProbeId = 0;

public:
MCDecodedPseudoProbeInlineTree() = default;
MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){};
MCDecodedPseudoProbeInlineTree(const InlineSite &Site,
MCDecodedPseudoProbeInlineTree *Parent)
: ProbeId(std::get<1>(Site)) {
this->Guid = std::get<0>(Site);
this->Parent = Parent;
}

// Return false if it's a dummy inline site
bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); }
InlineSite getInlineSite() const { return InlineSite(Guid, ProbeId); }
void setProbes(MutableArrayRef<MCDecodedPseudoProbe> ProbesRef) {
Probes = ProbesRef.data();
NumProbes = ProbesRef.size();
}
auto getProbes() const {
return MutableArrayRef<MCDecodedPseudoProbe>(Probes, NumProbes);
}
};

/// Instances of this class represent the pseudo probes inserted into a compile
Expand Down Expand Up @@ -336,6 +353,20 @@ class MCPseudoProbeTable {
};

class MCPseudoProbeDecoder {
// Decoded pseudo probes vector.
std::vector<MCDecodedPseudoProbe> PseudoProbeVec;
// Injected pseudo probes, identified by the containing inline tree node.
// Need to keep injected probes separately for two reasons:
// 1) Probes cannot be added to the PseudoProbeVec: appending may cause
// reallocation so that pointers to its elements will become invalid.
// 2) Probes belonging to function record must be contiguous in PseudoProbeVec
// as owning InlineTree references them with an ArrayRef to save space.
std::unordered_map<const MCDecodedPseudoProbeInlineTree *,
std::vector<MCDecodedPseudoProbe>>
InjectedProbeMap;
// Decoded inline records vector.
std::vector<MCDecodedPseudoProbeInlineTree> InlineTreeVec;

// GUID to PseudoProbeFuncDesc map.
GUIDProbeFunctionMap GUID2FuncDescMap;

Expand Down Expand Up @@ -370,16 +401,18 @@ class MCPseudoProbeDecoder {
// Decode pseudo_probe_desc section to build GUID to PseudoProbeFuncDesc map.
bool buildGUID2FuncDescMap(const uint8_t *Start, std::size_t Size);

// Decode pseudo_probe section to count the number of probes and inlined
// function records for each function record.
template <bool IsTopLevelFunc>
bool countRecords(bool &Discard, uint32_t &ProbeCount, uint32_t &InlinedCount,
const Uint64Set &GuidFilter);

// Decode pseudo_probe section to build address to probes map for specifed
// functions only.
bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size,
const Uint64Set &GuildFilter,
const Uint64Map &FuncStartAddrs);

bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur,
uint64_t &LastAddr, const Uint64Set &GuildFilter,
const Uint64Map &FuncStartAddrs);

// Print pseudo_probe_desc section info
void printGUID2FuncDescMap(raw_ostream &OS);

Expand Down Expand Up @@ -422,6 +455,34 @@ class MCPseudoProbeDecoder {
const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const {
return DummyInlineRoot;
}

void addInjectedProbe(const MCDecodedPseudoProbe &Probe, uint64_t Address) {
const MCDecodedPseudoProbeInlineTree *Parent = Probe.getInlineTreeNode();
InjectedProbeMap[Parent].emplace_back(Probe).setAddress(Address);
}

size_t
getNumInjectedProbes(const MCDecodedPseudoProbeInlineTree *Parent) const {
auto It = InjectedProbeMap.find(Parent);
if (It == InjectedProbeMap.end())
return 0;
return It->second.size();
}

auto getInjectedProbes(MCDecodedPseudoProbeInlineTree *Parent) {
auto It = InjectedProbeMap.find(Parent);
assert(It != InjectedProbeMap.end());
return iterator_range(It->second);
}

private:
// Recursively parse an inlining tree encoded in pseudo_probe section. Returns
// whether the the top-level node should be skipped.
template <bool IsTopLevelFunc>
bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur,
uint64_t &LastAddr, const Uint64Set &GuildFilter,
const Uint64Map &FuncStartAddrs,
const uint32_t CurChildIndex);
};

} // end namespace llvm
Expand Down
Loading
Loading