Skip to content

Commit

Permalink
[MC][NFC] Statically allocate storage for decoded pseudo probes and f…
Browse files Browse the repository at this point in the history
…unction records

Use llvm#102774 to allocate storage for decoded probes (`PseudoProbeVec`)
and function records (`InlineTreeVec`).

Leverage that to also shrink sizes of `MCDecodedPseudoProbe`:
- Drop Guid since it's accessible via `InlineTree`.

`MCDecodedPseudoProbeInlineTree`:
- Keep track of probes and inlinees using `ArrayRef`s now that probes
  and function records belonging to the same function are allocated
  contiguously.

This reduces peak RSS from 13.7 GiB to 9.7 GiB and pseudo probe parsing
time (as part of perf2bolt) from 15.3s to 9.6s for a large binary with
400MiB .pseudo_probe section containing 43M probes and 25M function
records.

Depends on:
llvm#102774
llvm#102787
llvm#102788

Reviewers: maksfb, rafaelauler, dcci, ayermolo, wlei-llvm

Reviewed By: wlei-llvm

Pull Request: llvm#102789
  • Loading branch information
aaupov authored and dmpolukhin committed Sep 2, 2024
1 parent 8a2c098 commit 6c53bf8
Show file tree
Hide file tree
Showing 5 changed files with 164 additions and 76 deletions.
30 changes: 21 additions & 9 deletions bolt/lib/Rewrite/PseudoProbeRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,9 @@ void PseudoProbeRewriter::updatePseudoProbes() {
}

unsigned ProbeTrack = AP.second.size();
std::list<MCDecodedPseudoProbe>::iterator Probe = AP.second.begin();
auto Probe = llvm::map_iterator(
AP.second.begin(),
[](auto RW) -> MCDecodedPseudoProbe & { return RW.get(); });
while (ProbeTrack != 0) {
if (Probe->isBlock()) {
Probe->setAddress(BlkOutputAddress);
Expand All @@ -218,9 +220,7 @@ void PseudoProbeRewriter::updatePseudoProbes() {
}

while (CallOutputAddress != CallOutputAddresses.second) {
AP.second.push_back(*Probe);
AP.second.back().setAddress(CallOutputAddress->second);
Probe->getInlineTreeNode()->addProbes(&(AP.second.back()));
ProbeDecoder.addInjectedProbe(*Probe, CallOutputAddress->second);
CallOutputAddress = std::next(CallOutputAddress);
}
}
Expand Down Expand Up @@ -332,7 +332,7 @@ void PseudoProbeRewriter::encodePseudoProbes() {
ProbeDecoder.getDummyInlineRoot();
for (auto Child = Root.getChildren().begin();
Child != Root.getChildren().end(); ++Child)
Inlinees[Child->first] = Child->second.get();
Inlinees[Child->getInlineSite()] = &*Child;

for (auto Inlinee : Inlinees)
// INT64_MAX is "placeholder" of unused callsite index field in the pair
Expand All @@ -358,25 +358,37 @@ void PseudoProbeRewriter::encodePseudoProbes() {
EmitInt(Cur->Guid, 8);
// Emit number of probes in this node
uint64_t Deleted = 0;
for (MCDecodedPseudoProbe *&Probe : Cur->getProbes())
for (MCDecodedPseudoProbe *&Probe :
llvm::make_pointer_range(Cur->getProbes()))
if (Probe->getAddress() == INT64_MAX)
Deleted++;
LLVM_DEBUG(dbgs() << "Deleted Probes:" << Deleted << "\n");
uint64_t ProbesSize = Cur->getProbes().size() - Deleted;
size_t InjectedProbes = ProbeDecoder.getNumInjectedProbes(Cur);
uint64_t ProbesSize = Cur->getProbes().size() - Deleted + InjectedProbes;
EmitULEB128IntValue(ProbesSize);
// Emit number of direct inlinees
EmitULEB128IntValue(Cur->getChildren().size());
// Emit probes in this group
for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) {
for (MCDecodedPseudoProbe *&Probe :
llvm::make_pointer_range(Cur->getProbes())) {
if (Probe->getAddress() == INT64_MAX)
continue;
EmitDecodedPseudoProbe(Probe);
LastProbe = Probe;
}
if (InjectedProbes) {
for (MCDecodedPseudoProbe *&Probe :
llvm::make_pointer_range(ProbeDecoder.getInjectedProbes(Cur))) {
if (Probe->getAddress() == INT64_MAX)
continue;
EmitDecodedPseudoProbe(Probe);
LastProbe = Probe;
}
}

for (auto Child = Cur->getChildren().begin();
Child != Cur->getChildren().end(); ++Child)
Inlinees[Child->first] = Child->second.get();
Inlinees[Child->getInlineSite()] = &*Child;
for (const auto &Inlinee : Inlinees) {
assert(Cur->Guid != 0 && "non root tree node must have nonzero Guid");
NextNodes.push_back({std::get<1>(Inlinee.first), Inlinee.second});
Expand Down
136 changes: 95 additions & 41 deletions llvm/include/llvm/MC/MCPseudoProbe.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,20 +54,21 @@
#ifndef LLVM_MC_MCPSEUDOPROBE_H
#define LLVM_MC_MCPSEUDOPROBE_H

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/Support/ErrorOr.h"
#include <list>
#include <functional>
#include <map>
#include <memory>
#include <string>
#include <tuple>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <vector>

namespace llvm {
Expand Down Expand Up @@ -103,14 +104,15 @@ using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
using GUIDProbeFunctionMap =
std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>;
// Address to pseudo probes map.
using AddressProbesMap = std::map<uint64_t, std::list<MCDecodedPseudoProbe>>;
using AddressProbesMap =
std::map<uint64_t,
std::vector<std::reference_wrapper<MCDecodedPseudoProbe>>>;

class MCDecodedPseudoProbeInlineTree;

class MCPseudoProbeBase {
protected:
uint64_t Guid;
uint64_t Index;
uint32_t Index;
uint32_t Discriminator;
uint8_t Attributes;
uint8_t Type;
Expand All @@ -120,14 +122,12 @@ class MCPseudoProbeBase {
const static uint32_t PseudoProbeFirstId = 1;

public:
MCPseudoProbeBase(uint64_t G, uint64_t I, uint64_t At, uint8_t T, uint32_t D)
: Guid(G), Index(I), Discriminator(D), Attributes(At), Type(T) {}
MCPseudoProbeBase(uint64_t I, uint64_t At, uint8_t T, uint32_t D)
: Index(I), Discriminator(D), Attributes(At), Type(T) {}

bool isEntry() const { return Index == PseudoProbeFirstId; }

uint64_t getGuid() const { return Guid; }

uint64_t getIndex() const { return Index; }
uint32_t getIndex() const { return Index; }

uint32_t getDiscriminator() const { return Discriminator; }

Expand Down Expand Up @@ -157,18 +157,20 @@ class MCPseudoProbeBase {
/// uses an address from a temporary label created at the current address in the
/// current section.
class MCPseudoProbe : public MCPseudoProbeBase {
uint64_t Guid;
MCSymbol *Label;

public:
MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type,
uint64_t Attributes, uint32_t Discriminator)
: MCPseudoProbeBase(Guid, Index, Attributes, Type, Discriminator),
: MCPseudoProbeBase(Index, Attributes, Type, Discriminator), Guid(Guid),
Label(Label) {
assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8");
assert(Attributes <= 0xFF &&
"Probe attributes too big to encode, exceeding 2^16");
}

uint64_t getGuid() const { return Guid; };
MCSymbol *getLabel() const { return Label; }
void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const;
};
Expand All @@ -181,11 +183,11 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase {
MCDecodedPseudoProbeInlineTree *InlineTree;

public:
MCDecodedPseudoProbe(uint64_t Ad, uint64_t G, uint32_t I, PseudoProbeType K,
uint8_t At, uint32_t D,
MCDecodedPseudoProbeInlineTree *Tree)
: MCPseudoProbeBase(G, I, At, static_cast<uint8_t>(K), D), Address(Ad),
MCDecodedPseudoProbe(uint64_t Ad, uint32_t I, PseudoProbeType K, uint8_t At,
uint32_t D, MCDecodedPseudoProbeInlineTree *Tree)
: MCPseudoProbeBase(I, At, static_cast<uint8_t>(K), D), Address(Ad),
InlineTree(Tree){};
uint64_t getGuid() const;

uint64_t getAddress() const { return Address; }

Expand All @@ -211,21 +213,14 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase {
bool ShowName) const;
};

template <typename ProbeType, typename DerivedProbeInlineTreeType>
template <typename ProbesType, typename DerivedProbeInlineTreeType,
typename InlinedProbeTreeMap>
class MCPseudoProbeInlineTreeBase {
struct InlineSiteHash {
uint64_t operator()(const InlineSite &Site) const {
return std::get<0>(Site) ^ std::get<1>(Site);
}
};

protected:
// Track children (e.g. inlinees) of current context
using InlinedProbeTreeMap = std::unordered_map<
InlineSite, std::unique_ptr<DerivedProbeInlineTreeType>, InlineSiteHash>;
InlinedProbeTreeMap Children;
// Set of probes that come with the function.
std::vector<ProbeType> Probes;
ProbesType Probes;
MCPseudoProbeInlineTreeBase() {
static_assert(std::is_base_of<MCPseudoProbeInlineTreeBase,
DerivedProbeInlineTreeType>::value,
Expand All @@ -240,12 +235,10 @@ class MCPseudoProbeInlineTreeBase {
bool isRoot() const { return Guid == 0; }
InlinedProbeTreeMap &getChildren() { return Children; }
const InlinedProbeTreeMap &getChildren() const { return Children; }
std::vector<ProbeType> &getProbes() { return Probes; }
const std::vector<ProbeType> &getProbes() const { return Probes; }
void addProbes(ProbeType Probe) { Probes.push_back(Probe); }
const ProbesType &getProbes() const { return Probes; }
// Caller node of the inline site
MCPseudoProbeInlineTreeBase<ProbeType, DerivedProbeInlineTreeType> *Parent =
nullptr;
MCPseudoProbeInlineTreeBase<ProbesType, DerivedProbeInlineTreeType,
InlinedProbeTreeMap> *Parent = nullptr;
DerivedProbeInlineTreeType *getOrAddNode(const InlineSite &Site) {
auto Ret = Children.emplace(
Site, std::make_unique<DerivedProbeInlineTreeType>(Site));
Expand All @@ -259,9 +252,17 @@ class MCPseudoProbeInlineTreeBase {
// instance is created as the root of a tree.
// A real instance of this class is created for each function, either a
// not inlined function that has code in .text section or an inlined function.
struct InlineSiteHash {
uint64_t operator()(const InlineSite &Site) const {
return std::get<0>(Site) ^ std::get<1>(Site);
}
};
class MCPseudoProbeInlineTree
: public MCPseudoProbeInlineTreeBase<MCPseudoProbe,
MCPseudoProbeInlineTree> {
: public MCPseudoProbeInlineTreeBase<
std::vector<MCPseudoProbe>, MCPseudoProbeInlineTree,
std::unordered_map<InlineSite,
std::unique_ptr<MCPseudoProbeInlineTree>,
InlineSiteHash>> {
public:
MCPseudoProbeInlineTree() = default;
MCPseudoProbeInlineTree(uint64_t Guid) { this->Guid = Guid; }
Expand All @@ -277,16 +278,31 @@ class MCPseudoProbeInlineTree

// inline tree node for the decoded pseudo probe
class MCDecodedPseudoProbeInlineTree
: public MCPseudoProbeInlineTreeBase<MCDecodedPseudoProbe *,
MCDecodedPseudoProbeInlineTree> {
public:
InlineSite ISite;
: public MCPseudoProbeInlineTreeBase<
MCDecodedPseudoProbe *, MCDecodedPseudoProbeInlineTree,
MutableArrayRef<MCDecodedPseudoProbeInlineTree>> {
uint32_t NumProbes = 0;
uint32_t ProbeId = 0;

public:
MCDecodedPseudoProbeInlineTree() = default;
MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){};
MCDecodedPseudoProbeInlineTree(const InlineSite &Site,
MCDecodedPseudoProbeInlineTree *Parent)
: ProbeId(std::get<1>(Site)) {
this->Guid = std::get<0>(Site);
this->Parent = Parent;
}

// Return false if it's a dummy inline site
bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); }
InlineSite getInlineSite() const { return InlineSite(Guid, ProbeId); }
void setProbes(MutableArrayRef<MCDecodedPseudoProbe> ProbesRef) {
Probes = ProbesRef.data();
NumProbes = ProbesRef.size();
}
auto getProbes() const {
return MutableArrayRef<MCDecodedPseudoProbe>(Probes, NumProbes);
}
};

/// Instances of this class represent the pseudo probes inserted into a compile
Expand Down Expand Up @@ -336,6 +352,20 @@ class MCPseudoProbeTable {
};

class MCPseudoProbeDecoder {
// Decoded pseudo probes vector.
std::vector<MCDecodedPseudoProbe> PseudoProbeVec;
// Injected pseudo probes, identified by the containing inline tree node.
// Need to keep injected probes separately for two reasons:
// 1) Probes cannot be added to the PseudoProbeVec: appending may cause
// reallocation so that pointers to its elements will become invalid.
// 2) Probes belonging to function record must be contiguous in PseudoProbeVec
// as owning InlineTree references them with an ArrayRef to save space.
std::unordered_map<const MCDecodedPseudoProbeInlineTree *,
std::vector<MCDecodedPseudoProbe>>
InjectedProbeMap;
// Decoded inline records vector.
std::vector<MCDecodedPseudoProbeInlineTree> InlineTreeVec;

// GUID to PseudoProbeFuncDesc map.
GUIDProbeFunctionMap GUID2FuncDescMap;

Expand Down Expand Up @@ -382,10 +412,6 @@ class MCPseudoProbeDecoder {
const Uint64Set &GuildFilter,
const Uint64Map &FuncStartAddrs);

bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur,
uint64_t &LastAddr, const Uint64Set &GuildFilter,
const Uint64Map &FuncStartAddrs);

// Print pseudo_probe_desc section info
void printGUID2FuncDescMap(raw_ostream &OS);

Expand Down Expand Up @@ -428,6 +454,34 @@ class MCPseudoProbeDecoder {
const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const {
return DummyInlineRoot;
}

void addInjectedProbe(const MCDecodedPseudoProbe &Probe, uint64_t Address) {
const MCDecodedPseudoProbeInlineTree *Parent = Probe.getInlineTreeNode();
InjectedProbeMap[Parent].emplace_back(Probe).setAddress(Address);
}

size_t
getNumInjectedProbes(const MCDecodedPseudoProbeInlineTree *Parent) const {
auto It = InjectedProbeMap.find(Parent);
if (It == InjectedProbeMap.end())
return 0;
return It->second.size();
}

auto getInjectedProbes(MCDecodedPseudoProbeInlineTree *Parent) {
auto It = InjectedProbeMap.find(Parent);
assert(It != InjectedProbeMap.end());
return iterator_range(It->second);
}

private:
// Recursively parse an inlining tree encoded in pseudo_probe section. Returns
// whether the the top-level node should be skipped.
template <bool IsTopLevelFunc>
bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur,
uint64_t &LastAddr, const Uint64Set &GuildFilter,
const Uint64Map &FuncStartAddrs,
const uint32_t CurChildIndex);
};

} // end namespace llvm
Expand Down
Loading

0 comments on commit 6c53bf8

Please sign in to comment.