Skip to content

Commit

Permalink
feat: demangle symbols
Browse files Browse the repository at this point in the history
  • Loading branch information
alandtse committed Oct 12, 2024
1 parent d5c0951 commit 0d116c6
Showing 1 changed file with 156 additions and 7 deletions.
163 changes: 156 additions & 7 deletions src/Crash/PDB/PdbHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,17 +62,158 @@ namespace Crash
return converter.to_bytes(utf16);
}

[[nodiscard]] static std::string trim(const std::string& str)
{
const auto start = str.find_first_not_of(" \t\n\r");
const auto end = str.find_last_not_of(" \t\n\r");
return (start == std::string::npos) ? "" : str.substr(start, end - start + 1);
}

std::wstring trim(const std::wstring& wstr)
{
auto start = wstr.begin();
while (start != wstr.end() && std::iswspace(*start)) {
++start;
}

auto end = wstr.end();
do {
--end;
} while (end != start && std::iswspace(*end));

return std::wstring(start, end + 1);
}

[[nodiscard]] static std::string demangle(const std::wstring& mangled)
{
// Set of ignored demangled types
static const std::unordered_set<std::wstring> ignoredTypes = {
L"float",
L"char",
L"signed char",
L"unsigned char",
L"void",
L"short",
L"unsigned short",
L"double",
L"unsigned char volatile",
};

const auto demangle_single = [](const wchar_t* a_in, wchar_t* a_out, std::uint32_t a_size) {
static std::mutex m;
std::lock_guard l{ m };
return UnDecorateSymbolNameW(
a_in,
a_out,
a_size,
(UNDNAME_NO_MS_KEYWORDS) |
(UNDNAME_NO_FUNCTION_RETURNS) |
(UNDNAME_NO_ALLOCATION_MODEL) |
(UNDNAME_NO_ALLOCATION_LANGUAGE) |
(UNDNAME_NO_THISTYPE) |
(UNDNAME_NO_ACCESS_SPECIFIERS) |
(UNDNAME_NO_THROW_SIGNATURES) |
(UNDNAME_NO_RETURN_UDT_MODEL) |
(UNDNAME_NAME_ONLY) |
(UNDNAME_NO_ARGUMENTS) |
static_cast<std::uint32_t>(0x8000));
};

// Buffer to store demangled result
std::array<wchar_t, 0x1000> buf{ L'\0' };
std::wistringstream wiss(mangled);
std::wostringstream woss;

std::wstring word;
bool hasDemangled = false;

while (wiss >> word) {
// Log the word before demangling
logger::info("Demangling word: {}", utf16_to_utf8(word));

// Attempt to demangle each word
const auto len = demangle_single(word.c_str(), buf.data(), static_cast<std::uint32_t>(buf.size()));

// Ensure null-termination
buf[len] = L'\0';

std::wstring demangledWord{ buf.data() };

// Trim the demangled word
demangledWord = trim(demangledWord);

// Log the demangled word
logger::info("Demangled result: {}", utf16_to_utf8(demangledWord));

// Check if the demangled result is different and not in the ignored set
if (len != 0 && demangledWord != L"<unknown>" && demangledWord != L"UNKNOWN") {
if (ignoredTypes.find(demangledWord) != ignoredTypes.end() || demangledWord.starts_with(L"?? ::")) {
woss << word << L" "; // Keep the original mangled word
} else {
woss << demangledWord << L" ";
hasDemangled = true;
}
} else {
woss << word << L" "; // If demangling failed, keep the original mangled word
}
}

// Prepare the final result
std::wstring result = woss.str();
std::wstring trimmedResult = trim(result);

// Extract the potential replacement word from the mangled string if we have a valid demangled result
std::wstring replacementWord;
if (hasDemangled && mangled.starts_with(L'?')) {
size_t start = 1; // Skip the initial '?'
size_t atPos = mangled.find(L'@', start);
if (atPos != std::wstring::npos) {
replacementWord = mangled.substr(start, atPos - start);
logger::info("Found potential replacement word: {}", utf16_to_utf8(replacementWord));
}
}

// Check for potential truncation only if the demangled result is used
if (hasDemangled && trimmedResult != mangled) {
std::wregex endsWithPattern(L"::[\\w]+$");
std::wsmatch match;

if (std::regex_search(trimmedResult, match, endsWithPattern)) {
// Extract the word after '::'
std::wstring truncatedPart = match.str(0).substr(2); // Remove '::'
logger::info("Found potential truncatedPart: {}", utf16_to_utf8(truncatedPart));

// Compare the truncated part with the replacement word
if (!replacementWord.empty() && !truncatedPart.empty() && replacementWord.ends_with(truncatedPart)) {
// Replace the truncated part with the replacement word
trimmedResult.replace(match.position(0), match.length(0), L"::" + replacementWord);
}
}
}

// If the demangled string is different from the original, return both
if (hasDemangled && trimmedResult != mangled) {
return fmt::format("{} (mangled: {})", utf16_to_utf8(trimmedResult), utf16_to_utf8(mangled));
}

// Otherwise, return the original mangled string
return utf16_to_utf8(mangled);
}

std::string processSymbol(IDiaSymbol* a_symbol, IDiaSession* a_session, const DWORD& a_rva, std::string_view& a_name, uintptr_t& a_offset, std::string& a_result)
{
BSTR name;
a_symbol->get_name(&name);
auto convertedName = ConvertBSTRToMBS(name);

// Demangle the symbol name
std::string demangledName = demangle(name);

DWORD rva;
if (a_rva == 0)
a_symbol->get_relativeVirtualAddress(&rva); // find rva if not provided
else
rva = a_rva;

ULONGLONG length = 0;
if (a_symbol->get_length(&length) == S_OK) {
IDiaEnumLineNumbers* lineNums[100];
Expand All @@ -83,6 +224,7 @@ namespace Crash
ULONG fetched = 0;
bool found_source = false;
bool found_line = false;

for (uint8_t i = 0; i < 5; ++i) {
if (lineNumsPtr->Next(i, &lineNum, &fetched) == S_OK && fetched == 1) {
found_source = false;
Expand All @@ -91,36 +233,43 @@ namespace Crash
IDiaSourceFile* srcFile;
BSTR fileName = nullptr;
std::string convertedFileName;

if (lineNum->get_sourceFile(&srcFile) == S_OK) {
BSTR fileName;
srcFile->get_fileName(&fileName);
convertedFileName = ConvertBSTRToMBS(fileName);
found_source = true;
}

if (lineNum->get_lineNumber(&sline) == S_OK)
found_line = true;
if (found_source && found_line) // this should always hit if hit at all
a_result += fmt::format(" {}:{} {}", convertedFileName, +sline ? (uint64_t)sline : 0, convertedName);

if (found_source && found_line)
a_result += fmt::format(" {}:{} {}", convertedFileName, +sline ? (uint64_t)sline : 0, demangledName);
else if (found_source)
a_result += fmt::format(" {} {}", convertedFileName, convertedName);
a_result += fmt::format(" {} {}", convertedFileName, demangledName);
else if (found_line)
a_result += fmt::format(" unk_:{} {}", +sline ? (uint64_t)sline : 0, convertedName);
a_result += fmt::format(" unk_:{} {}", +sline ? (uint64_t)sline : 0, demangledName);
}
}

if (!found_source && !found_line) {
auto sRva = fmt::format("{:X}", rva);
if (convertedName.ends_with(sRva))
if (demangledName.ends_with(sRva))
sRva = "";
else
sRva = "_" + sRva;
a_result += fmt::format(" {}{}", convertedName, sRva);

a_result += fmt::format(" {}{}", demangledName, sRva);
}
}
}

if (a_result.empty())
logger::info("No symbol found for {}+{:07X}"sv, a_name, a_offset);
else
logger::info("Symbol returning: {}", a_result);

return a_result;
}

Expand Down

0 comments on commit 0d116c6

Please sign in to comment.