1
0
mirror of synced 2025-01-22 11:33:46 +01:00

595 lines
17 KiB
C++
Raw Normal View History

//===--- DLangDemangle.cpp ------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file defines a demangler for the D programming language as specified
/// in the ABI specification, available at:
/// https://dlang.org/spec/abi.html#name_mangling
///
//===----------------------------------------------------------------------===//
#include "llvm/Demangle/Demangle.h"
2023-11-10 20:47:08 +01:00
#include "llvm/Demangle/StringViewExtras.h"
#include "llvm/Demangle/Utility.h"
#include <cctype>
#include <cstring>
#include <limits>
2023-11-10 20:47:08 +01:00
#include <string_view>
using namespace llvm;
using llvm::itanium_demangle::OutputBuffer;
2023-11-10 20:47:08 +01:00
using llvm::itanium_demangle::starts_with;
namespace {
/// Demangle information structure.
struct Demangler {
/// Initialize the information structure we use to pass around information.
///
/// \param Mangled String to demangle.
2023-11-10 20:47:08 +01:00
Demangler(std::string_view Mangled);
/// Extract and demangle the mangled symbol and append it to the output
/// string.
///
/// \param Demangled Output buffer to write the demangled name.
///
/// \return The remaining string on success or nullptr on failure.
///
/// \see https://dlang.org/spec/abi.html#name_mangling .
/// \see https://dlang.org/spec/abi.html#MangledName .
const char *parseMangle(OutputBuffer *Demangled);
private:
/// Extract and demangle a given mangled symbol and append it to the output
/// string.
///
/// \param Demangled output buffer to write the demangled name.
/// \param Mangled mangled symbol to be demangled.
///
/// \see https://dlang.org/spec/abi.html#name_mangling .
/// \see https://dlang.org/spec/abi.html#MangledName .
2023-11-10 20:47:08 +01:00
void parseMangle(OutputBuffer *Demangled, std::string_view &Mangled);
/// Extract the number from a given string.
///
/// \param Mangled string to extract the number.
/// \param Ret assigned result value.
///
2023-11-10 20:47:08 +01:00
/// \note Ret larger than UINT_MAX is considered a failure.
///
/// \see https://dlang.org/spec/abi.html#Number .
2023-11-10 20:47:08 +01:00
void decodeNumber(std::string_view &Mangled, unsigned long &Ret);
/// Extract the back reference position from a given string.
///
/// \param Mangled string to extract the back reference position.
/// \param Ret assigned result value.
///
2023-11-10 20:47:08 +01:00
/// \return true on success, false on error.
///
/// \note Ret is always >= 0 on success, and unspecified on failure
///
/// \see https://dlang.org/spec/abi.html#back_ref .
/// \see https://dlang.org/spec/abi.html#NumberBackRef .
2023-11-10 20:47:08 +01:00
bool decodeBackrefPos(std::string_view &Mangled, long &Ret);
/// Extract the symbol pointed by the back reference form a given string.
///
/// \param Mangled string to extract the back reference position.
/// \param Ret assigned result value.
///
2023-11-10 20:47:08 +01:00
/// \return true on success, false on error.
///
/// \see https://dlang.org/spec/abi.html#back_ref .
2023-11-10 20:47:08 +01:00
bool decodeBackref(std::string_view &Mangled, std::string_view &Ret);
/// Extract and demangle backreferenced symbol from a given mangled symbol
/// and append it to the output string.
///
/// \param Demangled output buffer to write the demangled name.
/// \param Mangled mangled symbol to be demangled.
///
/// \see https://dlang.org/spec/abi.html#back_ref .
/// \see https://dlang.org/spec/abi.html#IdentifierBackRef .
2023-11-10 20:47:08 +01:00
void parseSymbolBackref(OutputBuffer *Demangled, std::string_view &Mangled);
/// Extract and demangle backreferenced type from a given mangled symbol
/// and append it to the output string.
///
/// \param Mangled mangled symbol to be demangled.
///
/// \see https://dlang.org/spec/abi.html#back_ref .
/// \see https://dlang.org/spec/abi.html#TypeBackRef .
2023-11-10 20:47:08 +01:00
void parseTypeBackref(std::string_view &Mangled);
/// Check whether it is the beginning of a symbol name.
///
/// \param Mangled string to extract the symbol name.
///
/// \return true on success, false otherwise.
///
/// \see https://dlang.org/spec/abi.html#SymbolName .
2023-11-10 20:47:08 +01:00
bool isSymbolName(std::string_view Mangled);
/// Extract and demangle an identifier from a given mangled symbol append it
/// to the output string.
///
/// \param Demangled Output buffer to write the demangled name.
/// \param Mangled Mangled symbol to be demangled.
///
/// \see https://dlang.org/spec/abi.html#SymbolName .
2023-11-10 20:47:08 +01:00
void parseIdentifier(OutputBuffer *Demangled, std::string_view &Mangled);
/// Extract and demangle the plain identifier from a given mangled symbol and
/// prepend/append it to the output string, with a special treatment for some
/// magic compiler generated symbols.
///
/// \param Demangled Output buffer to write the demangled name.
/// \param Mangled Mangled symbol to be demangled.
/// \param Len Length of the mangled symbol name.
///
/// \see https://dlang.org/spec/abi.html#LName .
2023-11-10 20:47:08 +01:00
void parseLName(OutputBuffer *Demangled, std::string_view &Mangled,
unsigned long Len);
/// Extract and demangle the qualified symbol from a given mangled symbol
/// append it to the output string.
///
/// \param Demangled Output buffer to write the demangled name.
/// \param Mangled Mangled symbol to be demangled.
///
/// \see https://dlang.org/spec/abi.html#QualifiedName .
2023-11-10 20:47:08 +01:00
void parseQualified(OutputBuffer *Demangled, std::string_view &Mangled);
/// Extract and demangle a type from a given mangled symbol append it to
/// the output string.
///
/// \param Mangled mangled symbol to be demangled.
///
2023-11-10 20:47:08 +01:00
/// \return true on success, false on error.
///
/// \see https://dlang.org/spec/abi.html#Type .
2023-11-10 20:47:08 +01:00
bool parseType(std::string_view &Mangled);
2023-11-10 20:47:08 +01:00
/// An immutable view of the string we are demangling.
const std::string_view Str;
/// The index of the last back reference.
int LastBackref;
};
} // namespace
2023-11-10 20:47:08 +01:00
void Demangler::decodeNumber(std::string_view &Mangled, unsigned long &Ret) {
// Clear Mangled if trying to extract something that isn't a digit.
if (Mangled.empty()) {
Mangled = {};
return;
}
if (!std::isdigit(Mangled.front())) {
Mangled = {};
return;
}
unsigned long Val = 0;
do {
unsigned long Digit = Mangled[0] - '0';
// Check for overflow.
2023-11-10 20:47:08 +01:00
if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10) {
Mangled = {};
return;
}
Val = Val * 10 + Digit;
2023-11-10 20:47:08 +01:00
Mangled.remove_prefix(1);
} while (!Mangled.empty() && std::isdigit(Mangled.front()));
2023-11-10 20:47:08 +01:00
if (Mangled.empty()) {
Mangled = {};
return;
}
Ret = Val;
}
2023-11-10 20:47:08 +01:00
bool Demangler::decodeBackrefPos(std::string_view &Mangled, long &Ret) {
// Return nullptr if trying to extract something that isn't a digit
2023-11-10 20:47:08 +01:00
if (Mangled.empty()) {
Mangled = {};
return false;
}
// Any identifier or non-basic type that has been emitted to the mangled
// symbol before will not be emitted again, but is referenced by a special
// sequence encoding the relative position of the original occurrence in the
// mangled symbol name.
// Numbers in back references are encoded with base 26 by upper case letters
// A-Z for higher digits but lower case letters a-z for the last digit.
// NumberBackRef:
// [a-z]
// [A-Z] NumberBackRef
// ^
unsigned long Val = 0;
2023-11-10 20:47:08 +01:00
while (!Mangled.empty() && std::isalpha(Mangled.front())) {
// Check for overflow
if (Val > (std::numeric_limits<unsigned long>::max() - 25) / 26)
break;
Val *= 26;
if (Mangled[0] >= 'a' && Mangled[0] <= 'z') {
Val += Mangled[0] - 'a';
if ((long)Val <= 0)
break;
Ret = Val;
2023-11-10 20:47:08 +01:00
Mangled.remove_prefix(1);
return true;
}
Val += Mangled[0] - 'A';
2023-11-10 20:47:08 +01:00
Mangled.remove_prefix(1);
}
2023-11-10 20:47:08 +01:00
Mangled = {};
return false;
}
2023-11-10 20:47:08 +01:00
bool Demangler::decodeBackref(std::string_view &Mangled,
std::string_view &Ret) {
assert(!Mangled.empty() && Mangled.front() == 'Q' &&
"Invalid back reference!");
Ret = {};
// Position of 'Q'
2023-11-10 20:47:08 +01:00
const char *Qpos = Mangled.data();
long RefPos;
2023-11-10 20:47:08 +01:00
Mangled.remove_prefix(1);
2023-11-10 20:47:08 +01:00
if (!decodeBackrefPos(Mangled, RefPos)) {
Mangled = {};
return false;
}
2023-11-10 20:47:08 +01:00
if (RefPos > Qpos - Str.data()) {
Mangled = {};
return false;
}
// Set the position of the back reference.
Ret = Qpos - RefPos;
2023-11-10 20:47:08 +01:00
return true;
}
2023-11-10 20:47:08 +01:00
void Demangler::parseSymbolBackref(OutputBuffer *Demangled,
std::string_view &Mangled) {
// An identifier back reference always points to a digit 0 to 9.
// IdentifierBackRef:
// Q NumberBackRef
// ^
unsigned long Len;
// Get position of the back reference
2023-11-10 20:47:08 +01:00
std::string_view Backref;
if (!decodeBackref(Mangled, Backref)) {
Mangled = {};
return;
}
// Must point to a simple identifier
2023-11-10 20:47:08 +01:00
decodeNumber(Backref, Len);
if (Backref.empty() || Backref.length() < Len) {
Mangled = {};
return;
}
2023-11-10 20:47:08 +01:00
parseLName(Demangled, Backref, Len);
if (Backref.empty())
Mangled = {};
}
2023-11-10 20:47:08 +01:00
void Demangler::parseTypeBackref(std::string_view &Mangled) {
// A type back reference always points to a letter.
// TypeBackRef:
// Q NumberBackRef
// ^
// If we appear to be moving backwards through the mangle string, then
// bail as this may be a recursive back reference.
2023-11-10 20:47:08 +01:00
if (Mangled.data() - Str.data() >= LastBackref) {
Mangled = {};
return;
}
int SaveRefPos = LastBackref;
2023-11-10 20:47:08 +01:00
LastBackref = Mangled.data() - Str.data();
// Get position of the back reference.
2023-11-10 20:47:08 +01:00
std::string_view Backref;
if (!decodeBackref(Mangled, Backref)) {
Mangled = {};
return;
}
// Can't decode back reference.
2023-11-10 20:47:08 +01:00
if (Backref.empty()) {
Mangled = {};
return;
}
// TODO: Add support for function type back references.
2023-11-10 20:47:08 +01:00
if (!parseType(Backref))
Mangled = {};
LastBackref = SaveRefPos;
2023-11-10 20:47:08 +01:00
if (Backref.empty())
Mangled = {};
}
2023-11-10 20:47:08 +01:00
bool Demangler::isSymbolName(std::string_view Mangled) {
long Ret;
2023-11-10 20:47:08 +01:00
const char *Qref = Mangled.data();
2023-11-10 20:47:08 +01:00
if (std::isdigit(Mangled.front()))
return true;
// TODO: Handle template instances.
2023-11-10 20:47:08 +01:00
if (Mangled.front() != 'Q')
return false;
2023-11-10 20:47:08 +01:00
Mangled.remove_prefix(1);
bool Valid = decodeBackrefPos(Mangled, Ret);
if (!Valid || Ret > Qref - Str.data())
return false;
return std::isdigit(Qref[-Ret]);
}
2023-11-10 20:47:08 +01:00
void Demangler::parseMangle(OutputBuffer *Demangled,
std::string_view &Mangled) {
// A D mangled symbol is comprised of both scope and type information.
// MangleName:
// _D QualifiedName Type
// _D QualifiedName Z
// ^
// The caller should have guaranteed that the start pointer is at the
// above location.
// Note that type is never a function type, but only the return type of
// a function or the type of a variable.
2023-11-10 20:47:08 +01:00
Mangled.remove_prefix(2);
2023-11-10 20:47:08 +01:00
parseQualified(Demangled, Mangled);
2023-11-10 20:47:08 +01:00
if (Mangled.empty()) {
Mangled = {};
return;
}
2023-11-10 20:47:08 +01:00
// Artificial symbols end with 'Z' and have no type.
if (Mangled.front() == 'Z') {
Mangled.remove_prefix(1);
} else if (!parseType(Mangled))
Mangled = {};
}
2023-11-10 20:47:08 +01:00
void Demangler::parseQualified(OutputBuffer *Demangled,
std::string_view &Mangled) {
// Qualified names are identifiers separated by their encoded length.
// Nested functions also encode their argument types without specifying
// what they return.
// QualifiedName:
// SymbolFunctionName
// SymbolFunctionName QualifiedName
// ^
// SymbolFunctionName:
// SymbolName
// SymbolName TypeFunctionNoReturn
// SymbolName M TypeFunctionNoReturn
// SymbolName M TypeModifiers TypeFunctionNoReturn
// The start pointer should be at the above location.
// Whether it has more than one symbol
size_t NotFirst = false;
do {
// Skip over anonymous symbols.
2023-11-10 20:47:08 +01:00
if (!Mangled.empty() && Mangled.front() == '0') {
do
2023-11-10 20:47:08 +01:00
Mangled.remove_prefix(1);
while (!Mangled.empty() && Mangled.front() == '0');
continue;
}
if (NotFirst)
*Demangled << '.';
NotFirst = true;
2023-11-10 20:47:08 +01:00
parseIdentifier(Demangled, Mangled);
} while (!Mangled.empty() && isSymbolName(Mangled));
}
2023-11-10 20:47:08 +01:00
void Demangler::parseIdentifier(OutputBuffer *Demangled,
std::string_view &Mangled) {
if (Mangled.empty()) {
Mangled = {};
return;
}
2023-11-10 20:47:08 +01:00
if (Mangled.front() == 'Q')
return parseSymbolBackref(Demangled, Mangled);
// TODO: Parse lengthless template instances.
2023-11-10 20:47:08 +01:00
unsigned long Len;
decodeNumber(Mangled, Len);
2023-11-10 20:47:08 +01:00
if (Mangled.empty()) {
Mangled = {};
return;
}
if (!Len || Mangled.length() < Len) {
Mangled = {};
return;
}
// TODO: Parse template instances with a length prefix.
// There can be multiple different declarations in the same function that
// have the same mangled name. To make the mangled names unique, a fake
// parent in the form `__Sddd' is added to the symbol.
2023-11-10 20:47:08 +01:00
if (Len >= 4 && starts_with(Mangled, "__S")) {
const size_t SuffixLen = Mangled.length() - Len;
std::string_view P = Mangled.substr(3);
while (P.length() > SuffixLen && std::isdigit(P.front()))
P.remove_prefix(1);
if (P.length() == SuffixLen) {
// Skip over the fake parent.
2023-11-10 20:47:08 +01:00
Mangled.remove_prefix(Len);
return parseIdentifier(Demangled, Mangled);
}
// Else demangle it as a plain identifier.
}
2023-11-10 20:47:08 +01:00
parseLName(Demangled, Mangled, Len);
}
2023-11-10 20:47:08 +01:00
bool Demangler::parseType(std::string_view &Mangled) {
if (Mangled.empty()) {
Mangled = {};
return false;
}
2023-11-10 20:47:08 +01:00
switch (Mangled.front()) {
// TODO: Parse type qualifiers.
// TODO: Parse function types.
// TODO: Parse compound types.
// TODO: Parse delegate types.
// TODO: Parse tuple types.
// Basic types.
case 'i':
2023-11-10 20:47:08 +01:00
Mangled.remove_prefix(1);
// TODO: Add type name dumping
2023-11-10 20:47:08 +01:00
return true;
// TODO: Add support for the rest of the basic types.
// Back referenced type.
2023-11-10 20:47:08 +01:00
case 'Q': {
parseTypeBackref(Mangled);
return true;
}
default: // unhandled.
2023-11-10 20:47:08 +01:00
Mangled = {};
return false;
}
}
2023-11-10 20:47:08 +01:00
void Demangler::parseLName(OutputBuffer *Demangled, std::string_view &Mangled,
unsigned long Len) {
switch (Len) {
case 6:
2023-11-10 20:47:08 +01:00
if (starts_with(Mangled, "__initZ")) {
// The static initializer for a given symbol.
Demangled->prepend("initializer for ");
Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
2023-11-10 20:47:08 +01:00
Mangled.remove_prefix(Len);
return;
}
2023-11-10 20:47:08 +01:00
if (starts_with(Mangled, "__vtblZ")) {
// The vtable symbol for a given class.
Demangled->prepend("vtable for ");
Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
2023-11-10 20:47:08 +01:00
Mangled.remove_prefix(Len);
return;
}
break;
case 7:
2023-11-10 20:47:08 +01:00
if (starts_with(Mangled, "__ClassZ")) {
// The classinfo symbol for a given class.
Demangled->prepend("ClassInfo for ");
Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
2023-11-10 20:47:08 +01:00
Mangled.remove_prefix(Len);
return;
}
break;
case 11:
2023-11-10 20:47:08 +01:00
if (starts_with(Mangled, "__InterfaceZ")) {
// The interface symbol for a given class.
Demangled->prepend("Interface for ");
Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
2023-11-10 20:47:08 +01:00
Mangled.remove_prefix(Len);
return;
}
break;
case 12:
2023-11-10 20:47:08 +01:00
if (starts_with(Mangled, "__ModuleInfoZ")) {
// The ModuleInfo symbol for a given module.
Demangled->prepend("ModuleInfo for ");
Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
2023-11-10 20:47:08 +01:00
Mangled.remove_prefix(Len);
return;
}
break;
}
2023-11-10 20:47:08 +01:00
*Demangled << Mangled.substr(0, Len);
Mangled.remove_prefix(Len);
}
2023-11-10 20:47:08 +01:00
Demangler::Demangler(std::string_view Mangled)
: Str(Mangled), LastBackref(Mangled.length()) {}
const char *Demangler::parseMangle(OutputBuffer *Demangled) {
2023-11-10 20:47:08 +01:00
std::string_view M(this->Str);
parseMangle(Demangled, M);
return M.data();
}
2023-11-10 20:47:08 +01:00
char *llvm::dlangDemangle(std::string_view MangledName) {
if (MangledName.empty() || !starts_with(MangledName, "_D"))
return nullptr;
OutputBuffer Demangled;
2023-11-10 20:47:08 +01:00
if (MangledName == "_Dmain") {
Demangled << "D main";
} else {
2023-11-10 20:47:08 +01:00
Demangler D(MangledName);
const char *M = D.parseMangle(&Demangled);
// Check that the entire symbol was successfully demangled.
2023-11-10 20:47:08 +01:00
if (M == nullptr || *M != '\0') {
std::free(Demangled.getBuffer());
return nullptr;
}
}
// OutputBuffer's internal buffer is not null terminated and therefore we need
// to add it to comply with C null terminated strings.
if (Demangled.getCurrentPosition() > 0) {
Demangled << '\0';
Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1);
return Demangled.getBuffer();
}
std::free(Demangled.getBuffer());
return nullptr;
}