579 lines
17 KiB
C++
579 lines
17 KiB
C++
|
//===--- DLangDemangle.cpp ------------------------------------------------===//
|
||
|
//
|
||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
///
|
||
|
/// \file
|
||
|
/// This file defines a demangler for the D programming language as specified
|
||
|
/// in the ABI specification, available at:
|
||
|
/// https://dlang.org/spec/abi.html#name_mangling
|
||
|
///
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
|
||
|
#include "llvm/Demangle/Demangle.h"
|
||
|
#include "llvm/Demangle/StringView.h"
|
||
|
#include "llvm/Demangle/Utility.h"
|
||
|
|
||
|
#include <cctype>
|
||
|
#include <cstring>
|
||
|
#include <limits>
|
||
|
|
||
|
using namespace llvm;
|
||
|
using llvm::itanium_demangle::OutputBuffer;
|
||
|
using llvm::itanium_demangle::StringView;
|
||
|
|
||
|
namespace {
|
||
|
|
||
|
/// Demangle information structure.
|
||
|
struct Demangler {
|
||
|
/// Initialize the information structure we use to pass around information.
|
||
|
///
|
||
|
/// \param Mangled String to demangle.
|
||
|
Demangler(const char *Mangled);
|
||
|
|
||
|
/// Extract and demangle the mangled symbol and append it to the output
|
||
|
/// string.
|
||
|
///
|
||
|
/// \param Demangled Output buffer to write the demangled name.
|
||
|
///
|
||
|
/// \return The remaining string on success or nullptr on failure.
|
||
|
///
|
||
|
/// \see https://dlang.org/spec/abi.html#name_mangling .
|
||
|
/// \see https://dlang.org/spec/abi.html#MangledName .
|
||
|
const char *parseMangle(OutputBuffer *Demangled);
|
||
|
|
||
|
private:
|
||
|
/// Extract and demangle a given mangled symbol and append it to the output
|
||
|
/// string.
|
||
|
///
|
||
|
/// \param Demangled output buffer to write the demangled name.
|
||
|
/// \param Mangled mangled symbol to be demangled.
|
||
|
///
|
||
|
/// \return The remaining string on success or nullptr on failure.
|
||
|
///
|
||
|
/// \see https://dlang.org/spec/abi.html#name_mangling .
|
||
|
/// \see https://dlang.org/spec/abi.html#MangledName .
|
||
|
const char *parseMangle(OutputBuffer *Demangled, const char *Mangled);
|
||
|
|
||
|
/// Extract the number from a given string.
|
||
|
///
|
||
|
/// \param Mangled string to extract the number.
|
||
|
/// \param Ret assigned result value.
|
||
|
///
|
||
|
/// \return The remaining string on success or nullptr on failure.
|
||
|
///
|
||
|
/// \note A result larger than UINT_MAX is considered a failure.
|
||
|
///
|
||
|
/// \see https://dlang.org/spec/abi.html#Number .
|
||
|
const char *decodeNumber(const char *Mangled, unsigned long &Ret);
|
||
|
|
||
|
/// Extract the back reference position from a given string.
|
||
|
///
|
||
|
/// \param Mangled string to extract the back reference position.
|
||
|
/// \param Ret assigned result value.
|
||
|
///
|
||
|
/// \return the remaining string on success or nullptr on failure.
|
||
|
///
|
||
|
/// \note Ret is always >= 0 on success, and unspecified on failure
|
||
|
///
|
||
|
/// \see https://dlang.org/spec/abi.html#back_ref .
|
||
|
/// \see https://dlang.org/spec/abi.html#NumberBackRef .
|
||
|
const char *decodeBackrefPos(const char *Mangled, long &Ret);
|
||
|
|
||
|
/// Extract the symbol pointed by the back reference form a given string.
|
||
|
///
|
||
|
/// \param Mangled string to extract the back reference position.
|
||
|
/// \param Ret assigned result value.
|
||
|
///
|
||
|
/// \return the remaining string on success or nullptr on failure.
|
||
|
///
|
||
|
/// \see https://dlang.org/spec/abi.html#back_ref .
|
||
|
const char *decodeBackref(const char *Mangled, const char *&Ret);
|
||
|
|
||
|
/// Extract and demangle backreferenced symbol from a given mangled symbol
|
||
|
/// and append it to the output string.
|
||
|
///
|
||
|
/// \param Demangled output buffer to write the demangled name.
|
||
|
/// \param Mangled mangled symbol to be demangled.
|
||
|
///
|
||
|
/// \return the remaining string on success or nullptr on failure.
|
||
|
///
|
||
|
/// \see https://dlang.org/spec/abi.html#back_ref .
|
||
|
/// \see https://dlang.org/spec/abi.html#IdentifierBackRef .
|
||
|
const char *parseSymbolBackref(OutputBuffer *Demangled, const char *Mangled);
|
||
|
|
||
|
/// Extract and demangle backreferenced type from a given mangled symbol
|
||
|
/// and append it to the output string.
|
||
|
///
|
||
|
/// \param Mangled mangled symbol to be demangled.
|
||
|
///
|
||
|
/// \return the remaining string on success or nullptr on failure.
|
||
|
///
|
||
|
/// \see https://dlang.org/spec/abi.html#back_ref .
|
||
|
/// \see https://dlang.org/spec/abi.html#TypeBackRef .
|
||
|
const char *parseTypeBackref(const char *Mangled);
|
||
|
|
||
|
/// Check whether it is the beginning of a symbol name.
|
||
|
///
|
||
|
/// \param Mangled string to extract the symbol name.
|
||
|
///
|
||
|
/// \return true on success, false otherwise.
|
||
|
///
|
||
|
/// \see https://dlang.org/spec/abi.html#SymbolName .
|
||
|
bool isSymbolName(const char *Mangled);
|
||
|
|
||
|
/// Extract and demangle an identifier from a given mangled symbol append it
|
||
|
/// to the output string.
|
||
|
///
|
||
|
/// \param Demangled Output buffer to write the demangled name.
|
||
|
/// \param Mangled Mangled symbol to be demangled.
|
||
|
///
|
||
|
/// \return The remaining string on success or nullptr on failure.
|
||
|
///
|
||
|
/// \see https://dlang.org/spec/abi.html#SymbolName .
|
||
|
const char *parseIdentifier(OutputBuffer *Demangled, const char *Mangled);
|
||
|
|
||
|
/// Extract and demangle the plain identifier from a given mangled symbol and
|
||
|
/// prepend/append it to the output string, with a special treatment for some
|
||
|
/// magic compiler generated symbols.
|
||
|
///
|
||
|
/// \param Demangled Output buffer to write the demangled name.
|
||
|
/// \param Mangled Mangled symbol to be demangled.
|
||
|
/// \param Len Length of the mangled symbol name.
|
||
|
///
|
||
|
/// \return The remaining string on success or nullptr on failure.
|
||
|
///
|
||
|
/// \see https://dlang.org/spec/abi.html#LName .
|
||
|
const char *parseLName(OutputBuffer *Demangled, const char *Mangled,
|
||
|
unsigned long Len);
|
||
|
|
||
|
/// Extract and demangle the qualified symbol from a given mangled symbol
|
||
|
/// append it to the output string.
|
||
|
///
|
||
|
/// \param Demangled Output buffer to write the demangled name.
|
||
|
/// \param Mangled Mangled symbol to be demangled.
|
||
|
///
|
||
|
/// \return The remaining string on success or nullptr on failure.
|
||
|
///
|
||
|
/// \see https://dlang.org/spec/abi.html#QualifiedName .
|
||
|
const char *parseQualified(OutputBuffer *Demangled, const char *Mangled);
|
||
|
|
||
|
/// Extract and demangle a type from a given mangled symbol append it to
|
||
|
/// the output string.
|
||
|
///
|
||
|
/// \param Mangled mangled symbol to be demangled.
|
||
|
///
|
||
|
/// \return the remaining string on success or nullptr on failure.
|
||
|
///
|
||
|
/// \see https://dlang.org/spec/abi.html#Type .
|
||
|
const char *parseType(const char *Mangled);
|
||
|
|
||
|
/// The string we are demangling.
|
||
|
const char *Str;
|
||
|
/// The index of the last back reference.
|
||
|
int LastBackref;
|
||
|
};
|
||
|
|
||
|
} // namespace
|
||
|
|
||
|
const char *Demangler::decodeNumber(const char *Mangled, unsigned long &Ret) {
|
||
|
// Return nullptr if trying to extract something that isn't a digit.
|
||
|
if (Mangled == nullptr || !std::isdigit(*Mangled))
|
||
|
return nullptr;
|
||
|
|
||
|
unsigned long Val = 0;
|
||
|
|
||
|
do {
|
||
|
unsigned long Digit = Mangled[0] - '0';
|
||
|
|
||
|
// Check for overflow.
|
||
|
if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10)
|
||
|
return nullptr;
|
||
|
|
||
|
Val = Val * 10 + Digit;
|
||
|
++Mangled;
|
||
|
} while (std::isdigit(*Mangled));
|
||
|
|
||
|
if (*Mangled == '\0')
|
||
|
return nullptr;
|
||
|
|
||
|
Ret = Val;
|
||
|
return Mangled;
|
||
|
}
|
||
|
|
||
|
const char *Demangler::decodeBackrefPos(const char *Mangled, long &Ret) {
|
||
|
// Return nullptr if trying to extract something that isn't a digit
|
||
|
if (Mangled == nullptr || !std::isalpha(*Mangled))
|
||
|
return nullptr;
|
||
|
|
||
|
// Any identifier or non-basic type that has been emitted to the mangled
|
||
|
// symbol before will not be emitted again, but is referenced by a special
|
||
|
// sequence encoding the relative position of the original occurrence in the
|
||
|
// mangled symbol name.
|
||
|
// Numbers in back references are encoded with base 26 by upper case letters
|
||
|
// A-Z for higher digits but lower case letters a-z for the last digit.
|
||
|
// NumberBackRef:
|
||
|
// [a-z]
|
||
|
// [A-Z] NumberBackRef
|
||
|
// ^
|
||
|
unsigned long Val = 0;
|
||
|
|
||
|
while (std::isalpha(*Mangled)) {
|
||
|
// Check for overflow
|
||
|
if (Val > (std::numeric_limits<unsigned long>::max() - 25) / 26)
|
||
|
break;
|
||
|
|
||
|
Val *= 26;
|
||
|
|
||
|
if (Mangled[0] >= 'a' && Mangled[0] <= 'z') {
|
||
|
Val += Mangled[0] - 'a';
|
||
|
if ((long)Val <= 0)
|
||
|
break;
|
||
|
Ret = Val;
|
||
|
return Mangled + 1;
|
||
|
}
|
||
|
|
||
|
Val += Mangled[0] - 'A';
|
||
|
++Mangled;
|
||
|
}
|
||
|
|
||
|
return nullptr;
|
||
|
}
|
||
|
|
||
|
const char *Demangler::decodeBackref(const char *Mangled, const char *&Ret) {
|
||
|
assert(Mangled != nullptr && *Mangled == 'Q' && "Invalid back reference!");
|
||
|
Ret = nullptr;
|
||
|
|
||
|
// Position of 'Q'
|
||
|
const char *Qpos = Mangled;
|
||
|
long RefPos;
|
||
|
++Mangled;
|
||
|
|
||
|
Mangled = decodeBackrefPos(Mangled, RefPos);
|
||
|
if (Mangled == nullptr)
|
||
|
return nullptr;
|
||
|
|
||
|
if (RefPos > Qpos - Str)
|
||
|
return nullptr;
|
||
|
|
||
|
// Set the position of the back reference.
|
||
|
Ret = Qpos - RefPos;
|
||
|
|
||
|
return Mangled;
|
||
|
}
|
||
|
|
||
|
const char *Demangler::parseSymbolBackref(OutputBuffer *Demangled,
|
||
|
const char *Mangled) {
|
||
|
// An identifier back reference always points to a digit 0 to 9.
|
||
|
// IdentifierBackRef:
|
||
|
// Q NumberBackRef
|
||
|
// ^
|
||
|
const char *Backref;
|
||
|
unsigned long Len;
|
||
|
|
||
|
// Get position of the back reference
|
||
|
Mangled = decodeBackref(Mangled, Backref);
|
||
|
|
||
|
// Must point to a simple identifier
|
||
|
Backref = decodeNumber(Backref, Len);
|
||
|
if (Backref == nullptr || strlen(Backref) < Len)
|
||
|
return nullptr;
|
||
|
|
||
|
Backref = parseLName(Demangled, Backref, Len);
|
||
|
if (Backref == nullptr)
|
||
|
return nullptr;
|
||
|
|
||
|
return Mangled;
|
||
|
}
|
||
|
|
||
|
const char *Demangler::parseTypeBackref(const char *Mangled) {
|
||
|
// A type back reference always points to a letter.
|
||
|
// TypeBackRef:
|
||
|
// Q NumberBackRef
|
||
|
// ^
|
||
|
const char *Backref;
|
||
|
|
||
|
// If we appear to be moving backwards through the mangle string, then
|
||
|
// bail as this may be a recursive back reference.
|
||
|
if (Mangled - Str >= LastBackref)
|
||
|
return nullptr;
|
||
|
|
||
|
int SaveRefPos = LastBackref;
|
||
|
LastBackref = Mangled - Str;
|
||
|
|
||
|
// Get position of the back reference.
|
||
|
Mangled = decodeBackref(Mangled, Backref);
|
||
|
|
||
|
// Can't decode back reference.
|
||
|
if (Backref == nullptr)
|
||
|
return nullptr;
|
||
|
|
||
|
// TODO: Add support for function type back references.
|
||
|
Backref = parseType(Backref);
|
||
|
|
||
|
LastBackref = SaveRefPos;
|
||
|
|
||
|
if (Backref == nullptr)
|
||
|
return nullptr;
|
||
|
|
||
|
return Mangled;
|
||
|
}
|
||
|
|
||
|
bool Demangler::isSymbolName(const char *Mangled) {
|
||
|
long Ret;
|
||
|
const char *Qref = Mangled;
|
||
|
|
||
|
if (std::isdigit(*Mangled))
|
||
|
return true;
|
||
|
|
||
|
// TODO: Handle template instances.
|
||
|
|
||
|
if (*Mangled != 'Q')
|
||
|
return false;
|
||
|
|
||
|
Mangled = decodeBackrefPos(Mangled + 1, Ret);
|
||
|
if (Mangled == nullptr || Ret > Qref - Str)
|
||
|
return false;
|
||
|
|
||
|
return std::isdigit(Qref[-Ret]);
|
||
|
}
|
||
|
|
||
|
const char *Demangler::parseMangle(OutputBuffer *Demangled,
|
||
|
const char *Mangled) {
|
||
|
// A D mangled symbol is comprised of both scope and type information.
|
||
|
// MangleName:
|
||
|
// _D QualifiedName Type
|
||
|
// _D QualifiedName Z
|
||
|
// ^
|
||
|
// The caller should have guaranteed that the start pointer is at the
|
||
|
// above location.
|
||
|
// Note that type is never a function type, but only the return type of
|
||
|
// a function or the type of a variable.
|
||
|
Mangled += 2;
|
||
|
|
||
|
Mangled = parseQualified(Demangled, Mangled);
|
||
|
|
||
|
if (Mangled != nullptr) {
|
||
|
// Artificial symbols end with 'Z' and have no type.
|
||
|
if (*Mangled == 'Z')
|
||
|
++Mangled;
|
||
|
else {
|
||
|
Mangled = parseType(Mangled);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return Mangled;
|
||
|
}
|
||
|
|
||
|
const char *Demangler::parseQualified(OutputBuffer *Demangled,
|
||
|
const char *Mangled) {
|
||
|
// Qualified names are identifiers separated by their encoded length.
|
||
|
// Nested functions also encode their argument types without specifying
|
||
|
// what they return.
|
||
|
// QualifiedName:
|
||
|
// SymbolFunctionName
|
||
|
// SymbolFunctionName QualifiedName
|
||
|
// ^
|
||
|
// SymbolFunctionName:
|
||
|
// SymbolName
|
||
|
// SymbolName TypeFunctionNoReturn
|
||
|
// SymbolName M TypeFunctionNoReturn
|
||
|
// SymbolName M TypeModifiers TypeFunctionNoReturn
|
||
|
// The start pointer should be at the above location.
|
||
|
|
||
|
// Whether it has more than one symbol
|
||
|
size_t NotFirst = false;
|
||
|
do {
|
||
|
// Skip over anonymous symbols.
|
||
|
if (*Mangled == '0') {
|
||
|
do
|
||
|
++Mangled;
|
||
|
while (*Mangled == '0');
|
||
|
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if (NotFirst)
|
||
|
*Demangled << '.';
|
||
|
NotFirst = true;
|
||
|
|
||
|
Mangled = parseIdentifier(Demangled, Mangled);
|
||
|
|
||
|
} while (Mangled && isSymbolName(Mangled));
|
||
|
|
||
|
return Mangled;
|
||
|
}
|
||
|
|
||
|
const char *Demangler::parseIdentifier(OutputBuffer *Demangled,
|
||
|
const char *Mangled) {
|
||
|
unsigned long Len;
|
||
|
|
||
|
if (Mangled == nullptr || *Mangled == '\0')
|
||
|
return nullptr;
|
||
|
|
||
|
if (*Mangled == 'Q')
|
||
|
return parseSymbolBackref(Demangled, Mangled);
|
||
|
|
||
|
// TODO: Parse lengthless template instances.
|
||
|
|
||
|
const char *Endptr = decodeNumber(Mangled, Len);
|
||
|
|
||
|
if (Endptr == nullptr || Len == 0)
|
||
|
return nullptr;
|
||
|
|
||
|
if (strlen(Endptr) < Len)
|
||
|
return nullptr;
|
||
|
|
||
|
Mangled = Endptr;
|
||
|
|
||
|
// TODO: Parse template instances with a length prefix.
|
||
|
|
||
|
// There can be multiple different declarations in the same function that
|
||
|
// have the same mangled name. To make the mangled names unique, a fake
|
||
|
// parent in the form `__Sddd' is added to the symbol.
|
||
|
if (Len >= 4 && Mangled[0] == '_' && Mangled[1] == '_' && Mangled[2] == 'S') {
|
||
|
const char *NumPtr = Mangled + 3;
|
||
|
while (NumPtr < (Mangled + Len) && std::isdigit(*NumPtr))
|
||
|
++NumPtr;
|
||
|
|
||
|
if (Mangled + Len == NumPtr) {
|
||
|
// Skip over the fake parent.
|
||
|
Mangled += Len;
|
||
|
return parseIdentifier(Demangled, Mangled);
|
||
|
}
|
||
|
|
||
|
// Else demangle it as a plain identifier.
|
||
|
}
|
||
|
|
||
|
return parseLName(Demangled, Mangled, Len);
|
||
|
}
|
||
|
|
||
|
const char *Demangler::parseType(const char *Mangled) {
|
||
|
if (*Mangled == '\0')
|
||
|
return nullptr;
|
||
|
|
||
|
switch (*Mangled) {
|
||
|
// TODO: Parse type qualifiers.
|
||
|
// TODO: Parse function types.
|
||
|
// TODO: Parse compound types.
|
||
|
// TODO: Parse delegate types.
|
||
|
// TODO: Parse tuple types.
|
||
|
|
||
|
// Basic types.
|
||
|
case 'i':
|
||
|
++Mangled;
|
||
|
// TODO: Add type name dumping
|
||
|
return Mangled;
|
||
|
|
||
|
// TODO: Add support for the rest of the basic types.
|
||
|
|
||
|
// Back referenced type.
|
||
|
case 'Q':
|
||
|
return parseTypeBackref(Mangled);
|
||
|
|
||
|
default: // unhandled.
|
||
|
return nullptr;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled,
|
||
|
unsigned long Len) {
|
||
|
switch (Len) {
|
||
|
case 6:
|
||
|
if (strncmp(Mangled, "__initZ", Len + 1) == 0) {
|
||
|
// The static initializer for a given symbol.
|
||
|
Demangled->prepend("initializer for ");
|
||
|
Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
|
||
|
Mangled += Len;
|
||
|
return Mangled;
|
||
|
}
|
||
|
if (strncmp(Mangled, "__vtblZ", Len + 1) == 0) {
|
||
|
// The vtable symbol for a given class.
|
||
|
Demangled->prepend("vtable for ");
|
||
|
Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
|
||
|
Mangled += Len;
|
||
|
return Mangled;
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
case 7:
|
||
|
if (strncmp(Mangled, "__ClassZ", Len + 1) == 0) {
|
||
|
// The classinfo symbol for a given class.
|
||
|
Demangled->prepend("ClassInfo for ");
|
||
|
Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
|
||
|
Mangled += Len;
|
||
|
return Mangled;
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
case 11:
|
||
|
if (strncmp(Mangled, "__InterfaceZ", Len + 1) == 0) {
|
||
|
// The interface symbol for a given class.
|
||
|
Demangled->prepend("Interface for ");
|
||
|
Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
|
||
|
Mangled += Len;
|
||
|
return Mangled;
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
case 12:
|
||
|
if (strncmp(Mangled, "__ModuleInfoZ", Len + 1) == 0) {
|
||
|
// The ModuleInfo symbol for a given module.
|
||
|
Demangled->prepend("ModuleInfo for ");
|
||
|
Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
|
||
|
Mangled += Len;
|
||
|
return Mangled;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
*Demangled << StringView(Mangled, Len);
|
||
|
Mangled += Len;
|
||
|
|
||
|
return Mangled;
|
||
|
}
|
||
|
|
||
|
Demangler::Demangler(const char *Mangled)
|
||
|
: Str(Mangled), LastBackref(strlen(Mangled)) {}
|
||
|
|
||
|
const char *Demangler::parseMangle(OutputBuffer *Demangled) {
|
||
|
return parseMangle(Demangled, this->Str);
|
||
|
}
|
||
|
|
||
|
char *llvm::dlangDemangle(const char *MangledName) {
|
||
|
if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0)
|
||
|
return nullptr;
|
||
|
|
||
|
OutputBuffer Demangled;
|
||
|
if (!initializeOutputBuffer(nullptr, nullptr, Demangled, 1024))
|
||
|
return nullptr;
|
||
|
|
||
|
if (strcmp(MangledName, "_Dmain") == 0) {
|
||
|
Demangled << "D main";
|
||
|
} else {
|
||
|
|
||
|
Demangler D = Demangler(MangledName);
|
||
|
MangledName = D.parseMangle(&Demangled);
|
||
|
|
||
|
// Check that the entire symbol was successfully demangled.
|
||
|
if (MangledName == nullptr || *MangledName != '\0') {
|
||
|
std::free(Demangled.getBuffer());
|
||
|
return nullptr;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// OutputBuffer's internal buffer is not null terminated and therefore we need
|
||
|
// to add it to comply with C null terminated strings.
|
||
|
if (Demangled.getCurrentPosition() > 0) {
|
||
|
Demangled << '\0';
|
||
|
Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1);
|
||
|
return Demangled.getBuffer();
|
||
|
}
|
||
|
|
||
|
std::free(Demangled.getBuffer());
|
||
|
return nullptr;
|
||
|
}
|