//===--- DLangDemangle.cpp ------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file defines a demangler for the D programming language as specified /// in the ABI specification, available at: /// https://dlang.org/spec/abi.html#name_mangling /// //===----------------------------------------------------------------------===// #include "llvm/Demangle/Demangle.h" #include "llvm/Demangle/StringViewExtras.h" #include "llvm/Demangle/Utility.h" #include #include #include #include using namespace llvm; using llvm::itanium_demangle::OutputBuffer; using llvm::itanium_demangle::starts_with; namespace { /// Demangle information structure. struct Demangler { /// Initialize the information structure we use to pass around information. /// /// \param Mangled String to demangle. Demangler(std::string_view Mangled); /// Extract and demangle the mangled symbol and append it to the output /// string. /// /// \param Demangled Output buffer to write the demangled name. /// /// \return The remaining string on success or nullptr on failure. /// /// \see https://dlang.org/spec/abi.html#name_mangling . /// \see https://dlang.org/spec/abi.html#MangledName . const char *parseMangle(OutputBuffer *Demangled); private: /// Extract and demangle a given mangled symbol and append it to the output /// string. /// /// \param Demangled output buffer to write the demangled name. /// \param Mangled mangled symbol to be demangled. /// /// \see https://dlang.org/spec/abi.html#name_mangling . /// \see https://dlang.org/spec/abi.html#MangledName . void parseMangle(OutputBuffer *Demangled, std::string_view &Mangled); /// Extract the number from a given string. /// /// \param Mangled string to extract the number. /// \param Ret assigned result value. /// /// \note Ret larger than UINT_MAX is considered a failure. /// /// \see https://dlang.org/spec/abi.html#Number . void decodeNumber(std::string_view &Mangled, unsigned long &Ret); /// Extract the back reference position from a given string. /// /// \param Mangled string to extract the back reference position. /// \param Ret assigned result value. /// /// \return true on success, false on error. /// /// \note Ret is always >= 0 on success, and unspecified on failure /// /// \see https://dlang.org/spec/abi.html#back_ref . /// \see https://dlang.org/spec/abi.html#NumberBackRef . bool decodeBackrefPos(std::string_view &Mangled, long &Ret); /// Extract the symbol pointed by the back reference form a given string. /// /// \param Mangled string to extract the back reference position. /// \param Ret assigned result value. /// /// \return true on success, false on error. /// /// \see https://dlang.org/spec/abi.html#back_ref . bool decodeBackref(std::string_view &Mangled, std::string_view &Ret); /// Extract and demangle backreferenced symbol from a given mangled symbol /// and append it to the output string. /// /// \param Demangled output buffer to write the demangled name. /// \param Mangled mangled symbol to be demangled. /// /// \see https://dlang.org/spec/abi.html#back_ref . /// \see https://dlang.org/spec/abi.html#IdentifierBackRef . void parseSymbolBackref(OutputBuffer *Demangled, std::string_view &Mangled); /// Extract and demangle backreferenced type from a given mangled symbol /// and append it to the output string. /// /// \param Mangled mangled symbol to be demangled. /// /// \see https://dlang.org/spec/abi.html#back_ref . /// \see https://dlang.org/spec/abi.html#TypeBackRef . void parseTypeBackref(std::string_view &Mangled); /// Check whether it is the beginning of a symbol name. /// /// \param Mangled string to extract the symbol name. /// /// \return true on success, false otherwise. /// /// \see https://dlang.org/spec/abi.html#SymbolName . bool isSymbolName(std::string_view Mangled); /// Extract and demangle an identifier from a given mangled symbol append it /// to the output string. /// /// \param Demangled Output buffer to write the demangled name. /// \param Mangled Mangled symbol to be demangled. /// /// \see https://dlang.org/spec/abi.html#SymbolName . void parseIdentifier(OutputBuffer *Demangled, std::string_view &Mangled); /// Extract and demangle the plain identifier from a given mangled symbol and /// prepend/append it to the output string, with a special treatment for some /// magic compiler generated symbols. /// /// \param Demangled Output buffer to write the demangled name. /// \param Mangled Mangled symbol to be demangled. /// \param Len Length of the mangled symbol name. /// /// \see https://dlang.org/spec/abi.html#LName . void parseLName(OutputBuffer *Demangled, std::string_view &Mangled, unsigned long Len); /// Extract and demangle the qualified symbol from a given mangled symbol /// append it to the output string. /// /// \param Demangled Output buffer to write the demangled name. /// \param Mangled Mangled symbol to be demangled. /// /// \see https://dlang.org/spec/abi.html#QualifiedName . void parseQualified(OutputBuffer *Demangled, std::string_view &Mangled); /// Extract and demangle a type from a given mangled symbol append it to /// the output string. /// /// \param Mangled mangled symbol to be demangled. /// /// \return true on success, false on error. /// /// \see https://dlang.org/spec/abi.html#Type . bool parseType(std::string_view &Mangled); /// An immutable view of the string we are demangling. const std::string_view Str; /// The index of the last back reference. int LastBackref; }; } // namespace void Demangler::decodeNumber(std::string_view &Mangled, unsigned long &Ret) { // Clear Mangled if trying to extract something that isn't a digit. if (Mangled.empty()) { Mangled = {}; return; } if (!std::isdigit(Mangled.front())) { Mangled = {}; return; } unsigned long Val = 0; do { unsigned long Digit = Mangled[0] - '0'; // Check for overflow. if (Val > (std::numeric_limits::max() - Digit) / 10) { Mangled = {}; return; } Val = Val * 10 + Digit; Mangled.remove_prefix(1); } while (!Mangled.empty() && std::isdigit(Mangled.front())); if (Mangled.empty()) { Mangled = {}; return; } Ret = Val; } bool Demangler::decodeBackrefPos(std::string_view &Mangled, long &Ret) { // Return nullptr if trying to extract something that isn't a digit if (Mangled.empty()) { Mangled = {}; return false; } // Any identifier or non-basic type that has been emitted to the mangled // symbol before will not be emitted again, but is referenced by a special // sequence encoding the relative position of the original occurrence in the // mangled symbol name. // Numbers in back references are encoded with base 26 by upper case letters // A-Z for higher digits but lower case letters a-z for the last digit. // NumberBackRef: // [a-z] // [A-Z] NumberBackRef // ^ unsigned long Val = 0; while (!Mangled.empty() && std::isalpha(Mangled.front())) { // Check for overflow if (Val > (std::numeric_limits::max() - 25) / 26) break; Val *= 26; if (Mangled[0] >= 'a' && Mangled[0] <= 'z') { Val += Mangled[0] - 'a'; if ((long)Val <= 0) break; Ret = Val; Mangled.remove_prefix(1); return true; } Val += Mangled[0] - 'A'; Mangled.remove_prefix(1); } Mangled = {}; return false; } bool Demangler::decodeBackref(std::string_view &Mangled, std::string_view &Ret) { assert(!Mangled.empty() && Mangled.front() == 'Q' && "Invalid back reference!"); Ret = {}; // Position of 'Q' const char *Qpos = Mangled.data(); long RefPos; Mangled.remove_prefix(1); if (!decodeBackrefPos(Mangled, RefPos)) { Mangled = {}; return false; } if (RefPos > Qpos - Str.data()) { Mangled = {}; return false; } // Set the position of the back reference. Ret = Qpos - RefPos; return true; } void Demangler::parseSymbolBackref(OutputBuffer *Demangled, std::string_view &Mangled) { // An identifier back reference always points to a digit 0 to 9. // IdentifierBackRef: // Q NumberBackRef // ^ unsigned long Len; // Get position of the back reference std::string_view Backref; if (!decodeBackref(Mangled, Backref)) { Mangled = {}; return; } // Must point to a simple identifier decodeNumber(Backref, Len); if (Backref.empty() || Backref.length() < Len) { Mangled = {}; return; } parseLName(Demangled, Backref, Len); if (Backref.empty()) Mangled = {}; } void Demangler::parseTypeBackref(std::string_view &Mangled) { // A type back reference always points to a letter. // TypeBackRef: // Q NumberBackRef // ^ // If we appear to be moving backwards through the mangle string, then // bail as this may be a recursive back reference. if (Mangled.data() - Str.data() >= LastBackref) { Mangled = {}; return; } int SaveRefPos = LastBackref; LastBackref = Mangled.data() - Str.data(); // Get position of the back reference. std::string_view Backref; if (!decodeBackref(Mangled, Backref)) { Mangled = {}; return; } // Can't decode back reference. if (Backref.empty()) { Mangled = {}; return; } // TODO: Add support for function type back references. if (!parseType(Backref)) Mangled = {}; LastBackref = SaveRefPos; if (Backref.empty()) Mangled = {}; } bool Demangler::isSymbolName(std::string_view Mangled) { long Ret; const char *Qref = Mangled.data(); if (std::isdigit(Mangled.front())) return true; // TODO: Handle template instances. if (Mangled.front() != 'Q') return false; Mangled.remove_prefix(1); bool Valid = decodeBackrefPos(Mangled, Ret); if (!Valid || Ret > Qref - Str.data()) return false; return std::isdigit(Qref[-Ret]); } void Demangler::parseMangle(OutputBuffer *Demangled, std::string_view &Mangled) { // A D mangled symbol is comprised of both scope and type information. // MangleName: // _D QualifiedName Type // _D QualifiedName Z // ^ // The caller should have guaranteed that the start pointer is at the // above location. // Note that type is never a function type, but only the return type of // a function or the type of a variable. Mangled.remove_prefix(2); parseQualified(Demangled, Mangled); if (Mangled.empty()) { Mangled = {}; return; } // Artificial symbols end with 'Z' and have no type. if (Mangled.front() == 'Z') { Mangled.remove_prefix(1); } else if (!parseType(Mangled)) Mangled = {}; } void Demangler::parseQualified(OutputBuffer *Demangled, std::string_view &Mangled) { // Qualified names are identifiers separated by their encoded length. // Nested functions also encode their argument types without specifying // what they return. // QualifiedName: // SymbolFunctionName // SymbolFunctionName QualifiedName // ^ // SymbolFunctionName: // SymbolName // SymbolName TypeFunctionNoReturn // SymbolName M TypeFunctionNoReturn // SymbolName M TypeModifiers TypeFunctionNoReturn // The start pointer should be at the above location. // Whether it has more than one symbol size_t NotFirst = false; do { // Skip over anonymous symbols. if (!Mangled.empty() && Mangled.front() == '0') { do Mangled.remove_prefix(1); while (!Mangled.empty() && Mangled.front() == '0'); continue; } if (NotFirst) *Demangled << '.'; NotFirst = true; parseIdentifier(Demangled, Mangled); } while (!Mangled.empty() && isSymbolName(Mangled)); } void Demangler::parseIdentifier(OutputBuffer *Demangled, std::string_view &Mangled) { if (Mangled.empty()) { Mangled = {}; return; } if (Mangled.front() == 'Q') return parseSymbolBackref(Demangled, Mangled); // TODO: Parse lengthless template instances. unsigned long Len; decodeNumber(Mangled, Len); if (Mangled.empty()) { Mangled = {}; return; } if (!Len || Mangled.length() < Len) { Mangled = {}; return; } // TODO: Parse template instances with a length prefix. // There can be multiple different declarations in the same function that // have the same mangled name. To make the mangled names unique, a fake // parent in the form `__Sddd' is added to the symbol. if (Len >= 4 && starts_with(Mangled, "__S")) { const size_t SuffixLen = Mangled.length() - Len; std::string_view P = Mangled.substr(3); while (P.length() > SuffixLen && std::isdigit(P.front())) P.remove_prefix(1); if (P.length() == SuffixLen) { // Skip over the fake parent. Mangled.remove_prefix(Len); return parseIdentifier(Demangled, Mangled); } // Else demangle it as a plain identifier. } parseLName(Demangled, Mangled, Len); } bool Demangler::parseType(std::string_view &Mangled) { if (Mangled.empty()) { Mangled = {}; return false; } switch (Mangled.front()) { // TODO: Parse type qualifiers. // TODO: Parse function types. // TODO: Parse compound types. // TODO: Parse delegate types. // TODO: Parse tuple types. // Basic types. case 'i': Mangled.remove_prefix(1); // TODO: Add type name dumping return true; // TODO: Add support for the rest of the basic types. // Back referenced type. case 'Q': { parseTypeBackref(Mangled); return true; } default: // unhandled. Mangled = {}; return false; } } void Demangler::parseLName(OutputBuffer *Demangled, std::string_view &Mangled, unsigned long Len) { switch (Len) { case 6: if (starts_with(Mangled, "__initZ")) { // The static initializer for a given symbol. Demangled->prepend("initializer for "); Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); Mangled.remove_prefix(Len); return; } if (starts_with(Mangled, "__vtblZ")) { // The vtable symbol for a given class. Demangled->prepend("vtable for "); Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); Mangled.remove_prefix(Len); return; } break; case 7: if (starts_with(Mangled, "__ClassZ")) { // The classinfo symbol for a given class. Demangled->prepend("ClassInfo for "); Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); Mangled.remove_prefix(Len); return; } break; case 11: if (starts_with(Mangled, "__InterfaceZ")) { // The interface symbol for a given class. Demangled->prepend("Interface for "); Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); Mangled.remove_prefix(Len); return; } break; case 12: if (starts_with(Mangled, "__ModuleInfoZ")) { // The ModuleInfo symbol for a given module. Demangled->prepend("ModuleInfo for "); Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); Mangled.remove_prefix(Len); return; } break; } *Demangled << Mangled.substr(0, Len); Mangled.remove_prefix(Len); } Demangler::Demangler(std::string_view Mangled) : Str(Mangled), LastBackref(Mangled.length()) {} const char *Demangler::parseMangle(OutputBuffer *Demangled) { std::string_view M(this->Str); parseMangle(Demangled, M); return M.data(); } char *llvm::dlangDemangle(std::string_view MangledName) { if (MangledName.empty() || !starts_with(MangledName, "_D")) return nullptr; OutputBuffer Demangled; if (MangledName == "_Dmain") { Demangled << "D main"; } else { Demangler D(MangledName); const char *M = D.parseMangle(&Demangled); // Check that the entire symbol was successfully demangled. if (M == nullptr || *M != '\0') { std::free(Demangled.getBuffer()); return nullptr; } } // OutputBuffer's internal buffer is not null terminated and therefore we need // to add it to comply with C null terminated strings. if (Demangled.getCurrentPosition() > 0) { Demangled << '\0'; Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1); return Demangled.getBuffer(); } std::free(Demangled.getBuffer()); return nullptr; }