|  | 
|  | 1 | +//===--- OxCamlDemangle.cpp -------------------------------------*- C++ -*-===// | 
|  | 2 | +// | 
|  | 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | 4 | +// See https://llvm.org/LICENSE.txt for license information. | 
|  | 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | 6 | +// | 
|  | 7 | +//===----------------------------------------------------------------------===// | 
|  | 8 | +// | 
|  | 9 | +// This file defines a demangler for the new mangling scheme devised for OxCaml | 
|  | 10 | +// | 
|  | 11 | +//===----------------------------------------------------------------------===// | 
|  | 12 | + | 
|  | 13 | +#include <cassert> | 
|  | 14 | + | 
|  | 15 | +#include "llvm/Demangle/Demangle.h" | 
|  | 16 | +#include "llvm/Demangle/StringView.h" | 
|  | 17 | +#include "llvm/Demangle/Utility.h" | 
|  | 18 | + | 
|  | 19 | +using llvm::itanium_demangle::OutputBuffer; | 
|  | 20 | +using llvm::itanium_demangle::StringView; | 
|  | 21 | + | 
|  | 22 | +#define ERROR (~((unsigned)0)) | 
|  | 23 | + | 
|  | 24 | +static unsigned ConsumeUnsignedDecimal(StringView& sv) { | 
|  | 25 | +  unsigned res = 0, i = 0; | 
|  | 26 | +  while(sv[i] >= '0' && sv[i] <= '9') { | 
|  | 27 | +    res = res * 10 + (sv[i] - '0'); | 
|  | 28 | +    i++; | 
|  | 29 | +  } | 
|  | 30 | +  sv = sv.dropFront(i); | 
|  | 31 | +  if(i == 0) | 
|  | 32 | +    return ERROR; | 
|  | 33 | +  return res; | 
|  | 34 | +} | 
|  | 35 | + | 
|  | 36 | +static unsigned ConsumeUnsigned26(StringView& sv) { | 
|  | 37 | +  unsigned res = 0, i = 0; | 
|  | 38 | +  while(sv[i] >= 'A' && sv[i] <= 'Z') { | 
|  | 39 | +    res = res * 26 + (sv[i] - 'A'); | 
|  | 40 | +    i++; | 
|  | 41 | +  } | 
|  | 42 | +  sv = sv.dropFront(i); | 
|  | 43 | +  if(i == 0) | 
|  | 44 | +    return ERROR; | 
|  | 45 | +  return res; | 
|  | 46 | +} | 
|  | 47 | + | 
|  | 48 | +static bool islowerhex(char c) { | 
|  | 49 | +  return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'); | 
|  | 50 | +} | 
|  | 51 | + | 
|  | 52 | +static unsigned lowerhex(char c) { | 
|  | 53 | +  if(c >= '0' && c <= '9') | 
|  | 54 | +    return c - '0'; | 
|  | 55 | +  else { | 
|  | 56 | +    assert(c >= 'a' && c <= 'f'); | 
|  | 57 | +    return c - 'a' + 10; | 
|  | 58 | +  } | 
|  | 59 | +} | 
|  | 60 | + | 
|  | 61 | +// Decode unicode-escaped identifier (format: u<len><coded>_<raw>) | 
|  | 62 | +// Returns true on success, false on error | 
|  | 63 | +static bool DecodeUnicodeEscaped(StringView& Mangled, OutputBuffer& Demangled) { | 
|  | 64 | +  unsigned len = ConsumeUnsignedDecimal(Mangled); | 
|  | 65 | +  if(len == ERROR || len <= 0 || len > Mangled.size()) | 
|  | 66 | +    return false; | 
|  | 67 | + | 
|  | 68 | +  size_t split = Mangled.find('_'); | 
|  | 69 | +  if(split >= len) | 
|  | 70 | +    return false; | 
|  | 71 | + | 
|  | 72 | +  StringView coded = Mangled.substr(0, split); | 
|  | 73 | +  StringView raw = Mangled.substr(split+1, len-split-1); | 
|  | 74 | + | 
|  | 75 | +  while(!coded.empty()) { | 
|  | 76 | +    unsigned chunklen = ConsumeUnsigned26(coded); | 
|  | 77 | +    if(chunklen == ERROR || chunklen > raw.size()) | 
|  | 78 | +      return false; | 
|  | 79 | +    Demangled << raw.substr(0, chunklen); | 
|  | 80 | +    raw = raw.dropFront(chunklen); | 
|  | 81 | + | 
|  | 82 | +    unsigned i; | 
|  | 83 | +    for(i = 0; i+1 < coded.size() && islowerhex(coded[i]); i+=2) { | 
|  | 84 | +      if(!islowerhex(coded[i+1])) | 
|  | 85 | +        return false; | 
|  | 86 | +      char c = (char)(lowerhex(coded[i]) << 4 | lowerhex(coded[i+1])); | 
|  | 87 | +      Demangled << c; | 
|  | 88 | +    } | 
|  | 89 | +    coded = coded.dropFront(i); | 
|  | 90 | +  } | 
|  | 91 | + | 
|  | 92 | +  if(!raw.empty()) | 
|  | 93 | +    Demangled << raw; | 
|  | 94 | + | 
|  | 95 | +  Mangled = Mangled.dropFront(len); | 
|  | 96 | +  return true; | 
|  | 97 | +} | 
|  | 98 | + | 
|  | 99 | +// Decode identifier (either plain or unicode-escaped) | 
|  | 100 | +// Handles: <len><text> or u<len><coded>_<raw> | 
|  | 101 | +// Returns true on success, false on error | 
|  | 102 | +static bool DecodeIdentifier(StringView& Mangled, OutputBuffer& Demangled) { | 
|  | 103 | +  if(Mangled.consumeFront('u')) { | 
|  | 104 | +    // Unicode-escaped identifier | 
|  | 105 | +    return DecodeUnicodeEscaped(Mangled, Demangled); | 
|  | 106 | +  } else { | 
|  | 107 | +    // Plain identifier with length prefix | 
|  | 108 | +    unsigned len = ConsumeUnsignedDecimal(Mangled); | 
|  | 109 | +    if(len == ERROR || len <= 0 || len > Mangled.size()) | 
|  | 110 | +      return false; | 
|  | 111 | +    Demangled << Mangled.substr(0, len); | 
|  | 112 | +    Mangled = Mangled.dropFront(len); | 
|  | 113 | +    return true; | 
|  | 114 | +  } | 
|  | 115 | +} | 
|  | 116 | + | 
|  | 117 | +// Decode anonymous location (format: filename_line_col) | 
|  | 118 | +// Anonymous functions/modules are encoded as: fn(filename:line:col) | 
|  | 119 | +// Returns true on success, false on error | 
|  | 120 | +static bool DecodeAnonymousLocation(StringView& Mangled, OutputBuffer& Demangled) { | 
|  | 121 | +  // Allocate temporary buffer based on remaining mangled string size | 
|  | 122 | +  // The decoded identifier will be at most the size of the remaining mangled string | 
|  | 123 | +  size_t buffer_size = Mangled.size(); | 
|  | 124 | +  if(buffer_size == 0) | 
|  | 125 | +    return false; | 
|  | 126 | + | 
|  | 127 | +  char *temp_buf = static_cast<char *>(std::malloc(buffer_size)); | 
|  | 128 | +  if(temp_buf == nullptr) | 
|  | 129 | +    std::terminate(); | 
|  | 130 | + | 
|  | 131 | +  OutputBuffer TempDemangled(temp_buf, buffer_size); | 
|  | 132 | + | 
|  | 133 | +  if(!DecodeIdentifier(Mangled, TempDemangled)) { | 
|  | 134 | +    std::free(temp_buf); | 
|  | 135 | +    return false; | 
|  | 136 | +  } | 
|  | 137 | + | 
|  | 138 | +  size_t temp_len = TempDemangled.getCurrentPosition(); | 
|  | 139 | + | 
|  | 140 | +  // Parse filename_line_col format by finding the last two underscores | 
|  | 141 | +  size_t first_underscore = 0, second_underscore = 0; | 
|  | 142 | +  int underscore_count = 0; | 
|  | 143 | + | 
|  | 144 | +  for(size_t j = temp_len; j > 0; j--) { | 
|  | 145 | +    if(temp_buf[j-1] == '_') { | 
|  | 146 | +      underscore_count++; | 
|  | 147 | +      if(underscore_count == 1) | 
|  | 148 | +        second_underscore = j - 1; | 
|  | 149 | +      else if(underscore_count == 2) { | 
|  | 150 | +        first_underscore = j - 1; | 
|  | 151 | +        break; | 
|  | 152 | +      } | 
|  | 153 | +    } | 
|  | 154 | +  } | 
|  | 155 | + | 
|  | 156 | +  // Output in format fn(filename:line:col) | 
|  | 157 | +  if(underscore_count >= 2) { | 
|  | 158 | +    Demangled << "fn("; | 
|  | 159 | +    for(size_t j = 0; j < first_underscore; j++) | 
|  | 160 | +      Demangled << temp_buf[j]; | 
|  | 161 | +    Demangled << ':'; | 
|  | 162 | +    for(size_t j = first_underscore + 1; j < second_underscore; j++) | 
|  | 163 | +      Demangled << temp_buf[j]; | 
|  | 164 | +    Demangled << ':'; | 
|  | 165 | +    for(size_t j = second_underscore + 1; j < temp_len; j++) | 
|  | 166 | +      Demangled << temp_buf[j]; | 
|  | 167 | +    Demangled << ')'; | 
|  | 168 | +  } else { | 
|  | 169 | +    // Fallback: just output the identifier as-is | 
|  | 170 | +    for(size_t j = 0; j < temp_len; j++) | 
|  | 171 | +      Demangled << temp_buf[j]; | 
|  | 172 | +  } | 
|  | 173 | + | 
|  | 174 | +  std::free(temp_buf); | 
|  | 175 | +  return true; | 
|  | 176 | +} | 
|  | 177 | + | 
|  | 178 | +char *llvm::oxcamlDemangle(const char *MangledName) { | 
|  | 179 | +  StringView Mangled(MangledName); | 
|  | 180 | +  if(!Mangled.consumeFront("_O")) | 
|  | 181 | +    return nullptr; | 
|  | 182 | + | 
|  | 183 | +  // Allocate the buffer at a reasonable size, as OutputBuffer allocates 992 | 
|  | 184 | +  // bytes when starting from an empty buffer | 
|  | 185 | +  char *DemangledBuffer; | 
|  | 186 | +  DemangledBuffer = static_cast<char *>(std::malloc(Mangled.size())); | 
|  | 187 | +  if (DemangledBuffer == nullptr) | 
|  | 188 | +    std::terminate(); | 
|  | 189 | +  OutputBuffer Demangled(DemangledBuffer, Mangled.size()); | 
|  | 190 | + | 
|  | 191 | +#define ENDONERROR() do {           \ | 
|  | 192 | +  std::free(Demangled.getBuffer()); \ | 
|  | 193 | +  return nullptr;                   \ | 
|  | 194 | +} while(0) | 
|  | 195 | + | 
|  | 196 | +  // Parse path items | 
|  | 197 | +  while(!Mangled.empty()) { | 
|  | 198 | +      // Check for terminating underscore | 
|  | 199 | +      if(Mangled[0] == '_') { | 
|  | 200 | +          // End of symbol path, rest is unique id | 
|  | 201 | +          break; | 
|  | 202 | +      } | 
|  | 203 | + | 
|  | 204 | +      // Handle each path_item type | 
|  | 205 | +      switch(Mangled[0]) { | 
|  | 206 | +          case 'M':  // Module | 
|  | 207 | +              if(!Demangled.empty()) | 
|  | 208 | +                  Demangled << '.'; | 
|  | 209 | +              Mangled = Mangled.dropFront(1); | 
|  | 210 | +              if(!DecodeIdentifier(Mangled, Demangled)) | 
|  | 211 | +                  ENDONERROR(); | 
|  | 212 | +              break; | 
|  | 213 | + | 
|  | 214 | +          case 'F':  // NamedFunction | 
|  | 215 | +              if(!Demangled.empty()) | 
|  | 216 | +                  Demangled << '.'; | 
|  | 217 | +              Mangled = Mangled.dropFront(1); | 
|  | 218 | +              if(!DecodeIdentifier(Mangled, Demangled)) | 
|  | 219 | +                  ENDONERROR(); | 
|  | 220 | +              break; | 
|  | 221 | + | 
|  | 222 | +          case 'L':  // AnonymousFunction | 
|  | 223 | +              if(!Demangled.empty()) | 
|  | 224 | +                  Demangled << '.'; | 
|  | 225 | +              Mangled = Mangled.dropFront(1); | 
|  | 226 | +              if(!DecodeAnonymousLocation(Mangled, Demangled)) | 
|  | 227 | +                  ENDONERROR(); | 
|  | 228 | +              break; | 
|  | 229 | + | 
|  | 230 | +          case 'S':  // AnonymousModule | 
|  | 231 | +              if(!Demangled.empty()) | 
|  | 232 | +                  Demangled << '.'; | 
|  | 233 | +              Mangled = Mangled.dropFront(1); | 
|  | 234 | +              if(!DecodeAnonymousLocation(Mangled, Demangled)) | 
|  | 235 | +                  ENDONERROR(); | 
|  | 236 | +              break; | 
|  | 237 | + | 
|  | 238 | +          case 'P':  // PartialFunction (no dot separator) | 
|  | 239 | +              Mangled = Mangled.dropFront(1); | 
|  | 240 | +              Demangled << "(partially_applied)"; | 
|  | 241 | +              break; | 
|  | 242 | + | 
|  | 243 | +          default: | 
|  | 244 | +              // No prefix means Module (legacy compatibility) | 
|  | 245 | +              if(!Demangled.empty()) | 
|  | 246 | +                  Demangled << '.'; | 
|  | 247 | +              if(!DecodeIdentifier(Mangled, Demangled)) | 
|  | 248 | +                  ENDONERROR(); | 
|  | 249 | +              break; | 
|  | 250 | +      } | 
|  | 251 | +  } | 
|  | 252 | + | 
|  | 253 | +  Demangled << '\0'; | 
|  | 254 | + | 
|  | 255 | +  return Demangled.getBuffer(); | 
|  | 256 | +} | 
0 commit comments