Skip to content

Commit 595ac27

Browse files
tmcgilchristshym
andauthored
Add OxCaml runlength demangling (#28)
Co-authored-by: Samuel Hym <[email protected]>
1 parent 98ee309 commit 595ac27

File tree

10 files changed

+395
-0
lines changed

10 files changed

+395
-0
lines changed

lldb/include/lldb/Core/Mangled.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class Mangled {
4343
eManglingSchemeNone = 0,
4444
eManglingSchemeMSVC,
4545
eManglingSchemeItanium,
46+
eManglingSchemeOxCaml,
4647
eManglingSchemeRustV0,
4748
eManglingSchemeD
4849
};

lldb/source/Core/Mangled.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) {
4444
if (name.startswith("?"))
4545
return Mangled::eManglingSchemeMSVC;
4646

47+
if (name.startswith("_O"))
48+
return Mangled::eManglingSchemeOxCaml;
49+
4750
if (name.startswith("_R"))
4851
return Mangled::eManglingSchemeRustV0;
4952

@@ -167,6 +170,19 @@ static char *GetItaniumDemangledStr(const char *M) {
167170
return demangled_cstr;
168171
}
169172

173+
static char *GetOxCamlDemangledStr(const char *M) {
174+
char *demangled_cstr = llvm::oxcamlDemangle(M);
175+
176+
if (Log *log = GetLog(LLDBLog::Demangle)) {
177+
if (demangled_cstr && demangled_cstr[0])
178+
LLDB_LOG(log, "demangled oxcaml: {0} -> \"{1}\"", M, demangled_cstr);
179+
else
180+
LLDB_LOG(log, "demangled oxcaml: {0} -> error: failed to demangle", M);
181+
}
182+
183+
return demangled_cstr;
184+
}
185+
170186
static char *GetRustV0DemangledStr(const char *M) {
171187
char *demangled_cstr = llvm::rustDemangle(M);
172188

@@ -242,6 +258,7 @@ bool Mangled::GetRichManglingInfo(RichManglingContext &context,
242258
}
243259
}
244260

261+
case eManglingSchemeOxCaml:
245262
case eManglingSchemeRustV0:
246263
case eManglingSchemeD:
247264
// Rich demangling scheme is not supported
@@ -275,6 +292,9 @@ ConstString Mangled::GetDemangledName() const {
275292
demangled_name = GetItaniumDemangledStr(mangled_name);
276293
break;
277294
}
295+
case eManglingSchemeOxCaml:
296+
demangled_name = GetOxCamlDemangledStr(mangled_name);
297+
break;
278298
case eManglingSchemeRustV0:
279299
demangled_name = GetRustV0DemangledStr(mangled_name);
280300
break;

lldb/source/Symbol/Symtab.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ static bool lldb_skip_name(llvm::StringRef mangled,
255255

256256
// No filters for this scheme yet. Include all names in indexing.
257257
case Mangled::eManglingSchemeMSVC:
258+
case Mangled::eManglingSchemeOxCaml:
258259
case Mangled::eManglingSchemeRustV0:
259260
case Mangled::eManglingSchemeD:
260261
return false;

llvm/include/llvm/Demangle/Demangle.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ char *microsoftDemangle(const char *mangled_name, size_t *n_read, char *buf,
5757
size_t *n_buf, int *status,
5858
MSDemangleFlags Flags = MSDF_None);
5959

60+
// Demangles an OxCaml mangled symbol
61+
char *oxcamlDemangle(const char *MangledName);
62+
6063
// Demangles a Rust v0 mangled symbol.
6164
char *rustDemangle(const char *MangledName);
6265

llvm/lib/Demangle/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ add_llvm_component_library(LLVMDemangle
33
ItaniumDemangle.cpp
44
MicrosoftDemangle.cpp
55
MicrosoftDemangleNodes.cpp
6+
OxCamlDemangle.cpp
67
RustDemangle.cpp
78
DLangDemangle.cpp
89

llvm/lib/Demangle/Demangle.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ static bool isItaniumEncoding(const char *S) {
1919
return std::strncmp(S, "_Z", 2) == 0 || std::strncmp(S, "___Z", 4) == 0;
2020
}
2121

22+
static bool isOxCamlEncoding(const std::string &S) {
23+
return S.size() >= 2 && S[0] == '_' && S[1] == 'O'; }
24+
2225
static bool isRustEncoding(const char *S) { return S[0] == '_' && S[1] == 'R'; }
2326

2427
static bool isDLangEncoding(const std::string &MangledName) {
@@ -54,6 +57,8 @@ bool llvm::nonMicrosoftDemangle(const char *MangledName, std::string &Result) {
5457
Demangled = rustDemangle(MangledName);
5558
else if (isDLangEncoding(MangledName))
5659
Demangled = dlangDemangle(MangledName);
60+
else if (isOxCamlEncoding(MangledName))
61+
Demangled = oxcamlDemangle(MangledName);
5762

5863
if (!Demangled)
5964
return false;
Lines changed: 256 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
//===--- OxCamlDemangle.cpp -------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file defines a demangler for the new mangling scheme devised for OxCaml
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include <cassert>
14+
15+
#include "llvm/Demangle/Demangle.h"
16+
#include "llvm/Demangle/StringView.h"
17+
#include "llvm/Demangle/Utility.h"
18+
19+
using llvm::itanium_demangle::OutputBuffer;
20+
using llvm::itanium_demangle::StringView;
21+
22+
#define ERROR (~((unsigned)0))
23+
24+
static unsigned ConsumeUnsignedDecimal(StringView& sv) {
25+
unsigned res = 0, i = 0;
26+
while(sv[i] >= '0' && sv[i] <= '9') {
27+
res = res * 10 + (sv[i] - '0');
28+
i++;
29+
}
30+
sv = sv.dropFront(i);
31+
if(i == 0)
32+
return ERROR;
33+
return res;
34+
}
35+
36+
static unsigned ConsumeUnsigned26(StringView& sv) {
37+
unsigned res = 0, i = 0;
38+
while(sv[i] >= 'A' && sv[i] <= 'Z') {
39+
res = res * 26 + (sv[i] - 'A');
40+
i++;
41+
}
42+
sv = sv.dropFront(i);
43+
if(i == 0)
44+
return ERROR;
45+
return res;
46+
}
47+
48+
static bool islowerhex(char c) {
49+
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f');
50+
}
51+
52+
static unsigned lowerhex(char c) {
53+
if(c >= '0' && c <= '9')
54+
return c - '0';
55+
else {
56+
assert(c >= 'a' && c <= 'f');
57+
return c - 'a' + 10;
58+
}
59+
}
60+
61+
// Decode unicode-escaped identifier (format: u<len><coded>_<raw>)
62+
// Returns true on success, false on error
63+
static bool DecodeUnicodeEscaped(StringView& Mangled, OutputBuffer& Demangled) {
64+
unsigned len = ConsumeUnsignedDecimal(Mangled);
65+
if(len == ERROR || len <= 0 || len > Mangled.size())
66+
return false;
67+
68+
size_t split = Mangled.find('_');
69+
if(split >= len)
70+
return false;
71+
72+
StringView coded = Mangled.substr(0, split);
73+
StringView raw = Mangled.substr(split+1, len-split-1);
74+
75+
while(!coded.empty()) {
76+
unsigned chunklen = ConsumeUnsigned26(coded);
77+
if(chunklen == ERROR || chunklen > raw.size())
78+
return false;
79+
Demangled << raw.substr(0, chunklen);
80+
raw = raw.dropFront(chunklen);
81+
82+
unsigned i;
83+
for(i = 0; i+1 < coded.size() && islowerhex(coded[i]); i+=2) {
84+
if(!islowerhex(coded[i+1]))
85+
return false;
86+
char c = (char)(lowerhex(coded[i]) << 4 | lowerhex(coded[i+1]));
87+
Demangled << c;
88+
}
89+
coded = coded.dropFront(i);
90+
}
91+
92+
if(!raw.empty())
93+
Demangled << raw;
94+
95+
Mangled = Mangled.dropFront(len);
96+
return true;
97+
}
98+
99+
// Decode identifier (either plain or unicode-escaped)
100+
// Handles: <len><text> or u<len><coded>_<raw>
101+
// Returns true on success, false on error
102+
static bool DecodeIdentifier(StringView& Mangled, OutputBuffer& Demangled) {
103+
if(Mangled.consumeFront('u')) {
104+
// Unicode-escaped identifier
105+
return DecodeUnicodeEscaped(Mangled, Demangled);
106+
} else {
107+
// Plain identifier with length prefix
108+
unsigned len = ConsumeUnsignedDecimal(Mangled);
109+
if(len == ERROR || len <= 0 || len > Mangled.size())
110+
return false;
111+
Demangled << Mangled.substr(0, len);
112+
Mangled = Mangled.dropFront(len);
113+
return true;
114+
}
115+
}
116+
117+
// Decode anonymous location (format: filename_line_col)
118+
// Anonymous functions/modules are encoded as: fn(filename:line:col)
119+
// Returns true on success, false on error
120+
static bool DecodeAnonymousLocation(StringView& Mangled, OutputBuffer& Demangled) {
121+
// Allocate temporary buffer based on remaining mangled string size
122+
// The decoded identifier will be at most the size of the remaining mangled string
123+
size_t buffer_size = Mangled.size();
124+
if(buffer_size == 0)
125+
return false;
126+
127+
char *temp_buf = static_cast<char *>(std::malloc(buffer_size));
128+
if(temp_buf == nullptr)
129+
std::terminate();
130+
131+
OutputBuffer TempDemangled(temp_buf, buffer_size);
132+
133+
if(!DecodeIdentifier(Mangled, TempDemangled)) {
134+
std::free(temp_buf);
135+
return false;
136+
}
137+
138+
size_t temp_len = TempDemangled.getCurrentPosition();
139+
140+
// Parse filename_line_col format by finding the last two underscores
141+
size_t first_underscore = 0, second_underscore = 0;
142+
int underscore_count = 0;
143+
144+
for(size_t j = temp_len; j > 0; j--) {
145+
if(temp_buf[j-1] == '_') {
146+
underscore_count++;
147+
if(underscore_count == 1)
148+
second_underscore = j - 1;
149+
else if(underscore_count == 2) {
150+
first_underscore = j - 1;
151+
break;
152+
}
153+
}
154+
}
155+
156+
// Output in format fn(filename:line:col)
157+
if(underscore_count >= 2) {
158+
Demangled << "fn(";
159+
for(size_t j = 0; j < first_underscore; j++)
160+
Demangled << temp_buf[j];
161+
Demangled << ':';
162+
for(size_t j = first_underscore + 1; j < second_underscore; j++)
163+
Demangled << temp_buf[j];
164+
Demangled << ':';
165+
for(size_t j = second_underscore + 1; j < temp_len; j++)
166+
Demangled << temp_buf[j];
167+
Demangled << ')';
168+
} else {
169+
// Fallback: just output the identifier as-is
170+
for(size_t j = 0; j < temp_len; j++)
171+
Demangled << temp_buf[j];
172+
}
173+
174+
std::free(temp_buf);
175+
return true;
176+
}
177+
178+
char *llvm::oxcamlDemangle(const char *MangledName) {
179+
StringView Mangled(MangledName);
180+
if(!Mangled.consumeFront("_O"))
181+
return nullptr;
182+
183+
// Allocate the buffer at a reasonable size, as OutputBuffer allocates 992
184+
// bytes when starting from an empty buffer
185+
char *DemangledBuffer;
186+
DemangledBuffer = static_cast<char *>(std::malloc(Mangled.size()));
187+
if (DemangledBuffer == nullptr)
188+
std::terminate();
189+
OutputBuffer Demangled(DemangledBuffer, Mangled.size());
190+
191+
#define ENDONERROR() do { \
192+
std::free(Demangled.getBuffer()); \
193+
return nullptr; \
194+
} while(0)
195+
196+
// Parse path items
197+
while(!Mangled.empty()) {
198+
// Check for terminating underscore
199+
if(Mangled[0] == '_') {
200+
// End of symbol path, rest is unique id
201+
break;
202+
}
203+
204+
// Handle each path_item type
205+
switch(Mangled[0]) {
206+
case 'M': // Module
207+
if(!Demangled.empty())
208+
Demangled << '.';
209+
Mangled = Mangled.dropFront(1);
210+
if(!DecodeIdentifier(Mangled, Demangled))
211+
ENDONERROR();
212+
break;
213+
214+
case 'F': // NamedFunction
215+
if(!Demangled.empty())
216+
Demangled << '.';
217+
Mangled = Mangled.dropFront(1);
218+
if(!DecodeIdentifier(Mangled, Demangled))
219+
ENDONERROR();
220+
break;
221+
222+
case 'L': // AnonymousFunction
223+
if(!Demangled.empty())
224+
Demangled << '.';
225+
Mangled = Mangled.dropFront(1);
226+
if(!DecodeAnonymousLocation(Mangled, Demangled))
227+
ENDONERROR();
228+
break;
229+
230+
case 'S': // AnonymousModule
231+
if(!Demangled.empty())
232+
Demangled << '.';
233+
Mangled = Mangled.dropFront(1);
234+
if(!DecodeAnonymousLocation(Mangled, Demangled))
235+
ENDONERROR();
236+
break;
237+
238+
case 'P': // PartialFunction (no dot separator)
239+
Mangled = Mangled.dropFront(1);
240+
Demangled << "(partially_applied)";
241+
break;
242+
243+
default:
244+
// No prefix means Module (legacy compatibility)
245+
if(!Demangled.empty())
246+
Demangled << '.';
247+
if(!DecodeIdentifier(Mangled, Demangled))
248+
ENDONERROR();
249+
break;
250+
}
251+
}
252+
253+
Demangled << '\0';
254+
255+
return Demangled.getBuffer();
256+
}

llvm/unittests/Demangle/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,6 @@ add_llvm_unittest(DemangleTests
1010
OutputBufferTest.cpp
1111
PartialDemangleTest.cpp
1212
RustDemangleTest.cpp
13+
OxCamlDemangleTest.cpp
1314
StringViewTest.cpp
1415
)

0 commit comments

Comments
 (0)