12
12
13
13
#include " swift/Demangling/Punycode.h"
14
14
#include " swift/Demangling/ManglingUtils.h"
15
+ #include < limits>
15
16
#include < vector>
16
17
#include < cstdint>
17
18
@@ -49,7 +50,7 @@ static int digit_index(char value) {
49
50
static bool isValidUnicodeScalar (uint32_t S) {
50
51
// Also accept the range of 0xD800 - 0xD880, which is used for non-symbol
51
52
// ASCII characters.
52
- return (S < 0xD880 ) || (S >= 0xE000 && S <= 0x1FFFFF );
53
+ return (S < 0xD880 ) || (S >= 0xE000 && S <= 0x10FFFF );
53
54
}
54
55
55
56
// Section 6.1: Bias adaptation function
@@ -88,7 +89,7 @@ bool Punycode::decodePunycode(StringRef InputPunycode,
88
89
for (char c : InputPunycode.slice (0 , lastDelimiter)) {
89
90
// fail on any non-basic code point
90
91
if (static_cast <unsigned char >(c) > 0x7f )
91
- return true ;
92
+ return false ;
92
93
OutCodePoints.push_back (c);
93
94
}
94
95
// if more than zero code points were consumed then consume one more
@@ -103,28 +104,37 @@ bool Punycode::decodePunycode(StringRef InputPunycode,
103
104
for (int k = base; ; k += base) {
104
105
// consume a code point, or fail if there was none to consume
105
106
if (InputPunycode.empty ())
106
- return true ;
107
+ return false ;
107
108
char codePoint = InputPunycode.front ();
108
109
InputPunycode = InputPunycode.slice (1 , InputPunycode.size ());
109
110
// let digit = the code point's digit-value, fail if it has none
110
111
int digit = digit_index (codePoint);
111
112
if (digit < 0 )
112
- return true ;
113
+ return false ;
113
114
115
+ // Fail if i + (digit * w) would overflow
116
+ if (digit > (std::numeric_limits<int >::max () - i) / w)
117
+ return false ;
114
118
i = i + digit * w;
115
119
int t = k <= bias ? tmin
116
120
: k >= bias + tmax ? tmax
117
121
: k - bias;
118
122
if (digit < t)
119
123
break ;
124
+ // Fail if w * (base - t) would overflow
125
+ if (w > std::numeric_limits<int >::max () / (base - t))
126
+ return false ;
120
127
w = w * (base - t);
121
128
}
122
129
bias = adapt (i - oldi, OutCodePoints.size () + 1 , oldi == 0 );
130
+ // Fail if n + i / (OutCodePoints.size() + 1) would overflow
131
+ if (i / (OutCodePoints.size () + 1 ) > std::numeric_limits<int >::max () - n)
132
+ return false ;
123
133
n = n + i / (OutCodePoints.size () + 1 );
124
134
i = i % (OutCodePoints.size () + 1 );
125
135
// if n is a basic code point then fail
126
136
if (n < 0x80 )
127
- return true ;
137
+ return false ;
128
138
// insert n into output at position i
129
139
OutCodePoints.insert (OutCodePoints.begin () + i, n);
130
140
++i;
@@ -168,11 +178,17 @@ bool Punycode::encodePunycode(const std::vector<uint32_t> &InputCodePoints,
168
178
if (codePoint >= n && codePoint < m)
169
179
m = codePoint;
170
180
}
171
-
181
+
182
+ if ((m - n) > (std::numeric_limits<int >::max () - delta) / (h + 1 ))
183
+ return false ;
172
184
delta = delta + (m - n) * (h + 1 );
173
185
n = m;
174
186
for (auto c : InputCodePoints) {
175
- if (c < n) ++delta;
187
+ if (c < n) {
188
+ if (delta == std::numeric_limits<int >::max ())
189
+ return false ;
190
+ ++delta;
191
+ }
176
192
if (c == n) {
177
193
int q = delta;
178
194
for (int k = base; ; k += base) {
@@ -285,11 +301,12 @@ static bool convertUTF8toUTF32(llvm::StringRef InputUTF8,
285
301
auto end = InputUTF8.end ();
286
302
while (ptr < end) {
287
303
uint8_t first = *ptr++;
304
+ uint32_t code_point = 0 ;
288
305
if (first < 0x80 ) {
289
306
if (Mangle::isValidSymbolChar (first) || !mapNonSymbolChars) {
290
- OutUTF32. push_back ( first) ;
307
+ code_point = first;
291
308
} else {
292
- OutUTF32. push_back (( uint32_t )first + 0xD800 ) ;
309
+ code_point = ( uint32_t )first + 0xD800 ;
293
310
}
294
311
} else if (first < 0xC0 ) {
295
312
// Invalid continuation byte.
@@ -301,7 +318,7 @@ static bool convertUTF8toUTF32(llvm::StringRef InputUTF8,
301
318
uint8_t second = *ptr++;
302
319
if (!isContinuationByte (second))
303
320
return false ;
304
- OutUTF32. push_back ((( first & 0x1F ) << 6 ) | (second & 0x3F ) );
321
+ code_point = (( first & 0x1F ) << 6 ) | (second & 0x3F );
305
322
} else if (first < 0xF0 ) {
306
323
// Three-byte sequence.
307
324
if (end - ptr < 2 )
@@ -310,8 +327,9 @@ static bool convertUTF8toUTF32(llvm::StringRef InputUTF8,
310
327
uint8_t third = *ptr++;
311
328
if (!isContinuationByte (second) || !isContinuationByte (third))
312
329
return false ;
313
- OutUTF32.push_back (((first & 0xF ) << 12 ) | ((second & 0x3F ) << 6 )
314
- | ( third & 0x3F ));
330
+ code_point = ((first & 0xF ) << 12 )
331
+ | ((second & 0x3F ) << 6 )
332
+ | ( third & 0x3F );
315
333
} else if (first < 0xF8 ) {
316
334
// Four-byte sequence.
317
335
if (end - ptr < 3 )
@@ -322,13 +340,17 @@ static bool convertUTF8toUTF32(llvm::StringRef InputUTF8,
322
340
if (!isContinuationByte (second) || !isContinuationByte (third)
323
341
|| !isContinuationByte (fourth))
324
342
return false ;
325
- OutUTF32.push_back (((first & 0x7 ) << 18 ) | ((second & 0x3F ) << 12 )
326
- | ((third & 0x3F ) << 6 )
327
- | ( fourth & 0x3F ));
343
+ code_point = ((first & 0x7 ) << 18 )
344
+ | ((second & 0x3F ) << 12 )
345
+ | ((third & 0x3F ) << 6 )
346
+ | ( fourth & 0x3F );
328
347
} else {
329
348
// Unused sequence length.
330
349
return false ;
331
350
}
351
+ if (!isValidUnicodeScalar (code_point))
352
+ return false ;
353
+ OutUTF32.push_back (code_point);
332
354
}
333
355
return true ;
334
356
}
0 commit comments