Skip to content

Commit 05cc6bc

Browse files
erik-kroghsmowton
authored andcommitted
adjust regexp libraries to how unpaired surrogate are parsed now
1 parent f24d7c4 commit 05cc6bc

File tree

5 files changed

+5
-8
lines changed

5 files changed

+5
-8
lines changed

javascript/ql/lib/semmle/javascript/Regexp.qll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,8 +258,8 @@ class RegExpConstant extends RegExpTerm, @regexp_constant {
258258
class RegExpCharEscape extends RegExpEscape, RegExpConstant, @regexp_char_escape {
259259
override predicate isCharacter() {
260260
not (
261-
// unencodable characters are represented as '?' in the database
262-
getValue() = "?" and
261+
// unencodable characters are represented as '?' or \uFFFD in the database
262+
getValue() = ["?", 65533.toUnicode()] and
263263
exists(string s | s = toString().toLowerCase() |
264264
// only Unicode escapes give rise to unencodable characters
265265
s.matches("\\\\u%") and

javascript/ql/test/query-tests/Performance/ReDoS/ReDoS.expected

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,6 @@
145145
| tst.js:257:14:257:116 | (.thisisagoddamnlongstringforstresstestingthequery\|\\sthisisagoddamnlongstringforstresstestingthequery)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' thisisagoddamnlongstringforstresstestingthequery'. |
146146
| tst.js:260:14:260:77 | (thisisagoddamnlongstringforstresstestingthequery\|this\\w+query)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'thisisagoddamnlongstringforstresstestingthequery'. |
147147
| tst.js:260:68:260:70 | \\w+ | This part of the regular expression may cause exponential backtracking on strings starting with 'this' and containing many repetitions of 'aquerythis'. |
148-
| tst.js:266:18:266:49 | ([\\uDC66\\uDC67]\|[\\uDC68\\uDC69])* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\ufffd'. |
149-
| tst.js:269:18:269:51 | ((\\uDC66\|\\uDC67)\|(\\uDC68\|\\uDC69))* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\ufffd'. |
150148
| tst.js:272:21:272:22 | b+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
151149
| tst.js:275:38:275:40 | \\s* | This part of the regular expression may cause exponential backtracking on strings starting with '<a a=' and containing many repetitions of '"" a='. |
152150
| tst.js:281:16:281:17 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |

javascript/ql/test/query-tests/Performance/ReDoS/tst.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -262,10 +262,10 @@ var bad61 = /(thisisagoddamnlongstringforstresstestingthequery|this\w+query)*-/
262262
// GOOD
263263
var good27 = /(thisisagoddamnlongstringforstresstestingthequery|imanotherbutunrelatedstringcomparedtotheotherstring)*-/
264264

265-
// GOOD (but false positive caused by the extractor converting all four unpaired surrogates to \uFFFD)
265+
// GOOD
266266
var good28 = /foo([\uDC66\uDC67]|[\uDC68\uDC69])*foo/
267267

268-
// GOOD (but false positive caused by the extractor converting all four unpaired surrogates to \uFFFD)
268+
// GOOD
269269
var good29 = /foo((\uDC66|\uDC67)|(\uDC68|\uDC69))*foo/
270270

271271
// NOT GOOD (but cannot currently construct a prefix)

javascript/ql/test/query-tests/RegExp/DuplicateCharacterInCharacterClass/DuplicateCharacterInCharacterClass.expected

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
| tst.js:1:4:1:4 | o | Character 'o' is repeated $@ in the same character class. | tst.js:1:5:1:5 | o | here |
2-
| tst.js:3:3:3:8 | \\uDC3A | Character '\\uDC3A' is repeated $@ in the same character class. | tst.js:3:9:3:14 | \\uDC3C | here |
32
| tst.js:4:3:4:3 | ? | Character '?' is repeated $@ in the same character class. | tst.js:4:4:4:4 | ? | here |
43
| tst.js:5:3:5:8 | \\u003F | Character '\\u003F' is repeated $@ in the same character class. | tst.js:5:9:5:14 | \\u003f | here |
54
| tst.js:6:3:6:8 | \\u003F | Character '\\u003F' is repeated $@ in the same character class. | tst.js:6:9:6:9 | ? | here |

javascript/ql/test/query-tests/RegExp/DuplicateCharacterInCharacterClass/tst.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/[foo]/;
22
/[a-zc]/;
3-
/[\uDC3A\uDC3C]/; // False positive caused by the extractor converting both unpaired surrogates to \uFFFD
3+
/[\uDC3A\uDC3C]/;
44
/[??]/;
55
/[\u003F\u003f]/;
66
/[\u003F?]/;

0 commit comments

Comments
 (0)