Skip to content

Commit 31dac9d

Browse files
authored
feat: add support v flag (#82)
1 parent 773e6f9 commit 31dac9d

File tree

62 files changed

+37303
-5330
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+37303
-5330
lines changed

README.md

+7-7
Original file line numberDiff line numberDiff line change
@@ -79,15 +79,15 @@ Parse a regular expression literal.
7979
- **Return:**
8080
- The AST of the regular expression.
8181

82-
#### parser.parsePattern(source, start?, end?, uFlag?)
82+
#### parser.parsePattern(source, start?, end?, flags?)
8383

8484
Parse a regular expression pattern.
8585

8686
- **Parameters:**
8787
- `source` (`string`) The source code to parse. E.g. `"abc"`.
8888
- `start?` (`number`) The start index in the source code. Default is `0`.
8989
- `end?` (`number`) The end index in the source code. Default is `source.length`.
90-
- `uFlag?` (`boolean`) The flag to enable Unicode mode.
90+
- `flags?` (`{ unicode?: boolean, unicodeSets?: boolean }`) The flags to enable Unicode mode, and Unicode Set mode.
9191
- **Return:**
9292
- The AST of the regular expression pattern.
9393

@@ -118,15 +118,15 @@ Validate a regular expression literal.
118118
- `start?` (`number`) The start index in the source code. Default is `0`.
119119
- `end?` (`number`) The end index in the source code. Default is `source.length`.
120120

121-
#### validator.validatePattern(source, start, end, uFlag)
121+
#### validator.validatePattern(source, start, end, flags)
122122

123123
Validate a regular expression pattern.
124124

125125
- **Parameters:**
126126
- `source` (`string`) The source code to validate.
127127
- `start?` (`number`) The start index in the source code. Default is `0`.
128128
- `end?` (`number`) The end index in the source code. Default is `source.length`.
129-
- `uFlag?` (`boolean`) The flag to enable Unicode mode.
129+
- `flags?` (`{ unicode?: boolean, unicodeSets?: boolean }`) The flags to enable Unicode mode, and Unicode Set mode.
130130

131131
#### validator.validateFlags(source, start, end)
132132

@@ -172,6 +172,6 @@ Please use GitHub's Issues/PRs.
172172
- `npm run watch` runs tests with `--watch` option.
173173

174174
[`AST.Node`]: src/ast.ts#L4
175-
[`RegExpParser.Options`]: src/parser.ts#L539
176-
[`RegExpValidator.Options`]: src/validator.ts#L127
177-
[`RegExpVisitor.Handlers`]: src/visitor.ts#L204
175+
[`RegExpParser.Options`]: src/parser.ts#L743
176+
[`RegExpValidator.Options`]: src/validator.ts#L220
177+
[`RegExpVisitor.Handlers`]: src/visitor.ts#L291

scripts/update-fixtures.ts

+10
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,16 @@ for (const filename of Object.keys(Visitor.fixturesData)) {
4747
onCharacterClassEnter: enter,
4848
onCharacterClassRangeEnter: enter,
4949
onCharacterSetEnter: enter,
50+
onClassIntersectionEnter: enter,
51+
onClassStringDisjunctionEnter: enter,
52+
onClassSubtractionEnter: enter,
53+
onExpressionCharacterClassEnter: enter,
5054
onFlagsEnter: enter,
5155
onGroupEnter: enter,
5256
onPatternEnter: enter,
5357
onQuantifierEnter: enter,
5458
onRegExpLiteralEnter: enter,
59+
onStringAlternativeEnter: enter,
5560
onAlternativeLeave: leave,
5661
onAssertionLeave: leave,
5762
onBackreferenceLeave: leave,
@@ -60,11 +65,16 @@ for (const filename of Object.keys(Visitor.fixturesData)) {
6065
onCharacterClassLeave: leave,
6166
onCharacterClassRangeLeave: leave,
6267
onCharacterSetLeave: leave,
68+
onClassIntersectionLeave: leave,
69+
onClassStringDisjunctionLeave: leave,
70+
onClassSubtractionLeave: leave,
71+
onExpressionCharacterClassLeave: leave,
6372
onFlagsLeave: leave,
6473
onGroupLeave: leave,
6574
onPatternLeave: leave,
6675
onQuantifierLeave: leave,
6776
onRegExpLiteralLeave: leave,
77+
onStringAlternativeLeave: leave,
6878
})
6979

7080
fixture.patterns[pattern] = history

src/ast.ts

+143-6
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,16 @@ export type BranchNode =
1111
| CapturingGroup
1212
| CharacterClass
1313
| CharacterClassRange
14+
| ClassIntersection
15+
| ClassStringDisjunction
16+
| ClassSubtraction
17+
| ExpressionCharacterClass
1418
| Group
1519
| LookaroundAssertion
1620
| Pattern
1721
| Quantifier
1822
| RegExpLiteral
23+
| StringAlternative
1924

2025
/**
2126
* The type which includes all leaf nodes.
@@ -41,17 +46,29 @@ export type QuantifiableElement =
4146
| Character
4247
| CharacterClass
4348
| CharacterSet
49+
| ExpressionCharacterClass
4450
| Group
4551
| LookaheadAssertion
4652

4753
/**
4854
* The type which includes all character class atom nodes.
4955
*/
5056
export type CharacterClassElement =
57+
| ClassRangesCharacterClassElement
58+
| UnicodeSetsCharacterClassElement
59+
export type ClassRangesCharacterClassElement =
5160
| Character
5261
| CharacterClassRange
5362
| EscapeCharacterSet
5463
| UnicodePropertyCharacterSet
64+
export type UnicodeSetsCharacterClassElement =
65+
| Character
66+
| CharacterClassRange
67+
| ClassStringDisjunction
68+
| EscapeCharacterSet
69+
| ExpressionCharacterClass
70+
| UnicodePropertyCharacterSet
71+
| UnicodeSetsCharacterClass
5572

5673
/**
5774
* The type which defines common properties for all node types.
@@ -166,12 +183,35 @@ export interface Quantifier extends NodeBase {
166183
* The character class.
167184
* E.g. `[ab]`, `[^ab]`
168185
*/
169-
export interface CharacterClass extends NodeBase {
186+
export type CharacterClass =
187+
| ClassRangesCharacterClass
188+
| UnicodeSetsCharacterClass
189+
interface BaseCharacterClass extends NodeBase {
170190
type: "CharacterClass"
171-
parent: Alternative | Quantifier
191+
parent:
192+
| Alternative
193+
| ExpressionCharacterClass
194+
| Quantifier
195+
| UnicodeSetsCharacterClass
196+
unicodeSets: boolean
172197
negate: boolean
173198
elements: CharacterClassElement[]
174199
}
200+
export interface ClassRangesCharacterClass extends BaseCharacterClass {
201+
parent: Alternative | Quantifier
202+
unicodeSets: false
203+
elements: ClassRangesCharacterClassElement[]
204+
}
205+
/** UnicodeSetsCharacterClass is the CharacterClass when in Unicode sets mode. So it may contain strings. */
206+
export interface UnicodeSetsCharacterClass extends BaseCharacterClass {
207+
parent:
208+
| Alternative
209+
| ExpressionCharacterClass
210+
| Quantifier
211+
| UnicodeSetsCharacterClass
212+
unicodeSets: true
213+
elements: UnicodeSetsCharacterClassElement[]
214+
}
175215

176216
/**
177217
* The character class.
@@ -239,7 +279,12 @@ export interface AnyCharacterSet extends NodeBase {
239279
*/
240280
export interface EscapeCharacterSet extends NodeBase {
241281
type: "CharacterSet"
242-
parent: Alternative | CharacterClass | Quantifier
282+
parent:
283+
| Alternative
284+
| CharacterClass
285+
| ClassIntersection
286+
| ClassSubtraction
287+
| Quantifier
243288
kind: "digit" | "space" | "word"
244289
negate: boolean
245290
}
@@ -248,14 +293,98 @@ export interface EscapeCharacterSet extends NodeBase {
248293
* The unicode property escape.
249294
* E.g. `\p{ASCII}`, `\P{ASCII}`, `\p{Script=Hiragana}`
250295
*/
251-
export interface UnicodePropertyCharacterSet extends NodeBase {
296+
export type UnicodePropertyCharacterSet =
297+
| CharacterUnicodePropertyCharacterSet
298+
| StringsUnicodePropertyCharacterSet
299+
interface BaseUnicodePropertyCharacterSet extends NodeBase {
252300
type: "CharacterSet"
253-
parent: Alternative | CharacterClass | Quantifier
301+
parent:
302+
| Alternative
303+
| CharacterClass
304+
| ClassIntersection
305+
| ClassSubtraction
306+
| Quantifier
254307
kind: "property"
308+
strings: boolean
255309
key: string
256310
value: string | null
257311
negate: boolean
258312
}
313+
export interface CharacterUnicodePropertyCharacterSet
314+
extends BaseUnicodePropertyCharacterSet {
315+
strings: false
316+
value: string | null
317+
negate: boolean
318+
}
319+
/** StringsUnicodePropertyCharacterSet is Unicode property escape with property of strings. */
320+
export interface StringsUnicodePropertyCharacterSet
321+
extends BaseUnicodePropertyCharacterSet {
322+
strings: true
323+
value: null
324+
negate: false
325+
}
326+
327+
/**
328+
* The expression character class.
329+
* E.g. `[a--b]`, `[a&&b]`,`[^a--b]`, `[^a&&b]`
330+
*/
331+
export interface ExpressionCharacterClass extends NodeBase {
332+
type: "ExpressionCharacterClass"
333+
parent:
334+
| Alternative
335+
| ExpressionCharacterClass
336+
| Quantifier
337+
| UnicodeSetsCharacterClass
338+
negate: boolean
339+
expression: ClassIntersection | ClassSubtraction
340+
}
341+
342+
export type ClassSetOperand =
343+
| Character
344+
| ClassStringDisjunction
345+
| EscapeCharacterSet
346+
| ExpressionCharacterClass
347+
| UnicodePropertyCharacterSet
348+
| UnicodeSetsCharacterClass
349+
350+
/**
351+
* The character class intersection.
352+
* E.g. `a&&b`
353+
*/
354+
export interface ClassIntersection extends NodeBase {
355+
type: "ClassIntersection"
356+
parent: ClassIntersection | ExpressionCharacterClass
357+
left: ClassIntersection | ClassSetOperand
358+
right: ClassSetOperand
359+
}
360+
361+
/**
362+
* The character class subtraction.
363+
* E.g. `a--b`
364+
*/
365+
export interface ClassSubtraction extends NodeBase {
366+
type: "ClassSubtraction"
367+
parent: ClassSubtraction | ExpressionCharacterClass
368+
left: ClassSetOperand | ClassSubtraction
369+
right: ClassSetOperand
370+
}
371+
372+
/**
373+
* The character class string disjunction.
374+
* E.g. `\q{a|b}`
375+
*/
376+
export interface ClassStringDisjunction extends NodeBase {
377+
type: "ClassStringDisjunction"
378+
parent: ClassIntersection | ClassSubtraction | UnicodeSetsCharacterClass
379+
alternatives: StringAlternative[]
380+
}
381+
382+
/** StringAlternative is only used for `\q{alt}`({@link ClassStringDisjunction}). */
383+
export interface StringAlternative extends NodeBase {
384+
type: "StringAlternative"
385+
parent: ClassStringDisjunction
386+
elements: Character[]
387+
}
259388

260389
/**
261390
* The character.
@@ -264,7 +393,14 @@ export interface UnicodePropertyCharacterSet extends NodeBase {
264393
*/
265394
export interface Character extends NodeBase {
266395
type: "Character"
267-
parent: Alternative | CharacterClass | CharacterClassRange | Quantifier
396+
parent:
397+
| Alternative
398+
| CharacterClass
399+
| CharacterClassRange
400+
| ClassIntersection
401+
| ClassSubtraction
402+
| Quantifier
403+
| StringAlternative
268404
value: number // a code point.
269405
}
270406

@@ -292,4 +428,5 @@ export interface Flags extends NodeBase {
292428
multiline: boolean
293429
sticky: boolean
294430
unicode: boolean
431+
unicodeSets: boolean
295432
}

src/ecma-versions.ts

+2
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,5 @@ export type EcmaVersion =
99
| 2021
1010
| 2022
1111
| 2023
12+
| 2024
13+
export const latestEcmaVersion = 2024

0 commit comments

Comments
 (0)