Skip to content

Commit e5d9b9d

Browse files
committed
memoize partial parsers
1 parent 8c1cb74 commit e5d9b9d

File tree

2 files changed

+33
-24
lines changed

2 files changed

+33
-24
lines changed

src/parser/lexer.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { sumOf } from "@std/collections/sum-of";
2+
import { throwError } from "../../misc/misc.ts";
23
import { settings } from "../settings.ts";
34
import {
45
all,
@@ -39,7 +40,7 @@ import {
3940
UCSUR_CHARACTER_REGEX,
4041
UCSUR_TO_LATIN,
4142
} from "./ucsur.ts";
42-
import { throwError } from "../../misc/misc.ts";
43+
import { memoize } from "@std/cache/memoize";
4344

4445
const spacesWithoutNewline = match(/[^\S\n]*?(?=\S|\r?\n|$)/, "spaces");
4546
const newline = match(/\r?\n\s*/, "newline");
@@ -80,10 +81,10 @@ const multipleA = specificWord("a")
8081
.with(count(allAtLeastOnce(specificWord("a"))))
8182
.map((count) => ({ type: "multiple a", count: count + 1 }) as const);
8283
const repeatingLetter = match(/[a-zA-Z]/, "latin letter")
83-
.then((letter) =>
84+
.then(memoize((letter) =>
8485
count(all(matchString(letter)))
8586
.map((count) => [letter, count + 1] as const)
86-
);
87+
));
8788
const longWord = allAtLeastOnce(repeatingLetter)
8889
.skip(spaces)
8990
.map((letters) => {
@@ -95,9 +96,9 @@ const longWord = allAtLeastOnce(repeatingLetter)
9596
.filter(({ length }) => length > 1);
9697
const xAlaX = lazy(() =>
9798
settings.xAlaXPartialParsing ? empty : word
98-
.then((word) =>
99+
.then(memoize((word) =>
99100
sequence(specificWord("ala"), specificWord(word)).map(() => word)
100-
)
101+
))
101102
)
102103
.map((word) => ({ type: "x ala x", word }) as const);
103104
const punctuation = choiceOnlyOne(

src/parser/parser.ts

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
import { memoize } from "@std/cache/memoize";
2+
import {
3+
lazy as lazyEval,
4+
nullableAsArray,
5+
throwError,
6+
} from "../../misc/misc.ts";
27
import {
38
contentWordSet,
49
fillerSet,
@@ -7,7 +12,6 @@ import {
712
preverbSet,
813
tokiPonaWordSet,
914
} from "../dictionary.ts";
10-
import { nullableAsArray, throwError } from "../../misc/misc.ts";
1115
import {
1216
Clause,
1317
ContextClause,
@@ -58,7 +62,6 @@ import {
5862
UnrecognizedError,
5963
} from "./parser_lib.ts";
6064
import { describe, Token } from "./token.ts";
61-
import { lazy as lazyEval } from "../../misc/misc.ts";
6265

6366
const spaces = match(/\s*/, "spaces");
6467

@@ -125,6 +128,15 @@ const emphasis = choice<Emphasis>(
125128
specificWord("a").map((word) => ({ type: "word", word })),
126129
);
127130
const optionalEmphasis = optional(emphasis);
131+
const alaXLongGlyph = memoize((word: string) =>
132+
specificWord(word)
133+
.skip(specificToken("headless long glyph end"))
134+
.map(() => ({ type: "x ala x", word }) as const)
135+
);
136+
const alaX = memoize((word: string) =>
137+
sequence(specificWord("ala"), specificWord(word))
138+
.map(() => ({ type: "x ala x", word }) as const)
139+
);
128140
function xAlaX(
129141
useWord: Set<string>,
130142
description: string,
@@ -136,34 +148,30 @@ function xAlaX(
136148
specificToken("inside long glyph")
137149
.filter(({ words }) => filterCombinedGlyphs(words, "ala")),
138150
)
139-
.then((word) =>
140-
specificWord(word)
141-
.skip(specificToken("headless long glyph end"))
142-
.map(() => ({ type: "x ala x", word }))
143-
),
151+
.then(alaXLongGlyph),
144152
specificToken("x ala x")
145153
.map(({ word }) => ({ type: "x ala x", word })),
146154
word
147-
.then((word) =>
148-
sequence(specificWord("ala"), specificWord(word))
149-
.map(() => ({ type: "x ala x", word }))
150-
),
155+
.then(alaX),
151156
);
152157
}
158+
const reduplicateRest = memoize((word: string) =>
159+
count(manyAtLeastOnce(specificWord(word)))
160+
.map((count) =>
161+
({
162+
type: "reduplication",
163+
word,
164+
count: count + 1,
165+
}) as const
166+
)
167+
);
153168
function simpleWordUnit(
154169
word: Set<string>,
155170
description: string,
156171
): Parser<SimpleHeadedWordUnit> {
157172
return choice<SimpleHeadedWordUnit>(
158173
wordFrom(word, description)
159-
.then((word) =>
160-
count(manyAtLeastOnce(specificWord(word)))
161-
.map((count) => ({
162-
type: "reduplication",
163-
word,
164-
count: count + 1,
165-
}))
166-
),
174+
.then(reduplicateRest),
167175
xAlaX(word, description),
168176
wordFrom(word, description)
169177
.map((word) => ({ type: "default", word })),

0 commit comments

Comments
 (0)