@@ -86,7 +86,8 @@ const word = choiceOnlyOne(latinWord, singleUcsurWord);
86
86
const properWords = allAtLeastOnce (
87
87
match ( / [ A - Z ] [ a - z A - Z ] * / , "proper word" ) . skip ( spaces ) ,
88
88
)
89
- . map ( ( array ) => array . join ( " " ) ) ;
89
+ . map ( ( array ) => array . join ( " " ) )
90
+ . map < Token > ( ( words ) => ( { type : "proper word" , words, kind : "latin" } ) ) ;
90
91
/** Parses a specific word, either UCSUR or latin. */
91
92
function specificWord ( thatWord : string ) : Parser < string > {
92
93
return word . filter ( ( thisWord ) => {
@@ -98,13 +99,16 @@ function specificWord(thatWord: string): Parser<string> {
98
99
} ) ;
99
100
}
100
101
/** Parses multiple a. */
101
- const multipleA = sequence ( specificWord ( "a" ) , allAtLeastOnce ( specificWord ( "a" ) ) )
102
- . map ( ( [ a , as ] ) => [ a , ...as ] . length ) ;
102
+ const multipleA = sequence (
103
+ specificWord ( "a" ) ,
104
+ count ( allAtLeastOnce ( specificWord ( "a" ) ) ) ,
105
+ )
106
+ . map < Token > ( ( [ _ , count ] ) => ( { type : "multiple a" , count : count + 1 } ) ) ;
103
107
/** Parses lengthened words. */
104
108
const longWord = choiceOnlyOne ( matchString ( "a" ) , matchString ( "n" ) )
105
109
. then ( ( word ) =>
106
110
count ( allAtLeastOnce ( matchString ( word ) ) )
107
- . map < Token & { type : "long word" } > ( ( count ) => ( {
111
+ . map < Token > ( ( count ) => ( {
108
112
type : "long word" ,
109
113
word,
110
114
length : count + 1 ,
@@ -124,7 +128,8 @@ const xAlaX = lazy(() => {
124
128
sequence ( specificWord ( "ala" ) , specificWord ( word ) ) . map ( ( ) => word )
125
129
) ;
126
130
}
127
- } ) ;
131
+ } )
132
+ . map < Token > ( ( word ) => ( { type : "x ala x" , word } ) ) ;
128
133
129
134
Parser . endCache ( ) ;
130
135
@@ -139,7 +144,8 @@ const punctuation = choiceOnlyOne(
139
144
)
140
145
. skip ( spaces ) ,
141
146
newline . map ( ( ) => "." ) ,
142
- ) ;
147
+ )
148
+ . map < Token > ( ( punctuation ) => ( { type : "punctuation" , punctuation } ) ) ;
143
149
/**
144
150
* Parses cartouche element and returns the phonemes or letters it represents.
145
151
*/
@@ -181,7 +187,13 @@ const cartouche = sequence(
181
187
return `${ word [ 0 ] . toUpperCase ( ) } ${ word . slice ( 1 ) } ` ;
182
188
} ) ;
183
189
/** Parses multiple cartouches. */
184
- const cartouches = allAtLeastOnce ( cartouche ) . map ( ( words ) => words . join ( " " ) ) ;
190
+ const cartouches = allAtLeastOnce ( cartouche )
191
+ . map ( ( words ) => words . join ( " " ) )
192
+ . map < Token > ( ( words ) => ( {
193
+ type : "proper word" ,
194
+ words,
195
+ kind : "cartouche" ,
196
+ } ) ) ;
185
197
/**
186
198
* Parses long glyph container.
187
199
*
@@ -243,32 +255,31 @@ const insideLongGlyph = specificSpecialUcsur(END_OF_REVERSE_LONG_GLYPH)
243
255
. skip ( specificSpecialUcsur ( START_OF_LONG_GLYPH ) )
244
256
. skip ( spaces )
245
257
. map < Token > ( ( words ) => ( { type : "inside long glyph" , words } ) ) ;
258
+ const combinedGlyphsToken = combinedGlyphs
259
+ . skip ( spaces )
260
+ . map < Token > ( ( words ) => ( { type : "combined glyphs" , words } ) ) ;
261
+ const wordToken = word . map < Token > ( ( word ) => ( { type : "word" , word } ) ) ;
246
262
247
263
Parser . startCache ( cache ) ;
248
264
249
265
/** Parses a token. */
250
- export const token = choiceOnlyOne < Token > (
266
+ export const token = choiceOnlyOne (
267
+ longWord ,
268
+ xAlaX ,
269
+ multipleA ,
270
+ wordToken ,
271
+ properWords ,
272
+ // UCSUR only
251
273
spaceLongGlyph ,
252
274
headedLongGlyphStart ,
253
- combinedGlyphs
254
- . skip ( spaces )
255
- . map ( ( words ) => ( { type : "combined glyphs" , words } ) ) ,
256
- properWords . map ( ( words ) => ( { type : "proper word" , words, kind : "latin" } ) ) ,
257
- longWord ,
258
- xAlaX . map ( ( word ) => ( { type : "x ala x" , word } ) ) ,
259
- multipleA . map ( ( count ) => ( { type : "multiple a" , count } ) ) ,
260
- word . map ( ( word ) => ( { type : "word" , word } ) ) ,
275
+ combinedGlyphsToken ,
261
276
// starting with non-words:
262
- punctuation . map ( ( punctuation ) => ( { type : "punctuation" , punctuation } ) ) ,
277
+ punctuation ,
263
278
headlessLongGlyphEnd ,
264
279
headedLongGlyphEnd ,
265
280
headlessLongGlyphStart ,
266
281
insideLongGlyph ,
267
- cartouches . map ( ( words ) => ( {
268
- type : "proper word" ,
269
- words,
270
- kind : "cartouche" ,
271
- } ) ) ,
282
+ cartouches ,
272
283
) ;
273
284
274
285
Parser . endCache ( ) ;
0 commit comments