@@ -53,11 +53,12 @@ test("escape spaces with backslashes", () => {
53
53
expect ( tokenizeArgs ( `command space\\ ` ) ) . toEqual ( [ "command" , "space " ] ) ;
54
54
} ) ;
55
55
56
- test ( "ignore escaped newlines outside of quotes" , ( ) => {
56
+ test ( "remove escaped newlines outside of single quotes" , ( ) => {
57
57
expect ( tokenizeArgs ( `command \\\nargument` ) ) . toEqual ( [ "command" , `argument` ] ) ;
58
- expect ( tokenizeArgs ( `command "\\\nargument"` ) ) . toEqual ( [
58
+ expect ( tokenizeArgs ( `command "\\\nargument"` ) ) . toEqual ( [ "command" , `argument` , ] ) ;
59
+ expect ( tokenizeArgs ( `command '\\\nargument'` ) ) . toEqual ( [
59
60
"command" ,
60
- `\nargument` ,
61
+ `\\\ nargument` ,
61
62
] ) ;
62
63
} ) ;
63
64
@@ -116,3 +117,247 @@ test("empty command", () => {
116
117
expect ( tokenizeArgs ( `` ) ) . toEqual ( [ ] ) ;
117
118
expect ( tokenizeArgs ( ` ` ) ) . toEqual ( [ ] ) ;
118
119
} ) ;
120
+
121
+ // --------------------------------------------------
122
+ // Characters and character codes
123
+ // --------------------------------------------------
124
+
125
+ // special characters
126
+ const CHR_BS = "\\" ;
127
+ // special character codes
128
+ const ASC_NL = "\n" . charCodeAt ( 0 ) ;
129
+ const ASC_DQ = '"' . charCodeAt ( 0 ) ;
130
+ const ASC_SQ = "'" . charCodeAt ( 0 ) ;
131
+ const ASC_DOLLAR = "$" . charCodeAt ( 0 ) ;
132
+ const ASC_AT = "@" . charCodeAt ( 0 ) ;
133
+ const ASC_BS = CHR_BS . charCodeAt ( 0 ) ;
134
+ const ASC_BQ = "`" . charCodeAt ( 0 ) ;
135
+ // characters that vanish, when escaped with a backslash
136
+ // <backslash><newline> is a line continuation, that should be removed
137
+ const skip_escaped_unquoted = [ ASC_NL ] ;
138
+ const skip_escaped_double = [ ASC_NL ] ;
139
+ const skip_escaped_single = [ ASC_SQ ] ;
140
+ // characters that are unescaped in a double quoting context
141
+ const escaped_double = [ ASC_DQ , ASC_DOLLAR , ASC_BQ , ASC_BS ] ;
142
+
143
+ // --------------------------------------------------
144
+ // Expected unescaped result crafted according to POSIX standard
145
+ // --------------------------------------------------
146
+
147
+ // characters escaped with a backslash in 2 parts
148
+ let chars_escaped_1 : string [ ] = [ ] ;
149
+ let chars_escaped_2 : string [ ] = [ ] ;
150
+
151
+ // expected results for unescaped characters depending on context
152
+ let chars_unescaped_unquoted : string [ ] = [ ] ;
153
+ let chars_unescaped_double : string [ ] = [ ] ;
154
+ let chars_unescaped_single : string [ ] = [ ] ;
155
+
156
+ let arg_string_unquoted : string ;
157
+ let arg_string_double : string ;
158
+ let arg_string_single : string ;
159
+
160
+ let arg_tokens_unquoted : string [ ] = [ ] ;
161
+ let arg_tokens_double : string [ ] = [ ] ;
162
+ let arg_tokens_single : string [ ] = [ ] ;
163
+
164
+ function add_char_to_unescaped_arrays ( ascii_code : number , escaped_chars : string [ ] ) {
165
+ let _chr = String . fromCharCode ( ascii_code ) ;
166
+ escaped_chars . push ( CHR_BS + _chr ) ;
167
+
168
+ if ( skip_escaped_unquoted . indexOf ( ascii_code ) < 0 ) {
169
+ // all characters are unescaped
170
+ chars_unescaped_unquoted . push ( _chr ) ;
171
+ }
172
+ if ( skip_escaped_double . indexOf ( ascii_code ) < 0 ) {
173
+ // only some characters are unescaped
174
+ if ( escaped_double . indexOf ( ascii_code ) < 0 ) {
175
+ chars_unescaped_double . push ( CHR_BS + _chr ) ;
176
+ } else {
177
+ chars_unescaped_double . push ( _chr ) ;
178
+ }
179
+ }
180
+ if ( skip_escaped_single . indexOf ( ascii_code ) < 0 ) {
181
+ // no characters are unescaped
182
+ chars_unescaped_single . push ( CHR_BS + _chr ) ;
183
+ } else {
184
+ // a single quote terminates single quoting
185
+ chars_unescaped_single . push ( CHR_BS ) ;
186
+ }
187
+ }
188
+
189
+ type Overrides = {
190
+ tokens_unquoted ?: string [ ] ;
191
+ tokens_double ?: string [ ] ;
192
+ tokens_single ?: string [ ] ;
193
+ }
194
+
195
+ function chars_escaped_test_generate_strings ( start : number , end : number , overrides ?: Overrides ) {
196
+ chars_escaped_1 = [ ] ;
197
+ chars_escaped_2 = [ ] ;
198
+ chars_unescaped_unquoted = [ ] ;
199
+ chars_unescaped_double = [ ] ;
200
+ chars_unescaped_single = [ ] ;
201
+ arg_tokens_unquoted = [ ] ;
202
+ arg_tokens_double = [ ] ;
203
+ arg_tokens_single = [ ] ;
204
+
205
+ if ( typeof ( overrides ) === 'undefined' ) {
206
+ overrides = { } ;
207
+ }
208
+
209
+ // the first part of escaped characters are all characters from
210
+ // 0 - ASC(single_quote) "\000 ... '"
211
+ for ( let ascii_code = start ; ascii_code <= ASC_SQ ; ascii_code ++ ) {
212
+ add_char_to_unescaped_arrays ( ascii_code , chars_escaped_1 ) ;
213
+ }
214
+ // The second part are all characters from
215
+ // ASC(<open-parenthesis>) - ASC(255) "( ... ÿ"
216
+ for ( let ascii_code = ASC_SQ + 1 ; ascii_code <= end ; ascii_code ++ ) {
217
+ add_char_to_unescaped_arrays ( ascii_code , chars_escaped_2 ) ;
218
+ }
219
+
220
+ arg_string_unquoted = chars_escaped_1 . join ( "" ) + chars_escaped_2 . join ( "" ) ;
221
+ arg_string_double = '"' + chars_escaped_1 . join ( "" ) + chars_escaped_2 . join ( "" ) + '"' ;
222
+ // Since a single quote cannot be a member of a single quoted string,
223
+ // the escaped single quote at the end of the first part will
224
+ // terminate single-quoting. To avoid a syntax error, a single quote
225
+ // must be prepended to the second part of escaped characters.
226
+ arg_string_single = "'" + chars_escaped_1 . join ( "" ) + "'" + chars_escaped_2 . join ( "" ) + "'" ;
227
+
228
+ arg_tokens_unquoted = overrides . tokens_unquoted ? overrides . tokens_unquoted : [ chars_unescaped_unquoted . join ( "" ) ] ;
229
+ arg_tokens_double = overrides . tokens_double ? overrides . tokens_double : [ chars_unescaped_double . join ( "" ) ] ;
230
+ arg_tokens_single = overrides . tokens_single ? overrides . tokens_single : [ chars_unescaped_single . join ( "" ) ] ;
231
+ }
232
+
233
+ // --------------------------------------------------
234
+ // Expected unescaped result generated by /bin/sh
235
+ // --------------------------------------------------
236
+
237
+ let shell_arg_token_unquoted = atob ( `
238
+ AQIDBAUGBwgJCwwNDg8QERITFBUWFxgZGhscHR4fICEiIyQlJicoKSorLC0uLzAxMjM0NTY3ODk6
239
+ Ozw9Pj9AQUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVpbXF1eX2BhYmNkZWZnaGlqa2xtbm9wcXJz
240
+ dHV2d3h5ent8fX5/
241
+ ` ) ;
242
+
243
+ let shell_arg_token_double = atob ( `
244
+ XAFcAlwDXARcBVwGXAdcCFwJXAtcDFwNXA5cD1wQXBFcElwTXBRcFVwWXBdcGFwZXBpcG1wcXB1c
245
+ HlwfXCBcISJcIyRcJVwmXCdcKFwpXCpcK1wsXC1cLlwvXDBcMVwyXDNcNFw1XDZcN1w4XDlcOlw7
246
+ XDxcPVw+XD9cQFxBXEJcQ1xEXEVcRlxHXEhcSVxKXEtcTFxNXE5cT1xQXFFcUlxTXFRcVVxWXFdc
247
+ WFxZXFpcW1xcXVxeXF9gXGFcYlxjXGRcZVxmXGdcaFxpXGpca1xsXG1cblxvXHBccVxyXHNcdFx1
248
+ XHZcd1x4XHlcelx7XHxcfVx+XH8=
249
+ ` ) ;
250
+
251
+ let shell_arg_token_single = atob ( `
252
+ XAFcAlwDXARcBVwGXAdcCFwJXApcC1wMXA1cDlwPXBBcEVwSXBNcFFwVXBZcF1wYXBlcGlwbXBxc
253
+ HVweXB9cIFwhXCJcI1wkXCVcJlxcKFwpXCpcK1wsXC1cLlwvXDBcMVwyXDNcNFw1XDZcN1w4XDlc
254
+ Olw7XDxcPVw+XD9cQFxBXEJcQ1xEXEVcRlxHXEhcSVxKXEtcTFxNXE5cT1xQXFFcUlxTXFRcVVxW
255
+ XFdcWFxZXFpcW1xcXF1cXlxfXGBcYVxiXGNcZFxlXGZcZ1xoXGlcalxrXGxcbVxuXG9ccFxxXHJc
256
+ c1x0XHVcdlx3XHhceVx6XHtcfFx9XH5cfw==
257
+ ` ) ;
258
+
259
+ // function fold_string (str: string, width?: number) {
260
+ // let lines = [];
261
+ // if (typeof(width) === 'undefined') {
262
+ // width = 76;
263
+ // }
264
+ // while (str) {
265
+ // lines.push(str.substring(0, width));
266
+ // str = str.substring(width);
267
+ // }
268
+ // return lines.join("\n");
269
+ // }
270
+ //
271
+ // async function generate_shell_arg_tokens () {
272
+ // // result.stdout - the stdout as a string
273
+ // // result.stderr - the stderr as a string
274
+ // // result.exitCode - the process exit code as a number
275
+ // let result = await x('/bin/sh', ['-c', `pecho () { printf "%s" "\${*}"; }; pecho ` + arg_string_unquoted]);
276
+ // console.log('let shell_arg_token_unquoted = atob(`' + "\n" + fold_string(btoa(result.stdout)) + '`)';);
277
+ // result = await x('/bin/sh', ['-c', `pecho () { printf "%s" "\${*}"; }; pecho ` + arg_string_double]);
278
+ // console.log('let shell_arg_token_double = atob(`' + "\n" + fold_string(btoa(result.stdout)) + '`)');
279
+ // result = await x('/bin/sh', ['-c', `pecho () { printf "%s" "\${*}"; }; pecho ` + arg_string_single]);
280
+ // console.log('let shell_arg_token_single = atob(`' + "\n" + fold_string(btoa(result.stdout)) + '`)');
281
+ // }
282
+
283
+ // import { x } from 'tinyexec';
284
+ // // tinyexec does not handle NUL in argument strings, ASCII codes > 127 are messed up by UTF-8 output
285
+ // chars_escaped_test_generate_strings(1, 127);
286
+ // await generate_shell_arg_tokens();
287
+
288
+ function pretty_print_character_string_array ( char_string_array : string [ ] ) {
289
+ let output = [ ] ;
290
+ for ( const _string of char_string_array ) {
291
+ output . push ( "--------------------------------------------------" ) ;
292
+ let _escaped = "" ;
293
+ for ( let _indx = 0 ; _indx < _string . length ; _indx ++ ) {
294
+ let _chr = _string [ _indx ] ;
295
+ let _asc = _chr . charCodeAt ( 0 ) ;
296
+ if ( ! _escaped ) {
297
+ if ( _chr === CHR_BS ) {
298
+ _escaped = _chr ;
299
+ continue
300
+ }
301
+ }
302
+ if ( _asc < 32 ) {
303
+ _chr = "^" + String . fromCharCode ( ASC_AT + _asc ) ;
304
+ } else if ( _asc >= 127 ) {
305
+ _chr = "\\x" + _asc . toString ( 16 ) . toUpperCase ( ) ;
306
+ }
307
+ output . push ( _escaped + _chr + " " + _asc . toString ( ) ) ;
308
+ _escaped = "" ;
309
+ }
310
+ if ( _escaped ) {
311
+ output . push ( _escaped ) ;
312
+ }
313
+ }
314
+ return output . join ( "\n" ) ;
315
+ }
316
+
317
+ function chars_escaped_test ( start : number , end : number , suffix : string , overrides ?: Overrides ) {
318
+ chars_escaped_test_generate_strings ( start , end , overrides ) ;
319
+
320
+ if ( suffix ) {
321
+ suffix = " " + suffix ;
322
+ }
323
+
324
+ test ( "all escaped characters outside quoting context" + suffix , ( ) => {
325
+ expect (
326
+ pretty_print_character_string_array (
327
+ tokenizeArgs ( arg_string_unquoted ) )
328
+ ) . toEqual (
329
+ pretty_print_character_string_array (
330
+ arg_tokens_unquoted )
331
+ ) ;
332
+ } ) ;
333
+
334
+ test ( "all escaped characters in double quoting context" + suffix , ( ) => {
335
+ expect (
336
+ pretty_print_character_string_array (
337
+ tokenizeArgs ( arg_string_double ) )
338
+ ) . toEqual (
339
+ pretty_print_character_string_array (
340
+ arg_tokens_double )
341
+ ) ;
342
+ } ) ;
343
+
344
+ test ( "all escaped characters in single quoting context" + suffix , ( ) => {
345
+ expect (
346
+ pretty_print_character_string_array (
347
+ tokenizeArgs ( arg_string_single ) )
348
+ ) . toEqual (
349
+ pretty_print_character_string_array (
350
+ arg_tokens_single )
351
+ ) ;
352
+ } ) ;
353
+ }
354
+
355
+ // Expected unescaped result generated according to POSIX
356
+ chars_escaped_test ( 0 , 255 , "(POSIX)" ) ;
357
+
358
+ // Expected unescaped result generated by /bin/sh
359
+ chars_escaped_test ( 1 , 127 , "(/bin/sh)" , {
360
+ tokens_unquoted : [ shell_arg_token_unquoted ] ,
361
+ tokens_double : [ shell_arg_token_double ] ,
362
+ tokens_single : [ shell_arg_token_single ]
363
+ } ) ;
0 commit comments