Skip to content

Commit a3fbce8

Browse files
committed
gccrs: fix tokenizing utf-8 whitespaces
gcc/rust/ChangeLog: * lex/rust-lex.cc (Lexer::build_token):add check for all kinds of whitespaces gcc/testsuite/ChangeLog: * rust/compile/torture/utf8_whitespaces.rs: New test. Signed-off-by: Raiki Tamura <[email protected]>
1 parent 760ed46 commit a3fbce8

File tree

2 files changed

+27
-2
lines changed

2 files changed

+27
-2
lines changed

gcc/rust/lex/rust-lex.cc

+11-2
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,10 @@ Lexer::build_token ()
420420
{
421421
/* ignore whitespace characters for tokens but continue updating
422422
* location */
423-
case '\n': // newline
423+
case '\n': // newline
424+
case 0x0085: // next line
425+
case 0x2028: // line separator
426+
case 0x2029: // paragraph separator
424427
current_line++;
425428
current_column = 1;
426429
// tell line_table that new line starts
@@ -432,10 +435,16 @@ Lexer::build_token ()
432435
case ' ': // space
433436
current_column++;
434437
continue;
435-
case '\t': // tab
438+
case '\t': // horizontal tab
436439
// width of a tab is not well-defined, assume 8 spaces
437440
current_column += 8;
438441
continue;
442+
case '\v': // vertical tab
443+
case 0x000c: // form feed
444+
case 0x200e: // left-to-right mark
445+
case 0x200f: // right-to-left mark
446+
// Ignored.
447+
continue;
439448

440449
// punctuation - actual tokens
441450
case '=':
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
fn main() {
2+
// FORM FEED
3+
4+
// LINE TABULATION (vt)
5+
6+
// NEXT LINE (nel)
7+
…
8+
// LEFT-TO-RIGHT MARK
9+
10+
// RIGHT-TO-LEFT MARK
11+
12+
// LINE SEPARATOR
13+
14+
// PARAGRAPH SEPARATOR
15+
16+
}

0 commit comments

Comments
 (0)