Skip to content

Add more aliases for Unicode confusable chars #33128

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 5, 2016
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 53 additions & 6 deletions src/libsyntax/parse/lexer/unicode_chars.rs
Original file line number Diff line number Diff line change
@@ -16,6 +16,22 @@ use errors::DiagnosticBuilder;
use super::StringReader;

const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
(' ', "No-Break Space", ' '),
(' ', "Ogham Space Mark", ' '),
(' ', "En Quad", ' '),
(' ', "Em Quad", ' '),
(' ', "En Space", ' '),
(' ', "Em Space", ' '),
(' ', "Three-Per-Em Space", ' '),
(' ', "Four-Per-Em Space", ' '),
(' ', "Six-Per-Em Space", ' '),
(' ', "Figure Space", ' '),
(' ', "Punctuation Space", ' '),
(' ', "Thin Space", ' '),
(' ', "Hair Space", ' '),
(' ', "Narrow No-Break Space", ' '),
(' ', "Medium Mathematical Space", ' '),
(' ', "Ideographic Space", ' '),
('ߺ', "Nko Lajanyalan", '_'),
('﹍', "Dashed Low Line", '_'),
('﹎', "Centreline Low Line", '_'),
@@ -24,14 +40,18 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
('‑', "Non-Breaking Hyphen", '-'),
('‒', "Figure Dash", '-'),
('–', "En Dash", '-'),
('—', "Em Dash", '-'),
('﹘', "Small Em Dash", '-'),
('⁃', "Hyphen Bullet", '-'),
('˗', "Modifier Letter Minus Sign", '-'),
('−', "Minus Sign", '-'),
('ー', "Katakana-Hiragana Prolonged Sound Mark", '-'),
('٫', "Arabic Decimal Separator", ','),
('‚', "Single Low-9 Quotation Mark", ','),
('ꓹ', "Lisu Letter Tone Na Po", ','),
(',', "Fullwidth Comma", ','),
(';', "Greek Question Mark", ';'),
(';', "Fullwidth Semicolon", ';'),
('ः', "Devanagari Sign Visarga", ':'),
('ઃ', "Gujarati Sign Visarga", ':'),
(':', "Fullwidth Colon", ':'),
@@ -53,16 +73,20 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
('ʔ', "Latin Letter Glottal Stop", '?'),
('ॽ', "Devanagari Letter Glottal Stop", '?'),
('Ꭾ', "Cherokee Letter He", '?'),
('?', "Fullwidth Question Mark", '?'),
('𝅭', "Musical Symbol Combining Augmentation Dot", '.'),
('․', "One Dot Leader", '.'),
('۔', "Arabic Full Stop", '.'),
('܁', "Syriac Supralinear Full Stop", '.'),
('܂', "Syriac Sublinear Full Stop", '.'),
('꘎', "Vai Full Stop", '.'),
('𐩐', "Kharoshthi Punctuation Dot", '.'),
('·', "Middle Dot", '.'),
('٠', "Arabic-Indic Digit Zero", '.'),
('۰', "Extended Arabic-Indic Digit Zero", '.'),
('ꓸ', "Lisu Letter Tone Mya Ti", '.'),
('。', "Ideographic Full Stop", '.'),
('・', "Katakana Middle Dot", '.'),
('՝', "Armenian Comma", '\''),
(''', "Fullwidth Apostrophe", '\''),
('‘', "Left Single Quotation Mark", '\''),
@@ -108,16 +132,30 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
('ײ', "Hebrew Ligature Yiddish Double Yod", '"'),
('❞', "Heavy Double Comma Quotation Mark Ornament", '"'),
('❝', "Heavy Double Turned Comma Quotation Mark Ornament", '"'),
('[', "Fullwidth Left Square Bracket", '('),
('❨', "Medium Left Parenthesis Ornament", '('),
('❲', "Light Left Tortoise Shell Bracket Ornament", '('),
('〔', "Left Tortoise Shell Bracket", '('),
('﴾', "Ornate Left Parenthesis", '('),
('', "Fullwidth Right Square Bracket", ')'),
('', "Fullwidth Left Parenthesis", '('),
('❩', "Medium Right Parenthesis Ornament", ')'),
('❳', "Light Right Tortoise Shell Bracket Ornament", ')'),
('〕', "Right Tortoise Shell Bracket", ')'),
('﴿', "Ornate Right Parenthesis", ')'),
(')', "Fullwidth Right Parenthesis", ')'),
('[', "Fullwidth Left Square Bracket", '['),
('❲', "Light Left Tortoise Shell Bracket Ornament", '['),
('「', "Left Corner Bracket", '['),
('『', "Left White Corner Bracket", '['),
('【', "Left Black Lenticular Bracket", '['),
('〔', "Left Tortoise Shell Bracket", '['),
('〖', "Left White Lenticular Bracket", '['),
('〘', "Left White Tortoise Shell Bracket", '['),
('〚', "Left White Square Bracket", '['),
(']', "Fullwidth Right Square Bracket", ']'),
('❳', "Light Right Tortoise Shell Bracket Ornament", ']'),
('」', "Right Corner Bracket", ']'),
('』', "Right White Corner Bracket", ']'),
('】', "Right Black Lenticular Bracket", ']'),
('〕', "Right Tortoise Shell Bracket", ']'),
('〗', "Right White Lenticular Bracket", ']'),
('〙', "Right White Tortoise Shell Bracket", ']'),
('〛', "Right White Square Bracket", ']'),
('❴', "Medium Left Curly Bracket Ornament", '{'),
('❵', "Medium Right Curly Bracket Ornament", '}'),
('⁎', "Low Asterisk", '*'),
@@ -140,6 +178,8 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
('⟍', "Mathematical Falling Diagonal", '\\'),
('⧵', "Reverse Solidus Operator", '\\'),
('⧹', "Big Reverse Solidus", '\\'),
('、', "Ideographic Comma", '\\'),
('ヽ', "Katakana Iteration Mark", '\\'),
('㇔', "Cjk Stroke D", '\\'),
('丶', "Cjk Unified Ideograph-4E36", '\\'),
('⼂', "Kangxi Radical Dot", '\\'),
@@ -148,15 +188,20 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
('‹', "Single Left-Pointing Angle Quotation Mark", '<'),
('❮', "Heavy Left-Pointing Angle Quotation Mark Ornament", '<'),
('˂', "Modifier Letter Left Arrowhead", '<'),
('〈', "Left Angle Bracket", '<'),
('《', "Left Double Angle Bracket", '<'),
('꓿', "Lisu Punctuation Full Stop", '='),
('›', "Single Right-Pointing Angle Quotation Mark", '>'),
('❯', "Heavy Right-Pointing Angle Quotation Mark Ornament", '>'),
('˃', "Modifier Letter Right Arrowhead", '>'),
('〉', "Right Angle Bracket", '>'),
('》', "Right Double Angle Bracket", '>'),
('Ⲻ', "Coptic Capital Letter Dialect-P Ni", '-'),
('Ɂ', "Latin Capital Letter Glottal Stop", '?'),
('Ⳇ', "Coptic Capital Letter Old Coptic Esh", '/'), ];

const ASCII_ARRAY: &'static [(char, &'static str)] = &[
(' ', "Space"),
('_', "Underscore"),
('-', "Minus/Hyphen"),
(',', "Comma"),
@@ -169,6 +214,8 @@ const ASCII_ARRAY: &'static [(char, &'static str)] = &[
('"', "Quotation Mark"),
('(', "Left Parenthesis"),
(')', "Right Parenthesis"),
('[', "Left Square Bracket"),
(']', "Right Square Bracket"),
('{', "Left Curly Brace"),
('}', "Right Curly Brace"),
('*', "Asterisk"),