Skip to content

Commit 2733b56

Browse files
committed
Make type_name2type_identifier retain UTF-8 characters
In order to better support UTF-8.
1 parent 05f85d2 commit 2733b56

File tree

2 files changed

+11
-2
lines changed

2 files changed

+11
-2
lines changed

src/ansi-c/type2name.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -305,11 +305,12 @@ std::string type_name2type_identifier(const std::string &name)
305305
};
306306
const auto replace_invalid_characters_with_underscore =
307307
[](const std::string &identifier) {
308-
static const std::regex non_alpha_numeric{"[^A-Za-z0-9]+"};
308+
static const std::regex non_alpha_numeric{"[^A-Za-z0-9\x80-\xff]+"};
309309
return std::regex_replace(identifier, non_alpha_numeric, "_");
310310
};
311311
const auto strip_leading_non_letters = [](const std::string &identifier) {
312-
static const std::regex identifier_regex{"[A-Za-z][A-Za-z0-9_]*"};
312+
static const std::regex identifier_regex{
313+
"[A-Za-z\x80-\xff][A-Za-z0-9_\x80-\xff]*"};
313314
std::smatch match_results;
314315
bool found = std::regex_search(identifier, match_results, identifier_regex);
315316
POSTCONDITION(found);

unit/ansi-c/type2name.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,11 @@ TEST_CASE(
6363
"0123456789_banana_0123456789_split_0123456789") ==
6464
"banana_0123456789_split_0123456789");
6565
}
66+
67+
TEST_CASE(
68+
"type_name2type_identifier UTF-8 characters",
69+
"[core][ansi-c][type_name2type_identifier]")
70+
{
71+
const std::string utf8_example = "\xF0\x9F\x8D\x8C\xF0\x9F\x8D\xA8";
72+
CHECK(type_name2type_identifier(utf8_example) == utf8_example);
73+
}

0 commit comments

Comments
 (0)