Skip to content

Commit 069db73

Browse files
Merge pull request #5420 from thomasspriggs/tas/type2identifier_v2
Robust type name to type identifier conversion for C harnesses
2 parents 2d02d6e + a9f7f01 commit 069db73

File tree

5 files changed

+132
-25
lines changed

5 files changed

+132
-25
lines changed

src/ansi-c/c_typecheck_expr.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1893,15 +1893,15 @@ void c_typecheck_baset::typecheck_side_effect_function_call(
18931893
/// XXX why are we only looking at the type of the first parameter?
18941894
if(parameters.front().type().id() == ID_pointer)
18951895
{
1896-
identifier_with_type =
1897-
id2string(identifier) + "_" +
1898-
type2identifier(parameters.front().type().subtype(), *this);
1896+
identifier_with_type = id2string(identifier) + "_" +
1897+
type_to_partial_identifier(
1898+
parameters.front().type().subtype(), *this);
18991899
}
19001900
else
19011901
{
19021902
identifier_with_type =
19031903
id2string(identifier) + "_" +
1904-
type2identifier(parameters.front().type(), *this);
1904+
type_to_partial_identifier(parameters.front().type(), *this);
19051905
}
19061906
gcc_polymorphic->set_identifier(identifier_with_type);
19071907

src/ansi-c/type2name.cpp

Lines changed: 43 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ Author: Daniel Kroening, [email protected]
1919
#include <util/std_types.h>
2020
#include <util/symbol_table.h>
2121

22+
#include <regex>
23+
2224
typedef std::unordered_map<irep_idt, std::pair<size_t, bool>> symbol_numbert;
2325

2426
static std::string type2name(
@@ -277,27 +279,48 @@ std::string type2name(const typet &type, const namespacet &ns)
277279
/// If we want utilities like dump_c to work properly identifiers
278280
/// should ideally always be valid C identifiers
279281
/// This replaces some invalid characters that can appear in type2name output.
280-
std::string type2identifier(const typet &type, const namespacet &ns)
282+
std::string type_name2type_identifier(const std::string &name)
281283
{
282-
auto type2name_res = type2name(type, ns);
283-
std::string result{};
284-
for(char c : type2name_res)
285-
{
286-
switch(c)
284+
const auto replace_special_characters = [](const std::string &name) {
285+
std::string result{};
286+
for(char c : name)
287287
{
288-
case '*':
289-
result += "_ptr_";
290-
break;
291-
case '{':
292-
result += "_start_sub_";
293-
break;
294-
case '}':
295-
result += "_end_sub_";
296-
break;
297-
default:
298-
result += c;
299-
break;
288+
switch(c)
289+
{
290+
case '*':
291+
result += "_ptr_";
292+
break;
293+
case '{':
294+
result += "_start_sub_";
295+
break;
296+
case '}':
297+
result += "_end_sub_";
298+
break;
299+
default:
300+
result += c;
301+
break;
302+
}
300303
}
301-
}
302-
return result;
304+
return result;
305+
};
306+
const auto replace_invalid_characters_with_underscore =
307+
[](const std::string &identifier) {
308+
static const std::regex non_alpha_numeric{"[^A-Za-z0-9\x80-\xff]+"};
309+
return std::regex_replace(identifier, non_alpha_numeric, "_");
310+
};
311+
const auto strip_leading_digits = [](const std::string &identifier) {
312+
static const std::regex identifier_regex{
313+
"[A-Za-z\x80-\xff_][A-Za-z0-9_\x80-\xff]*"};
314+
std::smatch match_results;
315+
bool found = std::regex_search(identifier, match_results, identifier_regex);
316+
POSTCONDITION(found);
317+
return match_results.str(0);
318+
};
319+
return strip_leading_digits(replace_invalid_characters_with_underscore(
320+
replace_special_characters(name)));
321+
}
322+
323+
std::string type_to_partial_identifier(const typet &type, const namespacet &ns)
324+
{
325+
return type_name2type_identifier(type2name(type, ns));
303326
}

src/ansi-c/type2name.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,15 @@ class namespacet;
2020

2121
std::string type2name(const typet &type, const namespacet &ns);
2222

23-
std::string type2identifier(const typet &type, const namespacet &ns);
23+
/**
24+
* Constructs a string describing the given type, which can be used as part of
25+
* a `C` identifier. The resulting identifier is not guaranteed to uniquely
26+
* identify the type in all cases.
27+
* @param type Internal representation of the type to describe.
28+
* @param ns Namespace for looking up any types on which the `type` parameter
29+
* depends.
30+
* @return An identifying string which can be used as part of a `C` identifier.
31+
*/
32+
std::string type_to_partial_identifier(const typet &type, const namespacet &ns);
2433

2534
#endif // CPROVER_ANSI_C_TYPE2NAME_H

unit/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ SRC += analyses/ai/ai.cpp \
1515
analyses/does_remove_const/does_type_preserve_const_correctness.cpp \
1616
analyses/does_remove_const/is_type_at_least_as_const_as.cpp \
1717
ansi-c/max_malloc_size.cpp \
18+
ansi-c/type2name.cpp \
1819
big-int/big-int.cpp \
1920
compound_block_locations.cpp \
2021
get_goto_model_from_c_test.cpp \

unit/ansi-c/type2name.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*******************************************************************\
2+
3+
Module: Unit tests for type_name2type_identifier
4+
5+
Author: Thomas Spriggs
6+
7+
\*******************************************************************/
8+
9+
#include <testing-utils/use_catch.h>
10+
11+
#include <numeric>
12+
13+
extern std::string type_name2type_identifier(const std::string &name);
14+
15+
TEST_CASE(
16+
"type_name2type_identifier sanity check",
17+
"[core][ansi-c][type_name2type_identifier]")
18+
{
19+
CHECK(type_name2type_identifier("char") == "char");
20+
}
21+
22+
TEST_CASE(
23+
"type_name2type_identifier special characters",
24+
"[core][ansi-c][type_name2type_identifier]")
25+
{
26+
CHECK(type_name2type_identifier("char*") == "char_ptr_");
27+
CHECK(type_name2type_identifier("foo{bar}") == "foo_start_sub_bar_end_sub_");
28+
}
29+
30+
/**
31+
* @return A string containing all 7-bit ascii printable characters.
32+
*/
33+
static std::string all_printable_characters()
34+
{
35+
const char first = 32;
36+
const char last = 127;
37+
std::string printable_characters(last - first, ' ');
38+
std::iota(printable_characters.begin(), printable_characters.end(), first);
39+
return printable_characters;
40+
}
41+
42+
TEST_CASE(
43+
"type_name2type_identifier invalid characters",
44+
"[core][ansi-c][type_name2type_identifier]")
45+
{
46+
const std::string printable_characters = all_printable_characters();
47+
CHECK(
48+
printable_characters ==
49+
R"( !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`)"
50+
R"(abcdefghijklmnopqrstuvwxyz{|}~)");
51+
CHECK(
52+
type_name2type_identifier(printable_characters) ==
53+
"_ptr_0123456789_ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz_"
54+
"start_sub_end_sub_");
55+
}
56+
57+
TEST_CASE(
58+
"type_name2type_identifier leading characters",
59+
"[core][ansi-c][type_name2type_identifier]")
60+
{
61+
CHECK(
62+
type_name2type_identifier("0123456789banana_0123456789_split_0123456789") ==
63+
"banana_0123456789_split_0123456789");
64+
CHECK(type_name2type_identifier("0123456789_banana") == "_banana");
65+
CHECK(type_name2type_identifier("_0123456789") == "_0123456789");
66+
}
67+
68+
TEST_CASE(
69+
"type_name2type_identifier UTF-8 characters",
70+
"[core][ansi-c][type_name2type_identifier]")
71+
{
72+
const std::string utf8_example = "\xF0\x9F\x8D\x8C\xF0\x9F\x8D\xA8";
73+
CHECK(type_name2type_identifier(utf8_example) == utf8_example);
74+
}

0 commit comments

Comments
 (0)