Skip to content

Commit cfeacbf

Browse files
authored
allows any character in the variable name (#1086)
BIL and Core Theory variable literals can contain only a specific set of valid characters. Thus approach doesn't play nice with various manglers and non-C languages as well as contradicts with our own tradition as we generally tend to allow any character in variables, see our Knowledge variables for example (and which are used underneath the hood of CT and BIL variables). One of the real-world examples where bap fails is the new C++ code with lambdas, that uses `#` for anonymous variables. In this proposal we allow any string (including the empty one) to be used as a variable name. First of all, we escape any non-printable or whitespace characters. We also escape '.' as we use it for variable versioning. Next, if a prospective variable name starts from a digit, is empty, or starts with `$` or `#` (which we use to encode de-bruijin style variables) we prefix them with `_`. Note, that escaping whitespaces and non-printable characters is really not necessary, but helps with the textual representation of the variable name, in case if we hit one.
1 parent d3b1400 commit cfeacbf

File tree

1 file changed

+18
-27
lines changed

1 file changed

+18
-27
lines changed

lib/bap_core_theory/bap_core_theory_var.ml

Lines changed: 18 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -31,33 +31,25 @@ let valid_first_char = function
3131
| '0'..'9' | '#' | '$' -> false
3232
| _ -> true
3333

34-
let valid_char c =
35-
valid_first_char c || match c with
36-
| '0' .. '9' | '\'' | '.' -> true
37-
| _ -> false
38-
39-
let non_empty name =
40-
if String.length name = 0
41-
then invalid_arg "Invalid var literal: a variable can't be empty"
42-
43-
let all_chars_valid name =
44-
if not (valid_first_char name.[0])
45-
then invalid_argf
46-
"Invalid var literal: a variable can't start from %c" name.[0] ();
47-
match String.find name ~f:(Fn.non valid_char) with
48-
| None -> ()
49-
| Some c ->
50-
invalid_argf
51-
"Invalid var literal: a variable can't contain char %c" c ()
52-
53-
let validate_variable name =
54-
non_empty name;
55-
all_chars_valid name
34+
let escapeworthy = Char.all |> List.filter ~f:(function
35+
| '.' -> true
36+
| c -> Char.is_whitespace c || not (Char.is_print c))
37+
38+
let escape_char = '\\'
39+
40+
let escape =
41+
Staged.unstage @@
42+
String.Escaping.escape ~escapeworthy ~escape_char
43+
44+
let mangle_if_necessary name =
45+
let name = escape name in
46+
if String.is_empty name then "_"
47+
else if not (valid_first_char name.[0])
48+
then "_" ^ name
49+
else name
5650

5751
let define sort name : 'a var =
58-
validate_variable name;
59-
non_empty name;
60-
sort, Reg {name; ver=0}
52+
sort, Reg {name=mangle_if_necessary name; ver=0}
6153

6254
let create sort ident = sort,ident
6355

@@ -118,7 +110,7 @@ module Ident = struct
118110
failwithf "`%s' is not a valid temporary value" s ()
119111

120112
let split_version s =
121-
match String.rfindi s ~f:(fun _ c -> Char.(c = '.')) with
113+
match String.Escaping.rindex s ~escape_char '.' with
122114
| None -> s,0
123115
| Some n ->
124116
String.subo ~len:n s,
@@ -134,7 +126,6 @@ module Ident = struct
134126
let s,ver = split_version s in
135127
Var {num = num s; ver}
136128
| _ -> fun _ ->
137-
validate_variable x;
138129
let name,ver = split_version x in
139130
Reg {name; ver}
140131

0 commit comments

Comments
 (0)