Skip to content

Commit 15db9d0

Browse files
committed
XML output: ensure expressions yield valid XML 1.0
We previously did not perform sanity checks that expressions translate to valid XML 1.0 (which requires that all characters are printable). In most contexts we only handle printable strings, but expressions appearing in goto traces may include non-printable characters. In such cases, just output the underlying array of bytes. This problem was surfaced by SV-COMP benchmarks such as pthread-driver-races/char_generic_nvram_nvram_llseek_nvram_unlocked_ioctl.i (and some of the LDV examples), which use non-printable characters in printk invocations.
1 parent d613b01 commit 15db9d0

File tree

6 files changed

+101
-14
lines changed

6 files changed

+101
-14
lines changed

regression/cbmc/xml-trace2/main.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#include <assert.h>
2+
3+
int main()
4+
{
5+
char *c14 = "\x0E";
6+
assert(c14[0] == 14);
7+
assert(c14[1] == 0);
8+
assert(c14[0] != 14);
9+
}

regression/cbmc/xml-trace2/test.desc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
CORE
2+
main.c
3+
--xml-ui
4+
<full_lhs_value( binary="0000001000000000000000000000000000000000000000000000000000000000")?>\{ 14, 0 \}</full_lhs_value>
5+
^EXIT=10$
6+
^SIGNAL=0$
7+
--
8+
<full_lhs_value( binary="0000001000000000000000000000000000000000000000000000000000000000")?>"\\&#14;"</full_lhs_value>

src/goto-programs/graphml_witness.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Author: Daniel Kroening
1818
#include <util/pointer_predicates.h>
1919
#include <util/prefix.h>
2020
#include <util/ssa_expr.h>
21+
#include <util/string_constant.h>
2122
#include <util/string_container.h>
2223

2324
#include <langapi/language_util.h>
@@ -57,6 +58,12 @@ void graphml_witnesst::remove_l0_l1(exprt &expr)
5758

5859
return;
5960
}
61+
else if(expr.id() == ID_string_constant)
62+
{
63+
std::string output_string = expr_to_string(ns, "", expr);
64+
if(!xmlt::is_printable_xml(output_string))
65+
expr = to_string_constant(expr).to_array_expr();
66+
}
6067

6168
Forall_operands(it, expr)
6269
remove_l0_l1(*it);

src/goto-programs/xml_goto_trace.cpp

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ Author: Daniel Kroening
1313

1414
#include "xml_goto_trace.h"
1515

16+
#include <util/string_constant.h>
1617
#include <util/symbol.h>
1718
#include <util/xml_irep.h>
1819

@@ -23,6 +24,38 @@ Author: Daniel Kroening
2324
#include "structured_trace_util.h"
2425
#include "xml_expr.h"
2526

27+
/// Rewrite all string constants to their array counterparts
28+
static void replace_string_constants_rec(exprt &expr)
29+
{
30+
for(auto &op : expr.operands())
31+
replace_string_constants_rec(op);
32+
33+
if(expr.id() == ID_string_constant)
34+
expr = to_string_constant(expr).to_array_expr();
35+
}
36+
37+
/// Given an expression \p expr, produce a string representation that is
38+
/// printable in XML 1.0. Produces an empty string if no valid XML 1.0 string
39+
/// representing \p expr can be generated.
40+
static std::string
41+
get_printable_xml(const namespacet &ns, const irep_idt &id, const exprt &expr)
42+
{
43+
std::string result = from_expr(ns, id, expr);
44+
45+
if(!xmlt::is_printable_xml(result))
46+
{
47+
exprt new_expr = expr;
48+
replace_string_constants_rec(new_expr);
49+
result = from_expr(ns, id, new_expr);
50+
51+
// give up
52+
if(!xmlt::is_printable_xml(result))
53+
return {};
54+
}
55+
56+
return result;
57+
}
58+
2659
xmlt full_lhs_value(const goto_trace_stept &step, const namespacet &ns)
2760
{
2861
xmlt value_xml{"full_lhs_value"};
@@ -34,7 +67,7 @@ xmlt full_lhs_value(const goto_trace_stept &step, const namespacet &ns)
3467
const auto &lhs_object = step.get_lhs_object();
3568
const irep_idt identifier =
3669
lhs_object.has_value() ? lhs_object->get_identifier() : irep_idt();
37-
value_xml.data = from_expr(ns, identifier, value);
70+
value_xml.data = get_printable_xml(ns, identifier, value);
3871

3972
const auto &bv_type = type_try_dynamic_cast<bitvector_typet>(value.type());
4073
const auto &constant = expr_try_dynamic_cast<constant_exprt>(value);
@@ -121,7 +154,7 @@ void convert(
121154
std::string full_lhs_string;
122155

123156
if(step.full_lhs.is_not_nil())
124-
full_lhs_string = from_expr(ns, identifier, step.full_lhs);
157+
full_lhs_string = get_printable_xml(ns, identifier, step.full_lhs);
125158

126159
xml_assignment.new_element("full_lhs").data = full_lhs_string;
127160
xml_assignment.new_element(full_lhs_value(step, ns));
@@ -158,7 +191,7 @@ void convert(
158191
for(const auto &arg : step.io_args)
159192
{
160193
xml_output.new_element("value").data =
161-
from_expr(ns, step.function_id, arg);
194+
get_printable_xml(ns, step.function_id, arg);
162195
xml_output.new_element("value_expression").new_element(xml(arg, ns));
163196
}
164197
break;
@@ -176,7 +209,7 @@ void convert(
176209
for(const auto &arg : step.io_args)
177210
{
178211
xml_input.new_element("value").data =
179-
from_expr(ns, step.function_id, arg);
212+
get_printable_xml(ns, step.function_id, arg);
180213
xml_input.new_element("value_expression").new_element(xml(arg, ns));
181214
}
182215

src/util/xml.cpp

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -101,12 +101,17 @@ void xmlt::escape(const std::string &s, std::ostream &out)
101101
out << '\n';
102102
break;
103103

104+
case 0x9: // TAB
105+
case 0x7F: // DEL
106+
out << "&#" << std::to_string((unsigned char)ch) << ';';
107+
break;
108+
104109
default:
105-
// &#0; isn't allowed, but what shall we do?
106-
if((ch>=0 && ch<' ') || ch==127)
107-
out << "&#"+std::to_string((unsigned char)ch)+";";
108-
else
109-
out << ch;
110+
DATA_INVARIANT(
111+
ch >= ' ',
112+
"XML does not support escaping non-printable character " +
113+
std::to_string((unsigned char)ch));
114+
out << ch;
110115
}
111116
}
112117
}
@@ -135,16 +140,34 @@ void xmlt::escape_attribute(const std::string &s, std::ostream &out)
135140
out << "&quot;";
136141
break;
137142

143+
case 0x9: // TAB
144+
case 0xA: // LF
145+
case 0xD: // CR
146+
case 0x7F: // DEL
147+
out << "&#" << std::to_string((unsigned char)ch) << ';';
148+
break;
149+
138150
default:
139-
// &#0; isn't allowed, but what shall we do?
140-
if((ch>=0 && ch<' ') || ch==127)
141-
out << "&#"+std::to_string((unsigned char)ch)+";";
142-
else
143-
out << ch;
151+
DATA_INVARIANT(
152+
ch >= ' ',
153+
"XML does not support escaping non-printable character " +
154+
std::to_string((unsigned char)ch));
155+
out << ch;
144156
}
145157
}
146158
}
147159

160+
bool xmlt::is_printable_xml(const std::string &s)
161+
{
162+
for(const auto ch : s)
163+
{
164+
if(ch < 0x20 && ch != 0x9 && ch != 0xA && ch != 0xD)
165+
return false;
166+
}
167+
168+
return true;
169+
}
170+
148171
void xmlt::do_indent(std::ostream &out, unsigned indent)
149172
{
150173
out << std::string(indent, ' ');

src/util/xml.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,13 @@ class xmlt
123123

124124
static void escape_attribute(const std::string &s, std::ostream &out);
125125

126+
/// Determine whether \p s does not contain any characters that cannot be
127+
/// escaped in XML 1.0. See https://www.w3.org/TR/xml/#charsets for details.
128+
/// \param s: string to verify
129+
/// \return True if, and only if, all characters in \p s are taken from the
130+
/// charset permitted in XML 1.0.
131+
static bool is_printable_xml(const std::string &s);
132+
126133
protected:
127134
static void do_indent(
128135
std::ostream &out,

0 commit comments

Comments
 (0)