Skip to content

Commit 5b1f614

Browse files
authored
Merge pull request #5774 from tautschnig/xml-escaping
XML output: ensure expressions are valid XML 1.0
2 parents d613b01 + 15db9d0 commit 5b1f614

File tree

6 files changed

+101
-14
lines changed

6 files changed

+101
-14
lines changed

regression/cbmc/xml-trace2/main.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#include <assert.h>
2+
3+
int main()
4+
{
5+
char *c14 = "\x0E";
6+
assert(c14[0] == 14);
7+
assert(c14[1] == 0);
8+
assert(c14[0] != 14);
9+
}

regression/cbmc/xml-trace2/test.desc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
CORE
2+
main.c
3+
--xml-ui
4+
<full_lhs_value( binary="0000001000000000000000000000000000000000000000000000000000000000")?>\{ 14, 0 \}</full_lhs_value>
5+
^EXIT=10$
6+
^SIGNAL=0$
7+
--
8+
<full_lhs_value( binary="0000001000000000000000000000000000000000000000000000000000000000")?>"\\&#14;"</full_lhs_value>

src/goto-programs/graphml_witness.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Author: Daniel Kroening
1818
#include <util/pointer_predicates.h>
1919
#include <util/prefix.h>
2020
#include <util/ssa_expr.h>
21+
#include <util/string_constant.h>
2122
#include <util/string_container.h>
2223

2324
#include <langapi/language_util.h>
@@ -57,6 +58,12 @@ void graphml_witnesst::remove_l0_l1(exprt &expr)
5758

5859
return;
5960
}
61+
else if(expr.id() == ID_string_constant)
62+
{
63+
std::string output_string = expr_to_string(ns, "", expr);
64+
if(!xmlt::is_printable_xml(output_string))
65+
expr = to_string_constant(expr).to_array_expr();
66+
}
6067

6168
Forall_operands(it, expr)
6269
remove_l0_l1(*it);

src/goto-programs/xml_goto_trace.cpp

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ Author: Daniel Kroening
1313

1414
#include "xml_goto_trace.h"
1515

16+
#include <util/string_constant.h>
1617
#include <util/symbol.h>
1718
#include <util/xml_irep.h>
1819

@@ -23,6 +24,38 @@ Author: Daniel Kroening
2324
#include "structured_trace_util.h"
2425
#include "xml_expr.h"
2526

27+
/// Rewrite all string constants to their array counterparts
28+
static void replace_string_constants_rec(exprt &expr)
29+
{
30+
for(auto &op : expr.operands())
31+
replace_string_constants_rec(op);
32+
33+
if(expr.id() == ID_string_constant)
34+
expr = to_string_constant(expr).to_array_expr();
35+
}
36+
37+
/// Given an expression \p expr, produce a string representation that is
38+
/// printable in XML 1.0. Produces an empty string if no valid XML 1.0 string
39+
/// representing \p expr can be generated.
40+
static std::string
41+
get_printable_xml(const namespacet &ns, const irep_idt &id, const exprt &expr)
42+
{
43+
std::string result = from_expr(ns, id, expr);
44+
45+
if(!xmlt::is_printable_xml(result))
46+
{
47+
exprt new_expr = expr;
48+
replace_string_constants_rec(new_expr);
49+
result = from_expr(ns, id, new_expr);
50+
51+
// give up
52+
if(!xmlt::is_printable_xml(result))
53+
return {};
54+
}
55+
56+
return result;
57+
}
58+
2659
xmlt full_lhs_value(const goto_trace_stept &step, const namespacet &ns)
2760
{
2861
xmlt value_xml{"full_lhs_value"};
@@ -34,7 +67,7 @@ xmlt full_lhs_value(const goto_trace_stept &step, const namespacet &ns)
3467
const auto &lhs_object = step.get_lhs_object();
3568
const irep_idt identifier =
3669
lhs_object.has_value() ? lhs_object->get_identifier() : irep_idt();
37-
value_xml.data = from_expr(ns, identifier, value);
70+
value_xml.data = get_printable_xml(ns, identifier, value);
3871

3972
const auto &bv_type = type_try_dynamic_cast<bitvector_typet>(value.type());
4073
const auto &constant = expr_try_dynamic_cast<constant_exprt>(value);
@@ -121,7 +154,7 @@ void convert(
121154
std::string full_lhs_string;
122155

123156
if(step.full_lhs.is_not_nil())
124-
full_lhs_string = from_expr(ns, identifier, step.full_lhs);
157+
full_lhs_string = get_printable_xml(ns, identifier, step.full_lhs);
125158

126159
xml_assignment.new_element("full_lhs").data = full_lhs_string;
127160
xml_assignment.new_element(full_lhs_value(step, ns));
@@ -158,7 +191,7 @@ void convert(
158191
for(const auto &arg : step.io_args)
159192
{
160193
xml_output.new_element("value").data =
161-
from_expr(ns, step.function_id, arg);
194+
get_printable_xml(ns, step.function_id, arg);
162195
xml_output.new_element("value_expression").new_element(xml(arg, ns));
163196
}
164197
break;
@@ -176,7 +209,7 @@ void convert(
176209
for(const auto &arg : step.io_args)
177210
{
178211
xml_input.new_element("value").data =
179-
from_expr(ns, step.function_id, arg);
212+
get_printable_xml(ns, step.function_id, arg);
180213
xml_input.new_element("value_expression").new_element(xml(arg, ns));
181214
}
182215

src/util/xml.cpp

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -101,12 +101,17 @@ void xmlt::escape(const std::string &s, std::ostream &out)
101101
out << '\n';
102102
break;
103103

104+
case 0x9: // TAB
105+
case 0x7F: // DEL
106+
out << "&#" << std::to_string((unsigned char)ch) << ';';
107+
break;
108+
104109
default:
105-
// &#0; isn't allowed, but what shall we do?
106-
if((ch>=0 && ch<' ') || ch==127)
107-
out << "&#"+std::to_string((unsigned char)ch)+";";
108-
else
109-
out << ch;
110+
DATA_INVARIANT(
111+
ch >= ' ',
112+
"XML does not support escaping non-printable character " +
113+
std::to_string((unsigned char)ch));
114+
out << ch;
110115
}
111116
}
112117
}
@@ -135,16 +140,34 @@ void xmlt::escape_attribute(const std::string &s, std::ostream &out)
135140
out << "&quot;";
136141
break;
137142

143+
case 0x9: // TAB
144+
case 0xA: // LF
145+
case 0xD: // CR
146+
case 0x7F: // DEL
147+
out << "&#" << std::to_string((unsigned char)ch) << ';';
148+
break;
149+
138150
default:
139-
// &#0; isn't allowed, but what shall we do?
140-
if((ch>=0 && ch<' ') || ch==127)
141-
out << "&#"+std::to_string((unsigned char)ch)+";";
142-
else
143-
out << ch;
151+
DATA_INVARIANT(
152+
ch >= ' ',
153+
"XML does not support escaping non-printable character " +
154+
std::to_string((unsigned char)ch));
155+
out << ch;
144156
}
145157
}
146158
}
147159

160+
bool xmlt::is_printable_xml(const std::string &s)
161+
{
162+
for(const auto ch : s)
163+
{
164+
if(ch < 0x20 && ch != 0x9 && ch != 0xA && ch != 0xD)
165+
return false;
166+
}
167+
168+
return true;
169+
}
170+
148171
void xmlt::do_indent(std::ostream &out, unsigned indent)
149172
{
150173
out << std::string(indent, ' ');

src/util/xml.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,13 @@ class xmlt
123123

124124
static void escape_attribute(const std::string &s, std::ostream &out);
125125

126+
/// Determine whether \p s does not contain any characters that cannot be
127+
/// escaped in XML 1.0. See https://www.w3.org/TR/xml/#charsets for details.
128+
/// \param s: string to verify
129+
/// \return True if, and only if, all characters in \p s are taken from the
130+
/// charset permitted in XML 1.0.
131+
static bool is_printable_xml(const std::string &s);
132+
126133
protected:
127134
static void do_indent(
128135
std::ostream &out,

0 commit comments

Comments
 (0)