Skip to content

Commit 1ba8148

Browse files
committed
new pointer encoding
This changes the encoding of pointers in the flattening solver as follows: * Pointers with constant zero offset are encoded by numbering the object (0 is the NULL object). * Pointers that might have an offset are numbered, and encoded/decoded as needed. The numbering points into a table, which records an n-bit object and n-bit offset for an n-bit pointer. The key benefit of the encoding above are that a) offset arithmetic no longer overflows before the given width of the pointer is reached, b) the full width of the pointer (usually 32 or 64 bits) can be used to encode the object number, thereby eliminating the requirement to configure the number of 'object bits'.
1 parent ed2a9fa commit 1ba8148

File tree

2 files changed

+177
-91
lines changed

2 files changed

+177
-91
lines changed

src/solvers/flattening/bv_pointers.cpp

Lines changed: 167 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ Author: Daniel Kroening, [email protected]
1111
#include <util/arith_tools.h>
1212
#include <util/byte_operators.h>
1313
#include <util/c_types.h>
14-
#include <util/config.h>
1514
#include <util/exception_utils.h>
1615
#include <util/expr_util.h>
1716
#include <util/namespace.h>
@@ -71,53 +70,93 @@ bv_pointerst::endianness_map(const typet &type, bool little_endian) const
7170
return bv_endianness_mapt{type, little_endian, ns, bv_width};
7271
}
7372

74-
std::size_t bv_pointerst::get_object_width(const pointer_typet &) const
75-
{
76-
// not actually type-dependent for now
77-
return config.bv_encoding.object_bits;
78-
}
79-
80-
std::size_t bv_pointerst::get_offset_width(const pointer_typet &type) const
73+
bvt bv_pointerst::object_literals(const bvt &bv, const pointer_typet &type)
8174
{
82-
const std::size_t pointer_width = type.get_width();
83-
const std::size_t object_width = get_object_width(type);
84-
PRECONDITION(pointer_width >= object_width);
85-
return pointer_width - object_width;
86-
}
75+
const auto width = type.get_width();
76+
PRECONDITION(width == bv.size());
8777

88-
std::size_t bv_pointerst::get_address_width(const pointer_typet &type) const
89-
{
90-
return type.get_width();
91-
}
78+
const auto result = prop.new_variables(width);
79+
bvt match_found;
9280

93-
bvt bv_pointerst::object_literals(const bvt &bv, const pointer_typet &type)
94-
const
95-
{
96-
const std::size_t offset_width = get_offset_width(type);
97-
const std::size_t object_width = get_object_width(type);
98-
PRECONDITION(bv.size() >= offset_width + object_width);
81+
for(std::size_t i = 0; i < numbered_pointers.size(); i++)
82+
{
83+
auto cond = bv_utils.equal(
84+
bv,
85+
bv_utilst::concatenate(
86+
bv_utilst::build_constant(i, width - 1), {const_literal(true)}));
87+
match_found.push_back(cond);
88+
bv_utils.cond_implies_equal(
89+
cond,
90+
bv_utilst::zero_extension(numbered_pointers[i].first, width),
91+
result);
92+
}
9993

100-
return bvt(
101-
bv.begin() + offset_width, bv.begin() + offset_width + object_width);
94+
auto within_bounds = prop.lor(match_found);
95+
96+
// The top bit distinguishes 'object only' vs. 'table encoded'.
97+
// When outside of the table, return an invalid object.
98+
return bv_utils.select(
99+
bv.back(),
100+
bv_utils.select(
101+
within_bounds,
102+
result,
103+
bv_utilst::build_constant(pointer_logic.get_invalid_object(), width)),
104+
bv);
102105
}
103106

104107
bvt bv_pointerst::offset_literals(const bvt &bv, const pointer_typet &type)
105-
const
106108
{
107-
const std::size_t offset_width = get_offset_width(type);
108-
PRECONDITION(bv.size() >= offset_width);
109+
const auto width = type.get_width();
110+
PRECONDITION(width == bv.size());
111+
112+
const auto result = prop.new_variables(width);
109113

110-
return bvt(bv.begin(), bv.begin() + offset_width);
114+
for(std::size_t i = 0; i < numbered_pointers.size(); i++)
115+
{
116+
auto cond = bv_utils.equal(
117+
bv,
118+
bv_utilst::concatenate(
119+
bv_utilst::build_constant(i, width - 1), {const_literal(true)}));
120+
bv_utils.cond_implies_equal(
121+
cond,
122+
bv_utilst::sign_extension(numbered_pointers[i].second, width),
123+
result);
124+
}
125+
126+
// the top bit distinguishes 'object only' vs. 'table encoded'
127+
return bv_utils.select(bv.back(), result, bv_utilst::zeros(width));
111128
}
112129

113-
bvt bv_pointerst::object_offset_encoding(const bvt &object, const bvt &offset)
130+
bvt bv_pointerst::object_offset_encoding(
131+
const bvt &object_bv,
132+
const bvt &offset_bv,
133+
const pointer_typet &type)
114134
{
115-
bvt result;
116-
result.reserve(offset.size() + object.size());
117-
result.insert(result.end(), offset.begin(), offset.end());
118-
result.insert(result.end(), object.begin(), object.end());
119-
120-
return result;
135+
const auto width = type.get_width();
136+
PRECONDITION(object_bv.size() == width);
137+
PRECONDITION(offset_bv.size() == width);
138+
139+
// is the offset zero?
140+
if(std::find_if_not(offset_bv.begin(), offset_bv.end(), [](literalt l) {
141+
return l == const_literal(false);
142+
}) != offset_bv.end())
143+
{
144+
// offset is not zero, add to the pointer table
145+
auto number = numbered_pointers.size();
146+
numbered_pointers.emplace_back(object_bv, offset_bv);
147+
148+
// Encode the table index.
149+
// Also set top bit to distinguish from object-only pointers.
150+
return bv_utilst::concatenate(
151+
bv_utilst::build_constant(number, width - 1), {const_literal(true)});
152+
}
153+
else
154+
{
155+
// Offset is zero, just zero-extend object number.
156+
// Top bit is zero to indicate object-only pointer.
157+
return bv_utilst::concatenate(
158+
bv_utilst::zero_extension(object_bv, width - 1), {const_literal(false)});
159+
}
121160
}
122161

123162
literalt bv_pointerst::convert_rest(const exprt &expr)
@@ -141,17 +180,7 @@ literalt bv_pointerst::convert_rest(const exprt &expr)
141180
bvt invalid_bv = object_literals(
142181
encode(pointer_logic.get_invalid_object(), type), type);
143182

144-
const std::size_t object_bits = get_object_width(type);
145-
146-
bvt equal_invalid_bv;
147-
equal_invalid_bv.reserve(object_bits);
148-
149-
for(std::size_t i=0; i<object_bits; i++)
150-
{
151-
equal_invalid_bv.push_back(prop.lequal(object_bv[i], invalid_bv[i]));
152-
}
153-
154-
return prop.land(equal_invalid_bv);
183+
return bv_utils.equal(object_bv, invalid_bv);
155184
}
156185
}
157186
}
@@ -168,6 +197,32 @@ literalt bv_pointerst::convert_rest(const exprt &expr)
168197
return l;
169198
}
170199
}
200+
else if(expr.id() == ID_equal || expr.id() == ID_notequal)
201+
{
202+
if(
203+
operands.size() == 2 && operands[0].type().id() == ID_pointer &&
204+
operands[1].type().id() == ID_pointer)
205+
{
206+
// bit-wise comparison doesn't work because of numbered pointers
207+
const bvt &bv0 = convert_bv(operands[0]);
208+
const bvt &bv1 = convert_bv(operands[1]);
209+
210+
const pointer_typet &type0 = to_pointer_type(operands[0].type());
211+
bvt object_bv0 = object_literals(bv0, type0);
212+
bvt offset_bv0 = offset_literals(bv0, type0);
213+
214+
const pointer_typet &type1 = to_pointer_type(operands[1].type());
215+
bvt object_bv1 = object_literals(bv1, type1);
216+
bvt offset_bv1 = offset_literals(bv1, type1);
217+
218+
// object and offset need to match
219+
auto equal = prop.land(
220+
bv_utils.equal(object_bv0, object_bv1),
221+
bv_utils.equal(offset_bv0, offset_bv1));
222+
223+
return expr.id() == ID_equal ? equal : !equal;
224+
}
225+
}
171226
else if(expr.id()==ID_lt || expr.id()==ID_le ||
172227
expr.id()==ID_gt || expr.id()==ID_ge)
173228
{
@@ -463,8 +518,7 @@ bvt bv_pointerst::convert_pointer_type(const exprt &expr)
463518
count == 1,
464519
"there should be exactly one pointer-type operand in a pointer-type sum");
465520

466-
const std::size_t offset_bits = get_offset_width(type);
467-
bvt sum = bv_utils.build_constant(0, offset_bits);
521+
bvt sum = bv_utils.build_constant(0, bits);
468522

469523
forall_operands(it, plus_expr)
470524
{
@@ -484,7 +538,8 @@ bvt bv_pointerst::convert_pointer_type(const exprt &expr)
484538
bvt op=convert_bv(*it);
485539
CHECK_RETURN(!op.empty());
486540

487-
op = bv_utils.extension(op, offset_bits, rep);
541+
op = bv_utils.extension(op, bits, rep);
542+
CHECK_RETURN(op.size() == sum.size());
488543

489544
sum=bv_utils.add(sum, op);
490545
}
@@ -794,46 +849,66 @@ exprt bv_pointerst::bv_get_rec(
794849
bvt value_bv(bv.begin() + offset, bv.begin() + offset + bits);
795850

796851
std::string value = bits_to_string(prop, value_bv);
797-
std::string value_addr = bits_to_string(prop, object_literals(value_bv, pt));
798-
std::string value_offset =
799-
bits_to_string(prop, offset_literals(value_bv, pt));
800-
801852
// we treat these like bit-vector constants, but with
802853
// some additional annotation
803-
804854
const irep_idt bvrep = make_bvrep(bits, [&value](std::size_t i) {
805855
return value[value.size() - 1 - i] == '1';
806856
});
807857

808858
constant_exprt result(bvrep, type);
809859

810860
pointer_logict::pointert pointer;
811-
pointer.object =
812-
numeric_cast_v<std::size_t>(binary2integer(value_addr, false));
813-
pointer.offset=binary2integer(value_offset, true);
861+
862+
// Top bit set?
863+
if(value.front() == '1')
864+
{
865+
// It's a pointer into the table. Turn bits into number, but
866+
// first strip top bit.
867+
auto index = binary2integer(value.substr(1, std::string::npos), false);
868+
869+
if(index >= 0 && index < numbered_pointers.size())
870+
{
871+
// copy from table
872+
const auto &entry = numbered_pointers[numeric_cast_v<std::size_t>(index)];
873+
pointer.object = binary2integer(bits_to_string(prop, entry.first), false);
874+
pointer.offset = binary2integer(bits_to_string(prop, entry.second), true);
875+
}
876+
else
877+
{
878+
// out of bounds, we'll make it an 'invalid pointer'
879+
pointer.object = pointer_logic.get_invalid_object();
880+
pointer.offset = 0;
881+
}
882+
}
883+
else
884+
{
885+
// It's an object only, offset is zero.
886+
pointer.object = binary2integer(value, false);
887+
pointer.offset = 0;
888+
}
814889

815890
return annotated_pointer_constant_exprt{
816891
bvrep, pointer_logic.pointer_expr(pointer, pt)};
817892
}
818893

819-
bvt bv_pointerst::encode(const mp_integer &addr, const pointer_typet &type)
894+
bvt bv_pointerst::encode(const mp_integer &object, const pointer_typet &type)
820895
const
821896
{
822-
const std::size_t offset_bits = get_offset_width(type);
823-
const std::size_t object_bits = get_object_width(type);
897+
const auto width = type.get_width();
824898

825-
bvt zero_offset(offset_bits, const_literal(false));
826-
bvt object = bv_utils.build_constant(addr, object_bits);
899+
auto object_bv = bv_utilst::build_constant(object, width - 1);
827900

828-
return object_offset_encoding(object, zero_offset);
901+
// Offset is zero, just zero-extend object number.
902+
// Top bit is zero to indicate object-only pointer.
903+
return bv_utilst::concatenate(object_bv, {const_literal(false)});
829904
}
830905

831906
bvt bv_pointerst::offset_arithmetic(
832907
const pointer_typet &type,
833908
const bvt &bv,
834909
const mp_integer &x)
835910
{
836-
const std::size_t offset_bits = get_offset_width(type);
911+
const std::size_t offset_bits = type.get_width();
837912

838913
return offset_arithmetic(
839914
type, bv, 1, bv_utils.build_constant(x, offset_bits));
@@ -851,52 +926,60 @@ bvt bv_pointerst::offset_arithmetic(
851926
index.type().id()==ID_signedbv?bv_utilst::representationt::SIGNED:
852927
bv_utilst::representationt::UNSIGNED;
853928

854-
const std::size_t offset_bits = get_offset_width(type);
855-
bv_index=bv_utils.extension(bv_index, offset_bits, rep);
929+
PRECONDITION(bv.size() == type.get_width());
930+
931+
bv_index = bv_utils.extension(bv_index, bv.size(), rep);
856932

857933
return offset_arithmetic(type, bv, factor, bv_index);
858934
}
859935

860936
bvt bv_pointerst::offset_arithmetic(
861937
const pointer_typet &type,
862-
const bvt &bv,
938+
const bvt &pointer_bv,
863939
const mp_integer &factor,
864-
const bvt &index)
940+
const bvt &index_bv)
865941
{
866-
bvt bv_index;
942+
PRECONDITION(pointer_bv.size() == type.get_width());
943+
PRECONDITION(index_bv.size() == type.get_width());
944+
945+
bvt index_scaled_bv;
867946

868947
if(factor==1)
869-
bv_index=index;
948+
{
949+
index_scaled_bv = index_bv;
950+
}
870951
else
871952
{
872-
bvt bv_factor=bv_utils.build_constant(factor, index.size());
873-
bv_index = bv_utils.signed_multiplier(index, bv_factor);
953+
bvt factor_bv = bv_utils.build_constant(factor, pointer_bv.size());
954+
index_scaled_bv = bv_utils.signed_multiplier(
955+
bv_utils.sign_extension(index_bv, pointer_bv.size()), factor_bv);
874956
}
875957

876-
const std::size_t offset_bits = get_offset_width(type);
877-
bv_index = bv_utils.sign_extension(bv_index, offset_bits);
958+
bvt offset_bv = offset_literals(pointer_bv, type);
959+
CHECK_RETURN(offset_bv.size() == pointer_bv.size());
878960

879-
bvt offset_bv = offset_literals(bv, type);
961+
DATA_INVARIANT(
962+
offset_bv.size() == index_scaled_bv.size(),
963+
"pointer offset bitvector width");
880964

881-
bvt bv_tmp = bv_utils.add(offset_bv, bv_index);
965+
bvt sum = bv_utils.add(offset_bv, index_scaled_bv);
882966

883-
return object_offset_encoding(object_literals(bv, type), bv_tmp);
967+
return object_offset_encoding(object_literals(pointer_bv, type), sum, type);
884968
}
885969

886970
bvt bv_pointerst::add_addr(const exprt &expr)
887971
{
888972
const auto a = pointer_logic.add_object(expr);
889973

890974
const pointer_typet type = pointer_type(expr.type());
891-
const std::size_t object_bits = get_object_width(type);
892-
const std::size_t max_objects=std::size_t(1)<<object_bits;
975+
const mp_integer object_bits = type.get_width();
976+
const mp_integer max_objects = mp_integer(1) << object_bits;
893977

894978
if(a==max_objects)
895979
throw analysis_exceptiont(
896-
"too many addressed objects: maximum number of objects is set to 2^n=" +
897-
std::to_string(max_objects) + " (with n=" + std::to_string(object_bits) +
898-
"); " +
899-
"use the `--object-bits n` option to increase the maximum number");
980+
"too many addressed objects: maximum number of objects is 2^n=" +
981+
integer2string(max_objects) + " (with n=" + integer2string(object_bits) +
982+
")");
900983

901984
return encode(a, type);
902985
}

0 commit comments

Comments
 (0)