Skip to content

Commit 251c257

Browse files
committed
Add counterexample-guided loop assigns synthesis
Implement the functionality described below. Motivation --- This loop assigns synthesizer use the idea counter-example-guided synthesis (CEGIS) to synthesize loop assigns targets. It is based on the same CEGIS framework as the loop invariant synthesizer w ith the following difference. Use Cases --- Our loop invariants synthesizer will synthesize invariants clauses for all loops without loop invariant annotation. The loop assigns synthesizer will synthesize assigns clauses for all loops without loop invariant annotation **and** loop assigns annotation. Cause Loop ID --- In the loop invariant synthesizer, we say a loop is a cause loop if the violation is dependent on the loop's havoc instructions. Because invariant clauses affect the havoced values which could flow into variables outside the loop. In the loop assigns synthesizer, we say a loop is a cause loop if the violation is in the loop. Because assigns clauses only affect the CAR of its own loop body. Synthesizer --- For an assignable violation with checked target `T`, the new assigns target will be `T`. If `T` is a non-constant target, e.g., `ptr[i]` with `i` also an assigns target, we widen it to `__CPROVER_POINTER_OBJECT(T)`. We add the new target to cause loop one by one starting from the most-inner one. Until the assignable violation is resolved.
1 parent 50a795e commit 251c257

File tree

14 files changed

+364
-88
lines changed

14 files changed

+364
-88
lines changed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#include <assert.h>
2+
#include <stdlib.h>
3+
4+
#define SIZE 8
5+
6+
struct blob
7+
{
8+
char *data;
9+
};
10+
11+
void main()
12+
{
13+
struct blob *b = malloc(sizeof(struct blob));
14+
__CPROVER_assume(b != NULL);
15+
16+
b->data = malloc(SIZE);
17+
__CPROVER_assume(b->data != NULL);
18+
19+
b->data[5] = 0;
20+
unsigned result = 0;
21+
22+
unsigned arr[3] = {0, 0, 0};
23+
24+
for(unsigned int i = 0; i < SIZE; i++)
25+
// clang-format off
26+
// __CPROVER_assigns(i, result)
27+
// clang-format on
28+
{
29+
result += 1;
30+
}
31+
32+
for(unsigned int i = 0; i < SIZE; i++)
33+
// clang-format off
34+
// __CPROVER_assigns(i, arr[0])
35+
// clang-format on
36+
{
37+
arr[0] += 1;
38+
}
39+
40+
for(unsigned j = 0; j < SIZE; j++)
41+
// __CPROVER_assigns(j, __CPROVER_POINTER_OBJECT(b->data))
42+
{
43+
for(unsigned i = 0; i < SIZE; i++)
44+
// clang-format off
45+
// __CPROVER_assigns(i, j, __CPROVER_POINTER_OBJECT(b->data))
46+
// clang-format on
47+
{
48+
b->data[i] = 1;
49+
}
50+
}
51+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
CORE
2+
main.c
3+
--pointer-check
4+
^EXIT=0$
5+
^SIGNAL=0$
6+
^\[main.\d+\] .* Check loop invariant before entry: SUCCESS$
7+
^\[main.assigns.\d+\] .* Check that i is assignable: SUCCESS$
8+
^\[main.assigns.\d+\] .* Check that j is assignable: SUCCESS$
9+
^\[main.assigns.\d+\] .* Check that result is assignable: SUCCESS$
10+
^\[main.assigns.\d+\] .* Check that b->data\[(.*)i\] is assignable: SUCCESS$
11+
^\[main.assigns.\d+\] .* Check that arr\[(.*)0\] is assignable: SUCCESS$
12+
^\[main.\d+\] .* Check that loop invariant is preserved: SUCCESS$
13+
^VERIFICATION SUCCESSFUL$
14+
--
15+
--
16+
This test is a variation of contracts/loop_assigns-01.
17+
It shows that we can synthesize loop assigns pointer_object(b->data) that
18+
cannot be inferred by goto-instrument.

src/goto-instrument/contracts/contracts.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1457,11 +1457,11 @@ void code_contractst::apply_loop_contracts(
14571457
if(original_loop_number_map.count(it_instr) != 0)
14581458
continue;
14591459

1460-
// Store loop number for
1461-
// ASSIGN ENTERED_LOOP = TRUE
1460+
// Store loop number for two type of instrumented instructions.
1461+
// ASSIGN ENTERED_LOOP = false --- head of transformed loops.
1462+
// ASSIGN ENTERED_LOOP = true --- end of transformed loops.
14621463
if(
1463-
is_assignment_to_instrumented_variable(it_instr, ENTERED_LOOP) &&
1464-
it_instr->assign_rhs() == true_exprt())
1464+
is_transformed_loop_end(it_instr) || is_transformed_loop_head(it_instr))
14651465
{
14661466
const auto &assign_lhs =
14671467
expr_try_dynamic_cast<symbol_exprt>(it_instr->assign_lhs());

src/goto-instrument/contracts/instrument_spec_assigns.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -708,9 +708,14 @@ void instrument_spec_assignst::inclusion_check_assertion(
708708
comment += " is assignable";
709709
source_location.set_comment(comment);
710710

711-
dest.add(goto_programt::make_assertion(
712-
inclusion_check_full(car, allow_null_lhs, include_stack_allocated),
713-
source_location));
711+
exprt inclusion_check =
712+
inclusion_check_full(car, allow_null_lhs, include_stack_allocated);
713+
// Record what target is checked.
714+
auto &checked_assigns =
715+
static_cast<exprt &>(inclusion_check.add(ID_checked_assigns));
716+
checked_assigns = car.target();
717+
718+
dest.add(goto_programt::make_assertion(inclusion_check, source_location));
714719
}
715720

716721
exprt instrument_spec_assignst::inclusion_check_single(

src/goto-instrument/contracts/utils.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -448,14 +448,20 @@ void generate_history_variables_initialization(
448448
program.destructive_append(history);
449449
}
450450

451+
bool is_transformed_loop_head(const goto_programt::const_targett &target)
452+
{
453+
// The head of a transformed loop is
454+
// ASSIGN entered_loop = false
455+
return is_assignment_to_instrumented_variable(target, ENTERED_LOOP) &&
456+
target->assign_rhs() == false_exprt();
457+
}
458+
451459
bool is_transformed_loop_end(const goto_programt::const_targett &target)
452460
{
453-
// The end of the loop end of transformed loop is
461+
// The end of a transformed loop is
454462
// ASSIGN entered_loop = true
455-
if(!is_assignment_to_instrumented_variable(target, ENTERED_LOOP))
456-
return false;
457-
458-
return target->assign_rhs() == true_exprt();
463+
return is_assignment_to_instrumented_variable(target, ENTERED_LOOP) &&
464+
target->assign_rhs() == true_exprt();
459465
}
460466

461467
bool is_assignment_to_instrumented_variable(

src/goto-instrument/contracts/utils.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,10 @@ void generate_history_variables_initialization(
210210
const irep_idt &mode,
211211
goto_programt &program);
212212

213-
/// Return true if `target` is the loop end of some transformed code.
213+
/// Return true if `target` is the head of some transformed loop.
214+
bool is_transformed_loop_head(const goto_programt::const_targett &target);
215+
216+
/// Return true if `target` is the end of some transformed loop.
214217
bool is_transformed_loop_end(const goto_programt::const_targett &target);
215218

216219
/// Return true if `target` is an assignment to an instrumented variable with

src/goto-synthesizer/cegis_verifier.cpp

Lines changed: 118 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ Author: Qinheping Hu
3232
#include <goto-checker/all_properties_verifier_with_trace_storage.h>
3333
#include <goto-checker/multi_path_symex_checker.h>
3434
#include <goto-instrument/contracts/contracts.h>
35+
#include <goto-instrument/contracts/instrument_spec_assigns.h>
3536
#include <goto-instrument/contracts/utils.h>
3637
#include <goto-instrument/havoc_utils.h>
3738
#include <langapi/language_util.h>
@@ -93,11 +94,47 @@ optionst cegis_verifiert::get_options()
9394
return options;
9495
}
9596

96-
optionalt<loop_idt> cegis_verifiert::get_cause_loop_id(
97+
std::list<loop_idt>
98+
cegis_verifiert::get_cause_loop_id_for_assigns(const goto_tracet &goto_trace)
99+
{
100+
std::list<loop_idt> result;
101+
102+
// We say a loop is the cause loop of an assignable-violation if the inclusion
103+
// check is in the loop.
104+
105+
// So we check what loops the last step of the trace is in.
106+
// Transformed loop head:
107+
// ASSIGN entered_loop = false
108+
// Transformed loop end:
109+
// ASSIGN entered_loop = true
110+
for(const auto &step : goto_trace.steps)
111+
{
112+
// We are entering a loop.
113+
if(is_transformed_loop_head(step.pc))
114+
{
115+
result.push_front(
116+
loop_idt(step.function_id, original_loop_number_map[step.pc]));
117+
}
118+
// We are leaving a loop.
119+
else if(is_transformed_loop_end(step.pc))
120+
{
121+
const loop_idt loop_id(
122+
step.function_id, original_loop_number_map[step.pc]);
123+
INVARIANT(
124+
result.front() == loop_id, "Leaving a loop we haven't entered.");
125+
result.pop_front();
126+
}
127+
}
128+
129+
INVARIANT(!result.empty(), "The assignable violation is not in a loop.");
130+
return result;
131+
}
132+
133+
std::list<loop_idt> cegis_verifiert::get_cause_loop_id(
97134
const goto_tracet &goto_trace,
98135
const goto_programt::const_targett violation)
99136
{
100-
optionalt<loop_idt> result;
137+
std::list<loop_idt> result;
101138

102139
// build the dependence graph
103140
const namespacet ns(goto_model.symbol_table);
@@ -165,7 +202,8 @@ optionalt<loop_idt> cegis_verifiert::get_cause_loop_id(
165202
// if it is dependent on the loop havoc.
166203
if(reachable_set.count(dependence_graph[from].get_node_id()))
167204
{
168-
result = loop_idt(step.function_id, original_loop_number_map[step.pc]);
205+
result.push_back(
206+
loop_idt(step.function_id, original_loop_number_map[step.pc]));
169207
return result;
170208
}
171209
}
@@ -434,9 +472,12 @@ optionalt<cext> cegis_verifiert::verify()
434472
original_functions[fun_entry.first].copy_from(fun_entry.second.body);
435473
}
436474

437-
// Annotate the candidates tot the goto_model for checking.
475+
// Annotate the candidates to the goto_model for checking.
438476
annotate_invariants(invariant_candidates, goto_model);
439477

478+
// Annotate assigns
479+
annotate_assigns(assigns_map, goto_model);
480+
440481
// Control verbosity.
441482
// We allow non-error output message only when verbosity is set to at least 9.
442483
const unsigned original_verbosity = log.get_message_handler().get_verbosity();
@@ -495,15 +536,15 @@ optionalt<cext> cegis_verifiert::verify()
495536
if(property_it.second.status != property_statust::FAIL)
496537
continue;
497538

539+
// Store the violation that we want to fix with synthesized
540+
// assigns/invariant.
498541
first_violation = property_it.first;
499-
exprt violated_predicate = property_it.second.pc->condition();
500-
501-
// The pointer checked in the null-pointer-check violation.
502-
exprt checked_pointer = true_exprt();
503542

504543
// Type of the violation
505544
cext::violation_typet violation_type = cext::violation_typet::cex_other;
506545

546+
// Decide the violation type from the description of violation
547+
507548
// The violation is a pointer OOB check.
508549
if((property_it.second.description.find(
509550
"dereference failure: pointer outside object bounds in") !=
@@ -516,11 +557,6 @@ optionalt<cext> cegis_verifiert::verify()
516557
if(property_it.second.description.find("pointer NULL") != std::string::npos)
517558
{
518559
violation_type = cext::violation_typet::cex_null_pointer;
519-
checked_pointer = property_it.second.pc->condition()
520-
.operands()[0]
521-
.operands()[1]
522-
.operands()[0];
523-
INVARIANT(checked_pointer.id() == ID_symbol, "Checking pointer symbol");
524560
}
525561

526562
// The violation is a loop-invariant-preservation check.
@@ -537,51 +573,97 @@ optionalt<cext> cegis_verifiert::verify()
537573
violation_type = cext::violation_typet::cex_not_hold_upon_entry;
538574
}
539575

540-
// The loop which could be the cause of the violation.
541-
// We say a loop is the cause loop if the violated predicate is dependent
542-
// upon the write set of the loop.
543-
optionalt<loop_idt> cause_loop_id = get_cause_loop_id(
544-
checker->get_traces()[property_it.first], property_it.second.pc);
576+
// The violation is an assignable check.
577+
if(property_it.second.description.find("assignable") != std::string::npos)
578+
{
579+
violation_type = cext::violation_typet::cex_assignable;
580+
}
545581

546-
if(!cause_loop_id.has_value())
582+
// Compute the cause loop---the loop for which we synthesize loop contracts,
583+
// and the counterexample.
584+
585+
// If the violation is an assignable check, we synthesize assigns targets.
586+
// In the case, cause loops are all loops the violation is in. We keep
587+
// adding the new assigns target to the most-inner loop that does not
588+
// contain the new target until the assignable violation is resolved.
589+
590+
// For other cases, we synthesize loop invariant clauses. We synthesize
591+
// invariants for one loop at a time. So we return only the first cause loop
592+
// although there can be multiple ones.
593+
594+
log.debug() << "Start to compute cause loop ids." << messaget::eom;
595+
596+
const auto &trace = checker->get_traces()[property_it.first];
597+
598+
// Doing assigns-synthesis or invariant-synthesis
599+
if(violation_type == cext::violation_typet::cex_assignable)
600+
{
601+
cext result(violation_type);
602+
result.cause_loop_ids = get_cause_loop_id_for_assigns(trace);
603+
result.checked_pointer = static_cast<const exprt &>(
604+
property_it.second.pc->condition().find(ID_checked_assigns));
605+
restore_functions();
606+
return result;
607+
}
608+
609+
// We construct the full counterexample only for violations other than
610+
// assignable checks.
611+
612+
// Although there can be multiple cause loop ids. We only synthesize
613+
// loop invariants for the first cause loop.
614+
const std::list<loop_idt> cause_loop_ids =
615+
get_cause_loop_id(trace, property_it.second.pc);
616+
617+
if(cause_loop_ids.empty())
547618
{
548-
log.debug() << "No cause loop found!\n";
619+
log.debug() << "No cause loop found!" << messaget::eom;
549620
restore_functions();
550621

551622
return cext(violation_type);
552623
}
553624

554-
log.debug() << "Found cause loop with function id: "
555-
<< cause_loop_id.value().function_id
556-
<< ", and loop number: " << cause_loop_id.value().loop_number
557-
<< "\n";
558-
559625
// Decide whether the violation is in the cause loop.
560-
bool violation_in_loop = is_instruction_in_transfomed_loop(
561-
cause_loop_id.value(),
562-
goto_model.get_goto_function(cause_loop_id.value().function_id),
626+
bool is_violation_in_loop = is_instruction_in_transfomed_loop(
627+
cause_loop_ids.front(),
628+
goto_model.get_goto_function(cause_loop_ids.front().function_id),
563629
property_it.second.pc->location_number);
564630

631+
log.debug() << "Found cause loop with function id: "
632+
<< cause_loop_ids.front().function_id
633+
<< ", and loop number: " << cause_loop_ids.front().loop_number
634+
<< messaget::eom;
635+
565636
// We always strengthen in_clause if the violation is
566637
// invariant-not-preserved.
567638
if(violation_type == cext::violation_typet::cex_not_preserved)
568-
violation_in_loop = true;
639+
is_violation_in_loop = true;
569640

570641
restore_functions();
571642

572643
auto return_cex = build_cex(
573-
checker->get_traces()[property_it.first],
644+
trace,
574645
get_loop_head(
575-
cause_loop_id.value().loop_number,
646+
cause_loop_ids.front().loop_number,
576647
goto_model.goto_functions
577-
.function_map[cause_loop_id.value().function_id])
648+
.function_map[cause_loop_ids.front().function_id])
578649
->source_location());
579-
return_cex.violated_predicate = violated_predicate;
580-
return_cex.cause_loop_id = cause_loop_id;
581-
return_cex.checked_pointer = checked_pointer;
582-
return_cex.is_violation_in_loop = violation_in_loop;
650+
return_cex.violated_predicate = property_it.second.pc->condition();
651+
return_cex.cause_loop_ids = cause_loop_ids;
652+
return_cex.is_violation_in_loop = is_violation_in_loop;
583653
return_cex.violation_type = violation_type;
584654

655+
// The pointer checked in the null-pointer-check violation.
656+
if(violation_type == cext::violation_typet::cex_null_pointer)
657+
{
658+
return_cex.checked_pointer = property_it.second.pc->condition()
659+
.operands()[0]
660+
.operands()[1]
661+
.operands()[0];
662+
INVARIANT(
663+
return_cex.checked_pointer.id() == ID_symbol,
664+
"Checking pointer symbol");
665+
}
666+
585667
return return_cex;
586668
}
587669

0 commit comments

Comments
 (0)