Skip to content

Commit ec3df76

Browse files
committed
Added documentation
1 parent 13d0e37 commit ec3df76

File tree

2 files changed

+82
-46
lines changed

2 files changed

+82
-46
lines changed

wp/plugin/lib/equality.ml

Lines changed: 55 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,16 @@ open Base__Option.Monad_infix
99
module H = Bap.Std.Graphs.Ir
1010
module J = Graphlib.To_ocamlgraph (H)
1111
module BFS = Graph.Traverse.Bfs (J)
12-
module TidSet = Set.Make(Tid)
12+
module TidSet = Tid.Set
1313

14-
module TidMap = Map.Make(Tid)
15-
module VarMap = Map.Make(Var)
16-
module VarSet = Set.Make(Var)
14+
module TidMap = Tid.Map
15+
module VarMap = Var.Map
16+
module VarSet = Var.Set
1717
module TidTupleMap = Map.Make(struct type t = Tid.t * Tid.t [@@deriving sexp, compare] end)
1818

1919
type varToVarMap = Var.t VarMap.t
2020

21-
(* replaces the real variables with indices with only their base *)
21+
(* Replaces the real variables with indices with only their base *)
2222
let rec strip_indices (e : Exp.t) : Exp.t =
2323
let open Bap.Std.Bil.Types in
2424
match e with
@@ -55,9 +55,9 @@ let map_var (vmap : varToVarMap)
5555
(* if found in map, return what is found *)
5656
| Some v_found -> (vmap, v_found) |> Some
5757

58-
(* maps all virtual variables from e1 to their analagous variable within vmap;
59-
* if variable not in vmap, is added to vmap from e2
60-
* returns None if cannot map variables because subs do not match in structure *)
58+
(* Maps all virtual variables from e1 to their analagous variable within vmap;
59+
if a variable not in vmap, is added to vmap from e2
60+
returns None if cannot map variables because subs do not match in structure *)
6161
let rec sub_exp (vmap : varToVarMap)
6262
(e1 : Exp.t) (e2 : Exp.t) : (varToVarMap * Exp.t) option =
6363
let open Bap.Std.Bil.Types in
@@ -111,12 +111,12 @@ let rec sub_exp (vmap : varToVarMap)
111111
fun (vmap, exp2_bin1) -> vmap, Concat (exp1_bin1, exp2_bin1)
112112
| _, _ -> None
113113

114-
(* gets the block associated with a node in the graph *)
114+
(* Gets the block associated with a node in the graph *)
115115
let get_label (a: Bap.Std.Graphs.Ir.Node.t) : blk term =
116116
Bap.Std.Graphs.Ir.Node.label a
117117

118-
(* compares two defs; returns the updated varmap if they match; returns None
119-
* if do not match*)
118+
(* Compares two defs; returns the updated varmap if they match; returns None
119+
* if do not match. Only compares expressions and structure. *)
120120
let compare_def (def1 : def term) (def2 : def term) (vmap_orig : varToVarMap)
121121
: varToVarMap option =
122122
let var1_orig, var2 =
@@ -132,7 +132,7 @@ let compare_def (def1 : def term) (def2 : def term) (vmap_orig : varToVarMap)
132132
Some vmap
133133
else None
134134

135-
(* compares label based on only expression*)
135+
(* Compares label based on only expression and structure. *)
136136
let compare_lbl (lbl1 : label) (lbl2 : label) (map : varToVarMap) : varToVarMap option =
137137
match lbl1, lbl2 with
138138
| Direct _tid1, Direct _tid2 -> (* to be checked later on *) Some map
@@ -147,8 +147,7 @@ let compare_lbl (lbl1 : label) (lbl2 : label) (map : varToVarMap) : varToVarMap
147147

148148
(* Compares two things:
149149
- jmp1 and jmp2 match in structure
150-
- jmp1 and jmp2 match in all Exp.ts contained within
151-
*)
150+
- jmp1 and jmp2 match in all Exp.ts contained within *)
152151
let compare_jmp jmp1 jmp2 map =
153152
match Jmp.kind jmp1, Jmp.kind jmp2 with
154153
| Goto label1, Goto label2 -> compare_lbl label1 label2 map
@@ -172,7 +171,7 @@ let compare_phis phis1 phis2 map =
172171
if List.length phis1 > 0 || List.length phis2 > 0 then None
173172
else Some map
174173

175-
(* Check that all defs in two lists match. *)
174+
(* Check that all defs in two lists match in expression and structure but not TID. *)
176175
let compare_defs (defs1 : (def term) list)
177176
(defs2 : (def term) list) (map : varToVarMap) : varToVarMap option =
178177
match List.zip defs1 defs2 with
@@ -184,7 +183,7 @@ let compare_defs (defs1 : (def term) list)
184183
| None -> None
185184
| Some map -> compare_def d1 d2 map)
186185

187-
(* Check that all jmps in a list match in expression and structure. *)
186+
(* Check that all jmps in a list match in expression and structure but not TID. *)
188187
let compare_jmps jmps1 jmps2 map =
189188
match List.zip jmps1 jmps2 with
190189
| Core_kernel.List.Or_unequal_lengths.Unequal_lengths -> None
@@ -197,9 +196,12 @@ let compare_jmps jmps1 jmps2 map =
197196

198197
(* Compares everything about a block EXCEPT tids, which will be done later. *)
199198
let compare_block (blk1 : blk term) (blk2 : blk term) map =
200-
compare_defs (Term.enum def_t blk1 |> Sequence.to_list) (Term.enum def_t blk2 |> Sequence.to_list) map >>=
201-
fun map -> compare_jmps (Term.enum jmp_t blk1 |> Sequence.to_list) (Term.enum jmp_t blk2 |> Sequence.to_list) map >>=
202-
fun map -> compare_phis (Term.enum phi_t blk1 |> Sequence.to_list) (Term.enum phi_t blk2 |> Sequence.to_list) map
199+
compare_defs (Term.enum def_t blk1 |> Sequence.to_list)
200+
(Term.enum def_t blk2 |> Sequence.to_list) map >>=
201+
fun map -> compare_jmps (Term.enum jmp_t blk1 |> Sequence.to_list)
202+
(Term.enum jmp_t blk2 |> Sequence.to_list) map >>=
203+
fun map -> compare_phis (Term.enum phi_t blk1 |> Sequence.to_list)
204+
(Term.enum phi_t blk2 |> Sequence.to_list) map
203205

204206
(* Iterate through all reachable nodes in each cfg
205207
* and generate a map from tid to blk.*)
@@ -215,14 +217,17 @@ let get_node_maps
215217
BFS.fold acc TidMap.empty cfg2 in
216218
tid1_map, tid2_map
217219

218-
(* compares a sub to another sub; returns a map from index into sub 1
219-
* to the set of sub2 indices that it is syntactically equal to
220-
* and a map from (indx_sub1, indx_sub2) to var maps*)
221-
let compare_blocks (sub1: Sub.t) (sub2 : Sub.t) : (TidSet.t TidMap.t) * (varToVarMap TidTupleMap.t) =
220+
(* Compares a sub to another sub; returns a map from tid into sub1
221+
to the set of sub2 tids that it is syntactically equal to. Also returns
222+
a map from (tid_blk_sub1, tid_blk_sub2) to the mapping between variables
223+
used in those subs. *)
224+
let compare_blocks (sub1: Sub.t) (sub2 : Sub.t) :
225+
(TidSet.t TidMap.t) * (varToVarMap TidTupleMap.t) =
226+
222227
let cfg1, cfg2 = Sub.to_cfg sub1, Sub.to_cfg sub2 in
223-
(* blk1 indx -> set{blk2 indxs} *)
228+
(* blk1 tid -> set{blk2 tids} *)
224229
let blk_map = TidMap.empty in
225-
(* blk1 indx, blk2 indx -> varmap *)
230+
(* blk1 tid, blk2 tid -> varmap *)
226231
let blk_varmap = TidTupleMap.empty in
227232
let evaluator = (fun (node1 : Bap.Std.Graphs.Ir.Node.t) (blk_map, blk_varmap) ->
228233
let blk1 = get_label node1 in
@@ -246,8 +251,9 @@ let compare_blocks (sub1: Sub.t) (sub2 : Sub.t) : (TidSet.t TidMap.t) * (varToVa
246251
blk_map, blk_varmap) in
247252
BFS.fold evaluator (blk_map, blk_varmap) cfg1
248253

249-
(* compare the label but only draw comparisons with tid *)
250-
let compare_lbl_tid_only graph lbl1 lbl2 : bool=
254+
(* Compare a label.t with tid comparisons only *)
255+
let compare_lbl_tid_only (graph Tid.t TidMap.t) (lbl1 : Label.t)
256+
(lbl2 : Label.t) : bool=
251257
match lbl1, lbl2 with
252258
| Direct tid1, Direct tid2 ->
253259
let tid_mapped = TidMap.find_exn graph tid1 in
@@ -256,8 +262,8 @@ let compare_lbl_tid_only graph lbl1 lbl2 : bool=
256262
| _, _ -> false
257263

258264

259-
(* compare the jmp but with tid comparisons only *)
260-
let compare_jmp_tid_only graph jmp1 jmp2 =
265+
(* Compare a jmp with tid comparisons only *)
266+
let compare_jmp_tid_only (graph Tid.t TidMap.t) (jmp1 : jmp_t) (jmp2 : jmp_t) =
261267
match Jmp.kind jmp1, Jmp.kind jmp2 with
262268
| Goto label1, Goto label2 -> compare_lbl_tid_only graph label1 label2
263269
| Call call1, Call call2 ->
@@ -277,15 +283,17 @@ let compare_jmp_tid_only graph jmp1 jmp2 =
277283
Tid.equal tid_mapped tid2
278284
| _, _ -> false
279285

280-
let compare_blk_tid_only graph blk1 blk2 : bool =
286+
(* Check that the two blocks has matching TIDs in jumps. *)
287+
let compare_blk_tid_only (graph : Tid.t TidMap.t)
288+
(blk1 : blk term) (blk2 : blk term) bool =
281289
let jmps1 = Term.enum jmp_t blk1 |> Sequence.to_list in
282290
let jmps2 = Term.enum jmp_t blk2 |> Sequence.to_list in
283291
match List.zip jmps1 jmps2 with
284-
| Core_kernel.List.Or_unequal_lengths.Unequal_lengths -> (* TODO exception *) false
292+
| Core_kernel.List.Or_unequal_lengths.Unequal_lengths -> false
285293
| Core_kernel.List.Or_unequal_lengths.Ok z ->
286294
List.for_all z ~f:(fun (j1, j2) -> compare_jmp_tid_only graph j1 j2)
287295

288-
(* checks that the proposed mapping is actually isomorphic *)
296+
(* Check that the graph from tid to tid has matching TIDs in jumps. *)
289297
let check_isomorphism
290298
(graph : Tid.t TidMap.t)
291299
(tid_to_blk1 : (blk term) TidMap.t)
@@ -297,6 +305,9 @@ let check_isomorphism
297305
let blk2 = TidMap.find_exn tid_to_blk2 tid2 in
298306
compare_blk_tid_only graph blk1 blk2)
299307

308+
(* Checks that two maps from variable to variable can be merged.
309+
This means that they do not have contradictory definitions
310+
for the same key. *)
300311
let merge_maps (merged_map : varToVarMap) (new_map : varToVarMap) : varToVarMap option =
301312
let new_merged_map = VarMap.merge merged_map new_map
302313
~f:(fun ~key:_key v ->
@@ -312,6 +323,15 @@ let merge_maps (merged_map : varToVarMap) (new_map : varToVarMap) : varToVarMap
312323
false -> None
313324
| true -> Some new_merged_map
314325

326+
(* Given a [candidate_map] of sub1 block tids to a set of sub2 block tids that
327+
sub1 is syntactically equal to (barring TIDs in jmps), a [used_set] of already
328+
mapped to sub_1 TIDS, a [node_stack] of sub1 blks that have not yet been mapped to,
329+
- a partial mapping, denoted [graph], from sub1 tids to sub2 tids,
330+
- a mapping, [tid_to_blk1] from sub1 tids to sub1 blks,
331+
- a mapping, [tid_to_blk2] from sub1 tids to sub2 blks,
332+
- a map from (tid1, tid2) to a variable mapping [var_maps] generated by that pair of blocks
333+
- [merged_map], a map built out of block-wise maps corresponding
334+
to the current partial mapping *)
315335
let rec get_isomorphism
316336
(candidate_map : TidSet.t TidMap.t) (used_set : TidSet.t)
317337
(node_stack : (blk term) list) (graph : Tid.t TidMap.t)
@@ -344,28 +364,17 @@ let rec get_isomorphism
344364
graph tid_to_blk1 tid_to_blk2 var_maps merged_map)
345365
| None -> None
346366

347-
let get_length cfg : int =
348-
let f = ( fun node acc ->
349-
let lbl = get_label node in
350-
let def_len = Term.enum def_t lbl |> Sequence.to_list |> List.length in
351-
let jmp_len = Term.enum jmp_t lbl |> Sequence.to_list |> List.length in
352-
acc + def_len + jmp_len
353-
) in
354-
BFS.fold f 0 cfg
355-
367+
(* Gets all syntactically equal blocks between the two subs,
368+
Checks if it is possible to construct a mapping between
369+
the two sets of blocks that matches in control flow. *)
356370
let exist_isomorphism (sub1: Sub.t) (sub2 : Sub.t) : bool =
357371
let cfg1, cfg2 = Sub.to_cfg sub1, Sub.to_cfg sub2 in
358-
printf "\nLENGTH IS: %d\n" (get_length cfg1);
359372
let tid_to_blk1, tid_to_blk2 = get_node_maps cfg1 cfg2 in
360-
(* the variable mappings on a per-block basis are not actually used *)
361373
let tid_map, var_maps = compare_blocks sub1 sub2 in
362-
let _ = printf "PERCENTAGE_IDENTICAL_BLOCKS_IS: %.4f\n" (((List.length (TidMap.keys tid_map)) |> float_of_int) /. ((List.length (TidMap.keys tid_to_blk1)) |> float_of_int)) in
363-
(* List.iter (TidMap.keys tid_map) ~f:(fun key -> printf "TID MAP: %s\n" (Tid.to_string key)) ; *)
364374
let used_set = TidSet.empty in
365375
let node_stack = TidMap.data tid_to_blk1 in
366376
let node2_stack = TidMap.data tid_to_blk2 in
367377
let graph = TidMap.empty in
368-
printf "BLOCK LENGTH: %d\n" (List.length node_stack);
369378
(* short circuit if we already know that the length does not match *)
370379
if (List.length node_stack) = (List.length node2_stack) then
371380
match get_isomorphism tid_map used_set node_stack graph tid_to_blk1 tid_to_blk2 var_maps VarMap.empty with

wp/plugin/lib/equality.mli

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,30 @@
11
open Bap.Std
22

3+
module TidSet = Tid.Set
4+
module TidMap = Tid.Map
5+
6+
(* This function checks for exact syntactic equality between Sub.ts.
7+
This means that there must exist an block-level mapping between the blocks in sub 1 and
8+
in sub 2 such that syntax matches. Syntax includes several things:
9+
- the base of physical variable names
10+
- structure including control flow and phi terms (of which there should be none)
11+
- the usage of variables in the subs must match. E.g. if I have in sub 1:
12+
#45 := RBP
13+
mem := mem with [RSP, el]:u64 <- #45
14+
and
15+
#88 := RBP
16+
mem := mem with [RSP, el]:u64 <- #88
17+
Then these two blocks (and the entire sub.ts if they were to consist only of this)
18+
would match. However, if instead we had something like:
19+
#45 := RBP
20+
mem := mem with [RSP, el]:u64 <- #45
21+
and
22+
#88 := RBP
23+
mem := mem with [RSP, el]:u64 <- #90
24+
then the blocks would not be syntactically equal since #45 would be expected
25+
in the spot that #90 is taking.
26+
- The block TIDs mentioned within jmp_ts in sub1 when run through the
27+
constructed block mapping must match the corresponding TIDS
28+
in the same respective jmp_t in sub2.
29+
*)
330
val exist_isomorphism : Sub.t -> Sub.t -> bool

0 commit comments

Comments
 (0)