@@ -9,16 +9,16 @@ open Base__Option.Monad_infix
9
9
module H = Bap.Std.Graphs. Ir
10
10
module J = Graphlib. To_ocamlgraph (H )
11
11
module BFS = Graph.Traverse. Bfs (J )
12
- module TidSet = Set. Make ( Tid )
12
+ module TidSet = Tid. Set
13
13
14
- module TidMap = Map. Make ( Tid )
15
- module VarMap = Map. Make ( Var )
16
- module VarSet = Set. Make ( Var )
14
+ module TidMap = Tid. Map
15
+ module VarMap = Var. Map
16
+ module VarSet = Var. Set
17
17
module TidTupleMap = Map. Make (struct type t = Tid. t * Tid. t [@@ deriving sexp, compare] end )
18
18
19
19
type varToVarMap = Var .t VarMap .t
20
20
21
- (* replaces the real variables with indices with only their base *)
21
+ (* Replaces the real variables with indices with only their base *)
22
22
let rec strip_indices (e : Exp.t ) : Exp.t =
23
23
let open Bap.Std.Bil.Types in
24
24
match e with
@@ -55,9 +55,9 @@ let map_var (vmap : varToVarMap)
55
55
(* if found in map, return what is found *)
56
56
| Some v_found -> (vmap, v_found) |> Some
57
57
58
- (* maps all virtual variables from e1 to their analagous variable within vmap;
59
- * if variable not in vmap, is added to vmap from e2
60
- * returns None if cannot map variables because subs do not match in structure *)
58
+ (* Maps all virtual variables from e1 to their analagous variable within vmap;
59
+ if a variable not in vmap, is added to vmap from e2
60
+ returns None if cannot map variables because subs do not match in structure *)
61
61
let rec sub_exp (vmap : varToVarMap )
62
62
(e1 : Exp.t ) (e2 : Exp.t ) : (varToVarMap * Exp.t) option =
63
63
let open Bap.Std.Bil.Types in
@@ -111,12 +111,12 @@ let rec sub_exp (vmap : varToVarMap)
111
111
fun (vmap , exp2_bin1 ) -> vmap, Concat (exp1_bin1, exp2_bin1)
112
112
| _ , _ -> None
113
113
114
- (* gets the block associated with a node in the graph *)
114
+ (* Gets the block associated with a node in the graph *)
115
115
let get_label (a : Bap.Std.Graphs.Ir.Node.t ) : blk term =
116
116
Bap.Std.Graphs.Ir.Node. label a
117
117
118
- (* compares two defs; returns the updated varmap if they match; returns None
119
- * if do not match*)
118
+ (* Compares two defs; returns the updated varmap if they match; returns None
119
+ * if do not match. Only compares expressions and structure. *)
120
120
let compare_def (def1 : def term ) (def2 : def term ) (vmap_orig : varToVarMap )
121
121
: varToVarMap option =
122
122
let var1_orig, var2 =
@@ -132,7 +132,7 @@ let compare_def (def1 : def term) (def2 : def term) (vmap_orig : varToVarMap)
132
132
Some vmap
133
133
else None
134
134
135
- (* compares label based on only expression*)
135
+ (* Compares label based on only expression and structure. *)
136
136
let compare_lbl (lbl1 : label ) (lbl2 : label ) (map : varToVarMap ) : varToVarMap option =
137
137
match lbl1, lbl2 with
138
138
| Direct _tid1 , Direct _tid2 -> (* to be checked later on *) Some map
@@ -147,8 +147,7 @@ let compare_lbl (lbl1 : label) (lbl2 : label) (map : varToVarMap) : varToVarMap
147
147
148
148
(* Compares two things:
149
149
- jmp1 and jmp2 match in structure
150
- - jmp1 and jmp2 match in all Exp.ts contained within
151
- *)
150
+ - jmp1 and jmp2 match in all Exp.ts contained within *)
152
151
let compare_jmp jmp1 jmp2 map =
153
152
match Jmp. kind jmp1, Jmp. kind jmp2 with
154
153
| Goto label1 , Goto label2 -> compare_lbl label1 label2 map
@@ -172,7 +171,7 @@ let compare_phis phis1 phis2 map =
172
171
if List. length phis1 > 0 || List. length phis2 > 0 then None
173
172
else Some map
174
173
175
- (* Check that all defs in two lists match. *)
174
+ (* Check that all defs in two lists match in expression and structure but not TID . *)
176
175
let compare_defs (defs1 : (def term) list )
177
176
(defs2 : (def term) list ) (map : varToVarMap ) : varToVarMap option =
178
177
match List. zip defs1 defs2 with
@@ -184,7 +183,7 @@ let compare_defs (defs1 : (def term) list)
184
183
| None -> None
185
184
| Some map -> compare_def d1 d2 map)
186
185
187
- (* Check that all jmps in a list match in expression and structure. *)
186
+ (* Check that all jmps in a list match in expression and structure but not TID . *)
188
187
let compare_jmps jmps1 jmps2 map =
189
188
match List. zip jmps1 jmps2 with
190
189
| Core_kernel.List.Or_unequal_lengths. Unequal_lengths -> None
@@ -197,9 +196,12 @@ let compare_jmps jmps1 jmps2 map =
197
196
198
197
(* Compares everything about a block EXCEPT tids, which will be done later. *)
199
198
let compare_block (blk1 : blk term ) (blk2 : blk term ) map =
200
- compare_defs (Term. enum def_t blk1 |> Sequence. to_list) (Term. enum def_t blk2 |> Sequence. to_list) map >> =
201
- fun map -> compare_jmps (Term. enum jmp_t blk1 |> Sequence. to_list) (Term. enum jmp_t blk2 |> Sequence. to_list) map >> =
202
- fun map -> compare_phis (Term. enum phi_t blk1 |> Sequence. to_list) (Term. enum phi_t blk2 |> Sequence. to_list) map
199
+ compare_defs (Term. enum def_t blk1 |> Sequence. to_list)
200
+ (Term. enum def_t blk2 |> Sequence. to_list) map >> =
201
+ fun map -> compare_jmps (Term. enum jmp_t blk1 |> Sequence. to_list)
202
+ (Term. enum jmp_t blk2 |> Sequence. to_list) map >> =
203
+ fun map -> compare_phis (Term. enum phi_t blk1 |> Sequence. to_list)
204
+ (Term. enum phi_t blk2 |> Sequence. to_list) map
203
205
204
206
(* Iterate through all reachable nodes in each cfg
205
207
* and generate a map from tid to blk.*)
@@ -215,14 +217,17 @@ let get_node_maps
215
217
BFS. fold acc TidMap. empty cfg2 in
216
218
tid1_map, tid2_map
217
219
218
- (* compares a sub to another sub; returns a map from index into sub 1
219
- * to the set of sub2 indices that it is syntactically equal to
220
- * and a map from (indx_sub1, indx_sub2) to var maps*)
221
- let compare_blocks (sub1 : Sub.t ) (sub2 : Sub.t ) : (TidSet.t TidMap.t) * (varToVarMap TidTupleMap.t) =
220
+ (* Compares a sub to another sub; returns a map from tid into sub1
221
+ to the set of sub2 tids that it is syntactically equal to. Also returns
222
+ a map from (tid_blk_sub1, tid_blk_sub2) to the mapping between variables
223
+ used in those subs. *)
224
+ let compare_blocks (sub1 : Sub.t ) (sub2 : Sub.t ) :
225
+ (TidSet. t TidMap. t ) * (varToVarMap TidTupleMap. t ) =
226
+
222
227
let cfg1, cfg2 = Sub. to_cfg sub1, Sub. to_cfg sub2 in
223
- (* blk1 indx -> set{blk2 indxs } *)
228
+ (* blk1 tid -> set{blk2 tids } *)
224
229
let blk_map = TidMap. empty in
225
- (* blk1 indx , blk2 indx -> varmap *)
230
+ (* blk1 tid , blk2 tid -> varmap *)
226
231
let blk_varmap = TidTupleMap. empty in
227
232
let evaluator = (fun (node1 : Bap.Std.Graphs.Ir.Node.t ) (blk_map , blk_varmap ) ->
228
233
let blk1 = get_label node1 in
@@ -246,8 +251,9 @@ let compare_blocks (sub1: Sub.t) (sub2 : Sub.t) : (TidSet.t TidMap.t) * (varToVa
246
251
blk_map, blk_varmap) in
247
252
BFS. fold evaluator (blk_map, blk_varmap) cfg1
248
253
249
- (* compare the label but only draw comparisons with tid *)
250
- let compare_lbl_tid_only graph lbl1 lbl2 : bool =
254
+ (* Compare a label.t with tid comparisons only *)
255
+ let compare_lbl_tid_only (graph Tid. t TidMap. t ) (lbl1 : Label.t )
256
+ (lbl2 : Label.t ) : bool =
251
257
match lbl1, lbl2 with
252
258
| Direct tid1 , Direct tid2 ->
253
259
let tid_mapped = TidMap. find_exn graph tid1 in
@@ -256,8 +262,8 @@ let compare_lbl_tid_only graph lbl1 lbl2 : bool=
256
262
| _ , _ -> false
257
263
258
264
259
- (* compare the jmp but with tid comparisons only *)
260
- let compare_jmp_tid_only graph jmp1 jmp2 =
265
+ (* Compare a jmp with tid comparisons only *)
266
+ let compare_jmp_tid_only ( graph Tid. t TidMap. t ) ( jmp1 : jmp_t ) ( jmp2 : jmp_t ) =
261
267
match Jmp. kind jmp1, Jmp. kind jmp2 with
262
268
| Goto label1 , Goto label2 -> compare_lbl_tid_only graph label1 label2
263
269
| Call call1 , Call call2 ->
@@ -277,15 +283,17 @@ let compare_jmp_tid_only graph jmp1 jmp2 =
277
283
Tid. equal tid_mapped tid2
278
284
| _ , _ -> false
279
285
280
- let compare_blk_tid_only graph blk1 blk2 : bool =
286
+ (* Check that the two blocks has matching TIDs in jumps. *)
287
+ let compare_blk_tid_only (graph : Tid.t TidMap.t )
288
+ (blk1 : blk term ) (blk2 : blk term ) bool =
281
289
let jmps1 = Term. enum jmp_t blk1 |> Sequence. to_list in
282
290
let jmps2 = Term. enum jmp_t blk2 |> Sequence. to_list in
283
291
match List. zip jmps1 jmps2 with
284
- | Core_kernel.List.Or_unequal_lengths. Unequal_lengths -> (* TODO exception *) false
292
+ | Core_kernel.List.Or_unequal_lengths. Unequal_lengths -> false
285
293
| Core_kernel.List.Or_unequal_lengths. Ok z ->
286
294
List. for_all z ~f: (fun (j1 , j2 ) -> compare_jmp_tid_only graph j1 j2)
287
295
288
- (* checks that the proposed mapping is actually isomorphic *)
296
+ (* Check that the graph from tid to tid has matching TIDs in jumps. *)
289
297
let check_isomorphism
290
298
(graph : Tid.t TidMap.t )
291
299
(tid_to_blk1 : (blk term) TidMap.t )
@@ -297,6 +305,9 @@ let check_isomorphism
297
305
let blk2 = TidMap. find_exn tid_to_blk2 tid2 in
298
306
compare_blk_tid_only graph blk1 blk2)
299
307
308
+ (* Checks that two maps from variable to variable can be merged.
309
+ This means that they do not have contradictory definitions
310
+ for the same key. *)
300
311
let merge_maps (merged_map : varToVarMap ) (new_map : varToVarMap ) : varToVarMap option =
301
312
let new_merged_map = VarMap. merge merged_map new_map
302
313
~f: (fun ~key :_key v ->
@@ -312,6 +323,15 @@ let merge_maps (merged_map : varToVarMap) (new_map : varToVarMap) : varToVarMap
312
323
false -> None
313
324
| true -> Some new_merged_map
314
325
326
+ (* Given a [candidate_map] of sub1 block tids to a set of sub2 block tids that
327
+ sub1 is syntactically equal to (barring TIDs in jmps), a [used_set] of already
328
+ mapped to sub_1 TIDS, a [node_stack] of sub1 blks that have not yet been mapped to,
329
+ - a partial mapping, denoted [graph], from sub1 tids to sub2 tids,
330
+ - a mapping, [tid_to_blk1] from sub1 tids to sub1 blks,
331
+ - a mapping, [tid_to_blk2] from sub1 tids to sub2 blks,
332
+ - a map from (tid1, tid2) to a variable mapping [var_maps] generated by that pair of blocks
333
+ - [merged_map], a map built out of block-wise maps corresponding
334
+ to the current partial mapping *)
315
335
let rec get_isomorphism
316
336
(candidate_map : TidSet.t TidMap.t ) (used_set : TidSet.t )
317
337
(node_stack : (blk term) list ) (graph : Tid.t TidMap.t )
@@ -344,28 +364,17 @@ let rec get_isomorphism
344
364
graph tid_to_blk1 tid_to_blk2 var_maps merged_map)
345
365
| None -> None
346
366
347
- let get_length cfg : int =
348
- let f = ( fun node acc ->
349
- let lbl = get_label node in
350
- let def_len = Term. enum def_t lbl |> Sequence. to_list |> List. length in
351
- let jmp_len = Term. enum jmp_t lbl |> Sequence. to_list |> List. length in
352
- acc + def_len + jmp_len
353
- ) in
354
- BFS. fold f 0 cfg
355
-
367
+ (* Gets all syntactically equal blocks between the two subs,
368
+ Checks if it is possible to construct a mapping between
369
+ the two sets of blocks that matches in control flow. *)
356
370
let exist_isomorphism (sub1 : Sub.t ) (sub2 : Sub.t ) : bool =
357
371
let cfg1, cfg2 = Sub. to_cfg sub1, Sub. to_cfg sub2 in
358
- printf " \n LENGTH IS: %d\n " (get_length cfg1);
359
372
let tid_to_blk1, tid_to_blk2 = get_node_maps cfg1 cfg2 in
360
- (* the variable mappings on a per-block basis are not actually used *)
361
373
let tid_map, var_maps = compare_blocks sub1 sub2 in
362
- let _ = printf " PERCENTAGE_IDENTICAL_BLOCKS_IS: %.4f\n " (((List. length (TidMap. keys tid_map)) |> float_of_int) /. ((List. length (TidMap. keys tid_to_blk1)) |> float_of_int)) in
363
- (* List.iter (TidMap.keys tid_map) ~f:(fun key -> printf "TID MAP: %s\n" (Tid.to_string key)) ; *)
364
374
let used_set = TidSet. empty in
365
375
let node_stack = TidMap. data tid_to_blk1 in
366
376
let node2_stack = TidMap. data tid_to_blk2 in
367
377
let graph = TidMap. empty in
368
- printf " BLOCK LENGTH: %d\n " (List. length node_stack);
369
378
(* short circuit if we already know that the length does not match *)
370
379
if (List. length node_stack) = (List. length node2_stack) then
371
380
match get_isomorphism tid_map used_set node_stack graph tid_to_blk1 tid_to_blk2 var_maps VarMap. empty with
0 commit comments