diff --git a/.gitignore b/.gitignore index 1f3d4a3..55abb29 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ *.*~ bisect*.out +syncer.native +syncer.byte _build/ coverage/ diff --git a/algo.ml b/algo.ml index 8961c38..0308fe4 100644 --- a/algo.ml +++ b/algo.ml @@ -1,13 +1,10 @@ -module Weak = Adler -module Strong = Digest - type action = | Match of int * int | Miss of Buffer.t | Start of int | Stop - + let output_action oc = function | Match (b,e) -> Io.write_byte oc 1; @@ -20,7 +17,7 @@ let output_action oc = function Io.write_byte oc 3; Io.write_int oc s | Stop -> Io.write_byte oc 4 - + let input_action ic = match input_byte ic with | 1 -> @@ -37,144 +34,147 @@ let input_action ic = Start s | 4 -> Stop | _ -> failwith "unknown action" - + +open Hash open Signature +module Rsync = functor (W:WEAK) -> functor (S:STRONG) -> struct + module MySig = Signature(W)(S) - -class delta_emitter signature new_fn handler = - let bs = signature.bs in - let buffer_size = 8 * bs in - let buffer = String.create buffer_size in -object(self) - val mutable _read = 0 - val mutable _first_free = 0 - val mutable _n_free = buffer_size - val mutable _first_todo = 0 - val mutable _previous_action = Start bs - val mutable _finished = false - val mutable _weak_ok = false - val _weak = Weak.make () - - method _examine_block buffer offset length = - let wd = Weak.digest _weak in - match lookup_weak signature wd with - | None -> None - | Some block -> - let strong = Strong.substring buffer offset length in - if strong = block.strong - then Some block.index - else None - - method _miss char = - match _previous_action with - | Miss b when Buffer.length b < bs -> Buffer.add_char b char - | other -> - let () = handler _previous_action in - let b = Buffer.create bs in - let () = Buffer.add_char b char in - _previous_action <- Miss b - - method _match index = - match _previous_action with - | Match (b,e) when e + 1 = index -> _previous_action <- Match(b,index) - | other -> let () = handler _previous_action in - _previous_action <- Match(index,index) - - method run () = - let ic = open_in new_fn in - while not _finished do - begin - let read = input ic buffer _first_free _n_free in - if read = 0 then - _finished <- true - else - let () = _first_free <- _first_free + read in - let () = _n_free <- _n_free - read in - () - end; - while _first_todo + bs < _first_free do - if not _weak_ok then + class delta_emitter signature new_fn handler = + let bs = MySig.block_size signature in + let buffer_size = 8 * bs in + let buffer = String.create buffer_size in + object(self) + val mutable _read = 0 + val mutable _first_free = 0 + val mutable _n_free = buffer_size + val mutable _first_todo = 0 + val mutable _previous_action = Start bs + val mutable _finished = false + val mutable _weak_ok = false + val _weak = W.make () + + method _examine_block buffer offset length = + let wd = W.digest _weak in + match MySig.lookup_weak signature wd with + | None -> None + | Some bs -> + let strong = S.substring buffer offset length in + if strong = MySig.bs_strong bs + then Some (MySig.bs_index bs) + else None + + method _miss char = + match _previous_action with + | Miss b when Buffer.length b < bs -> Buffer.add_char b char + | other -> + let () = handler _previous_action in + let b = Buffer.create bs in + let () = Buffer.add_char b char in + _previous_action <- Miss b + + method _match index = + match _previous_action with + | Match (b,e) when e + 1 = index -> _previous_action <- Match(b,index) + | other -> let () = handler _previous_action in + _previous_action <- Match(index,index) + + method run () = + let ic = open_in new_fn in + while not _finished do + begin + let read = input ic buffer _first_free _n_free in + if read = 0 then + _finished <- true + else + let () = _first_free <- _first_free + read in + let () = _n_free <- _n_free - read in + () + end; + while _first_todo + bs < _first_free do + if not _weak_ok then + begin + W.update _weak buffer _first_todo bs; + _weak_ok <- true + end; begin - Weak.update _weak buffer _first_todo bs; - _weak_ok <- true - end; - begin - match self # _examine_block buffer _first_todo bs with - | None -> - self # _miss buffer.[_first_todo]; - Weak.rotate _weak buffer.[_first_todo] buffer.[_first_todo + bs]; - _first_todo <- _first_todo + 1 - | Some i -> - self # _match i; - _first_todo <- _first_todo + bs; - _weak_ok <- false; - Weak.reset _weak - end + match self # _examine_block buffer _first_todo bs with + | None -> + self # _miss buffer.[_first_todo]; + W.rotate _weak buffer.[_first_todo] buffer.[_first_todo + bs]; + _first_todo <- _first_todo + 1 + | Some i -> + self # _match i; + _first_todo <- _first_todo + bs; + _weak_ok <- false; + W.reset _weak + end + done; + if _first_todo + bs >= _first_free + then + begin + let length = _first_free - _first_todo in + String.blit buffer _first_todo buffer 0 length; + _first_todo <- 0; + _first_free <- length; + _n_free <- buffer_size - length + end done; - if _first_todo + bs >= _first_free - then - begin - let length = _first_free - _first_todo in - String.blit buffer _first_todo buffer 0 length; - _first_todo <- 0; - _first_free <- length; - _n_free <- buffer_size - length - end - done; - let rec loop i = - if i = _first_free - then () - else - let () = self # _miss (buffer.[i]) in - loop (i+1) - in - loop _first_todo; - handler _previous_action; - handler Stop; - close_in ic -end - - -class delta_applier ic (old_fn:string) oc = - let fd = Unix.openfile old_fn [Unix.O_RDONLY] 0o640 in - let really_read buffer bs = - let rec loop pos todo = - if todo = 0 then () - else - let read = Unix.read fd buffer pos todo in - loop (pos+ read) (todo - read) - in - loop 0 bs - in -object(self) - method run () = - let bs = match input_action ic with - | Start s -> s - | _ -> failwith "start @ beginning" - in - let rec loop () = - let action = input_action ic in - match action with - | Match(b,e) -> self # apply_match bs b e ; loop () - | Miss b -> self # apply_miss b ; loop () - | Start s -> failwith "can't restart" - | Stop -> () - in - loop (); - Unix.close fd - - method apply_match bs b e = - let buffer = String.create bs in - let n = e + 1 - b in - let rec loop i = - if i = 0 then () - else - let () = really_read buffer bs in - let () = output_string oc buffer in - loop (i-1) + let rec loop i = + if i = _first_free + then () + else + let () = self # _miss (buffer.[i]) in + loop (i+1) + in + loop _first_todo; + handler _previous_action; + handler Stop; + close_in ic + end + + + class delta_applier ic (old_fn:string) oc = + let fd = Unix.openfile old_fn [Unix.O_RDONLY] 0o640 in + let really_read buffer bs = + let rec loop pos todo = + if todo = 0 then () + else + let read = Unix.read fd buffer pos todo in + loop (pos+ read) (todo - read) + in + loop 0 bs in - let _ = Unix.lseek fd (bs * b) Unix.SEEK_SET in - loop n - - method apply_miss b = output_string oc (Buffer.contents b) + object(self) + method run () = + let bs = match input_action ic with + | Start s -> s + | _ -> failwith "start @ beginning" + in + let rec loop () = + let action = input_action ic in + match action with + | Match(b,e) -> self # apply_match bs b e ; loop () + | Miss b -> self # apply_miss b ; loop () + | Start s -> failwith "can't restart" + | Stop -> () + in + loop (); + Unix.close fd + + method apply_match bs b e = + let buffer = String.create bs in + let n = e + 1 - b in + let rec loop i = + if i = 0 then () + else + let () = really_read buffer bs in + let () = output_string oc buffer in + loop (i-1) + in + let _ = Unix.lseek fd (bs * b) Unix.SEEK_SET in + loop n + + method apply_miss b = output_string oc (Buffer.contents b) + end end diff --git a/compile.sh b/compile.sh new file mode 100755 index 0000000..626253d --- /dev/null +++ b/compile.sh @@ -0,0 +1,12 @@ +#ocamlbuild -use-ocamlfind test.native + +ocamlbuild -use-ocamlfind \ + -tag 'package(bisect)' \ + -tag 'syntax(camlp4o)' \ + -tag 'syntax(bisect_pp)' \ + syncer.byte + +#ocamlbuild -use-ocamlfind -tag 'package(ocamlviz)' \ +# -tag 'pp(camlp4 pa_o.cmo str.cma pa_ocamlviz.cmo pr_o.cmo)' \ +# profile_test.native + diff --git a/hash.ml b/hash.ml new file mode 100644 index 0000000..1e7576a --- /dev/null +++ b/hash.ml @@ -0,0 +1,25 @@ +module type WEAK = sig + type t + val make : unit -> t + val from : string -> int -> int -> t + val reset: t -> unit + val digest: t -> int + val rotate: t -> char -> char -> unit + val update: t -> string -> int -> int -> unit +end + +module type STRONG = sig + type t + val to_hex : t -> string + val file : string -> t + val substring: string -> int -> int -> t + val write : out_channel -> t -> unit + val read : in_channel -> t +end + +module SDigest = (struct + include Digest + let read ic = Io.read_string ic + let write oc t = Io.write_string oc t + +end : STRONG) diff --git a/report.sh b/report.sh new file mode 100755 index 0000000..c41573c --- /dev/null +++ b/report.sh @@ -0,0 +1 @@ +bisect-report -I _build -html coverage $1 diff --git a/signature.ml b/signature.ml index 91387bc..e9c829e 100644 --- a/signature.ml +++ b/signature.ml @@ -1,118 +1,129 @@ -module Weak = Adler -module Strong = Digest +open Hash +open Unix -type block_signature = {weak:int; strong:string; index:int} +module Signature = functor (W:WEAK) -> functor (S:STRONG) -> struct + type block_signature = {weak:int; strong:S.t; index:int} + + let compare_weak ba bb = compare ba.weak bb.weak + let compare_index ba bb = compare ba.index bb.index + let bs_strong bs = bs.strong + let bs_index bs = bs.index -let compare_weak ba bb = compare ba.weak bb.weak -let compare_index ba bb = compare ba.index bb.index + type t = {len:int; bs: int; blocks: block_signature array;} -type t = {len:int; bs: int; blocks: block_signature array;} + let block_size t = t.bs + let length t = Array.length t.blocks -let length t = Array.length t.blocks - -let lookup_weak t w = - let rec find min max = - let mid = (min + max) / 2 in - let block = t.blocks.(mid) in - let weak = block.weak in - if w = weak then Some block - else if min > max then None - else if w > weak then find (mid+1) max - else find min (mid -1) - in - let len = length t in - find 0 (len -1) - -let create fn bs = - let ic = open_in fn in - let len = in_channel_length ic in - let buf = String.create bs in - let read_block size index = - let () = really_input ic buf 0 size in - let a = Weak.from buf 0 size in - let weak = Weak.digest a in - let strong = Strong.substring buf 0 size in - {weak;strong;index} - in - let rec read_blocks acc todo i = - if todo >= bs then - let block = read_block bs i in - read_blocks (block :: acc) (todo - bs) (i+1) - else - let block = read_block todo i in - List.rev (block :: acc) - in - let blocks_l = read_blocks [] len 0 in - let blocks = Array.of_list blocks_l in - let () = close_in ic in - Array.sort compare_weak blocks; - {len;bs;blocks;} + let optimal fn = + let stat = Unix.stat fn in + let size = stat.st_size in + let rc = int_of_float (2. *. sqrt (float size)) in (* thumb *) + let r = if rc < 32 then 32 else rc in + r -let output_signature oc t = - Io.write_int oc t.len; - Io.write_int oc t.bs; - Io.write_int oc (length t); - let i = ref 0 in - let one block = - Io.write_int oc block.weak; - Io.write_string oc block.strong; - assert (block.index = !i); + let lookup_weak t w = + let rec find min max = + let mid = (min + max) / 2 in + let block = t.blocks.(mid) in + let weak = block.weak in + if w = weak then Some block + else if min > max then None + else if w > weak then find (mid+1) max + else find min (mid -1) + in + let len = length t in + find 0 (len -1) + + let create fn bs = + let ic = open_in fn in + let len = in_channel_length ic in + let buf = String.create bs in + let read_block size index = + let () = really_input ic buf 0 size in + let a = W.from buf 0 size in + let weak = W.digest a in + let strong = S.substring buf 0 size in + {weak;strong;index} + in + let rec read_blocks acc todo i = + if todo >= bs then + let block = read_block bs i in + read_blocks (block :: acc) (todo - bs) (i+1) + else + let block = read_block todo i in + List.rev (block :: acc) + in + let blocks_l = read_blocks [] len 0 in + let blocks = Array.of_list blocks_l in + let () = close_in ic in + Array.sort compare_weak blocks; + {len;bs;blocks;} + + let output_signature oc t = + Io.write_int oc t.len; + Io.write_int oc t.bs; + Io.write_int oc (length t); + let i = ref 0 in + let one block = + Io.write_int oc block.weak; + S.write oc block.strong; + assert (block.index = !i); (* i is skipped: in order because sorted *) - incr i - in - Array.sort compare_index t.blocks; - Array.iter one t.blocks; - Array.sort compare_weak t.blocks + incr i + in + Array.sort compare_index t.blocks; + Array.iter one t.blocks; + Array.sort compare_weak t.blocks + + let input_signature ic = + let len = Io.read_int ic in + let bs = Io.read_int ic in + let nblocks = Io.read_int ic in + let rec loop acc index = + if index = nblocks then List.rev acc + else + let weak = Io.read_int ic in + let strong = S.read ic in + let b = {weak;strong;index} in + loop (b :: acc) (index + 1) in -let input_signature ic = - let len = Io.read_int ic in - let bs = Io.read_int ic in - let nblocks = Io.read_int ic in - let rec loop acc index = - if index = nblocks then List.rev acc - else - let weak = Io.read_int ic in - let strong = Io.read_string ic in - let b = {weak;strong;index} in - loop (b :: acc) (index + 1) in - - let blocks_l = loop [] 0 in - let blocks = Array.of_list blocks_l in - let r = {len;bs;blocks} in - Array.sort compare_weak r.blocks; - r - + let blocks_l = loop [] 0 in + let blocks = Array.of_list blocks_l in + let r = {len;bs;blocks} in + Array.sort compare_weak r.blocks; + r + -let to_file t fn = - let oc = open_out fn in - let () = output_signature oc t in - close_out oc - -let from_file fn = - let ic = open_in fn in - let s = input_signature ic in - close_in ic; - s - - -let equals t1 t2 = - let so_far = - t1.len = t2.len && - t1.bs = t2.bs in - let size1 = Array.length t1.blocks in - let size2 = Array.length t2.blocks in - let rec loop i acc = - if i = size1 then acc - else - begin - let bs1 = t1.blocks.(i) - and bs2 = t2.blocks.(i) in - let acc' = acc && bs1 = bs2 - and i' = i+1 in - loop i' acc' - end - in - let r = so_far && size1 = size2 && loop 0 true in - r + let to_file t fn = + let oc = open_out fn in + let () = output_signature oc t in + close_out oc + + let from_file fn = + let ic = open_in fn in + let s = input_signature ic in + close_in ic; + s + - + let equals t1 t2 = + let so_far = + t1.len = t2.len && + t1.bs = t2.bs in + let size1 = Array.length t1.blocks in + let size2 = Array.length t2.blocks in + let rec loop i acc = + if i = size1 then acc + else + begin + let bs1 = t1.blocks.(i) + and bs2 = t2.blocks.(i) in + let acc' = acc && bs1 = bs2 + and i' = i+1 in + loop i' acc' + end + in + let r = so_far && size1 = size2 && loop 0 true in + r + +end diff --git a/syncer.ml b/syncer.ml index 4c10238..d9da81e 100644 --- a/syncer.ml +++ b/syncer.ml @@ -3,11 +3,14 @@ type command = | Signature | Test +open Signature +module MySig = Signature(Adler) (Hash.SDigest) + let calculate_signature fn sig_fn = - let bs = (*... calculate this ... *) 512 in - let s = Signature.create fn bs in + let bs = MySig.optimal fn in + let s = MySig.create fn bs in let oc = open_out sig_fn in - Signature.output_signature oc s; + MySig.output_signature oc s; close_out oc diff --git a/test.ml b/test.ml index 1b78185..58f7a12 100644 --- a/test.ml +++ b/test.ml @@ -1,5 +1,10 @@ open Algo open Signature +open Hash + +module S = SDigest +module AA = Rsync(Adler)(S) +module MySig = Signature(Adler)(S) let old_dir = "tests/old/" let new_dir = "tests/new/" @@ -23,35 +28,36 @@ let test_io () = let test_signature fn bs = Printf.printf "SIGNATURE:%s\n" fn; let old_fn = old_dir ^ fn in - let signature = Signature.create old_fn bs in + let signature = MySig.create old_fn bs in let signature_fn = sync_dir ^ fn ^ ".signature" in - let () = Signature.to_file signature signature_fn in - let signature2 = Signature.from_file signature_fn in - Printf.printf "%s: %b \n%!" fn (Signature.equals signature2 signature) + let () = MySig.to_file signature signature_fn in + let signature2 = MySig.from_file signature_fn in + Printf.printf "%s: %b \n%!" fn (MySig.equals signature2 signature) + let test_one fn bs = let old_fn = old_dir ^ fn in - let signature = Signature.create old_fn bs in + let signature = MySig.create old_fn bs in let signature_fn = sync_dir ^ fn ^ ".signature" in let delta_fn = sync_dir ^ fn ^ ".delta" in - let () = Signature.to_file signature signature_fn in + let () = MySig.to_file signature signature_fn in let oc = open_out delta_fn in let handle_action action = output_action oc action in let new_fn = new_dir ^ fn in - let signature2 = Signature.from_file signature_fn in + let signature2 = MySig.from_file signature_fn in (* let signature2 = signature in *) assert (signature2 = signature); - let emitter = new delta_emitter signature2 new_fn handle_action in + let emitter = new AA.delta_emitter signature2 new_fn handle_action in let () = emitter # run () in let () = close_out oc in let ic = open_in delta_fn in let sync_fn = sync_dir ^ fn in let sync_oc = open_out sync_fn in - let applier = new delta_applier ic old_fn sync_oc in + let applier = new AA.delta_applier ic old_fn sync_oc in applier # run () ; close_out sync_oc; - let d_new = Digest.to_hex(Digest.file new_fn) in - let d_synced = Digest.to_hex(Digest.file sync_fn) in + let d_new = S.to_hex(S.file new_fn) in + let d_synced = S.to_hex(S.file sync_fn) in Printf.printf "%s: %s =?= %s\n%!" fn d_new d_synced; ()