Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

encode backward and use C stubs #223

Merged
merged 30 commits into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
d131534
avoid warning
c-cube Nov 25, 2023
c17bd5d
add bench binary, to be used with hyperfine
c-cube Nov 23, 2023
278e2d6
refactor
c-cube Nov 23, 2023
d157234
remove closure in `nested`
c-cube Nov 23, 2023
af10cbf
add C stubs for pbrt
c-cube Nov 23, 2023
1143773
remove dead code
c-cube Nov 23, 2023
efbe479
perf pbrt: encode backward
c-cube Nov 23, 2023
e18931a
benchs: avoid C stubs collisions
c-cube Nov 23, 2023
e2c8183
test: more precise failures in unit test for wrapper encoding
c-cube Nov 23, 2023
c84c4c5
gitignore
c-cube Nov 23, 2023
78c3b29
docs
c-cube Nov 23, 2023
f46d8b4
add optional `?size` param to Pbrt.Encoder.create
c-cube Nov 23, 2023
f51ec33
fix benches
c-cube Nov 24, 2023
abb79c5
fix pbrt: bug in nested
c-cube Nov 24, 2023
152dd3b
add current algo to benchmarks for encoding nested structs
c-cube Nov 24, 2023
8e983b7
chore: update bench
c-cube Nov 24, 2023
31f1318
perf pbrt: avoid allocating tuples when writing keys
c-cube Nov 24, 2023
16f41e7
perf pbrt: more inlining for varint; avoid going to int64 too early
c-cube Nov 24, 2023
4c5d2a3
perf pbrt: make `nested` inline
c-cube Nov 24, 2023
fecb35b
perf pbrt: make `bool` and `string` inline
c-cube Nov 24, 2023
f544b52
perf pbrt: manual unrolling for `List_util.rev_iter`
c-cube Nov 24, 2023
2fa2486
bench: factor out the allocation of encoder
c-cube Nov 24, 2023
325d0ec
CI: check all PRs, not just against master
c-cube Nov 24, 2023
97591c5
simplify `caml_pbrt_varint` stub
c-cube Nov 25, 2023
34161a8
fix codegen: for packed arrays, do make sure key comes first
c-cube Nov 27, 2023
5ad1cc9
fix pbrt: in repeated field, rev_iter was missing some fields
c-cube Nov 27, 2023
c4dec4c
fix codegen: proper encoding for maps
c-cube Nov 27, 2023
122d5f9
inlining annotations
c-cube Nov 27, 2023
fa9ac42
fix warning in generated code
c-cube Nov 27, 2023
6e968ff
add an example extracted from a bench
c-cube Nov 29, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ on:
branches:
- master
pull_request:
branches:
- master
jobs:
run:
name: Build
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ ocaml_protoc.native
*.orig
_opam
*.data
*.exe
2 changes: 2 additions & 0 deletions b_run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/sh
exec dune exec --profile=release -- benchs/bin/run.exe $@
98 changes: 67 additions & 31 deletions benchs/benchs.ml
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ module Dec = struct
(* make a buffer with the integers from [0] to [n] inside *)
let mk_buf_n n : string =
let enc = Pbrt.Encoder.create () in
for i = 0 to n do
for i = n downto 0 do
Pbrt.Encoder.int_as_varint i enc
done;
Pbrt.Encoder.to_string enc
Expand Down Expand Up @@ -270,7 +270,7 @@ module Dec_bits64 = struct
(* put the int64 integers from 0 to n in a dec *)
let mk_buf_n n : string =
let enc = Pbrt.Encoder.create () in
for i = 0 to n do
for i = n downto 0 do
Pbrt.Encoder.int_as_bits64 i enc
done;
Pbrt.Encoder.to_string enc
Expand Down Expand Up @@ -493,7 +493,7 @@ module Varint_size = struct

module C_while = struct
external varint_size : (int64[@unboxed]) -> int
= "caml_pbrt_varint_size_byte" "caml_pbrt_varint_size"
= "b_caml_pbrt_varint_size_byte" "b_caml_pbrt_varint_size"
[@@noalloc]

let loop ~n =
Expand Down Expand Up @@ -596,10 +596,13 @@ module Nested = struct
val nested : (t -> unit) -> t -> unit
end

let[@inline] zigzag i : int64 =
Int64.(logxor (shift_left i 1) (shift_right i 63))

module Make_enc (E : ENC) = struct
let enc_person (p : person) (e : E.t) : unit =
E.key 1 Bytes (E.string p.name) e;
E.key 2 Varint (E.int64_as_varint p.age) e;
E.key 2 Varint (E.int64_as_varint @@ zigzag p.age) e;
()

let enc_store (st : store) (e : E.t) : unit =
Expand Down Expand Up @@ -649,26 +652,49 @@ module Nested = struct
});
}

module Make_bench (E : ENC) = struct
include Make_enc (E)
module type MK_COMPANY = sig
type t

val name_of_enc : string
val create : unit -> t
val clear : t -> unit
val enc_company : company -> t -> unit
val to_string : t -> string
end

module Make_bench_of_mk_company (E : MK_COMPANY) = struct
let bench company =
mk_t (spf "nenc-%s" E.name_of_enc) @@ fun () ->
for _i = 1 to 10 do
let enc = E.create () in
for _j = 1 to 10 do
Sys.opaque_identity
(E.clear enc;
enc_company company enc)
done
done
(* create an encoder that we'll reuse every time, so we can measure
the allocations caused purely by the encoding itself *)
let enc = E.create () in
mk_t E.name_of_enc @@ fun () ->
E.clear enc;
Sys.opaque_identity (E.enc_company company enc)

let string_of_company c =
let e = E.create () in
enc_company c e;
E.enc_company c e;
E.to_string e
end

module Make_bench (E : ENC) = struct
module Arg = struct
include E
include Make_enc (E)
end

include Make_bench_of_mk_company (Arg)
end

module Cur = Make_bench_of_mk_company (struct
let name_of_enc = "current"

include Pbrt.Encoder

let create () = create ()
let enc_company = Foo.encode_pb_company
end)

module Basic = Make_bench (struct
let name_of_enc = "basic-buffer"

Expand Down Expand Up @@ -1062,7 +1088,7 @@ module Nested = struct

(*
external varint_size : (int64[@unboxed]) -> int
= "caml_pbrt_varint_size_byte" "caml_pbrt_varint_size"
= "b_caml_pbrt_varint_size_byte" "b_caml_pbrt_varint_size"
[@@noalloc]
*)

Expand All @@ -1083,7 +1109,7 @@ module Nested = struct

external varint_slice :
bytes -> (int[@untagged]) -> (int64[@unboxed]) -> unit
= "caml_pbrt_varint_byte" "caml_pbrt_varint"
= "b_caml_pbrt_varint_byte" "b_caml_pbrt_varint"
[@@noalloc]

let[@inline] varint (i : int64) (e : t) : unit =
Expand Down Expand Up @@ -1132,6 +1158,7 @@ module Nested = struct
int_as_varint size e
end)

let bench_cur = Cur.bench
let bench_basic = Basic.bench
let bench_buffers_nested = Buffers_nested.bench
let bench_from_back = From_back.bench
Expand All @@ -1141,19 +1168,23 @@ module Nested = struct
let pp_size ~n ~depth =
Printf.printf "bench nested enc: length for n=%d, depth=%d is %d B\n" n
depth
(String.length (Basic.string_of_company @@ mk_company ~n ~depth))
(String.length (Cur.string_of_company @@ mk_company ~n ~depth))

(* sanity check *)
let check ~n ~depth () =
let s_basic = Basic.string_of_company (mk_company ~n ~depth) in
let s_buffers_nested =
Buffers_nested.string_of_company (mk_company ~n ~depth)
in
let s_from_back = From_back.string_of_company (mk_company ~n ~depth) in
let s_from_back2 =
From_back_noinline.string_of_company (mk_company ~n ~depth)
in
let s_from_backc = From_back_c.string_of_company (mk_company ~n ~depth) in
let comp = mk_company ~n ~depth in
let s_cur = Cur.string_of_company comp in
let s_basic = Basic.string_of_company comp in

(*
Printf.printf "###### n=%d, depth=%d\n" n depth;
Printf.printf "s_cur[%d]=%S\n" (String.length s_cur) s_cur;
Printf.printf "s_basic[%d]=%S\n" (String.length s_basic) s_basic;
*)
let s_buffers_nested = Buffers_nested.string_of_company comp in
let s_from_back = From_back.string_of_company comp in
let s_from_back2 = From_back_noinline.string_of_company comp in
let s_from_backc = From_back_c.string_of_company comp in
(*
Printf.printf "basic:\n(len=%d) %S\n" (String.length s_basic) s_basic;
Printf.printf "from_back:\n(len=%d) %S\n"
Expand All @@ -1169,14 +1200,18 @@ module Nested = struct
Foo.decode_pb_company dec)
in
let c_basic = dec_s s_basic in
let c_cur = dec_s s_cur in
let c_buffers_nested = dec_s s_buffers_nested in
let c_from_back = dec_s s_from_back in
let c_from_back2 = dec_s s_from_back2 in
let c_from_backc = dec_s s_from_backc in
(*
Format.printf "c_basic=%a@." Foo_pp.pp_company c_basic;
Format.printf "c_from_back=%a@." Foo_pp.pp_company c_from_back;
Format.printf "comp=%a@." Foo.pp_company comp;
Format.printf "c_basic=%a@." Foo.pp_company c_basic;
Format.printf "c_cur=%a@." Foo.pp_company c_cur;
*)
assert (c_basic = comp);
assert (c_basic = c_cur);
assert (c_basic = c_buffers_nested);
assert (c_basic = c_from_back);
assert (c_basic = c_from_back2);
Expand All @@ -1186,7 +1221,7 @@ module Nested = struct
let () =
List.iter
(fun (n, depth) -> check ~n ~depth ())
[ 1, 3; 2, 4; 10, 1; 20, 2 ]
[ 1, 3; 2, 4; 10, 1; 20, 2; 1, 10 ]
end

let test_nested_enc ~n ~depth =
Expand All @@ -1198,6 +1233,7 @@ let test_nested_enc ~n ~depth =
B.throughputN ~repeat:4 3
[
Nested.bench_basic company;
Nested.bench_cur company;
Nested.bench_buffers_nested company;
Nested.bench_from_back company;
Nested.bench_from_back_noinline company;
Expand Down
11 changes: 11 additions & 0 deletions benchs/bin/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

(executable
(name run)
(ocamlopt_flags :standard -inline 100)
(libraries pbrt))

(rule
(targets foo.ml foo.mli)
(deps foo.proto)
(action
(run ocaml-protoc %{deps} --binary --pp --ml_out .)))
19 changes: 19 additions & 0 deletions benchs/bin/foo.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@

syntax = "proto3";

message Person {
string name = 1;
sint64 age = 2;
}

message Store {
string address = 1;
repeated Person employees = 2;
repeated Person clients = 3;
}

message Company {
string name = 1;
repeated Store stores = 2;
repeated Company subsidiaries = 3;
}
64 changes: 64 additions & 0 deletions benchs/bin/run.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
open Foo

let spf = Printf.sprintf

(* company, with [n] stores and [2^depth] subsidiaries *)
let rec mk_company ~n ~depth : company =
{
name = "bigcorp";
subsidiaries =
(if depth = 0 then
[]
else (
let c = mk_company ~n ~depth:(depth - 1) in
[ c; c ]
));
stores =
List.init n (fun i ->
{
address = spf "%d foobar street" i;
clients =
List.init 2 (fun j ->
{
name = spf "client_%d_%d" i j;
age = Int64.of_int ((j mod 30) + 15);
});
employees =
List.init 2 (fun j ->
{
name = spf "employee_%d_%d" i j;
age = Int64.of_int ((j mod 30) + 18);
});
});
}

let comp = mk_company ~n:3 ~depth:2

let () =
let n = ref 3 in
let depth = ref 2 in
let iters = ref 100 in

let opts =
[
"-n", Arg.Set_int n, " size for data";
"--depth", Arg.Set_int depth, " nesting depth for data";
"--iters", Arg.Set_int iters, " number of iterations";
]
|> Arg.align
in
Arg.parse opts ignore "";

Printf.printf "n=%d, depth=%d, iters=%n\n%!" !n !depth !iters;
let comp = mk_company ~n:!n ~depth:!depth in

let enc = Pbrt.Encoder.create ~size:(64 * 1024) () in

Sys.opaque_identity (Foo.encode_pb_company comp enc);
let size = String.length @@ Pbrt.Encoder.to_string enc in
Printf.printf "size=%d B\n%!" size;

for _i = 1 to !iters do
Pbrt.Encoder.clear enc;
Sys.opaque_identity (Foo.encode_pb_company comp enc)
done
12 changes: 6 additions & 6 deletions benchs/stubs.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include <stdbool.h>
#include <stdint.h>

inline int pbrt_varint_size(int64_t i) {
static inline int pbrt_varint_size(int64_t i) {
int n = 0;
while (1) {
n++;
Expand All @@ -18,12 +18,12 @@ inline int pbrt_varint_size(int64_t i) {
}

// number of bytes for i
CAMLprim value caml_pbrt_varint_size(int64_t i) {
CAMLprim value b_caml_pbrt_varint_size(int64_t i) {
int res = pbrt_varint_size(i);
return Val_int(res);
}

CAMLprim value caml_pbrt_varint_size_byte(value v_i) {
CAMLprim value b_caml_pbrt_varint_size_byte(value v_i) {
CAMLparam1(v_i);

int64_t i = Int64_val(v_i);
Expand All @@ -32,7 +32,7 @@ CAMLprim value caml_pbrt_varint_size_byte(value v_i) {
}

// write i at str[idx…]
inline void pbrt_varint(unsigned char *str, int64_t i) {
static inline void pbrt_varint(unsigned char *str, int64_t i) {
while (true) {
int64_t cur = i & 0x7f;
if (cur == i) {
Expand Down Expand Up @@ -63,14 +63,14 @@ inline void pbrt_varint(unsigned char *str, int64_t i) {
// done

// write `i` starting at `idx`
CAMLprim value caml_pbrt_varint(value _str, intnat idx, int64_t i) {
CAMLprim value b_caml_pbrt_varint(value _str, intnat idx, int64_t i) {
CAMLparam1(_str);
char *str = Bytes_val(_str);
pbrt_varint(str + idx, i);
CAMLreturn(Val_unit);
}

CAMLprim value caml_pbrt_varint_bytes(value _str, value _idx, value _i) {
CAMLprim value b_caml_pbrt_varint_bytes(value _str, value _idx, value _i) {
CAMLparam3(_str, _idx, _i);
char *str = Bytes_val(_str);
int idx = Int_val(_idx);
Expand Down
Loading