Skip to content

Commit cd21775

Browse files
committed
Optimize work-stealing deque
- Add padding to avoid false sharing - Use a GADT to express desired result type - Use various tweaks to improve performance - Remove negative test that uses the WS deque in an invalid unsafe way - Implement caching of the thief side index
1 parent c6729d9 commit cd21775

File tree

3 files changed

+114
-142
lines changed

3 files changed

+114
-142
lines changed

src_lockfree/ws_deque.ml

Lines changed: 109 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
* Copyright (c) 2015, KC Sivaramakrishnan <[email protected]>
44
* Copyright (c) 2017, Nicolas ASSOUAD <[email protected]>
55
* Copyright (c) 2021, Tom Kelly <[email protected]>
6+
* Copyright (c) 2024, Vesa Karvonen <[email protected]>
67
*
78
* Permission to use, copy, modify, and/or distribute this software for any
89
* purpose with or without fee is hereby granted, provided that the above
@@ -27,6 +28,8 @@
2728
* https://dl.acm.org/doi/abs/10.1145/2442516.2442524
2829
*)
2930

31+
module Atomic = Multicore_magic.Transparent_atomic
32+
3033
module type S = sig
3134
type 'a t
3235

@@ -38,152 +41,123 @@ module type S = sig
3841
val steal_opt : 'a t -> 'a option
3942
end
4043

41-
module CArray = struct
42-
type 'a t = 'a array
43-
44-
let rec log2 n = if n <= 1 then 0 else 1 + log2 (n asr 1)
45-
46-
let create sz v =
47-
(* [sz] must be a power of two. *)
48-
assert (0 < sz && sz = Int.shift_left 1 (log2 sz));
49-
assert (Int.logand sz (sz - 1) == 0);
50-
Array.make sz v
51-
52-
let size t = Array.length t [@@inline]
53-
let mask t = size t - 1 [@@inline]
54-
55-
let index i t =
56-
(* Because [size t] is a power of two, [i mod (size t)] is the same as
57-
[i land (size t - 1)], that is, [i land (mask t)]. *)
58-
Int.logand i (mask t)
59-
[@@inline]
60-
61-
let get t i = Array.unsafe_get t (index i t) [@@inline]
62-
let put t i v = Array.unsafe_set t (index i t) v [@@inline]
63-
64-
let transfer src dst top num =
65-
ArrayExtra.blit_circularly (* source array and index: *)
66-
src
67-
(index top src) (* target array and index: *)
68-
dst
69-
(index top dst) (* number of elements: *)
70-
num
71-
[@@inline]
72-
73-
let grow t top bottom =
74-
let sz = size t in
75-
assert (bottom - top = sz);
76-
let dst = create (2 * sz) (Obj.magic ()) in
77-
transfer t dst top sz;
78-
dst
79-
80-
let shrink t top bottom =
81-
let sz = size t in
82-
assert (bottom - top <= sz / 2);
83-
let dst = create (sz / 2) (Obj.magic ()) in
84-
transfer t dst top (bottom - top);
85-
dst
86-
end
87-
8844
module M : S = struct
89-
let min_size = 32
90-
let shrink_const = 3
45+
(** This must be a power of two. *)
46+
let min_capacity = 16
9147

9248
type 'a t = {
9349
top : int Atomic.t;
9450
bottom : int Atomic.t;
95-
tab : 'a ref CArray.t Atomic.t;
96-
mutable next_shrink : int;
51+
top_cache : int ref;
52+
mutable tab : 'a ref array;
9753
}
9854

9955
let create () =
100-
{
101-
top = Atomic.make 1;
102-
bottom = Atomic.make 1;
103-
tab = Atomic.make (CArray.create min_size (Obj.magic ()));
104-
next_shrink = 0;
105-
}
106-
107-
let set_next_shrink q =
108-
let sz = CArray.size (Atomic.get q.tab) in
109-
if sz <= min_size then q.next_shrink <- 0
110-
else q.next_shrink <- sz / shrink_const
111-
112-
let grow q t b =
113-
Atomic.set q.tab (CArray.grow (Atomic.get q.tab) t b);
114-
set_next_shrink q
115-
116-
let size q =
117-
let b = Atomic.get q.bottom in
118-
let t = Atomic.get q.top in
119-
b - t
56+
let top = Atomic.make 0 |> Multicore_magic.copy_as_padded in
57+
let tab = Array.make min_capacity (Obj.magic ()) in
58+
let bottom = Atomic.make 0 |> Multicore_magic.copy_as_padded in
59+
let top_cache = ref 0 |> Multicore_magic.copy_as_padded in
60+
{ top; bottom; top_cache; tab } |> Multicore_magic.copy_as_padded
61+
62+
let realloc a t b sz new_sz =
63+
let new_a = Array.make new_sz (Obj.magic ()) in
64+
ArrayExtra.blit_circularly a
65+
(t land (sz - 1))
66+
new_a
67+
(t land (new_sz - 1))
68+
(b - t);
69+
new_a
12070

12171
let push q v =
122-
let v' = ref v in
123-
let b = Atomic.get q.bottom in
124-
let t = Atomic.get q.top in
125-
let a = Atomic.get q.tab in
126-
let size = b - t in
127-
let a =
128-
if size = CArray.size a then (
129-
grow q t b;
130-
Atomic.get q.tab)
131-
else a
132-
in
133-
CArray.put a b v';
134-
Atomic.set q.bottom (b + 1)
135-
136-
let release ptr =
137-
let res = !ptr in
138-
(* we know this ptr will never be dereferenced, but want to
139-
break the reference to ensure that the contents of the
140-
deque array get garbage collected *)
141-
ptr := Obj.magic ();
142-
res
143-
[@@inline]
144-
145-
let pop q =
146-
if size q = 0 then raise Exit
147-
else
148-
let b = Atomic.get q.bottom - 1 in
149-
Atomic.set q.bottom b;
72+
let v = ref v in
73+
(* Read of [bottom] by the owner simply does not require a fence as the
74+
[bottom] is only mutated by the owner. *)
75+
let b = Atomic.fenceless_get q.bottom in
76+
let t_cache = !(q.top_cache) in
77+
let a = q.tab in
78+
let size = b - t_cache in
79+
let capacity = Array.length a in
80+
if
81+
size < capacity
82+
||
15083
let t = Atomic.get q.top in
151-
let a = Atomic.get q.tab in
152-
let size = b - t in
153-
if size < 0 then (
154-
(* empty queue *)
155-
Atomic.set q.bottom (b + 1);
156-
raise Exit)
157-
else
158-
let out = CArray.get a b in
159-
if b = t then
160-
(* single last element *)
161-
if Atomic.compare_and_set q.top t (t + 1) then (
162-
Atomic.set q.bottom (b + 1);
163-
release out)
164-
else (
165-
Atomic.set q.bottom (b + 1);
166-
raise Exit)
167-
else (
168-
(* non-empty queue *)
169-
if q.next_shrink > size then (
170-
Atomic.set q.tab (CArray.shrink a t b);
171-
set_next_shrink q);
172-
release out)
173-
174-
let pop_opt q = try Some (pop q) with Exit -> None
175-
176-
let rec steal backoff q =
177-
let t = Atomic.get q.top in
84+
q.top_cache := t;
85+
t != t_cache
86+
then begin
87+
Array.unsafe_set a (b land (capacity - 1)) v;
88+
Atomic.incr q.bottom
89+
end
90+
else
91+
let a = realloc a t_cache b capacity (capacity lsl 1) in
92+
Array.unsafe_set a (b land (Array.length a - 1)) v;
93+
q.tab <- a;
94+
Atomic.incr q.bottom
95+
96+
type ('a, _) poly = Option : ('a, 'a option) poly | Value : ('a, 'a) poly
97+
98+
let pop_as : type a r. a t -> (a, r) poly -> r =
99+
fun q poly ->
100+
let b = Atomic.fetch_and_add q.bottom (-1) - 1 in
101+
(* Read of [top] at this point requires no fence as we simply need to ensure
102+
that the read happens after updating [bottom]. *)
103+
let t = Atomic.fenceless_get q.top in
104+
let size = b - t in
105+
if 0 < size then begin
106+
let a = q.tab in
107+
let capacity = Array.length a in
108+
let out = Array.unsafe_get a (b land (capacity - 1)) in
109+
let res = !out in
110+
out := Obj.magic ();
111+
if size + size + size <= capacity - min_capacity then
112+
q.tab <- realloc a t b capacity (capacity lsr 1);
113+
match poly with Option -> Some res | Value -> res
114+
end
115+
else if b = t then begin
116+
(* Whether or not the [compare_and_set] below succeeds, [top_cache] can be
117+
updated, because in either case [top] has been incremented. *)
118+
q.top_cache := t + 1;
119+
let got = Atomic.compare_and_set q.top t (t + 1) in
120+
(* This write of [bottom] requires no fence. The deque is empty and
121+
remains so until the next [push]. *)
122+
Atomic.fenceless_set q.bottom (b + 1);
123+
if got then begin
124+
let a = q.tab in
125+
let out = Array.unsafe_get a (b land (Array.length a - 1)) in
126+
let res = !out in
127+
out := Obj.magic ();
128+
match poly with Option -> Some res | Value -> res
129+
end
130+
else match poly with Option -> None | Value -> raise_notrace Exit
131+
end
132+
else begin
133+
(* This write of [bottom] requires no fence. The deque is empty and
134+
remains so until the next [push]. *)
135+
Atomic.fenceless_set q.bottom (b + 1);
136+
match poly with Option -> None | Value -> raise_notrace Exit
137+
end
138+
139+
let pop q = pop_as q Value
140+
let pop_opt q = pop_as q Option
141+
142+
let rec steal_as : type a r. a t -> Backoff.t -> (a, r) poly -> r =
143+
fun q backoff poly ->
144+
(* Read of [top] does not require a fence at this point, but the read of
145+
[top] must happen before the read of [bottom]. The write of [top] later
146+
has no effect in case we happened to read an old value of [top]. *)
147+
let t = Atomic.fenceless_get q.top in
178148
let b = Atomic.get q.bottom in
179149
let size = b - t in
180-
if size <= 0 then raise Exit
181-
else
182-
let a = Atomic.get q.tab in
183-
let out = CArray.get a t in
184-
if Atomic.compare_and_set q.top t (t + 1) then release out
185-
else steal (Backoff.once backoff) q
186-
187-
let steal q = steal Backoff.default q
188-
let steal_opt q = try Some (steal q) with Exit -> None
150+
if 0 < size then
151+
let a = q.tab in
152+
let out = Array.unsafe_get a (t land (Array.length a - 1)) in
153+
if Atomic.compare_and_set q.top t (t + 1) then begin
154+
let res = !out in
155+
out := Obj.magic ();
156+
match poly with Option -> Some res | Value -> res
157+
end
158+
else steal_as q (Backoff.once backoff) poly
159+
else match poly with Option -> None | Value -> raise_notrace Exit
160+
161+
let steal q = steal_as q Backoff.default Value
162+
let steal_opt q = steal_as q Backoff.default Option
189163
end

test/ws_deque/dune

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,12 @@
1111
(test
1212
(package saturn_lockfree)
1313
(name ws_deque_dscheck)
14-
(libraries atomic dscheck alcotest backoff)
14+
(libraries atomic dscheck alcotest backoff multicore-magic-dscheck)
1515
(build_if
1616
(>= %{ocaml_version} 5))
17-
(modules ArrayExtra ws_deque ws_deque_dscheck))
17+
(modules ArrayExtra ws_deque ws_deque_dscheck)
18+
(flags
19+
(:standard -open Multicore_magic_dscheck)))
1820

1921
(test
2022
(package saturn_lockfree)

test/ws_deque/stm_ws_deque.ml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,7 @@ let () =
7777
assume (Dom.all_interleavings_ok triple);
7878
repeat rep_count Dom.agree_prop_par_asym triple)
7979
in
80-
[
81-
agree_test_par_asym ~count ~name:(name ^ " parallel");
82-
(* Note: this can generate, e.g., pop commands/actions in different threads, thus violating the spec. *)
83-
Dom.neg_agree_test_par ~count ~name:(name ^ " parallel, negative");
84-
]
80+
[ agree_test_par_asym ~count ~name:(name ^ " parallel") ]
8581
in
8682
Stm_run.run ~count:1000 ~name:"Saturn_lockfree.Ws_deque" ~verbose:true
8783
~make_domain

0 commit comments

Comments
 (0)