Skip to content

Commit 49ebb15

Browse files
committed
Chaos
1 parent 211fc5b commit 49ebb15

File tree

2 files changed

+178
-0
lines changed

2 files changed

+178
-0
lines changed

deps/rabbit/src/rabbit.erl

+7
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,13 @@
162162
{requires, kernel_ready},
163163
{enables, core_initialized}]}).
164164

165+
-rabbit_boot_step({rabbit_chaos,
166+
[{description, "rabbit node chaos server"},
167+
{mfa, {rabbit_sup, start_restartable_child,
168+
[rabbit_chaos]}},
169+
{requires, [database]},
170+
{enables, core_initialized}]}).
171+
165172
-rabbit_boot_step({rabbit_node_monitor,
166173
[{description, "node monitor"},
167174
{mfa, {rabbit_sup, start_restartable_child,

deps/rabbit/src/rabbit_chaos.erl

+171
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
%% This Source Code Form is subject to the terms of the Mozilla Public
2+
%% License, v. 2.0. If a copy of the MPL was not distributed with this
3+
%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
4+
%%
5+
%% Copyright (c) 2007-2023 Broadcom. All Rights Reserved. The term “Broadcom”
6+
%% refers to Broadcom Inc. and/or its subsidiaries. All rights reserved.
7+
%%
8+
9+
-module(rabbit_chaos).
10+
11+
-behaviour(gen_server).
12+
13+
-export([start_link/0]).
14+
-export([
15+
begin_default/0,
16+
begin_default/1,
17+
begin_chaos/1
18+
]).
19+
20+
21+
-export([init/1,
22+
handle_call/3,
23+
handle_cast/2,
24+
handle_info/2,
25+
terminate/2,
26+
code_change/3]).
27+
28+
-type chaos_event() :: {Name :: atom(),
29+
{kill_named_proc, Process :: atom()} |
30+
kill_quorum_queue_member,
31+
flood_node}.
32+
33+
-type chaos_cfg() :: #{interval := non_neg_integer(),
34+
events := [chaos_event()]}.
35+
-define(SERVER, ?MODULE).
36+
37+
-record(?MODULE, {cfg :: chaos_cfg()}).
38+
39+
-export_type([chaos_cfg/0,
40+
chaos_event/0]).
41+
42+
%%----------------------------------------------------------------------------
43+
%% A chaos server that can be enabled to create periodic configurable chaos
44+
%% inside the broker.
45+
%%----------------------------------------------------------------------------
46+
47+
begin_default() ->
48+
begin_default(20000).
49+
50+
begin_default(Interval) ->
51+
Events = [
52+
{kill_qq_wal, 1, {kill_named_proc, ra_log_wal}},
53+
{kill_qq_seq_writer, 1, {kill_named_proc, ra_log_segment_writer}},
54+
{kill_qq_member, 2, kill_ra_member},
55+
{kill_qq_member, 2, restart_ra_member},
56+
{flood_a_node, 2, flood_node}
57+
],
58+
begin_chaos(#{interval => Interval,
59+
events => Events}).
60+
61+
begin_chaos(Cfg) ->
62+
gen_server:call(?SERVER, {begin_chaos, Cfg}).
63+
64+
-spec start_link() -> rabbit_types:ok_pid_or_error().
65+
start_link() ->
66+
gen_server:start_link({local, ?SERVER}, ?MODULE, [], []).
67+
68+
init([]) ->
69+
process_flag(trap_exit, true),
70+
Cfg = #{interval => 20000,
71+
events => []},
72+
{ok, #?MODULE{cfg = Cfg}}.
73+
74+
handle_call({begin_chaos, #{interval := Interval} = Cfg}, _From, State) ->
75+
_ = erlang:send_after(Interval, self(), do_chaos),
76+
{reply, ok, State#?MODULE{cfg = Cfg}}.
77+
78+
handle_cast(_Request, State) ->
79+
{noreply, State}.
80+
81+
handle_info(do_chaos, #?MODULE{cfg = #{interval := Interval} = Cfg} = State) ->
82+
Events = maps:get(events, Cfg),
83+
{Name, _, Event} = pick_event(Events),
84+
do_event(Name, Event),
85+
_ = erlang:send_after(Interval, self(), do_chaos),
86+
{noreply, State};
87+
handle_info(_, #?MODULE{} = State) ->
88+
{noreply, State}.
89+
90+
terminate(_Reason, #?MODULE{}) ->
91+
ok.
92+
93+
code_change(_OldVsn, State, _Extra) ->
94+
{ok, State}.
95+
96+
%% internal
97+
98+
do_event(Name, {kill_named_proc, ProcName}) ->
99+
rabbit_log:info("~s: doing event ~s...", [?MODULE, Name]),
100+
catch exit(whereis(ProcName), chaos),
101+
ok;
102+
do_event(Name, flood_node) ->
103+
rabbit_log:info("~s: doing event ~s...", [?MODULE, Name]),
104+
%% TODO: avoid if nodes() == []
105+
Nodes = nodes(),
106+
At = rand:uniform(length(Nodes)),
107+
Selected = lists:nth(At, Nodes),
108+
109+
Pid = erpc:call(Selected, erlang, spawn, [fun() -> ok end]),
110+
111+
Data = crypto:strong_rand_bytes(100_000),
112+
Loop = fun F(0) -> ok;
113+
F(N) ->
114+
case erlang:send(Pid, Data, [nosuspend]) of
115+
nosuspend ->
116+
Pid ! Data,
117+
rabbit_log:info("~s: flood of node ~s competed ~s...",
118+
[?MODULE, Selected, Name]),
119+
%% flood complete
120+
ok;
121+
ok ->
122+
F(N-1)
123+
end
124+
end,
125+
126+
Loop(10000),
127+
ok;
128+
do_event(Name, kill_ra_member) ->
129+
rabbit_log:info("~s: doing event ~s...", [?MODULE, Name]),
130+
Procs = ets:tab2list(ra_leaderboard),
131+
At = rand:uniform(length(Procs)),
132+
Selected = lists:nth(At, Procs),
133+
catch exit(whereis(element(1, Selected)), kill),
134+
ok;
135+
do_event(Name, restart_ra_member = Type) ->
136+
rabbit_log:info("~s: doing event ~s of type ~s", [?MODULE, Name, Type]),
137+
Queues = rabbit_amqqueue:list_local_quorum_queues(),
138+
At = rand:uniform(length(Queues)),
139+
Selected = lists:nth(At, Queues),
140+
{ServerName, _} = amqqueue:get_pid(Selected),
141+
ServerId = {ServerName, node()},
142+
ra:stop_server(quorum_queues, ServerId),
143+
Sleep = rand:uniform(10000) + 1000,
144+
timer:sleep(Sleep),
145+
ra:restart_server(quorum_queues, ServerId),
146+
ok;
147+
do_event(Name, {multi, Num, Interval, Event}) ->
148+
rabbit_log:info("~s: doing multi event ~s...",
149+
[?MODULE, Name]),
150+
catch [begin
151+
do_event(Name, Event),
152+
timer:sleep(Interval)
153+
end || _ <- lists:seq(1, Num)],
154+
ok.
155+
156+
pick_event(Events) ->
157+
TotalWeight = lists:sum([element(2, E) || E <- Events]),
158+
Pick = rand:uniform(TotalWeight),
159+
event_at_weight_point(Pick, 0, Events).
160+
161+
162+
event_at_weight_point(_Pick, _Cur, []) ->
163+
undefined;
164+
event_at_weight_point(Pick, Cur0, [{_, W, _} = E | Events]) ->
165+
Cur = Cur0 + W,
166+
case Pick =< Cur of
167+
true ->
168+
E;
169+
false ->
170+
event_at_weight_point(Pick, Cur, Events)
171+
end.

0 commit comments

Comments
 (0)