8
8
-module (rabbit_quorum_queue ).
9
9
10
10
-behaviour (rabbit_queue_type ).
11
+ -behaviour (rabbit_policy_validator ).
12
+ -behaviour (rabbit_policy_merge_strategy ).
11
13
12
14
-export ([init /1 ,
13
15
close /1 ,
30
32
-export ([cluster_state /1 , status /2 ]).
31
33
-export ([update_consumer_handler /8 , update_consumer /9 ]).
32
34
-export ([cancel_consumer_handler /2 , cancel_consumer /3 ]).
33
- -export ([become_leader /2 , handle_tick /3 , spawn_deleter /1 ]).
35
+ -export ([become_leader /2 , handle_tick /3 , spawn_deleter /1 , eval_members / 3 ]).
34
36
-export ([rpc_delete_metrics /1 ]).
35
37
-export ([format /1 ]).
36
38
-export ([open_files /1 ]).
65
67
is_compatible /3 ,
66
68
declare /2 ,
67
69
is_stateful /0 ]).
70
+ -export ([validate_policy /1 , merge_policy_value /3 ]).
68
71
69
72
-import (rabbit_queue_type_util , [args_policy_lookup /3 ,
70
73
qname_to_internal_name /1 ]).
111
114
-define (ADD_MEMBER_TIMEOUT , 5000 ).
112
115
-define (SNAPSHOT_INTERVAL , 8192 ). % % the ra default is 4096
113
116
117
+ -define (EVAL_MEMBERS_TIMEOUT , 60000 ).
118
+ -define (EVAL_MEMBERS_EVENT_TIMEOUT , 30000 ).
119
+
120
+
121
+ % %----------- QQ policies ---------------------------------------------------
122
+
123
+ -rabbit_boot_step (
124
+ {? MODULE ,
125
+ [{description , " QQ policy validation" },
126
+ {mfa , {rabbit_registry , register ,
127
+ [policy_validator , <<" target-group-size" >>, ? MODULE ]}},
128
+ {mfa , {rabbit_registry , register ,
129
+ [operator_policy_validator , <<" target-group-size" >>, ? MODULE ]}},
130
+ {mfa , {rabbit_registry , register ,
131
+ [policy_merge_strategy , <<" target-group-size" >>, ? MODULE ]}},
132
+ {requires , rabbit_registry },
133
+ {enables , recovery }]}).
134
+
135
+ validate_policy (Args ) ->
136
+ Count = proplists :get_value (<<" target-group-size" >>, Args , none ),
137
+ case is_integer (Count ) andalso Count > 0 of
138
+ true -> ok ;
139
+ false -> {error , " ~tp is not a valid qq target count value" , [Count ]}
140
+ end .
141
+
142
+ merge_policy_value (<<" target-group-size" >>, _Val , OpVal ) ->
143
+ OpVal .
144
+
114
145
% %----------- rabbit_queue_type ---------------------------------------------
115
146
116
147
-spec is_enabled () -> boolean ().
@@ -177,7 +208,7 @@ start_cluster(Q) ->
177
208
Arguments = amqqueue :get_arguments (Q ),
178
209
Opts = amqqueue :get_options (Q ),
179
210
ActingUser = maps :get (user , Opts , ? UNKNOWN_USER ),
180
- QuorumSize = get_default_quorum_initial_group_size (Arguments ),
211
+ QuorumSize = get_default_quorum_initial_group_size (Arguments , Q ),
181
212
RaName = case qname_to_internal_name (QName ) of
182
213
{ok , A } ->
183
214
A ;
@@ -193,11 +224,7 @@ start_cluster(Q) ->
193
224
[QuorumSize , rabbit_misc :rs (QName ), Leader ]),
194
225
case rabbit_amqqueue :internal_declare (NewQ1 , false ) of
195
226
{created , NewQ } ->
196
- TickTimeout = application :get_env (rabbit , quorum_tick_interval ,
197
- ? TICK_TIMEOUT ),
198
- SnapshotInterval = application :get_env (rabbit , quorum_snapshot_interval ,
199
- ? SNAPSHOT_INTERVAL ),
200
- RaConfs = [make_ra_conf (NewQ , ServerId , TickTimeout , SnapshotInterval )
227
+ RaConfs = [make_ra_conf (NewQ , ServerId )
201
228
|| ServerId <- members (NewQ )],
202
229
try erpc_call (Leader , ra , start_cluster ,
203
230
[? RA_SYSTEM , RaConfs , ? START_CLUSTER_TIMEOUT ],
@@ -550,6 +577,90 @@ reductions(Name) ->
550
577
0
551
578
end .
552
579
580
+ eval_members (ClusterName , Cluster , QName ) ->
581
+ MemberNodes = [N || {_ , N } <- Cluster ],
582
+ ExpectedNodes = rabbit_nodes :list_members (),
583
+ Remove = MemberNodes -- ExpectedNodes ,
584
+ case Remove of
585
+ [] ->
586
+ add_member_effects (ClusterName , Cluster , QName , MemberNodes );
587
+ _ ->
588
+ remove_member_effects (ClusterName , Cluster , QName , Remove )
589
+ end .
590
+
591
+ add_member_effects (ClusterName , Cluster , QName , MemberNodes ) ->
592
+ Running = rabbit_nodes :list_running (),
593
+ case lists :sort (Running ) == lists :sort ([node () | nodes ()]) of
594
+ true ->
595
+ {ok , Q } = rabbit_amqqueue :lookup (QName ),
596
+ New = Running -- MemberNodes ,
597
+ Arguments = amqqueue :get_arguments (Q ),
598
+ Size = get_default_quorum_initial_group_size (Arguments , Q ),
599
+ CurrentSize = length (MemberNodes ),
600
+ case {CurrentSize < Size , New } of
601
+ {true , NewNodes } when NewNodes =/= [] ->
602
+ NodesToAdd = lists :sublist (grow_order_sort (NewNodes ),
603
+ Size - CurrentSize ),
604
+ create_add_member_effects (ClusterName , Cluster ,
605
+ Q , QName , NodesToAdd );
606
+ {_ ,_ } ->
607
+ rabbit_log :debug (" CALLED: NOOP ~n " ,[]),
608
+ undefined
609
+ end ;
610
+ false ->
611
+ rabbit_log :debug (" CALLED: BACKOFF~n " ,[]),
612
+ eval_members_backoff
613
+ end .
614
+
615
+ create_add_member_effects (ClusterName , Cluster , Q , QName , New ) ->
616
+ rabbit_log :debug (" CALLED: WILL ADD ~p~n " ,[New ]),
617
+ NewMembers = [make_add_member_effect (Q , QName , {ClusterName , N }) || N <- New ],
618
+ {add_member , NewMembers , Cluster }.
619
+
620
+ make_add_member_effect (Q , QName , {_ClusterName , Node } = ServerId ) ->
621
+ Conf = make_ra_conf (Q , ServerId ),
622
+ ResultFun = fun ({ok , _ , Leader }) ->
623
+ Fun = fun (Q1 ) ->
624
+ Q2 = update_type_state (
625
+ Q1 , fun (#{nodes := Nodes } = Ts ) ->
626
+ Ts #{nodes => [Node | Nodes ]}
627
+ end ),
628
+ amqqueue :set_pid (Q2 , Leader )
629
+ end ,
630
+ _ = rabbit_amqqueue :update (QName , Fun )
631
+ end ,
632
+ {{ServerId , Conf }, ResultFun }.
633
+
634
+ grow_order_sort (Nodes ) ->
635
+ QueueLenFun =
636
+ fun (Node ) ->
637
+ length ([Q || Q <- rabbit_amqqueue :list_by_type (quorum ),
638
+ amqqueue :get_state (Q ) =/= crashed ,
639
+ lists :member (Node , rabbit_amqqueue :get_quorum_nodes (Q ))])
640
+ end ,
641
+ NodeWithQLen = lists :keysort (
642
+ 2 ,
643
+ [{Node , QueueLenFun (Node )} || Node <- Nodes ]),
644
+ [N || {N ,_ } <- NodeWithQLen ].
645
+
646
+ remove_member_effects (ClusterName , Cluster , QName , RemovedFromCluster ) ->
647
+ rabbit_log :debug (" CALLED: WILL REMOVE ~p~n " ,[RemovedFromCluster ]),
648
+ RemoveMembers = [make_remove_member_effect (QName , {ClusterName , N }) || N <- RemovedFromCluster ],
649
+ {remove_member , RemoveMembers , Cluster }.
650
+
651
+ make_remove_member_effect (QName , {_ClusterName , Node } = ServerId ) ->
652
+ ResultFun = fun ({ok , _ , _ }) ->
653
+ Fun = fun (Q1 ) ->
654
+ update_type_state (
655
+ Q1 ,
656
+ fun (#{nodes := Nodes } = Ts ) ->
657
+ Ts #{nodes => lists :delete (Node , Nodes )}
658
+ end )
659
+ end ,
660
+ _ = rabbit_amqqueue :update (QName , Fun )
661
+ end ,
662
+ {ServerId , ResultFun }.
663
+
553
664
is_recoverable (Q ) ->
554
665
Node = node (),
555
666
Nodes = get_nodes (Q ),
@@ -1089,11 +1200,7 @@ add_member(Q, Node, Timeout) when ?amqqueue_is_quorum(Q) ->
1089
1200
% % TODO parallel calls might crash this, or add a duplicate in quorum_nodes
1090
1201
ServerId = {RaName , Node },
1091
1202
Members = members (Q ),
1092
- TickTimeout = application :get_env (rabbit , quorum_tick_interval ,
1093
- ? TICK_TIMEOUT ),
1094
- SnapshotInterval = application :get_env (rabbit , quorum_snapshot_interval ,
1095
- ? SNAPSHOT_INTERVAL ),
1096
- Conf = make_ra_conf (Q , ServerId , TickTimeout , SnapshotInterval ),
1203
+ Conf = make_ra_conf (Q , ServerId ),
1097
1204
case ra :start_server (? RA_SYSTEM , Conf ) of
1098
1205
ok ->
1099
1206
case ra :add_member (Members , ServerId , Timeout ) of
@@ -1573,12 +1680,18 @@ quorum_ctag(Other) ->
1573
1680
maybe_send_reply (_ChPid , undefined ) -> ok ;
1574
1681
maybe_send_reply (ChPid , Msg ) -> ok = rabbit_channel :send_command (ChPid , Msg ).
1575
1682
1576
- get_default_quorum_initial_group_size (Arguments ) ->
1577
- case rabbit_misc :table_lookup (Arguments , <<" x-quorum-initial-group-size" >>) of
1578
- undefined ->
1683
+ get_default_quorum_initial_group_size (Arguments , Q ) ->
1684
+ PolicyValue = rabbit_policy :get (<<" target-group-size" >>, Q ),
1685
+ ArgValue = rabbit_misc :table_lookup (Arguments , <<" x-quorum-initial-group-size" >>),
1686
+ case {ArgValue , PolicyValue } of
1687
+ {undefined , undefined } ->
1579
1688
application :get_env (rabbit , quorum_cluster_size , 3 );
1580
- {_Type , Val } ->
1581
- Val
1689
+ {undefined , V } ->
1690
+ V ;
1691
+ {{_Type , V }, undefined } ->
1692
+ V ;
1693
+ {{_Type , ArgV }, PolV } ->
1694
+ max (ArgV , PolV )
1582
1695
end .
1583
1696
1584
1697
% % member with the current leader first
@@ -1590,7 +1703,16 @@ members(Q) when ?amqqueue_is_quorum(Q) ->
1590
1703
format_ra_event (ServerId , Evt , QRef ) ->
1591
1704
{'$gen_cast' , {queue_event , QRef , {ServerId , Evt }}}.
1592
1705
1593
- make_ra_conf (Q , ServerId , TickTimeout , SnapshotInterval ) ->
1706
+ make_ra_conf (Q , ServerId ) ->
1707
+ TickTimeout = application :get_env (rabbit , quorum_tick_interval ,
1708
+ ? TICK_TIMEOUT ),
1709
+ SnapshotInterval = application :get_env (rabbit , quorum_snapshot_interval ,
1710
+ ? SNAPSHOT_INTERVAL ),
1711
+ % % Do we want these values to be configurable?
1712
+ MemberEvalTimeout = application :get_env (rabbit , quorum_eval_members_timeout ,
1713
+ ? EVAL_MEMBERS_TIMEOUT ),
1714
+ MemberEvalEventTimeout = application :get_env (rabbit , quorum_eval_members_event_timeout ,
1715
+ ? EVAL_MEMBERS_EVENT_TIMEOUT ),
1594
1716
QName = amqqueue :get_name (Q ),
1595
1717
RaMachine = ra_machine (Q ),
1596
1718
[{ClusterName , _ } | _ ] = Members = members (Q ),
@@ -1606,6 +1728,8 @@ make_ra_conf(Q, ServerId, TickTimeout, SnapshotInterval) ->
1606
1728
log_init_args => #{uid => UId ,
1607
1729
snapshot_interval => SnapshotInterval },
1608
1730
tick_timeout => TickTimeout ,
1731
+ eval_members_timeout => MemberEvalTimeout ,
1732
+ eval_members_event_timeout => MemberEvalEventTimeout ,
1609
1733
machine => RaMachine ,
1610
1734
ra_event_formatter => Formatter }.
1611
1735
0 commit comments