Skip to content

Commit 8e998c4

Browse files
Merge pull request #13669 from rabbitmq/mergify/bp/v4.0.x/pr-13667
Allow a previously reset node to rejoin its original cluster (backport #13643) (backport #13667)
2 parents 100c16a + 6d45ee8 commit 8e998c4

File tree

2 files changed

+32
-13
lines changed

2 files changed

+32
-13
lines changed

deps/rabbit/src/rabbit_db_cluster.erl

+18-2
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ ensure_feature_flags_are_in_sync(Nodes, NodeIsVirgin) ->
5050
RemoteNode :: node(),
5151
Ret :: Ok | Error,
5252
Ok :: {ok, [node()]} | {ok, already_member},
53-
Error :: {error, {inconsistent_cluster, string()}}.
53+
Error :: {error, {inconsistent_cluster, string()} | {error, {erpc, noconnection}}}.
5454

5555
can_join(RemoteNode) ->
5656
?LOG_INFO(
@@ -82,7 +82,7 @@ can_join_using_khepri(RemoteNode) ->
8282
NodeType :: node_type(),
8383
Ret :: Ok | Error,
8484
Ok :: ok | {ok, already_member},
85-
Error :: {error, {inconsistent_cluster, string()}}.
85+
Error :: {error, {inconsistent_cluster, string()} | {error, {erpc, noconnection}}}.
8686
%% @doc Adds this node to a cluster using `RemoteNode' to reach it.
8787

8888
join(ThisNode, _NodeType) when ThisNode =:= node() ->
@@ -214,6 +214,22 @@ join(RemoteNode, NodeType)
214214
end;
215215
{ok, already_member} ->
216216
{ok, already_member};
217+
{error, {inconsistent_cluster, _Msg}} = Error ->
218+
case rabbit_khepri:is_enabled() of
219+
true ->
220+
Error;
221+
false ->
222+
%% rabbit_mnesia:can_join_cluster/1 notice inconsistent_cluster,
223+
%% as RemoteNode thinks this node is already in the cluster.
224+
%% Attempt to leave the RemoteNode cluster, the discovery cluster,
225+
%% and simply retry the operation.
226+
rabbit_log:info("Mnesia: node ~tp thinks it's clustered "
227+
"with node ~tp, but ~tp disagrees. ~tp will ask "
228+
"to leave the cluster and try again.",
229+
[RemoteNode, node(), node(), node()]),
230+
ok = rabbit_mnesia:leave_then_rediscover_cluster(RemoteNode),
231+
join(RemoteNode, NodeType)
232+
end;
217233
{error, _} = Error ->
218234
Error
219235
end.

deps/rabbit/src/rabbit_mnesia.erl

+14-11
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@
7373
-export([node_info/0, remove_node_if_mnesia_running/1]).
7474

7575
%% Used internally in `rabbit_db_cluster'.
76-
-export([members/0]).
76+
-export([members/0, leave_then_rediscover_cluster/1]).
7777

7878
%% Used internally in `rabbit_khepri'.
7979
-export([mnesia_and_msg_store_files/0]).
@@ -155,7 +155,7 @@ init() ->
155155
%% we cluster to its cluster.
156156

157157
-spec can_join_cluster(node())
158-
-> {ok, [node()]} | {ok, already_member} | {error, {inconsistent_cluster, string()}}.
158+
-> {ok, [node()]} | {ok, already_member} | {error, {inconsistent_cluster, string()} | {error, {erpc, noconnection}}}.
159159

160160
can_join_cluster(DiscoveryNode) ->
161161
ensure_mnesia_dir(),
@@ -179,7 +179,6 @@ can_join_cluster(DiscoveryNode) ->
179179
{ok, already_member};
180180
false ->
181181
Msg = format_inconsistent_cluster_message(DiscoveryNode, node()),
182-
rabbit_log:error(Msg),
183182
{error, {inconsistent_cluster, Msg}}
184183
end
185184
end.
@@ -894,15 +893,19 @@ remove_node_if_mnesia_running(Node) ->
894893
end
895894
end.
896895

897-
leave_cluster() ->
898-
case rabbit_nodes:nodes_excl_me(cluster_nodes(all)) of
899-
[] -> ok;
900-
AllNodes -> case lists:any(fun leave_cluster/1, AllNodes) of
901-
true -> ok;
902-
false -> e(no_running_cluster_nodes)
903-
end
904-
end.
896+
leave_then_rediscover_cluster(DiscoveryNode) ->
897+
{ClusterNodes, _, _} = discover_cluster([DiscoveryNode]),
898+
leave_cluster(rabbit_nodes:nodes_excl_me(ClusterNodes)).
905899

900+
leave_cluster() ->
901+
leave_cluster(rabbit_nodes:nodes_excl_me(cluster_nodes(all))).
902+
leave_cluster([]) ->
903+
ok;
904+
leave_cluster(Nodes) when is_list(Nodes) ->
905+
case lists:any(fun leave_cluster/1, Nodes) of
906+
true -> ok;
907+
false -> e(no_running_cluster_nodes)
908+
end;
906909
leave_cluster(Node) ->
907910
case rpc:call(Node,
908911
rabbit_mnesia, remove_node_if_mnesia_running, [node()]) of

0 commit comments

Comments
 (0)