Skip to content

Commit 83b5e41

Browse files
committed
cluster: expel instances removed from config in cluster.sync
Currently, cluster:sync() leaves instances removed from the config in the instance list. This makes cluster:reload() fail because it tries to reload the config for all instances including those that are not in the config anymore. To fix that, let's make cluster:sync() move instances that were removed from the config to the special expelled instance list so that they become inaccessible via cluster but are still dropped by cluster:drop(). Also, let's add the new boolean option start_stop for cluster:sync(). If set, cluster:sync() will stop old servers and start new ones. Closes #423
1 parent e03b037 commit 83b5e41

File tree

3 files changed

+171
-44
lines changed

3 files changed

+171
-44
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44

55
- Fixed a bug when `server:grep_log()` failed to find a string logged in
66
`server:exec()` called immediately before it (gh-421).
7+
- Fixed a bug when it wasn't possible to reload the cluster config with
8+
`cluster:reload()` after removing an instance with `cluster:sync()`.
9+
Also added an option to `cluster:sync()` to start/stop added/removed
10+
instances (gh-423).
711

812
## 1.1.0
913

luatest/cluster.lua

Lines changed: 97 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,46 @@ end
9292

9393
-- {{{ Helpers
9494

95+
-- Start all instances in the list.
96+
--
97+
-- @tab[opt] opts Options.
98+
-- @bool[opt] opts.wait_until_ready Wait until servers are ready
99+
-- (default: true).
100+
-- @bool[opt] opts.wait_until_running Wait until servers are running
101+
-- (default: wait_until_ready).
102+
local function start_instances(servers, opts)
103+
for _, iserver in ipairs(servers) do
104+
iserver:start({wait_until_ready = false})
105+
end
106+
107+
-- wait_until_ready is true by default.
108+
local wait_until_ready = true
109+
if opts ~= nil and opts.wait_until_ready ~= nil then
110+
wait_until_ready = opts.wait_until_ready
111+
end
112+
113+
if wait_until_ready then
114+
for _, iserver in ipairs(servers) do
115+
iserver:wait_until_ready()
116+
end
117+
end
118+
119+
-- wait_until_running is equal to wait_until_ready by default.
120+
local wait_until_running = wait_until_ready
121+
if opts ~= nil and opts.wait_until_running ~= nil then
122+
wait_until_running = opts.wait_until_running
123+
end
124+
125+
if wait_until_running then
126+
for _, iserver in ipairs(servers) do
127+
helpers.retrying({timeout = 60}, function()
128+
assertions.assert_equals(iserver:eval('return box.info.status'),
129+
'running')
130+
end)
131+
end
132+
end
133+
end
134+
95135
-- Collect names of all the instances defined in the config
96136
-- in the alphabetical order.
97137
local function instance_names_from_config(config)
@@ -131,39 +171,11 @@ end
131171
--
132172
-- @tab[opt] opts Cluster startup options.
133173
-- @bool[opt] opts.wait_until_ready Wait until servers are ready
134-
-- (default: false).
174+
-- (default: true).
175+
-- @bool[opt] opts.wait_until_running Wait until servers are running
176+
-- (default: wait_until_ready).
135177
function Cluster:start(opts)
136-
self:each(function(iserver)
137-
iserver:start({wait_until_ready = false})
138-
end)
139-
140-
-- wait_until_ready is true by default.
141-
local wait_until_ready = true
142-
if opts ~= nil and opts.wait_until_ready ~= nil then
143-
wait_until_ready = opts.wait_until_ready
144-
end
145-
146-
if wait_until_ready then
147-
self:each(function(iserver)
148-
iserver:wait_until_ready()
149-
end)
150-
end
151-
152-
-- wait_until_running is equal to wait_until_ready by default.
153-
local wait_until_running = wait_until_ready
154-
if opts ~= nil and opts.wait_until_running ~= nil then
155-
wait_until_running = opts.wait_until_running
156-
end
157-
158-
if wait_until_running then
159-
self:each(function(iserver)
160-
helpers.retrying({timeout = 60}, function()
161-
assertions.assert_equals(iserver:eval('return box.info.status'),
162-
'running')
163-
end)
164-
165-
end)
166-
end
178+
start_instances(self._servers, opts)
167179
end
168180

169181
--- Start the given instance.
@@ -187,8 +199,12 @@ function Cluster:drop()
187199
for _, iserver in ipairs(self._servers or {}) do
188200
iserver:drop()
189201
end
202+
for _, iserver in ipairs(self._expelled_servers or {}) do
203+
iserver:drop()
204+
end
190205
self._servers = nil
191206
self._server_map = nil
207+
self._expelled_servers = nil
192208
end
193209

194210
--- Sync the cluster object with the new config.
@@ -197,25 +213,67 @@ end
197213
--
198214
-- * Write the new config into the config file.
199215
-- * Update the internal list of instances.
216+
-- * Optionally starts instances added to the config and stops instances
217+
-- removed from the config.
200218
--
201219
-- @tab config New config.
202-
function Cluster:sync(config)
220+
-- @tab[opt] opts Options.
221+
-- @bool[opt] opts.start_stop Start/stop added/removed servers
222+
-- (default: false).
223+
-- @bool[opt] opts.wait_until_ready Wait until servers are ready
224+
-- (default: true; used only if start_stop is set).
225+
-- @bool[opt] opts.wait_until_running Wait until servers are running
226+
-- (default: wait_until_ready; used only if start_stop is set).
227+
function Cluster:sync(config, opts)
203228
assert(type(config) == 'table')
204229

205230
local instance_names = instance_names_from_config(config)
206231

207232
treegen.write_file(self._dir, self._config_file_rel, yaml.encode(config))
208233

209-
for i, name in ipairs(instance_names) do
210-
if self._server_map[name] == nil then
211-
local iserver = server:new(fun.chain(self._server_opts, {
234+
local server_map = self._server_map
235+
self._server_map = {}
236+
self._servers = {}
237+
local new_servers = {}
238+
239+
for _, name in ipairs(instance_names) do
240+
local iserver = server_map[name]
241+
if iserver == nil then
242+
iserver = server:new(fun.chain(self._server_opts, {
212243
alias = name,
213244
}):tomap())
214-
table.insert(self._servers, i, iserver)
215-
self._server_map[name] = iserver
245+
table.insert(new_servers, iserver)
246+
else
247+
server_map[name] = nil
216248
end
249+
self._server_map[name] = iserver
250+
table.insert(self._servers, iserver)
217251
end
218252

253+
local expelled_servers = {}
254+
for _, iserver in pairs(server_map) do
255+
table.insert(expelled_servers, iserver)
256+
end
257+
258+
-- Sort expelled servers by name for reproducibility.
259+
table.sort(expelled_servers, function(a, b) return a.alias < b.alias end)
260+
261+
-- Add expelled servers to the list to be dropped with the cluster.
262+
for _, iserver in pairs(expelled_servers) do
263+
table.insert(self._expelled_servers, iserver)
264+
end
265+
266+
local start_stop = false
267+
if opts ~= nil and opts.start_stop ~= nil then
268+
start_stop = opts.start_stop
269+
end
270+
271+
if start_stop then
272+
start_instances(new_servers, opts)
273+
for _, iserver in ipairs(expelled_servers) do
274+
iserver:stop()
275+
end
276+
end
219277
end
220278

221279
--- Reload configuration on all the instances.
@@ -297,6 +355,7 @@ function Cluster:new(config, server_opts, opts)
297355
-- Store a cluster object in 'g'.
298356
self._servers = servers
299357
self._server_map = server_map
358+
self._expelled_servers = {}
300359
self._dir = dir
301360
self._config_file_rel = config_file_rel
302361
self._server_opts = server_opts

test/cluster_test.lua

Lines changed: 70 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -142,9 +142,6 @@ g.test_sync = function()
142142
c:start()
143143
assert_instance_running(c, 'i-001')
144144

145-
c:stop()
146-
assert_instance_stopped(c, 'i-001')
147-
148145
local config2 = cbuilder:new()
149146
:use_group('g-001')
150147
:use_replicaset('r-001')
@@ -156,18 +153,85 @@ g.test_sync = function()
156153

157154
:config()
158155

156+
local server1 = c['i-001']
157+
159158
c:sync(config2)
160159

161-
t.assert_equals(c:size(), 3)
160+
-- Check that the server that was removed from the config is expelled
161+
-- from the cluster but not stopped.
162+
t.assert_equals(c:size(), 2)
163+
t.assert_is(c['i-001'], nil)
164+
t.assert_is_not(server1.process, nil)
162165

163-
c:start_instance('i-002')
164-
c:start_instance('i-003')
166+
-- Check that the new servers are not started.
167+
assert_instance_stopped(c, 'i-002')
168+
assert_instance_stopped(c, 'i-003')
169+
170+
c:start()
165171
assert_instance_running(c, 'i-002')
166172
assert_instance_running(c, 'i-003')
167173

174+
-- Check config reload works after sync.
175+
c:reload()
176+
168177
c:stop()
169178
assert_instance_stopped(c, 'i-002')
170179
assert_instance_stopped(c, 'i-003')
180+
181+
-- Starting/stopping the cluster shouldn't affect the expelled server.
182+
-- However, dropping the cluster should also drop the expelled server.
183+
t.assert_is_not(server1.process, nil)
184+
c:drop()
185+
t.assert_is(server1.process, nil)
186+
end
187+
188+
g.test_sync_start_stop = function()
189+
t.run_only_if(utils.version_current_ge_than(3, 0, 0),
190+
[[Declarative configuration works on Tarantool 3.0.0+.
191+
See tarantool/tarantool@13149d65bc9d for details]])
192+
193+
t.assert_equals(g._cluster, nil)
194+
195+
local config = cbuilder:new()
196+
:use_group('g-001')
197+
:use_replicaset('r-001')
198+
:add_instance('i-001', {})
199+
:config()
200+
201+
local c = cluster:new(config, server_opts)
202+
203+
t.assert_equals(c:size(), 1)
204+
205+
c:start()
206+
assert_instance_running(c, 'i-001')
207+
208+
local config2 = cbuilder:new()
209+
:use_group('g-001')
210+
:use_replicaset('r-001')
211+
:add_instance('i-002', {})
212+
213+
:use_group('g-002')
214+
:use_replicaset('r-002')
215+
:add_instance('i-003', {})
216+
217+
:config()
218+
219+
local server1 = c['i-001']
220+
221+
c:sync(config2, {start_stop = true})
222+
223+
-- Check that the server that was removed from the config is expelled
224+
-- from the cluster and stopped.
225+
t.assert_equals(c:size(), 2)
226+
t.assert_is(c['i-001'], nil)
227+
t.assert_is(server1.process, nil)
228+
229+
-- Check that the new servers are started.
230+
assert_instance_running(c, 'i-002')
231+
assert_instance_running(c, 'i-003')
232+
233+
-- Check config reload works after sync.
234+
c:reload()
171235
end
172236

173237
g.test_reload = function()

0 commit comments

Comments
 (0)