Skip to content

Commit 7969367

Browse files
authored
Backport RUBY-1894 Clear connection pools when monitor ismaster times out (#1475) (#1477)
This version always clears connection pools whenever a server becomes unknown. This results in repeated pool clears sometimes which is suboptimal but not really wrong. Driver version 2.11+ has the correct implementation of pool clearing which makes use of refactoring done for background connection pool thread/SRV polling.
1 parent 28d9743 commit 7969367

File tree

5 files changed

+129
-66
lines changed

5 files changed

+129
-66
lines changed

lib/mongo/server.rb

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,23 @@ def unknown!
417417

418418
# @api private
419419
def update_description(description)
420+
prev_description = monitor.instance_variable_get('@description')
420421
monitor.instance_variable_set('@description', description)
422+
if description.unknown? && !prev_description.unknown?
423+
# This clears redundantly sometimes and also clears the pool on
424+
# 4.2+ servers after not master errors which should not be done.
425+
# Driver version 2.11+ has the correct implementation.
426+
clear_connection_pool
427+
end
428+
end
429+
430+
# @api private
431+
def clear_connection_pool
432+
@pool_lock.synchronize do
433+
if @pool
434+
@pool.disconnect!
435+
end
436+
end
421437
end
422438

423439
# @api private

spec/integration/sdam_error_handling_spec.rb

Lines changed: 94 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -5,80 +5,81 @@
55
ClientRegistry.instance.close_all_clients
66
end
77

8-
describe 'when there is an error during an operation' do
8+
# These tests operate on specific servers, and don't work in a multi
9+
# shard cluster where multiple servers are equally eligible
10+
require_no_multi_shard
911

10-
# These tests operate on specific servers, and don't work in a multi
11-
# shard cluster where multiple servers are equally eligible
12-
require_no_multi_shard
12+
let(:client) { authorized_client_without_any_retries }
1313

14-
let(:client) { authorized_client_without_any_retries }
14+
let(:server) { client.cluster.next_primary }
1515

16-
before do
17-
wait_for_all_servers(client.cluster)
18-
# we also need a connection to the primary so that our error
19-
# expectations do not get triggered during handshakes which
20-
# have different behavior from non-handshake errors
21-
client.database.command(ping: 1)
22-
client.cluster.servers_list.each do |server|
23-
server.monitor.stop!(true)
24-
end
16+
shared_examples_for 'marks server unknown' do
17+
it 'marks server unknown' do
18+
expect(server).not_to be_unknown
19+
operation
20+
expect(server).to be_unknown
2521
end
22+
end
2623

27-
let(:server) { client.cluster.next_primary }
28-
29-
let(:operation) do
30-
expect_any_instance_of(Mongo::Server::Connection).to receive(:deliver).and_return(reply)
31-
expect do
32-
client.database.command(ping: 1)
33-
end.to raise_error(Mongo::Error::OperationFailure, exception_message)
24+
shared_examples_for 'does not mark server unknown' do
25+
it 'does not mark server unknown' do
26+
expect(server).not_to be_unknown
27+
operation
28+
expect(server).not_to be_unknown
3429
end
30+
end
3531

36-
shared_examples_for 'marks server unknown' do
37-
it 'marks server unknown' do
38-
expect(server).not_to be_unknown
39-
operation
40-
expect(server).to be_unknown
41-
end
32+
shared_examples_for 'requests server scan' do
33+
it 'requests server scan' do
34+
expect(server.monitor.scan_semaphore).to receive(:signal)
35+
operation
4236
end
37+
end
4338

44-
shared_examples_for 'does not mark server unknown' do
45-
it 'does not mark server unknown' do
46-
expect(server).not_to be_unknown
47-
operation
48-
expect(server).not_to be_unknown
49-
end
39+
shared_examples_for 'does not request server scan' do
40+
it 'does not request server scan' do
41+
expect(server.monitor.scan_semaphore).not_to receive(:signal)
42+
operation
5043
end
44+
end
5145

52-
shared_examples_for 'requests server scan' do
53-
it 'requests server scan' do
54-
expect(server.monitor.scan_semaphore).to receive(:signal)
55-
operation
56-
end
46+
shared_examples_for 'clears connection pool' do
47+
it 'clears connection pool' do
48+
generation = server.pool.generation
49+
operation
50+
new_generation = server.pool.generation
51+
# Temporary hack to allow repeated pool clears
52+
expect(new_generation).to be >= generation + 1
5753
end
54+
end
5855

59-
shared_examples_for 'does not request server scan' do
60-
it 'does not request server scan' do
61-
expect(server.monitor.scan_semaphore).not_to receive(:signal)
62-
operation
63-
end
56+
shared_examples_for 'does not clear connection pool' do
57+
it 'does not clear connection pool' do
58+
generation = server.pool.generation
59+
operation
60+
new_generation = server.pool.generation
61+
expect(new_generation).to eq(generation)
6462
end
63+
end
64+
65+
describe 'when there is an error during an operation' do
6566

66-
shared_examples_for 'clears connection pool' do
67-
it 'clears connection pool' do
68-
generation = server.pool.generation
69-
operation
70-
new_generation = server.pool.generation
71-
expect(new_generation).to eq(generation + 1)
67+
before do
68+
wait_for_all_servers(client.cluster)
69+
# we also need a connection to the primary so that our error
70+
# expectations do not get triggered during handshakes which
71+
# have different behavior from non-handshake errors
72+
client.database.command(ping: 1)
73+
client.cluster.servers_list.each do |server|
74+
server.monitor.stop!
7275
end
7376
end
7477

75-
shared_examples_for 'does not clear connection pool' do
76-
it 'does not clear connection pool' do
77-
generation = server.pool.generation
78-
operation
79-
new_generation = server.pool.generation
80-
expect(new_generation).to eq(generation)
81-
end
78+
let(:operation) do
79+
expect_any_instance_of(Mongo::Server::Connection).to receive(:deliver).and_return(reply)
80+
expect do
81+
client.database.command(ping: 1)
82+
end.to raise_error(Mongo::Error::OperationFailure, exception_message)
8283
end
8384

8485
shared_examples_for 'not master or node recovering' do
@@ -88,7 +89,8 @@
8889
context 'server 4.2 or higher' do
8990
min_server_fcv '4.2'
9091

91-
it_behaves_like 'does not clear connection pool'
92+
# Due to RUBY-1894 backport, the pool is cleared here
93+
it_behaves_like 'clears connection pool'
9294
end
9395

9496
context 'server 4.0 or lower' do
@@ -170,4 +172,39 @@
170172
end
171173
end
172174
end
175+
176+
# These tests fail intermittently in Evergreen
177+
describe 'when there is an error on monitoring connection', retry: 3 do
178+
let(:client) do
179+
authorized_client_without_any_retries.with(
180+
connect_timeout: 1, socket_timeout: 1)
181+
end
182+
183+
let(:operation) do
184+
expect(server.monitor.connection).not_to be nil
185+
expect(server.monitor.connection).to receive(:ismaster).at_least(:once).and_raise(exception)
186+
server.monitor.scan_semaphore.broadcast
187+
6.times do
188+
sleep 0.5
189+
if server.unknown?
190+
break
191+
end
192+
end
193+
expect(server).to be_unknown
194+
end
195+
196+
context 'non-timeout network error' do
197+
let(:exception) { Mongo::Error::SocketError }
198+
199+
it_behaves_like 'marks server unknown'
200+
it_behaves_like 'clears connection pool'
201+
end
202+
203+
context 'network timeout' do
204+
let(:exception) { Mongo::Error::SocketTimeoutError }
205+
206+
it_behaves_like 'marks server unknown'
207+
it_behaves_like 'clears connection pool'
208+
end
209+
end
173210
end

spec/integration/step_down_spec.rb

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,8 @@
160160
collection.insert_one(test: 1)
161161
end.to raise_error(Mongo::Error::OperationFailure, /10107/)
162162

163-
expect(event_subscriber.select_published_events(Mongo::Monitoring::Event::Cmap::PoolCleared).count).to eq(0)
163+
# Temporarily add 1 due to RUBY-1894 backport
164+
expect(event_subscriber.select_published_events(Mongo::Monitoring::Event::Cmap::PoolCleared).count).to eq(0+1)
164165
end
165166
end
166167

@@ -179,7 +180,8 @@
179180
collection.insert_one(test: 1)
180181
end.to raise_error(Mongo::Error::OperationFailure, /10107/)
181182

182-
expect(event_subscriber.select_published_events(Mongo::Monitoring::Event::Cmap::PoolCleared).count).to eq(1)
183+
# Temporarily add 1 due to RUBY-1894 backport
184+
expect(event_subscriber.select_published_events(Mongo::Monitoring::Event::Cmap::PoolCleared).count).to eq(1+1)
183185
end
184186
end
185187

@@ -196,7 +198,8 @@
196198
collection.insert_one(test: 1)
197199
end.to raise_error(Mongo::Error::OperationFailure, /11600/)
198200

199-
expect(event_subscriber.select_published_events(Mongo::Monitoring::Event::Cmap::PoolCleared).count).to eq(1)
201+
# Temporarily add 1 due to RUBY-1894 backport
202+
expect(event_subscriber.select_published_events(Mongo::Monitoring::Event::Cmap::PoolCleared).count).to eq(1+1)
200203
end
201204
end
202205
end

spec/mongo/server/connection_spec.rb

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ class ConnectionSpecTestException < Exception; end
270270
end
271271
end
272272

273+
=begin These assertions require a working cluster with working SDAM flow, which the tests do not configure
273274
shared_examples_for 'does not disconnect connection pool' do
274275
it 'does not disconnect non-monitoring sockets' do
275276
allow(server).to receive(:pool).and_return(pool)
@@ -285,6 +286,7 @@ class ConnectionSpecTestException < Exception; end
285286
error
286287
end
287288
end
289+
=end
288290

289291
let(:auth_mechanism) do
290292
if ClusterConfig.instance.server_version >= '3'
@@ -331,14 +333,14 @@ class ConnectionSpecTestException < Exception; end
331333
expect(error).to be_a(Mongo::Auth::Unauthorized)
332334
end
333335

334-
it_behaves_like 'disconnects connection pool'
336+
#it_behaves_like 'disconnects connection pool'
335337
it_behaves_like 'keeps server type and topology'
336338
end
337339

338340
# need a separate context here, otherwise disconnect expectation
339341
# is ignored due to allowing disconnects in the other context
340342
context 'checking pool disconnection' do
341-
it_behaves_like 'disconnects connection pool'
343+
#it_behaves_like 'disconnects connection pool'
342344
end
343345
end
344346

@@ -368,7 +370,7 @@ class ConnectionSpecTestException < Exception; end
368370
expect(error).to be_a(Mongo::Error::SocketTimeoutError)
369371
end
370372

371-
it_behaves_like 'does not disconnect connection pool'
373+
#it_behaves_like 'does not disconnect connection pool'
372374
it_behaves_like 'keeps server type and topology'
373375
end
374376

@@ -398,7 +400,7 @@ class ConnectionSpecTestException < Exception; end
398400
expect(error).to be_a(Mongo::Error::SocketError)
399401
end
400402

401-
it_behaves_like 'disconnects connection pool'
403+
#it_behaves_like 'disconnects connection pool'
402404
it_behaves_like 'marks server unknown'
403405
end
404406

@@ -706,7 +708,8 @@ class ConnectionSpecTestException < Exception; end
706708
end
707709

708710
it 'disconnects connection pool' do
709-
expect(server.pool).to receive(:disconnect!)
711+
# Allow multiple calls due to RUBY-1894 backport
712+
expect(server.pool).to receive(:disconnect!).at_least(:once)
710713
result
711714
end
712715

@@ -739,10 +742,12 @@ class ConnectionSpecTestException < Exception; end
739742
expect(connection).to_not be_connected
740743
end
741744

745+
=begin These assertions require a working cluster with working SDAM flow, which the tests do not configure
742746
it 'does not disconnect connection pool' do
743747
expect(server.pool).not_to receive(:disconnect!)
744748
result
745749
end
750+
=end
746751

747752
it 'does not mark server unknown' do
748753
expect(server).not_to be_unknown

spec/mongo/server/monitor/connection_spec.rb

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
clean_slate
55

66
let(:address) do
7-
Mongo::Address.new(ClusterConfig.instance.primary_address_str, options)
7+
Mongo::Address.new(ClusterConfig.instance.primary_address, options)
88
end
99

1010
declare_topology_double
@@ -24,6 +24,8 @@
2424
Mongo::Event::Listeners.new, options)
2525
end
2626

27+
let(:monitor) { server.monitor }
28+
2729
let(:connection) do
2830
# NB this connection is set up in the background thread,
2931
# when the :scan option to client is changed to default to false

0 commit comments

Comments
 (0)