Skip to content

Commit af89d22

Browse files
authored
Backport RUBY-1894 Clear connection pools when monitor ismaster times out (#1475) (#1477) (#1479)
This version always clears connection pools whenever a server becomes unknown. This results in repeated pool clears sometimes which is suboptimal but not really wrong. Driver version 2.11+ has the correct implementation of pool clearing which makes use of refactoring done for background connection pool thread/SRV polling.
1 parent c1f3902 commit af89d22

File tree

5 files changed

+130
-62
lines changed

5 files changed

+130
-62
lines changed

lib/mongo/server.rb

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,23 @@ def unknown!
417417

418418
# @api private
419419
def update_description(description)
420+
prev_description = monitor.instance_variable_get('@description')
420421
monitor.instance_variable_set('@description', description)
422+
if description.unknown? && !prev_description.unknown?
423+
# This clears redundantly sometimes and also clears the pool on
424+
# 4.2+ servers after not master errors which should not be done.
425+
# Driver version 2.11+ has the correct implementation.
426+
clear_connection_pool
427+
end
428+
end
429+
430+
# @api private
431+
def clear_connection_pool
432+
@pool_lock.synchronize do
433+
if @pool
434+
@pool.disconnect!
435+
end
436+
end
421437
end
422438

423439
# @api private

spec/integration/sdam_error_handling_spec.rb

Lines changed: 95 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,64 @@
55
ClientRegistry.instance.close_all_clients
66
end
77

8+
# These tests operate on specific servers, and don't work in a multi
9+
# shard cluster where multiple servers are equally eligible
10+
require_topology :replica_set
11+
12+
let(:client) { authorized_client_without_any_retries }
13+
14+
let(:server) { client.cluster.next_primary }
15+
16+
shared_examples_for 'marks server unknown' do
17+
it 'marks server unknown' do
18+
expect(server).not_to be_unknown
19+
operation
20+
expect(server).to be_unknown
21+
end
22+
end
23+
24+
shared_examples_for 'does not mark server unknown' do
25+
it 'does not mark server unknown' do
26+
expect(server).not_to be_unknown
27+
operation
28+
expect(server).not_to be_unknown
29+
end
30+
end
31+
32+
shared_examples_for 'requests server scan' do
33+
it 'requests server scan' do
34+
expect(server.monitor.scan_semaphore).to receive(:signal)
35+
operation
36+
end
37+
end
38+
39+
shared_examples_for 'does not request server scan' do
40+
it 'does not request server scan' do
41+
expect(server.monitor.scan_semaphore).not_to receive(:signal)
42+
operation
43+
end
44+
end
45+
46+
shared_examples_for 'clears connection pool' do
47+
it 'clears connection pool' do
48+
generation = server.pool.generation
49+
operation
50+
new_generation = server.pool.generation
51+
# Temporary hack to allow repeated pool clears
52+
expect(new_generation).to be >= generation + 1
53+
end
54+
end
55+
56+
shared_examples_for 'does not clear connection pool' do
57+
it 'does not clear connection pool' do
58+
generation = server.pool.generation
59+
operation
60+
new_generation = server.pool.generation
61+
expect(new_generation).to eq(generation)
62+
end
63+
end
64+
865
describe 'when there is an error during an operation' do
9-
let(:client) { authorized_client_without_any_retries }
1066

1167
before do
1268
wait_for_all_servers(client.cluster)
@@ -15,75 +71,26 @@
1571
# have different behavior from non-handshake errors
1672
client.database.command(ping: 1)
1773
client.cluster.servers_list.each do |server|
18-
server.monitor.stop!(true)
74+
server.monitor.stop!
1975
end
2076
end
2177

22-
let(:server) { client.cluster.next_primary }
23-
2478
let(:operation) do
2579
expect_any_instance_of(Mongo::Server::Connection).to receive(:deliver).and_return(reply)
2680
expect do
2781
client.database.command(ping: 1)
2882
end.to raise_error(Mongo::Error::OperationFailure, exception_message)
2983
end
3084

31-
shared_examples_for 'marks server unknown' do
32-
it 'marks server unknown' do
33-
expect(server).not_to be_unknown
34-
operation
35-
expect(server).to be_unknown
36-
end
37-
end
38-
39-
shared_examples_for 'does not mark server unknown' do
40-
it 'does not mark server unknown' do
41-
expect(server).not_to be_unknown
42-
operation
43-
expect(server).not_to be_unknown
44-
end
45-
end
46-
47-
shared_examples_for 'requests server scan' do
48-
it 'requests server scan' do
49-
expect(server.monitor.scan_semaphore).to receive(:signal)
50-
operation
51-
end
52-
end
53-
54-
shared_examples_for 'does not request server scan' do
55-
it 'does not request server scan' do
56-
expect(server.monitor.scan_semaphore).not_to receive(:signal)
57-
operation
58-
end
59-
end
60-
61-
shared_examples_for 'clears connection pool' do
62-
it 'clears connection pool' do
63-
generation = server.pool.generation
64-
operation
65-
new_generation = server.pool.generation
66-
expect(new_generation).to eq(generation + 1)
67-
end
68-
end
69-
70-
shared_examples_for 'does not clear connection pool' do
71-
it 'does not clear connection pool' do
72-
generation = server.pool.generation
73-
operation
74-
new_generation = server.pool.generation
75-
expect(new_generation).to eq(generation)
76-
end
77-
end
78-
7985
shared_examples_for 'not master or node recovering' do
8086
it_behaves_like 'marks server unknown'
8187
it_behaves_like 'requests server scan'
8288

8389
context 'server 4.2 or higher' do
8490
min_server_fcv '4.2'
8591

86-
it_behaves_like 'does not clear connection pool'
92+
# Due to RUBY-1894 backport, the pool is cleared here
93+
it_behaves_like 'clears connection pool'
8794
end
8895

8996
context 'server 4.0 or lower' do
@@ -165,4 +172,39 @@
165172
end
166173
end
167174
end
175+
176+
# These tests fail intermittently in Evergreen
177+
describe 'when there is an error on monitoring connection', retry: 3 do
178+
let(:client) do
179+
authorized_client_without_any_retries.with(
180+
connect_timeout: 1, socket_timeout: 1)
181+
end
182+
183+
let(:operation) do
184+
expect(server.monitor.connection).not_to be nil
185+
expect(server.monitor.connection).to receive(:ismaster).at_least(:once).and_raise(exception)
186+
server.monitor.scan_semaphore.broadcast
187+
6.times do
188+
sleep 0.5
189+
if server.unknown?
190+
break
191+
end
192+
end
193+
expect(server).to be_unknown
194+
end
195+
196+
context 'non-timeout network error' do
197+
let(:exception) { Mongo::Error::SocketError }
198+
199+
it_behaves_like 'marks server unknown'
200+
it_behaves_like 'clears connection pool'
201+
end
202+
203+
context 'network timeout' do
204+
let(:exception) { Mongo::Error::SocketTimeoutError }
205+
206+
it_behaves_like 'marks server unknown'
207+
it_behaves_like 'clears connection pool'
208+
end
209+
end
168210
end

spec/integration/step_down_spec.rb

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,8 @@
154154
collection.insert_one(test: 1)
155155
end.to raise_error(Mongo::Error::OperationFailure, /10107/)
156156

157-
expect(event_subscriber.select_published_events(Mongo::Monitoring::Event::Cmap::PoolCleared).count).to eq(0)
157+
# Temporarily add 1 due to RUBY-1894 backport
158+
expect(event_subscriber.select_published_events(Mongo::Monitoring::Event::Cmap::PoolCleared).count).to eq(0+1)
158159
end
159160
end
160161

@@ -173,7 +174,8 @@
173174
collection.insert_one(test: 1)
174175
end.to raise_error(Mongo::Error::OperationFailure, /10107/)
175176

176-
expect(event_subscriber.select_published_events(Mongo::Monitoring::Event::Cmap::PoolCleared).count).to eq(1)
177+
# Temporarily add 1 due to RUBY-1894 backport
178+
expect(event_subscriber.select_published_events(Mongo::Monitoring::Event::Cmap::PoolCleared).count).to eq(1+1)
177179
end
178180
end
179181

@@ -190,7 +192,8 @@
190192
collection.insert_one(test: 1)
191193
end.to raise_error(Mongo::Error::OperationFailure, /11600/)
192194

193-
expect(event_subscriber.select_published_events(Mongo::Monitoring::Event::Cmap::PoolCleared).count).to eq(1)
195+
# Temporarily add 1 due to RUBY-1894 backport
196+
expect(event_subscriber.select_published_events(Mongo::Monitoring::Event::Cmap::PoolCleared).count).to eq(1+1)
194197
end
195198
end
196199
end

spec/mongo/server/connection_spec.rb

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ class ConnectionSpecTestException < Exception; end
270270
end
271271
end
272272

273+
=begin These assertions require a working cluster with working SDAM flow, which the tests do not configure
273274
shared_examples_for 'does not disconnect connection pool' do
274275
it 'does not disconnect non-monitoring sockets' do
275276
allow(server).to receive(:pool).and_return(pool)
@@ -285,6 +286,7 @@ class ConnectionSpecTestException < Exception; end
285286
error
286287
end
287288
end
289+
=end
288290

289291
let(:auth_mechanism) do
290292
if ClusterConfig.instance.server_version >= '3'
@@ -331,14 +333,14 @@ class ConnectionSpecTestException < Exception; end
331333
expect(error).to be_a(Mongo::Auth::Unauthorized)
332334
end
333335

334-
it_behaves_like 'disconnects connection pool'
336+
#it_behaves_like 'disconnects connection pool'
335337
it_behaves_like 'keeps server type and topology'
336338
end
337339

338340
# need a separate context here, otherwise disconnect expectation
339341
# is ignored due to allowing disconnects in the other context
340342
context 'checking pool disconnection' do
341-
it_behaves_like 'disconnects connection pool'
343+
#it_behaves_like 'disconnects connection pool'
342344
end
343345
end
344346

@@ -368,7 +370,7 @@ class ConnectionSpecTestException < Exception; end
368370
expect(error).to be_a(Mongo::Error::SocketTimeoutError)
369371
end
370372

371-
it_behaves_like 'does not disconnect connection pool'
373+
#it_behaves_like 'does not disconnect connection pool'
372374
it_behaves_like 'keeps server type and topology'
373375
end
374376

@@ -398,7 +400,7 @@ class ConnectionSpecTestException < Exception; end
398400
expect(error).to be_a(Mongo::Error::SocketError)
399401
end
400402

401-
it_behaves_like 'disconnects connection pool'
403+
#it_behaves_like 'disconnects connection pool'
402404
it_behaves_like 'marks server unknown'
403405
end
404406

@@ -706,7 +708,8 @@ class ConnectionSpecTestException < Exception; end
706708
end
707709

708710
it 'disconnects connection pool' do
709-
expect(server.pool).to receive(:disconnect!)
711+
# Allow multiple calls due to RUBY-1894 backport
712+
expect(server.pool).to receive(:disconnect!).at_least(:once)
710713
result
711714
end
712715

@@ -739,10 +742,12 @@ class ConnectionSpecTestException < Exception; end
739742
expect(connection).to_not be_connected
740743
end
741744

745+
=begin These assertions require a working cluster with working SDAM flow, which the tests do not configure
742746
it 'does not disconnect connection pool' do
743747
expect(server.pool).not_to receive(:disconnect!)
744748
result
745749
end
750+
=end
746751

747752
it 'does not mark server unknown' do
748753
expect(server).not_to be_unknown

spec/mongo/server/monitor/connection_spec.rb

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
end
1212

1313
let(:address) do
14-
client.cluster.next_primary.address
14+
Mongo::Address.new(ClusterConfig.instance.primary_address, options)
1515
end
1616

1717
declare_topology_double
@@ -31,6 +31,8 @@
3131
Mongo::Event::Listeners.new, options)
3232
end
3333

34+
let(:monitor) { server.monitor }
35+
3436
let(:connection) do
3537
# NB this connection is set up in the background thread,
3638
# when the :scan option to client is changed to default to false

0 commit comments

Comments
 (0)