43
43
WrapperProperties )
44
44
from aws_advanced_python_wrapper .utils .rds_url_type import RdsUrlType
45
45
from aws_advanced_python_wrapper .utils .rdsutils import RdsUtils
46
+ from aws_advanced_python_wrapper .utils .telemetry .telemetry import \
47
+ TelemetryTraceLevel
46
48
from aws_advanced_python_wrapper .writer_failover_handler import (
47
49
WriterFailoverHandler , WriterFailoverHandlerImpl )
48
50
@@ -84,7 +86,10 @@ def __init__(self, plugin_service: PluginService, props: Properties):
84
86
self ._properties )
85
87
self ._failover_reader_connect_timeout_sec = WrapperProperties .FAILOVER_READER_CONNECT_TIMEOUT_SEC .get_float (
86
88
self ._properties )
87
- self ._keep_session_state_on_failover = WrapperProperties .KEEP_SESSION_STATE_ON_FAILOVER .get_bool (self ._properties )
89
+ self ._keep_session_state_on_failover = WrapperProperties .KEEP_SESSION_STATE_ON_FAILOVER .get_bool (
90
+ self ._properties )
91
+ self ._telemetry_failover_additional_top_trace_setting = (
92
+ WrapperProperties .TELEMETRY_FAILOVER_ADDITIONAL_TOP_TRACE .get_bool (self ._properties ))
88
93
self ._failover_mode : FailoverMode
89
94
self ._is_in_transaction : bool = False
90
95
self ._is_closed : bool = False
@@ -96,6 +101,18 @@ def __init__(self, plugin_service: PluginService, props: Properties):
96
101
self ._saved_read_only_status : bool = False
97
102
self ._saved_auto_commit_status : bool = False
98
103
104
+ telemetry_factory = self ._plugin_service .get_telemetry_factory ()
105
+ self ._failover_writer_triggered_counter = telemetry_factory .create_counter ("writer_failover.triggered.count" )
106
+ self ._failover_writer_success_counter = telemetry_factory .create_counter (
107
+ "writer_failover.completed.success.count" )
108
+ self ._failover_writer_failed_counter = telemetry_factory .create_counter (
109
+ "writer_failover.completed.failed.count" )
110
+ self ._failover_reader_triggered_counter = telemetry_factory .create_counter ("reader_failover.triggered.count" )
111
+ self ._failover_reader_success_counter = telemetry_factory .create_counter (
112
+ "reader_failover.completed.success.count" )
113
+ self ._failover_reader_failed_counter = telemetry_factory .create_counter (
114
+ "reader_failover.completed.failed.count" )
115
+
99
116
FailoverPlugin ._SUBSCRIBED_METHODS .update (self ._plugin_service .network_bound_methods )
100
117
101
118
def init_host_provider (
@@ -213,10 +230,13 @@ def _connect(
213
230
properties : Properties ,
214
231
is_initial_connection : bool ,
215
232
connect_func : Callable ) -> Connection :
216
- conn : Connection = self ._stale_dns_helper .get_verified_connection (is_initial_connection , self ._host_list_provider_service , host , properties ,
233
+ conn : Connection = self ._stale_dns_helper .get_verified_connection (is_initial_connection ,
234
+ self ._host_list_provider_service , host ,
235
+ properties ,
217
236
connect_func )
218
237
if self ._keep_session_state_on_failover :
219
- self ._saved_read_only_status = False if self ._saved_read_only_status == self ._plugin_service .driver_dialect .is_read_only (conn ) \
238
+ self ._saved_read_only_status = False if self ._saved_read_only_status == self ._plugin_service .driver_dialect .is_read_only (
239
+ conn ) \
220
240
else self ._saved_read_only_status
221
241
self ._saved_auto_commit_status = False \
222
242
if self ._saved_read_only_status == self ._plugin_service .driver_dialect .get_autocommit (conn ) \
@@ -270,53 +290,96 @@ def _failover(self, failed_host: Optional[HostInfo]):
270
290
raise FailoverSuccessError (Messages .get (error_msg ))
271
291
272
292
def _failover_reader (self , failed_host : Optional [HostInfo ]):
273
- logger .debug ("FailoverPlugin.StartReaderFailover" )
293
+ telemetry_factory = self ._plugin_service .get_telemetry_factory ()
294
+ context = telemetry_factory .open_telemetry_context ("failover to replica" , TelemetryTraceLevel .NESTED )
295
+ self ._failover_reader_triggered_counter .inc ()
274
296
275
- old_aliases = None
276
- if self ._plugin_service .current_host_info is not None :
277
- old_aliases = self ._plugin_service .current_host_info .aliases
297
+ try :
298
+ logger .debug ("FailoverPlugin.StartReaderFailover" )
278
299
279
- if failed_host is not None and failed_host .get_raw_availability () != HostAvailability .AVAILABLE :
280
- failed_host = None
300
+ old_aliases = None
301
+ if self ._plugin_service .current_host_info is not None :
302
+ old_aliases = self ._plugin_service .current_host_info .aliases
281
303
282
- result : ReaderFailoverResult = self ._reader_failover_handler .failover (self ._plugin_service .hosts , failed_host )
304
+ if failed_host is not None and failed_host .get_raw_availability () != HostAvailability .AVAILABLE :
305
+ failed_host = None
283
306
284
- if result is None or not result .is_connected :
285
- raise FailoverFailedError (Messages .get ("FailoverPlugin.UnableToConnectToReader" ))
286
- else :
287
- if result .exception is not None :
288
- raise result .exception
289
- if self ._keep_session_state_on_failover :
290
- self .restore_session_state (result .connection )
291
- if result .connection is not None and result .new_host is not None :
292
- self ._plugin_service .set_current_connection (result .connection , result .new_host )
307
+ result : ReaderFailoverResult = self ._reader_failover_handler .failover (self ._plugin_service .hosts ,
308
+ failed_host )
293
309
294
- if self ._plugin_service .current_host_info is not None and old_aliases is not None and len (old_aliases ) > 0 :
295
- self ._plugin_service .current_host_info .remove_alias (old_aliases )
310
+ if result is None or not result .is_connected :
311
+ raise FailoverFailedError (Messages .get ("FailoverPlugin.UnableToConnectToReader" ))
312
+ else :
313
+ if result .exception is not None :
314
+ raise result .exception
315
+ if self ._keep_session_state_on_failover :
316
+ self .restore_session_state (result .connection )
317
+ if result .connection is not None and result .new_host is not None :
318
+ self ._plugin_service .set_current_connection (result .connection , result .new_host )
319
+
320
+ if self ._plugin_service .current_host_info is not None and old_aliases is not None and len (old_aliases ) > 0 :
321
+ self ._plugin_service .current_host_info .remove_alias (old_aliases )
322
+
323
+ self ._update_topology (True )
324
+
325
+ logger .debug ("FailoverPlugin.EstablishedConnection" , self ._plugin_service .current_host_info )
326
+
327
+ self ._failover_reader_success_counter .inc ()
328
+ except FailoverSuccessError as fse :
329
+ context .set_success (True )
330
+ context .set_exception (fse )
331
+ self ._failover_reader_success_counter .inc ()
332
+ raise fse
333
+ except Exception as ex :
334
+ context .set_success (False )
335
+ context .set_exception (ex )
336
+ self ._failover_reader_failed_counter .inc ()
337
+ raise ex
338
+ finally :
339
+ context .close_context ()
340
+ if self ._telemetry_failover_additional_top_trace_setting :
341
+ telemetry_factory .post_copy (context , TelemetryTraceLevel .FORCE_TOP_LEVEL )
296
342
297
- self ._update_topology (True )
343
+ def _failover_writer (self ):
344
+ telemetry_factory = self ._plugin_service .get_telemetry_factory ()
345
+ context = telemetry_factory .open_telemetry_context ("failover to writer node" , TelemetryTraceLevel .NESTED )
346
+ self ._failover_writer_triggered_counter .inc ()
298
347
299
- logger .debug ("FailoverPlugin.EstablishedConnection" , self ._plugin_service .current_host_info )
348
+ try :
349
+ logger .debug ("FailoverPlugin.StartWriterFailover" )
300
350
301
- def _failover_writer (self ):
302
- logger .debug ("FailoverPlugin.StartWriterFailover" )
351
+ result : WriterFailoverResult = self ._writer_failover_handler .failover (self ._plugin_service .hosts )
303
352
304
- result : WriterFailoverResult = self ._writer_failover_handler .failover (self ._plugin_service .hosts )
353
+ if result is not None and result .exception is not None :
354
+ raise result .exception
355
+ elif result is None or not result .is_connected :
356
+ raise FailoverFailedError (Messages .get ("FailoverPlugin.UnableToConnectToWriter" ))
305
357
306
- if result is not None and result .exception is not None :
307
- raise result .exception
308
- elif result is None or not result .is_connected :
309
- raise FailoverFailedError (Messages .get ("FailoverPlugin.UnableToConnectToWriter" ))
358
+ writer_host = self ._get_writer (result .topology )
359
+ if self ._keep_session_state_on_failover :
360
+ self .restore_session_state (result .new_connection )
310
361
311
- writer_host = self ._get_writer (result .topology )
312
- if self ._keep_session_state_on_failover :
313
- self .restore_session_state (result .new_connection )
362
+ self ._plugin_service .set_current_connection (result .new_connection , writer_host )
314
363
315
- self . _plugin_service . set_current_connection ( result . new_connection , writer_host )
364
+ logger . debug ( "FailoverPlugin.EstablishedConnection" , self . _plugin_service . current_host_info )
316
365
317
- logger . debug ( "FailoverPlugin.EstablishedConnection" , self ._plugin_service .current_host_info )
366
+ self ._plugin_service .refresh_host_list ( )
318
367
319
- self ._plugin_service .refresh_host_list ()
368
+ self ._failover_writer_success_counter .inc ()
369
+ except FailoverSuccessError as fse :
370
+ context .set_success (True )
371
+ context .set_exception (fse )
372
+ self ._failover_writer_success_counter .inc ()
373
+ raise fse
374
+ except Exception as ex :
375
+ context .set_success (False )
376
+ context .set_exception (ex )
377
+ self ._failover_writer_failed_counter .inc ()
378
+ raise ex
379
+ finally :
380
+ context .close_context ()
381
+ if self ._telemetry_failover_additional_top_trace_setting :
382
+ telemetry_factory .post_copy (context , TelemetryTraceLevel .FORCE_TOP_LEVEL )
320
383
321
384
def restore_session_state (self , conn : Optional [Connection ]):
322
385
"""
@@ -401,7 +464,8 @@ def _connect_to(self, host: HostInfo):
401
464
logger .debug ("FailoverPlugin.EstablishedConnection" , host )
402
465
except Exception as ex :
403
466
if self ._plugin_service is not None :
404
- logger .debug ("FailoverPlugin.ConnectionToHostFailed" , 'writer' if host .role == HostRole .WRITER else 'reader' , host .url )
467
+ logger .debug ("FailoverPlugin.ConnectionToHostFailed" ,
468
+ 'writer' if host .role == HostRole .WRITER else 'reader' , host .url )
405
469
raise ex
406
470
407
471
def _should_attempt_reader_connection (self ) -> bool :
0 commit comments