Skip to content

Commit a47b429

Browse files
lalitbcijothomasutpilla
authored
Avoid redundant shutdown in TracerProvider::drop when already shut down (#2197)
Co-authored-by: Cijo Thomas <[email protected]> Co-authored-by: Utkarsh Umesan Pillai <[email protected]> Co-authored-by: Cijo Thomas <[email protected]>
1 parent 2bb53b4 commit a47b429

File tree

2 files changed

+230
-36
lines changed

2 files changed

+230
-36
lines changed

opentelemetry-sdk/src/trace/provider.rs

Lines changed: 226 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,67 @@
1-
//! # Trace Provider SDK
2-
//!
3-
//! ## Tracer Creation
4-
//!
5-
//! New [`Tracer`] instances are always created through a [`TracerProvider`].
6-
//!
7-
//! All configuration objects and extension points (span processors,
8-
//! propagators) are provided by the [`TracerProvider`]. [`Tracer`] instances do
9-
//! not duplicate this data to avoid that different [`Tracer`] instances
10-
//! of the [`TracerProvider`] have different versions of these data.
1+
/// # Trace Provider SDK
2+
///
3+
/// The `TracerProvider` handles the creation and management of [`Tracer`] instances and coordinates
4+
/// span processing. It serves as the central configuration point for tracing, ensuring consistency
5+
/// across all [`Tracer`] instances it creates.
6+
///
7+
/// ## Tracer Creation
8+
///
9+
/// New [`Tracer`] instances are always created through a `TracerProvider`. These `Tracer`s share
10+
/// a common configuration, which includes the [`Resource`], span processors, sampling strategies,
11+
/// and span limits. This avoids the need for each `Tracer` to maintain its own version of these
12+
/// configurations, ensuring uniform behavior across all instances.
13+
///
14+
/// ## Cloning and Shutdown
15+
///
16+
/// The `TracerProvider` is designed to be clonable. Cloning a `TracerProvider` creates a
17+
/// new reference to the same provider, not a new instance. Dropping the last reference
18+
/// to the `TracerProvider` will automatically trigger its shutdown. During shutdown, the provider
19+
/// will flush all remaining spans, ensuring they are passed to the configured processors.
20+
/// Users can also manually trigger shutdown using the [`shutdown`](TracerProvider::shutdown)
21+
/// method, which will ensure the same behavior.
22+
///
23+
/// Once shut down, the `TracerProvider` transitions into a disabled state. In this state, further
24+
/// operations on its associated `Tracer` instances will result in no-ops, ensuring that no spans
25+
/// are processed or exported after shutdown.
26+
///
27+
/// ## Span Processing and Force Flush
28+
///
29+
/// The `TracerProvider` manages the lifecycle of span processors, which are responsible for
30+
/// collecting, processing, and exporting spans. The [`force_flush`](TracerProvider::force_flush) method
31+
/// invoked at any time will trigger an immediate flush of all pending spans (if any) to the exporters.
32+
/// This will block the user thread till all the spans are passed to exporters.
33+
///
34+
/// # Examples
35+
///
36+
/// ```
37+
/// use opentelemetry::global;
38+
/// use opentelemetry_sdk::trace::TracerProvider;
39+
/// use opentelemetry::trace::Tracer;
40+
///
41+
/// fn init_tracing() -> TracerProvider {
42+
/// let provider = TracerProvider::default();
43+
///
44+
/// // Set the provider to be used globally
45+
/// let _ = global::set_tracer_provider(provider.clone());
46+
///
47+
/// provider
48+
/// }
49+
///
50+
/// fn main() {
51+
/// let provider = init_tracing();
52+
///
53+
/// // create tracer..
54+
/// let tracer = global::tracer("example/client");
55+
///
56+
/// // create span...
57+
/// let span = tracer
58+
/// .span_builder("test_span")
59+
/// .start(&tracer);
60+
///
61+
/// // Explicitly shut down the provider
62+
/// provider.shutdown();
63+
/// }
64+
/// ```
1165
use crate::runtime::RuntimeChannel;
1266
use crate::trace::{
1367
BatchSpanProcessor, Config, RandomIdGenerator, Sampler, SimpleSpanProcessor, SpanLimits, Tracer,
@@ -16,7 +70,7 @@ use crate::{export::trace::SpanExporter, trace::SpanProcessor};
1670
use crate::{InstrumentationLibrary, Resource};
1771
use once_cell::sync::{Lazy, OnceCell};
1872
use opentelemetry::trace::TraceError;
19-
use opentelemetry::{global, trace::TraceResult};
73+
use opentelemetry::{otel_debug, trace::TraceResult};
2074
use std::borrow::Cow;
2175
use std::sync::atomic::{AtomicBool, Ordering};
2276
use std::sync::Arc;
@@ -36,36 +90,60 @@ static NOOP_TRACER_PROVIDER: Lazy<TracerProvider> = Lazy::new(|| TracerProvider
3690
span_limits: SpanLimits::default(),
3791
resource: Cow::Owned(Resource::empty()),
3892
},
93+
is_shutdown: AtomicBool::new(true),
3994
}),
40-
is_shutdown: Arc::new(AtomicBool::new(true)),
4195
});
4296

4397
/// TracerProvider inner type
4498
#[derive(Debug)]
4599
pub(crate) struct TracerProviderInner {
46100
processors: Vec<Box<dyn SpanProcessor>>,
47101
config: crate::trace::Config,
102+
is_shutdown: AtomicBool,
48103
}
49104

50-
impl Drop for TracerProviderInner {
51-
fn drop(&mut self) {
52-
for processor in &mut self.processors {
105+
impl TracerProviderInner {
106+
/// Crate-private shutdown method to be called both from explicit shutdown
107+
/// and from Drop when the last reference is released.
108+
pub(crate) fn shutdown(&self) -> Vec<TraceError> {
109+
let mut errs = vec![];
110+
for processor in &self.processors {
53111
if let Err(err) = processor.shutdown() {
54-
global::handle_error(err);
112+
// Log at debug level because:
113+
// - The error is also returned to the user for handling (if applicable)
114+
// - Or the error occurs during `TracerProviderInner::Drop` as part of telemetry shutdown,
115+
// which is non-actionable by the user
116+
otel_debug!(name: "TracerProvider.Drop.ShutdownError",
117+
error = format!("{err}"));
118+
errs.push(err);
55119
}
56120
}
121+
errs
122+
}
123+
}
124+
125+
impl Drop for TracerProviderInner {
126+
fn drop(&mut self) {
127+
if !self.is_shutdown.load(Ordering::Relaxed) {
128+
let _ = self.shutdown(); // errors are handled within shutdown
129+
} else {
130+
otel_debug!(
131+
name: "TracerProvider.Drop.AlreadyShutdown"
132+
);
133+
}
57134
}
58135
}
59136

60137
/// Creator and registry of named [`Tracer`] instances.
61138
///
62-
/// `TracerProvider` is lightweight container holding pointers to `SpanProcessor` and other components.
63-
/// Cloning and dropping them will not stop the span processing. To stop span processing, users
64-
/// must either call `shutdown` method explicitly, or drop every clone of `TracerProvider`.
139+
/// `TracerProvider` is a container holding pointers to `SpanProcessor` and other components.
140+
/// Cloning a `TracerProvider` instance and dropping it will not stop span processing. To stop span processing, users
141+
/// must either call the `shutdown` method explicitly or allow the last reference to the `TracerProvider`
142+
/// to be dropped. When the last reference is dropped, the shutdown process will be automatically triggered
143+
/// to ensure proper cleanup.
65144
#[derive(Clone, Debug)]
66145
pub struct TracerProvider {
67146
inner: Arc<TracerProviderInner>,
68-
is_shutdown: Arc<AtomicBool>,
69147
}
70148

71149
impl Default for TracerProvider {
@@ -79,7 +157,6 @@ impl TracerProvider {
79157
pub(crate) fn new(inner: TracerProviderInner) -> Self {
80158
TracerProvider {
81159
inner: Arc::new(inner),
82-
is_shutdown: Arc::new(AtomicBool::new(false)),
83160
}
84161
}
85162

@@ -101,7 +178,7 @@ impl TracerProvider {
101178
/// true if the provider has been shutdown
102179
/// Don't start span or export spans when provider is shutdown
103180
pub(crate) fn is_shutdown(&self) -> bool {
104-
self.is_shutdown.load(Ordering::Relaxed)
181+
self.inner.is_shutdown.load(Ordering::Relaxed)
105182
}
106183

107184
/// Force flush all remaining spans in span processors and return results.
@@ -153,28 +230,20 @@ impl TracerProvider {
153230
/// Note that shut down doesn't means the TracerProvider has dropped
154231
pub fn shutdown(&self) -> TraceResult<()> {
155232
if self
233+
.inner
156234
.is_shutdown
157235
.compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst)
158236
.is_ok()
159237
{
160238
// propagate the shutdown signal to processors
161-
// it's up to the processor to properly block new spans after shutdown
162-
let mut errs = vec![];
163-
for processor in &self.inner.processors {
164-
if let Err(err) = processor.shutdown() {
165-
errs.push(err);
166-
}
167-
}
168-
239+
let errs = self.inner.shutdown();
169240
if errs.is_empty() {
170241
Ok(())
171242
} else {
172243
Err(TraceError::Other(format!("{errs:?}").into()))
173244
}
174245
} else {
175-
Err(TraceError::Other(
176-
"tracer provider already shut down".into(),
177-
))
246+
Err(TraceError::TracerProviderAlreadyShutdown)
178247
}
179248
}
180249
}
@@ -215,7 +284,7 @@ impl opentelemetry::trace::TracerProvider for TracerProvider {
215284
}
216285

217286
fn library_tracer(&self, library: Arc<InstrumentationLibrary>) -> Self::Tracer {
218-
if self.is_shutdown.load(Ordering::Relaxed) {
287+
if self.inner.is_shutdown.load(Ordering::Relaxed) {
219288
return Tracer::new(library, NOOP_TRACER_PROVIDER.clone());
220289
}
221290
Tracer::new(library, self.clone())
@@ -292,7 +361,12 @@ impl Builder {
292361
p.set_resource(config.resource.as_ref());
293362
}
294363

295-
TracerProvider::new(TracerProviderInner { processors, config })
364+
let is_shutdown = AtomicBool::new(false);
365+
TracerProvider::new(TracerProviderInner {
366+
processors,
367+
config,
368+
is_shutdown,
369+
})
296370
}
297371
}
298372

@@ -391,6 +465,7 @@ mod tests {
391465
Box::from(TestSpanProcessor::new(false)),
392466
],
393467
config: Default::default(),
468+
is_shutdown: AtomicBool::new(false),
394469
});
395470

396471
let results = tracer_provider.force_flush();
@@ -534,6 +609,7 @@ mod tests {
534609
let tracer_provider = super::TracerProvider::new(TracerProviderInner {
535610
processors: vec![Box::from(processor)],
536611
config: Default::default(),
612+
is_shutdown: AtomicBool::new(false),
537613
});
538614

539615
let test_tracer_1 = tracer_provider.tracer("test1");
@@ -554,14 +630,128 @@ mod tests {
554630

555631
// after shutdown we should get noop tracer
556632
let noop_tracer = tracer_provider.tracer("noop");
633+
557634
// noop tracer cannot start anything
558635
let _ = noop_tracer.start("test");
559636
assert!(assert_handle.started_span_count(2));
560637
// noop tracer's tracer provider should be shutdown
561-
assert!(noop_tracer.provider().is_shutdown.load(Ordering::SeqCst));
638+
assert!(noop_tracer.provider().is_shutdown());
562639

563640
// existing tracer becomes noops after shutdown
564641
let _ = test_tracer_1.start("test");
565642
assert!(assert_handle.started_span_count(2));
643+
644+
// also existing tracer's tracer provider are in shutdown state
645+
assert!(test_tracer_1.provider().is_shutdown());
646+
}
647+
648+
#[derive(Debug)]
649+
struct CountingShutdownProcessor {
650+
shutdown_count: Arc<AtomicU32>,
651+
}
652+
653+
impl CountingShutdownProcessor {
654+
fn new(shutdown_count: Arc<AtomicU32>) -> Self {
655+
CountingShutdownProcessor { shutdown_count }
656+
}
657+
}
658+
659+
impl SpanProcessor for CountingShutdownProcessor {
660+
fn on_start(&self, _span: &mut Span, _cx: &Context) {
661+
// No operation needed for this processor
662+
}
663+
664+
fn on_end(&self, _span: SpanData) {
665+
// No operation needed for this processor
666+
}
667+
668+
fn force_flush(&self) -> TraceResult<()> {
669+
Ok(())
670+
}
671+
672+
fn shutdown(&self) -> TraceResult<()> {
673+
self.shutdown_count.fetch_add(1, Ordering::SeqCst);
674+
Ok(())
675+
}
676+
}
677+
678+
#[test]
679+
fn drop_test_with_multiple_providers() {
680+
let shutdown_count = Arc::new(AtomicU32::new(0));
681+
682+
{
683+
// Create a shared TracerProviderInner and use it across multiple providers
684+
let shared_inner = Arc::new(TracerProviderInner {
685+
processors: vec![Box::new(CountingShutdownProcessor::new(
686+
shutdown_count.clone(),
687+
))],
688+
config: Config::default(),
689+
is_shutdown: AtomicBool::new(false),
690+
});
691+
692+
{
693+
let tracer_provider1 = super::TracerProvider {
694+
inner: shared_inner.clone(),
695+
};
696+
let tracer_provider2 = super::TracerProvider {
697+
inner: shared_inner.clone(),
698+
};
699+
700+
let tracer1 = tracer_provider1.tracer("test-tracer1");
701+
let tracer2 = tracer_provider2.tracer("test-tracer2");
702+
703+
let _span1 = tracer1.start("span1");
704+
let _span2 = tracer2.start("span2");
705+
706+
// TracerProviderInner should not be dropped yet, since both providers and `shared_inner`
707+
// are still holding a reference.
708+
}
709+
// At this point, both `tracer_provider1` and `tracer_provider2` are dropped,
710+
// but `shared_inner` still holds a reference, so `TracerProviderInner` is NOT dropped yet.
711+
assert_eq!(shutdown_count.load(Ordering::SeqCst), 0);
712+
}
713+
// Verify shutdown was called during the drop of the shared TracerProviderInner
714+
assert_eq!(shutdown_count.load(Ordering::SeqCst), 1);
715+
}
716+
717+
#[test]
718+
fn drop_after_shutdown_test_with_multiple_providers() {
719+
let shutdown_count = Arc::new(AtomicU32::new(0));
720+
721+
// Create a shared TracerProviderInner and use it across multiple providers
722+
let shared_inner = Arc::new(TracerProviderInner {
723+
processors: vec![Box::new(CountingShutdownProcessor::new(
724+
shutdown_count.clone(),
725+
))],
726+
config: Config::default(),
727+
is_shutdown: AtomicBool::new(false),
728+
});
729+
730+
// Create a scope to test behavior when providers are dropped
731+
{
732+
let tracer_provider1 = super::TracerProvider {
733+
inner: shared_inner.clone(),
734+
};
735+
let tracer_provider2 = super::TracerProvider {
736+
inner: shared_inner.clone(),
737+
};
738+
739+
// Explicitly shut down the tracer provider
740+
let shutdown_result = tracer_provider1.shutdown();
741+
assert!(shutdown_result.is_ok());
742+
743+
// Verify that shutdown was called exactly once
744+
assert_eq!(shutdown_count.load(Ordering::SeqCst), 1);
745+
746+
// TracerProvider2 should observe the shutdown state but not trigger another shutdown
747+
let shutdown_result2 = tracer_provider2.shutdown();
748+
assert!(shutdown_result2.is_err());
749+
assert_eq!(shutdown_count.load(Ordering::SeqCst), 1);
750+
751+
// Both tracer providers will be dropped at the end of this scope
752+
}
753+
754+
// Verify that shutdown was only called once, even after drop
755+
assert_eq!(shutdown_count.load(Ordering::SeqCst), 1);
566756
}
567757
}

opentelemetry/src/trace/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,10 @@ pub enum TraceError {
200200
#[error("Exporting timed out after {} seconds", .0.as_secs())]
201201
ExportTimedOut(time::Duration),
202202

203+
/// already shutdown error
204+
#[error("TracerProvider already shutdown")]
205+
TracerProviderAlreadyShutdown,
206+
203207
/// Other errors propagated from trace SDK that weren't covered above
204208
#[error(transparent)]
205209
Other(#[from] Box<dyn std::error::Error + Send + Sync + 'static>),

0 commit comments

Comments
 (0)