Skip to content

Commit 9dfcff1

Browse files
authored
Use dedicated ShutdownResult for Metric SDK shutdown (#2573)
1 parent b50da91 commit 9dfcff1

File tree

16 files changed

+164
-60
lines changed

16 files changed

+164
-60
lines changed

examples/metrics-basic/src/main.rs

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use opentelemetry::{global, KeyValue};
2+
use opentelemetry_sdk::error::ShutdownError;
23
use opentelemetry_sdk::metrics::{PeriodicReader, SdkMeterProvider};
34
use opentelemetry_sdk::Resource;
45
use std::error::Error;
@@ -23,7 +24,7 @@ fn init_meter_provider() -> opentelemetry_sdk::metrics::SdkMeterProvider {
2324
}
2425

2526
#[tokio::main]
26-
async fn main() -> Result<(), Box<dyn Error + Send + Sync + 'static>> {
27+
async fn main() -> Result<(), Box<dyn Error>> {
2728
// Initialize the MeterProvider with the stdout Exporter.
2829
let meter_provider = init_meter_provider();
2930

@@ -137,9 +138,41 @@ async fn main() -> Result<(), Box<dyn Error + Send + Sync + 'static>> {
137138
})
138139
.build();
139140

140-
// Metrics are exported by default every 30 seconds when using stdout exporter,
141-
// however shutting down the MeterProvider here instantly flushes
142-
// the metrics, instead of waiting for the 30 sec interval.
141+
// Metrics are exported by default every 30 seconds when using stdout
142+
// exporter, however shutting down the MeterProvider here instantly flushes
143+
// the metrics, instead of waiting for the 30 sec interval. Shutdown returns
144+
// a result, which is bubbled up to the caller The commented code below
145+
// demonstrates handling the shutdown result, instead of bubbling up the
146+
// error.
143147
meter_provider.shutdown()?;
148+
149+
// let shutdown_result = meter_provider.shutdown();
150+
151+
// Handle the shutdown result.
152+
// match shutdown_result {
153+
// Ok(_) => println!("MeterProvider shutdown successfully"),
154+
// Err(e) => {
155+
// match e {
156+
// opentelemetry_sdk::error::ShutdownError::InternalFailure(message) => {
157+
// // This indicates some internal failure during shutdown. The
158+
// // error message is intended for logging purposes only and
159+
// // should not be used to make programmatic decisions.
160+
// println!("MeterProvider shutdown failed: {}", message)
161+
// }
162+
// opentelemetry_sdk::error::ShutdownError::AlreadyShutdown => {
163+
// // This indicates some user code tried to shutdown
164+
// // elsewhere. user need to review their code to ensure
165+
// // shutdown is called only once.
166+
// println!("MeterProvider already shutdown")
167+
// }
168+
// opentelemetry_sdk::error::ShutdownError::Timeout(e) => {
169+
// // This indicates the shutdown timed out, and a good hint to
170+
// // user to increase the timeout. (Shutdown method does not
171+
// // allow custom timeout today, but that is temporary)
172+
// println!("MeterProvider shutdown timed out after {:?}", e)
173+
// }
174+
// }
175+
// }
176+
// }
144177
Ok(())
145178
}

opentelemetry-otlp/src/exporter/http/metrics.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use std::sync::Arc;
33
use async_trait::async_trait;
44
use http::{header::CONTENT_TYPE, Method};
55
use opentelemetry::otel_debug;
6+
use opentelemetry_sdk::error::{ShutdownError, ShutdownResult};
67
use opentelemetry_sdk::metrics::data::ResourceMetrics;
78
use opentelemetry_sdk::metrics::{MetricError, MetricResult};
89

@@ -43,8 +44,11 @@ impl MetricsClient for OtlpHttpClient {
4344
Ok(())
4445
}
4546

46-
fn shutdown(&self) -> MetricResult<()> {
47-
let _ = self.client.lock()?.take();
47+
fn shutdown(&self) -> ShutdownResult {
48+
self.client
49+
.lock()
50+
.map_err(|e| ShutdownError::InternalFailure(format!("Failed to acquire lock: {}", e)))?
51+
.take();
4852

4953
Ok(())
5054
}

opentelemetry-otlp/src/exporter/tonic/metrics.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use opentelemetry::otel_debug;
66
use opentelemetry_proto::tonic::collector::metrics::v1::{
77
metrics_service_client::MetricsServiceClient, ExportMetricsServiceRequest,
88
};
9+
use opentelemetry_sdk::error::{ShutdownError, ShutdownResult};
910
use opentelemetry_sdk::metrics::data::ResourceMetrics;
1011
use opentelemetry_sdk::metrics::{MetricError, MetricResult};
1112
use tonic::{codegen::CompressionEncoding, service::Interceptor, transport::Channel, Request};
@@ -89,8 +90,11 @@ impl MetricsClient for TonicMetricsClient {
8990
Ok(())
9091
}
9192

92-
fn shutdown(&self) -> MetricResult<()> {
93-
let _ = self.inner.lock()?.take();
93+
fn shutdown(&self) -> ShutdownResult {
94+
self.inner
95+
.lock()
96+
.map_err(|e| ShutdownError::InternalFailure(format!("Failed to acquire lock: {}", e)))?
97+
.take();
9498

9599
Ok(())
96100
}

opentelemetry-otlp/src/metric.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use crate::NoExporterBuilderSet;
1616

1717
use async_trait::async_trait;
1818
use core::fmt;
19+
use opentelemetry_sdk::error::ShutdownResult;
1920
use opentelemetry_sdk::metrics::MetricResult;
2021

2122
use opentelemetry_sdk::metrics::{
@@ -123,7 +124,7 @@ impl HasHttpConfig for MetricExporterBuilder<HttpExporterBuilderSet> {
123124
#[async_trait]
124125
pub(crate) trait MetricsClient: fmt::Debug + Send + Sync + 'static {
125126
async fn export(&self, metrics: &mut ResourceMetrics) -> MetricResult<()>;
126-
fn shutdown(&self) -> MetricResult<()>;
127+
fn shutdown(&self) -> ShutdownResult;
127128
}
128129

129130
/// Export metrics in OTEL format.
@@ -149,7 +150,7 @@ impl PushMetricExporter for MetricExporter {
149150
Ok(())
150151
}
151152

152-
fn shutdown(&self) -> MetricResult<()> {
153+
fn shutdown(&self) -> ShutdownResult {
153154
self.client.shutdown()
154155
}
155156

opentelemetry-sdk/benches/metric.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use opentelemetry::{
77
Key, KeyValue,
88
};
99
use opentelemetry_sdk::{
10+
error::ShutdownResult,
1011
metrics::{
1112
data::ResourceMetrics, new_view, reader::MetricReader, Aggregation, Instrument,
1213
InstrumentKind, ManualReader, MetricResult, Pipeline, SdkMeterProvider, Stream,
@@ -31,7 +32,7 @@ impl MetricReader for SharedReader {
3132
self.0.force_flush()
3233
}
3334

34-
fn shutdown(&self) -> MetricResult<()> {
35+
fn shutdown(&self) -> ShutdownResult {
3536
self.0.shutdown()
3637
}
3738

opentelemetry-sdk/src/error.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,45 @@
11
//! Wrapper for error from trace, logs and metrics part of open telemetry.
22
3+
use std::{result::Result, time::Duration};
4+
5+
use thiserror::Error;
6+
37
/// Trait for errors returned by exporters
48
pub trait ExportError: std::error::Error + Send + Sync + 'static {
59
/// The name of exporter that returned this error
610
fn exporter_name(&self) -> &'static str;
711
}
12+
13+
#[derive(Error, Debug)]
14+
/// Errors that can occur during shutdown.
15+
pub enum ShutdownError {
16+
/// Shutdown has already been invoked.
17+
///
18+
/// While shutdown is idempotent and calling it multiple times has no
19+
/// impact, this error suggests that another part of the application is
20+
/// invoking `shutdown` earlier than intended. Users should review their
21+
/// code to identify unintended or duplicate shutdown calls and ensure it is
22+
/// only triggered once at the correct place.
23+
#[error("Shutdown already invoked")]
24+
AlreadyShutdown,
25+
26+
/// Shutdown timed out before completing.
27+
///
28+
/// This does not necessarily indicate a failure—shutdown may still be
29+
/// complete. If this occurs frequently, consider increasing the timeout
30+
/// duration to allow more time for completion.
31+
#[error("Shutdown timed out after {0:?}")]
32+
Timeout(Duration),
33+
34+
/// Shutdown failed due to an internal error.
35+
///
36+
/// The error message is intended for logging purposes only and should not
37+
/// be used to make programmatic decisions. It is implementation-specific
38+
/// and subject to change without notice. Consumers of this error should not
39+
/// rely on its content beyond logging.
40+
#[error("Shutdown failed: {0}")]
41+
InternalFailure(String),
42+
}
43+
44+
/// A specialized `Result` type for Shutdown operations.
45+
pub type ShutdownResult = Result<(), ShutdownError>;

opentelemetry-sdk/src/metrics/exporter.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
//! Interfaces for exporting metrics
22
use async_trait::async_trait;
33

4+
use crate::error::ShutdownResult;
45
use crate::metrics::MetricResult;
56

67
use crate::metrics::data::ResourceMetrics;
@@ -27,7 +28,7 @@ pub trait PushMetricExporter: Send + Sync + 'static {
2728
///
2829
/// After Shutdown is called, calls to Export will perform no operation and
2930
/// instead will return an error indicating the shutdown state.
30-
fn shutdown(&self) -> MetricResult<()>;
31+
fn shutdown(&self) -> ShutdownResult;
3132

3233
/// Access the [Temporality] of the MetricExporter.
3334
fn temporality(&self) -> Temporality;

opentelemetry-sdk/src/metrics/in_memory_exporter.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use crate::error::ShutdownResult;
12
use crate::metrics::data::{self, Gauge, Sum};
23
use crate::metrics::data::{Histogram, Metric, ResourceMetrics, ScopeMetrics};
34
use crate::metrics::exporter::PushMetricExporter;
@@ -277,7 +278,7 @@ impl PushMetricExporter for InMemoryMetricExporter {
277278
Ok(()) // In this implementation, flush does nothing
278279
}
279280

280-
fn shutdown(&self) -> MetricResult<()> {
281+
fn shutdown(&self) -> ShutdownResult {
281282
Ok(())
282283
}
283284

opentelemetry-sdk/src/metrics/manual_reader.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@ use std::{
55

66
use opentelemetry::otel_debug;
77

8-
use crate::metrics::{MetricError, MetricResult, Temporality};
8+
use crate::{
9+
error::{ShutdownError, ShutdownResult},
10+
metrics::{MetricError, MetricResult, Temporality},
11+
};
912

1013
use super::{
1114
data::ResourceMetrics,
@@ -107,8 +110,10 @@ impl MetricReader for ManualReader {
107110
}
108111

109112
/// Closes any connections and frees any resources used by the reader.
110-
fn shutdown(&self) -> MetricResult<()> {
111-
let mut inner = self.inner.lock()?;
113+
fn shutdown(&self) -> ShutdownResult {
114+
let mut inner = self.inner.lock().map_err(|e| {
115+
ShutdownError::InternalFailure(format!("Failed to acquire lock: {}", e))
116+
})?;
112117

113118
// Any future call to collect will now return an error.
114119
inner.sdk_producer = None;

opentelemetry-sdk/src/metrics/meter_provider.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,11 @@ use opentelemetry::{
1212
otel_debug, otel_error, otel_info, InstrumentationScope,
1313
};
1414

15-
use crate::metrics::{MetricError, MetricResult};
1615
use crate::Resource;
16+
use crate::{
17+
error::ShutdownResult,
18+
metrics::{MetricError, MetricResult},
19+
};
1720

1821
use super::{
1922
meter::SdkMeter, noop::NoopMeter, pipeline::Pipelines, reader::MetricReader, view::View,
@@ -108,7 +111,7 @@ impl SdkMeterProvider {
108111
///
109112
/// There is no guaranteed that all telemetry be flushed or all resources have
110113
/// been released on error.
111-
pub fn shutdown(&self) -> MetricResult<()> {
114+
pub fn shutdown(&self) -> ShutdownResult {
112115
otel_info!(
113116
name: "MeterProvider.Shutdown",
114117
message = "User initiated shutdown of MeterProvider."
@@ -131,15 +134,13 @@ impl SdkMeterProviderInner {
131134
}
132135
}
133136

134-
fn shutdown(&self) -> MetricResult<()> {
137+
fn shutdown(&self) -> ShutdownResult {
135138
if self
136139
.shutdown_invoked
137140
.swap(true, std::sync::atomic::Ordering::SeqCst)
138141
{
139142
// If the previous value was true, shutdown was already invoked.
140-
Err(MetricError::Other(
141-
"MeterProvider shutdown already invoked.".into(),
142-
))
143+
Err(crate::error::ShutdownError::AlreadyShutdown)
143144
} else {
144145
self.pipes.shutdown()
145146
}

opentelemetry-sdk/src/metrics/periodic_reader.rs

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use std::{
1111
use opentelemetry::{otel_debug, otel_error, otel_info, otel_warn};
1212

1313
use crate::{
14+
error::{ShutdownError, ShutdownResult},
1415
metrics::{exporter::PushMetricExporter, reader::SdkProducer, MetricError, MetricResult},
1516
Resource,
1617
};
@@ -402,27 +403,27 @@ impl PeriodicReaderInner {
402403
}
403404
}
404405

405-
fn shutdown(&self) -> MetricResult<()> {
406+
fn shutdown(&self) -> ShutdownResult {
406407
// TODO: See if this is better to be created upfront.
407408
let (response_tx, response_rx) = mpsc::channel();
408409
self.message_sender
409410
.send(Message::Shutdown(response_tx))
410-
.map_err(|e| MetricError::Other(e.to_string()))?;
411+
.map_err(|e| ShutdownError::InternalFailure(e.to_string()))?;
411412

412413
// TODO: Make this timeout configurable.
413414
match response_rx.recv_timeout(Duration::from_secs(5)) {
414415
Ok(response) => {
415416
if response {
416417
Ok(())
417418
} else {
418-
Err(MetricError::Other("Failed to shutdown".into()))
419+
Err(ShutdownError::InternalFailure("Failed to shutdown".into()))
419420
}
420421
}
421-
Err(mpsc::RecvTimeoutError::Timeout) => Err(MetricError::Other(
422-
"Failed to shutdown due to Timeout".into(),
423-
)),
422+
Err(mpsc::RecvTimeoutError::Timeout) => {
423+
Err(ShutdownError::Timeout(Duration::from_secs(5)))
424+
}
424425
Err(mpsc::RecvTimeoutError::Disconnected) => {
425-
Err(MetricError::Other("Failed to shutdown".into()))
426+
Err(ShutdownError::InternalFailure("Failed to shutdown".into()))
426427
}
427428
}
428429
}
@@ -451,7 +452,7 @@ impl MetricReader for PeriodicReader {
451452
// completion, and avoid blocking the thread. The default shutdown on drop
452453
// can still use blocking call. If user already explicitly called shutdown,
453454
// drop won't call shutdown again.
454-
fn shutdown(&self) -> MetricResult<()> {
455+
fn shutdown(&self) -> ShutdownResult {
455456
self.inner.shutdown()
456457
}
457458

@@ -471,10 +472,10 @@ impl MetricReader for PeriodicReader {
471472
mod tests {
472473
use super::PeriodicReader;
473474
use crate::{
474-
metrics::InMemoryMetricExporter,
475+
error::ShutdownResult,
475476
metrics::{
476-
data::ResourceMetrics, exporter::PushMetricExporter, reader::MetricReader, MetricError,
477-
MetricResult, SdkMeterProvider, Temporality,
477+
data::ResourceMetrics, exporter::PushMetricExporter, reader::MetricReader,
478+
InMemoryMetricExporter, MetricError, MetricResult, SdkMeterProvider, Temporality,
478479
},
479480
Resource,
480481
};
@@ -524,7 +525,7 @@ mod tests {
524525
Ok(())
525526
}
526527

527-
fn shutdown(&self) -> MetricResult<()> {
528+
fn shutdown(&self) -> ShutdownResult {
528529
Ok(())
529530
}
530531

@@ -548,7 +549,7 @@ mod tests {
548549
Ok(())
549550
}
550551

551-
fn shutdown(&self) -> MetricResult<()> {
552+
fn shutdown(&self) -> ShutdownResult {
552553
self.is_shutdown.store(true, Ordering::Relaxed);
553554
Ok(())
554555
}

0 commit comments

Comments
 (0)