Skip to content

Commit 7d6ba93

Browse files
litianningdatadogduncanista
authored andcommitted
feat: No longer launch Go-based agent for compatibility/OTLP/AAP config (#788)
https://datadoghq.atlassian.net/browse/SVLS-7398 - As part of coming release, bottlecap agent no longer launches Go-based agent when compatibility/AAP/OTLP features are active - Emit the same metric when detecting any of above configuration - Update corresponding unit tests Manifests: - [Test lambda function](https://us-east-1.console.aws.amazon.com/lambda/home?region=us-east-1#/functions/ltn1-fullinstrument-bn-cold-python310-lambda?code=&subtab=envVars&tab=testing) with [logs](https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logsV2:log-groups/log-group/$252Faws$252Flambda$252Fltn1-fullinstrument-bn-cold-python310-lambda/log-events/2025$252F08$252F21$252F$255B$2524LATEST$255Df3788d359677452dad162488ff15456f$3FfilterPattern$3Dotel) showing compatibility/AAP/OTPL are enabled <img width="2260" height="454" alt="image" src="https://github.com/user-attachments/assets/5dfd4954-5191-4390-83f5-a8eb3bffb9d3" /> - [Logging](https://app.datadoghq.com/logs/livetail?query=functionname%3Altn1-fullinstrument-bn-cold-python310-lambda%20Metric&agg_m=count&agg_m_source=base&agg_t=count&cols=host%2Cservice&fromUser=true&messageDisplay=inline&refresh_mode=paused&storage=driveline&stream_sort=desc&viz=stream&from_ts=1755787655569&to_ts=1755787689060&live=false) <img width="1058" height="911" alt="image" src="https://github.com/user-attachments/assets/629f75d1-e115-4478-afac-ad16d9369fa7" /> - [Metric](https://app.datadoghq.com/screen/integration/aws_lambda_enhanced_metrics?fromUser=false&fullscreen_end_ts=1755788220000&fullscreen_paused=true&fullscreen_refresh_mode=paused&fullscreen_section=overview&fullscreen_start_ts=1755787200000&fullscreen_widget=2&graph-explorer__tile_def=N4IgbglgXiBcIBcD2AHANhAzgkAaEAxgK7ZIC2A%2BhgHYDWmcA2gLr4BOApgI5EfYOxGoTphRJqmDhQBmSNmQCGOeJgIK0CtnhA8ObCHyagAJkoUVMSImwIc4IMhwT6CDfNQWP7utgE8AjNo%2BvvaYRGSwpggKxkgA5gB0kmxgemh8mAkcAB4IHBIQ4gnSChBoSKlswAAkCgDumBQKBARW1Ai41ZxxhdSd0kTUBAi9AL4ABABGvuPAA0Mj4h6OowkKja2DCAAUAJTaCnFx3UpyoeEgo6wgsvJEGgJCN3Jk9wrevH6BV-iWbMqgTbtOAAJgADPg5MY9BRpkZEL4UHZ4LdXhptBBqNDsnAISAoXp7NDVJdmKMfiBsL50nBgOSgA&refresh_mode=sliding&from_ts=1755783890661&to_ts=1755787490661&live=true) <img width="1227" height="1196" alt="image" src="https://github.com/user-attachments/assets/2922eb54-9853-4512-a902-dfa97916b643" />
1 parent 0e134ff commit 7d6ba93

File tree

5 files changed

+186
-105
lines changed

5 files changed

+186
-105
lines changed

bottlecap/src/bin/bottlecap/main.rs

Lines changed: 51 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ use bottlecap::{
3737
},
3838
logger,
3939
logs::{agent::LogsAgent, flusher::LogsFlusher},
40+
metrics::enhanced::lambda::Lambda as enhanced_metrics,
4041
otlp::{agent::Agent as OtlpAgent, should_enable_otlp_agent},
4142
proxy::{interceptor, should_start_proxy},
4243
secrets::decrypt,
@@ -84,9 +85,7 @@ use std::{
8485
collections::{HashMap, hash_map},
8586
env,
8687
io::{Error, Result},
87-
os::unix::process::CommandExt,
8888
path::Path,
89-
process::Command,
9089
sync::{Arc, Mutex},
9190
time::{Duration, Instant},
9291
};
@@ -402,14 +401,7 @@ fn load_configs(start_time: Instant) -> (AwsConfig, AwsCredentials, Arc<Config>)
402401
let aws_credentials = AwsCredentials::from_env();
403402
let lambda_directory: String =
404403
env::var("LAMBDA_TASK_ROOT").unwrap_or_else(|_| "/var/task".to_string());
405-
let config = match config::get_config(Path::new(&lambda_directory)) {
406-
Ok(config) => Arc::new(config),
407-
Err(_e) => {
408-
let err = Command::new("/opt/datadog-agent-go").exec();
409-
panic!("Error starting the extension: {err:?}");
410-
}
411-
};
412-
404+
let config = Arc::new(config::get_config(Path::new(&lambda_directory)));
413405
(aws_config, aws_credentials, config)
414406
}
415407

@@ -508,12 +500,22 @@ async fn extension_loop_active(
508500
.as_micros()
509501
.to_string()
510502
);
511-
503+
let metrics_intake_url = create_metrics_intake_url_prefix(config);
512504
let metrics_flushers = Arc::new(TokioMutex::new(start_metrics_flushers(
513505
Arc::clone(&api_key_factory),
514506
&metrics_aggr,
507+
&metrics_intake_url,
515508
config,
516509
)));
510+
511+
// Create lambda enhanced metrics instance once
512+
let lambda_enhanced_metrics =
513+
enhanced_metrics::new(Arc::clone(&metrics_aggr), Arc::clone(config));
514+
515+
// Send config issue metrics
516+
let config_issues = config::fallback(config);
517+
send_config_issue_metric(&config_issues, &lambda_enhanced_metrics);
518+
517519
// Lifecycle Invocation Processor
518520
let invocation_processor = Arc::new(TokioMutex::new(InvocationProcessor::new(
519521
Arc::clone(&tags_provider),
@@ -1006,33 +1008,33 @@ fn start_logs_agent(
10061008
(logs_agent_channel, logs_flusher)
10071009
}
10081010

1009-
fn start_metrics_flushers(
1010-
api_key_factory: Arc<ApiKeyFactory>,
1011-
metrics_aggr: &Arc<Mutex<MetricsAggregator>>,
1012-
config: &Arc<Config>,
1013-
) -> Vec<MetricsFlusher> {
1014-
let mut flushers = Vec::new();
1015-
1016-
let metrics_intake_url = if !config.dd_url.is_empty() {
1011+
fn create_metrics_intake_url_prefix(config: &Config) -> MetricsIntakeUrlPrefix {
1012+
if !config.dd_url.is_empty() {
10171013
let dd_dd_url = DdDdUrl::new(config.dd_url.clone()).expect("can't parse DD_DD_URL");
1018-
10191014
let prefix_override = MetricsIntakeUrlPrefixOverride::maybe_new(None, Some(dd_dd_url));
1020-
MetricsIntakeUrlPrefix::new(None, prefix_override)
1015+
MetricsIntakeUrlPrefix::new(None, prefix_override).expect("can't parse DD_DD_URL prefix")
10211016
} else if !config.url.is_empty() {
10221017
let dd_url = DdUrl::new(config.url.clone()).expect("can't parse DD_URL");
1023-
10241018
let prefix_override = MetricsIntakeUrlPrefixOverride::maybe_new(Some(dd_url), None);
1025-
MetricsIntakeUrlPrefix::new(None, prefix_override)
1019+
MetricsIntakeUrlPrefix::new(None, prefix_override).expect("can't parse DD_URL prefix")
10261020
} else {
1027-
// use site
10281021
let metrics_site = MetricsSite::new(config.site.clone()).expect("can't parse site");
1029-
MetricsIntakeUrlPrefix::new(Some(metrics_site), None)
1030-
};
1022+
MetricsIntakeUrlPrefix::new(Some(metrics_site), None).expect("can't parse site prefix")
1023+
}
1024+
}
1025+
1026+
fn start_metrics_flushers(
1027+
api_key_factory: Arc<ApiKeyFactory>,
1028+
metrics_aggr: &Arc<Mutex<MetricsAggregator>>,
1029+
metrics_intake_url: &MetricsIntakeUrlPrefix,
1030+
config: &Arc<Config>,
1031+
) -> Vec<MetricsFlusher> {
1032+
let mut flushers = Vec::new();
10311033

10321034
let flusher_config = MetricsFlusherConfig {
10331035
api_key_factory,
10341036
aggregator: Arc::clone(metrics_aggr),
1035-
metrics_intake_url_prefix: metrics_intake_url.expect("can't parse site or override"),
1037+
metrics_intake_url_prefix: metrics_intake_url.clone(),
10361038
https_proxy: config.proxy_https.clone(),
10371039
timeout: Duration::from_secs(config.flush_timeout),
10381040
retry_strategy: DsdRetryStrategy::Immediate(3),
@@ -1157,6 +1159,28 @@ fn start_trace_agent(
11571159
)
11581160
}
11591161

1162+
/// Sends metrics indicating issue with configuration.
1163+
///
1164+
/// # Arguments
1165+
/// * `issue_reasons` - Vector of messages describing the issue with the configurations
1166+
/// * `lambda_enhanced_metrics` - The lambda enhanced metrics instance
1167+
fn send_config_issue_metric(issue_reasons: &[String], lambda_enhanced_metrics: &enhanced_metrics) {
1168+
if issue_reasons.is_empty() {
1169+
return;
1170+
}
1171+
let now = std::time::UNIX_EPOCH
1172+
.elapsed()
1173+
.expect("can't poll clock")
1174+
.as_secs()
1175+
.try_into()
1176+
.unwrap_or_default();
1177+
1178+
// Setup a separate metric for each config issue reason
1179+
for issue_reason in issue_reasons {
1180+
lambda_enhanced_metrics.set_config_load_issue_metric(now, issue_reason);
1181+
}
1182+
}
1183+
11601184
async fn start_dogstatsd(metrics_aggr: &Arc<Mutex<MetricsAggregator>>) -> CancellationToken {
11611185
let dogstatsd_config = DogStatsDConfig {
11621186
host: EXTENSION_HOST.to_string(),

0 commit comments

Comments
 (0)