Skip to content

Commit f646401

Browse files
jordanhunt22Convex, Inc.
authored and
Convex, Inc.
committed
[Tracing] Initial Honeycomb implementation (#23836)
This PR adds honeycomb tracing to mutations called from the sync worker. This will be expanded to include more things over time like queries + actions, but mutations were the easiest one to start with. Sampling is off by default, and we can turn this on in staging when it lands. Current lifecycle of tracing a request: - a collector is initialized on backend + funrun startup - when a request comes in decide whether or not to trace the call at the beginning, controlled by a knob (head sampling) - enter the `run_mutation` loop with the actual span or no-op depending on if we sample this request - use the `#[minitrace::trace]` macro to trace all relevant functions on this machine - encode the current trace when we make a request to funrun - funrun decodes the request and initializes a span with that root as its parent - the parent is passed into isolate in the request - isolate initializes its own root - all this data is then sent to honeycomb and is aggregated there GitOrigin-RevId: d220f0682b6defc30ee2ef77e16d4d9239ae4f71
1 parent 6ec3433 commit f646401

28 files changed

+304
-15
lines changed

Cargo.lock

Lines changed: 74 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ lru = "0.12.0"
6262
maplit = "1"
6363
mime = "0.3"
6464
mime2ext = "0.1.52"
65+
minitrace = { version = "0.6", features = [ "enable" ] }
6566
must-let = { git = "https://github.com/sujayakar/must-let", rev = "5b487d78db235e396e61dd03ce261ced0eafff9d" }
6667
num_cpus = "1.16.0"
6768
oauth2 = "4.4.2"
@@ -116,7 +117,7 @@ tokio-metrics-collector = { version = "0.2.0" }
116117
tokio-process-stream = { version = "0.4.0" }
117118
tokio-stream = { version = "^0.1.8", features = [ "io-util", "sync" ] }
118119
tokio-tungstenite = "0.20.0"
119-
tonic = { version = "0.10.0", features = [ "gzip" ] }
120+
tonic = { version = "0.10.2", features = [ "gzip" ] }
120121
tonic-build = "0.10.0"
121122
tonic-health = "0.10.0"
122123
tower = { version = "0.4", features = [ "limit", "timeout" ] }

crates/application/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ maplit = { workspace = true }
3939
metrics = { path = "../metrics" }
4040
mime = { workspace = true }
4141
mime2ext = { workspace = true }
42+
minitrace = { workspace = true }
4243
model = { path = "../model" }
4344
node_executor = { path = "../../crates/node_executor" }
4445
num_cpus = { workspace = true }

crates/application/src/application_function_runner/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,7 @@ impl<RT: Runtime> FunctionRouter<RT> {
250250
}
251251

252252
impl<RT: Runtime> FunctionRouter<RT> {
253+
#[minitrace::trace]
253254
pub(crate) async fn execute_query_or_mutation(
254255
&self,
255256
tx: Transaction<RT>,
@@ -298,6 +299,7 @@ impl<RT: Runtime> FunctionRouter<RT> {
298299

299300
// Execute using the function runner. Can be used for v8 udfs other than http
300301
// actions.
302+
#[minitrace::trace]
301303
async fn function_runner_execute(
302304
&self,
303305
mut tx: Transaction<RT>,
@@ -644,6 +646,7 @@ impl<RT: Runtime> ApplicationFunctionRunner<RT> {
644646
}
645647

646648
/// Runs a mutations and retries on OCC errors.
649+
#[minitrace::trace]
647650
pub async fn retry_mutation(
648651
&self,
649652
request_id: RequestId,
@@ -678,6 +681,7 @@ impl<RT: Runtime> ApplicationFunctionRunner<RT> {
678681
}
679682

680683
/// Runs a mutations and retries on OCC errors.
684+
#[minitrace::trace]
681685
async fn _retry_mutation(
682686
&self,
683687
request_id: RequestId,
@@ -898,6 +902,7 @@ impl<RT: Runtime> ApplicationFunctionRunner<RT> {
898902
}
899903

900904
/// Runs the mutation once without any logging.
905+
#[minitrace::trace]
901906
async fn run_mutation_inner(
902907
&self,
903908
mut tx: Transaction<RT>,
@@ -1566,6 +1571,7 @@ impl<RT: Runtime> ApplicationFunctionRunner<RT> {
15661571
Ok(result)
15671572
}
15681573

1574+
#[minitrace::trace]
15691575
async fn check_mutation_status(
15701576
&self,
15711577
tx: &mut Transaction<RT>,
@@ -1627,6 +1633,7 @@ impl<RT: Runtime> ApplicationFunctionRunner<RT> {
16271633
Ok(())
16281634
}
16291635

1636+
#[minitrace::trace]
16301637
async fn write_mutation_status(
16311638
&self,
16321639
tx: &mut Transaction<RT>,

crates/application/src/lib.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,11 @@ impl<RT: Runtime> Application<RT> {
691691
self.database.now_ts_for_reads()
692692
}
693693

694+
pub fn instance_name(&self) -> String {
695+
self.instance_name.clone()
696+
}
697+
698+
#[minitrace::trace]
694699
pub async fn begin(&self, identity: Identity) -> anyhow::Result<Transaction<RT>> {
695700
self.database.begin(identity).await
696701
}
@@ -700,6 +705,7 @@ impl<RT: Runtime> Application<RT> {
700705
self.commit(transaction, "test").await
701706
}
702707

708+
#[minitrace::trace]
703709
pub async fn commit(
704710
&self,
705711
transaction: Transaction<RT>,
@@ -908,6 +914,7 @@ impl<RT: Runtime> Application<RT> {
908914
}
909915
}
910916

917+
#[minitrace::trace]
911918
pub async fn mutation_udf(
912919
&self,
913920
request_id: RequestId,
@@ -1677,6 +1684,7 @@ impl<RT: Runtime> Application<RT> {
16771684
Ok(auth_config)
16781685
}
16791686

1687+
#[minitrace::trace]
16801688
pub async fn apply_config_with_retries(
16811689
&self,
16821690
identity: Identity,

crates/common/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ hyper = { workspace = true }
3737
itertools = { workspace = true }
3838
maplit = { workspace = true }
3939
metrics = { path = "../metrics" }
40+
minitrace = { workspace = true }
4041
openidconnect = { workspace = true }
4142
packed_value = { path = "../packed_value" }
4243
parking_lot = { workspace = true }

crates/common/src/http/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ use serde::{
7777
Serialize,
7878
};
7979
use tokio::net::TcpSocket;
80-
use tonic::transport::NamedService;
80+
use tonic::server::NamedService;
8181
use tonic_health::server::health_reporter;
8282
use tower::{
8383
timeout::TimeoutLayer,

crates/common/src/knobs.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -989,3 +989,8 @@ pub static HTTP_SERVER_TIMEOUT_DURATION: LazyLock<Duration> =
989989
// Schema and code bundle pushes must be less than this.
990990
pub static MAX_PUSH_BYTES: LazyLock<usize> =
991991
LazyLock::new(|| env_config("MAX_PUSH_BYTES", 100_000_000));
992+
993+
/// Percentage of request traces that should sampled
994+
pub static REQUEST_TRACE_SAMPLE_PERCENT: LazyLock<f64> = LazyLock::new(|| {
995+
env_config("REQUEST_TRACE_SAMPLE_PERCENT", 0.0) // 0% by default
996+
});

crates/common/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ pub mod knobs;
4949
pub mod log_lines;
5050
pub mod log_streaming;
5151
pub mod metrics;
52+
pub mod minitrace_helpers;
5253
pub mod numeric;
5354
pub mod paths;
5455
pub mod pause;
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
use std::collections::BTreeMap;
2+
3+
use minitrace::{
4+
collector::SpanContext,
5+
Span,
6+
};
7+
use rand::Rng;
8+
9+
use crate::{
10+
knobs::REQUEST_TRACE_SAMPLE_PERCENT,
11+
runtime::Runtime,
12+
};
13+
14+
#[derive(Clone)]
15+
pub struct EncodedSpan(pub Option<String>);
16+
17+
impl EncodedSpan {
18+
pub fn empty() -> Self {
19+
Self(None)
20+
}
21+
22+
/// Encodes the passed in `SpanContext`
23+
pub fn from_parent(parent: Option<SpanContext>) -> Self {
24+
Self(parent.map(|ctx| ctx.encode_w3c_traceparent()))
25+
}
26+
}
27+
28+
/// Given an instance name returns a span with the sample percentage specified
29+
/// in `knobs.rs`
30+
pub fn get_sampled_span<RT: Runtime>(
31+
request_name: String,
32+
rt: RT,
33+
properties: BTreeMap<String, String>,
34+
) -> Span {
35+
let should_sample = rt
36+
.clone()
37+
.with_rng(|rng| rng.gen_bool(*REQUEST_TRACE_SAMPLE_PERCENT));
38+
match should_sample {
39+
true => Span::root(request_name, SpanContext::random()).with_properties(|| properties),
40+
false => Span::noop(),
41+
}
42+
}
43+
44+
/// Creates a root span from an encoded parent trace
45+
pub fn initialize_root_from_parent(span_name: &'static str, encoded_parent: EncodedSpan) -> Span {
46+
if let Some(p) = encoded_parent.0 {
47+
if let Some(ctx) = SpanContext::decode_w3c_traceparent(p.as_str()) {
48+
return Span::root(span_name, ctx);
49+
}
50+
}
51+
Span::noop()
52+
}

crates/database/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ itertools = { workspace = true }
2929
keybroker = { path = "../keybroker" }
3030
maplit = { workspace = true }
3131
metrics = { path = "../metrics" }
32+
minitrace = { workspace = true }
3233
parking_lot = { workspace = true, features = ["hardware-lock-elision"] }
3334
pb = { path = "../pb" }
3435
prometheus = { workspace = true }

0 commit comments

Comments
 (0)