Skip to content

Commit c003b5b

Browse files
committed
Add Otel collector to our tracing exports to Tempo
1 parent ad6f9ec commit c003b5b

11 files changed

+422
-2
lines changed
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
{{- if .Values.otelCollector.enabled }}
2+
apiVersion: v1
3+
kind: ConfigMap
4+
metadata:
5+
name: otel-collector-router-config
6+
namespace: {{ .Release.Namespace }}
7+
labels:
8+
app: otel-collector-router
9+
data:
10+
otel-collector-config.yaml: |
11+
extensions:
12+
health_check:
13+
endpoint: 0.0.0.0:13133
14+
15+
receivers:
16+
otlp:
17+
protocols:
18+
grpc:
19+
endpoint: 0.0.0.0:4317
20+
http:
21+
endpoint: 0.0.0.0:4318
22+
23+
processors:
24+
batch:
25+
timeout: 200ms
26+
send_batch_size: 512
27+
28+
memory_limiter:
29+
check_interval: 1s
30+
limit_mib: 400
31+
spike_limit_mib: 100
32+
33+
exporters:
34+
loadbalancing:
35+
routing_key: traceID
36+
protocol:
37+
otlp:
38+
tls:
39+
insecure: true
40+
resolver:
41+
dns:
42+
hostname: otel-collector-sampler-headless.{{ .Release.Namespace }}.svc.cluster.local
43+
port: "4317"
44+
interval: 30s
45+
timeout: 10s
46+
47+
service:
48+
extensions: [health_check]
49+
pipelines:
50+
traces:
51+
receivers: [otlp]
52+
processors: [memory_limiter, batch]
53+
exporters: [loadbalancing]
54+
{{- end }}
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
{{- if .Values.otelCollector.enabled }}
2+
apiVersion: apps/v1
3+
kind: Deployment
4+
metadata:
5+
name: otel-collector-router
6+
namespace: {{ .Release.Namespace }}
7+
labels:
8+
app: otel-collector-router
9+
spec:
10+
replicas: {{ .Values.otelCollector.router.replicas }}
11+
selector:
12+
matchLabels:
13+
app: otel-collector-router
14+
template:
15+
metadata:
16+
labels:
17+
app: otel-collector-router
18+
annotations:
19+
checksum/config: {{ include (print $.Template.BasePath "/otel-collector-router-config.yaml") . | sha256sum }}
20+
spec:
21+
{{- if .Values.otelCollector.router.nodeSelector }}
22+
nodeSelector:
23+
{{- toYaml .Values.otelCollector.router.nodeSelector | nindent 8 }}
24+
{{- end }}
25+
{{- if .Values.otelCollector.router.tolerations }}
26+
tolerations:
27+
{{- toYaml .Values.otelCollector.router.tolerations | nindent 8 }}
28+
{{- end }}
29+
containers:
30+
- name: otel-collector-router
31+
image: {{ .Values.otelCollector.router.image }}:{{ .Values.otelCollector.router.imageTag }}
32+
imagePullPolicy: {{ .Values.otelCollector.router.imagePullPolicy | default "IfNotPresent" }}
33+
args:
34+
- "--config=/conf/otel-collector-config.yaml"
35+
ports:
36+
- name: otlp-grpc
37+
containerPort: 4317
38+
protocol: TCP
39+
- name: otlp-http
40+
containerPort: 4318
41+
protocol: TCP
42+
- name: health
43+
containerPort: 13133
44+
protocol: TCP
45+
volumeMounts:
46+
- name: config
47+
mountPath: /conf
48+
resources:
49+
requests:
50+
memory: {{ .Values.otelCollector.router.resources.requests.memory }}
51+
cpu: {{ .Values.otelCollector.router.resources.requests.cpu }}
52+
limits:
53+
memory: {{ .Values.otelCollector.router.resources.limits.memory }}
54+
cpu: {{ .Values.otelCollector.router.resources.limits.cpu }}
55+
livenessProbe:
56+
httpGet:
57+
path: /
58+
port: health
59+
initialDelaySeconds: 15
60+
periodSeconds: 10
61+
readinessProbe:
62+
httpGet:
63+
path: /
64+
port: health
65+
initialDelaySeconds: 5
66+
periodSeconds: 5
67+
volumes:
68+
- name: config
69+
configMap:
70+
name: otel-collector-router-config
71+
{{- end }}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{{- if .Values.otelCollector.enabled }}
2+
apiVersion: v1
3+
kind: Service
4+
metadata:
5+
name: otel-collector-router
6+
namespace: {{ .Release.Namespace }}
7+
labels:
8+
app: otel-collector-router
9+
spec:
10+
type: ClusterIP
11+
ports:
12+
- name: otlp-grpc
13+
port: 4317
14+
targetPort: 4317
15+
protocol: TCP
16+
- name: otlp-http
17+
port: 4318
18+
targetPort: 4318
19+
protocol: TCP
20+
- name: health
21+
port: 13133
22+
targetPort: 13133
23+
protocol: TCP
24+
selector:
25+
app: otel-collector-router
26+
{{- end }}
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
{{- if .Values.otelCollector.enabled }}
2+
apiVersion: v1
3+
kind: ConfigMap
4+
metadata:
5+
name: otel-collector-sampler-config
6+
namespace: {{ .Release.Namespace }}
7+
labels:
8+
app: otel-collector-sampler
9+
data:
10+
otel-collector-config.yaml: |
11+
extensions:
12+
health_check:
13+
endpoint: 0.0.0.0:13133
14+
15+
receivers:
16+
otlp:
17+
protocols:
18+
grpc:
19+
endpoint: 0.0.0.0:4317
20+
http:
21+
endpoint: 0.0.0.0:4318
22+
23+
processors:
24+
memory_limiter:
25+
check_interval: 1s
26+
limit_mib: 3000
27+
spike_limit_mib: 500
28+
29+
batch:
30+
timeout: 1s
31+
send_batch_size: 1024
32+
33+
tail_sampling:
34+
decision_wait: {{ .Values.otelCollector.sampler.decisionWait }}
35+
num_traces: {{ .Values.otelCollector.sampler.numTraces }}
36+
expected_new_traces_per_sec: {{ .Values.otelCollector.sampler.expectedTracesPerSec }}
37+
policies:
38+
- name: errors
39+
type: status_code
40+
status_code:
41+
status_codes: [ERROR]
42+
- name: slow-requests
43+
type: latency
44+
latency:
45+
threshold_ms: {{ .Values.otelCollector.sampler.latencyThresholdMs }}
46+
- name: baseline
47+
type: probabilistic
48+
probabilistic:
49+
sampling_percentage: {{ .Values.otelCollector.sampler.baselineSamplingPercent }}
50+
51+
exporters:
52+
otlp:
53+
endpoint: {{ .Values.otelCollector.tempoEndpoint | quote }}
54+
tls:
55+
insecure: {{ .Values.otelCollector.tempoInsecure }}
56+
57+
service:
58+
extensions: [health_check]
59+
pipelines:
60+
traces:
61+
receivers: [otlp]
62+
processors: [memory_limiter, batch, tail_sampling]
63+
exporters: [otlp]
64+
{{- end }}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{{- if .Values.otelCollector.enabled }}
2+
apiVersion: v1
3+
kind: Service
4+
metadata:
5+
name: otel-collector-sampler-headless
6+
namespace: {{ .Release.Namespace }}
7+
labels:
8+
app: otel-collector-sampler
9+
spec:
10+
type: ClusterIP
11+
clusterIP: None
12+
ports:
13+
- name: otlp-grpc
14+
port: 4317
15+
targetPort: 4317
16+
protocol: TCP
17+
- name: otlp-http
18+
port: 4318
19+
targetPort: 4318
20+
protocol: TCP
21+
- name: health
22+
port: 13133
23+
targetPort: 13133
24+
protocol: TCP
25+
selector:
26+
app: otel-collector-sampler
27+
{{- end }}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
{{- if .Values.otelCollector.enabled }}
2+
apiVersion: apps/v1
3+
kind: StatefulSet
4+
metadata:
5+
name: otel-collector-sampler
6+
namespace: {{ .Release.Namespace }}
7+
labels:
8+
app: otel-collector-sampler
9+
spec:
10+
serviceName: otel-collector-sampler-headless
11+
replicas: {{ .Values.otelCollector.sampler.replicas }}
12+
selector:
13+
matchLabels:
14+
app: otel-collector-sampler
15+
template:
16+
metadata:
17+
labels:
18+
app: otel-collector-sampler
19+
annotations:
20+
checksum/config: {{ include (print $.Template.BasePath "/otel-collector-sampler-config.yaml") . | sha256sum }}
21+
spec:
22+
{{- if .Values.otelCollector.sampler.nodeSelector }}
23+
nodeSelector:
24+
{{- toYaml .Values.otelCollector.sampler.nodeSelector | nindent 8 }}
25+
{{- end }}
26+
{{- if .Values.otelCollector.sampler.tolerations }}
27+
tolerations:
28+
{{- toYaml .Values.otelCollector.sampler.tolerations | nindent 8 }}
29+
{{- end }}
30+
containers:
31+
- name: otel-collector-sampler
32+
image: {{ .Values.otelCollector.sampler.image }}:{{ .Values.otelCollector.sampler.imageTag }}
33+
imagePullPolicy: {{ .Values.otelCollector.sampler.imagePullPolicy | default "IfNotPresent" }}
34+
args:
35+
- "--config=/conf/otel-collector-config.yaml"
36+
ports:
37+
- name: otlp-grpc
38+
containerPort: 4317
39+
protocol: TCP
40+
- name: otlp-http
41+
containerPort: 4318
42+
protocol: TCP
43+
- name: health
44+
containerPort: 13133
45+
protocol: TCP
46+
volumeMounts:
47+
- name: config
48+
mountPath: /conf
49+
resources:
50+
requests:
51+
memory: {{ .Values.otelCollector.sampler.resources.requests.memory }}
52+
cpu: {{ .Values.otelCollector.sampler.resources.requests.cpu }}
53+
limits:
54+
memory: {{ .Values.otelCollector.sampler.resources.limits.memory }}
55+
cpu: {{ .Values.otelCollector.sampler.resources.limits.cpu }}
56+
livenessProbe:
57+
httpGet:
58+
path: /
59+
port: health
60+
initialDelaySeconds: 30
61+
periodSeconds: 10
62+
readinessProbe:
63+
httpGet:
64+
path: /
65+
port: health
66+
initialDelaySeconds: 10
67+
periodSeconds: 5
68+
volumes:
69+
- name: config
70+
configMap:
71+
name: otel-collector-sampler-config
72+
{{- end }}

kubernetes/linera-validator/templates/proxy.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,10 @@ spec:
8787
value: {{ .Values.logLevel }}
8888
- name: RUST_BACKTRACE
8989
value: "1"
90+
{{- if .Values.otlpExporterEndpoint }}
9091
- name: LINERA_OTLP_EXPORTER_ENDPOINT
9192
value: {{ .Values.otlpExporterEndpoint }}
93+
{{- end }}
9294
containers:
9395
- name: linera-proxy
9496
imagePullPolicy: {{ .Values.lineraImagePullPolicy }}
@@ -111,8 +113,10 @@ spec:
111113
env:
112114
- name: RUST_LOG
113115
value: {{ .Values.logLevel }}
116+
{{- if .Values.otlpExporterEndpoint }}
114117
- name: LINERA_OTLP_EXPORTER_ENDPOINT
115118
value: {{ .Values.otlpExporterEndpoint }}
119+
{{- end }}
116120
volumeMounts:
117121
- name: config
118122
mountPath: "/config"

kubernetes/linera-validator/templates/shards.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,10 @@ spec:
7575
value: {{ .Values.logLevel }}
7676
- name: RUST_BACKTRACE
7777
value: "1"
78+
{{- if .Values.otlpExporterEndpoint }}
7879
- name: LINERA_OTLP_EXPORTER_ENDPOINT
7980
value: {{ .Values.otlpExporterEndpoint }}
81+
{{- end }}
8082
volumeMounts:
8183
- name: config
8284
mountPath: "/config"
@@ -102,8 +104,10 @@ spec:
102104
env:
103105
- name: RUST_LOG
104106
value: {{ .Values.logLevel }}
107+
{{- if .Values.otlpExporterEndpoint }}
105108
- name: LINERA_OTLP_EXPORTER_ENDPOINT
106109
value: {{ .Values.otlpExporterEndpoint }}
110+
{{- end }}
107111
{{- if .Values.serverTokioThreads }}
108112
- name: LINERA_SERVER_TOKIO_THREADS
109113
value: "{{ .Values.serverTokioThreads }}"

kubernetes/linera-validator/values-local.yaml.gotmpl

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,3 +179,20 @@ environment: "kind"
179179
validator:
180180
serverConfig: {{ env "LINERA_HELMFILE_SET_SERVER_CONFIG" | default "working/server_1.json" }}
181181
genesisConfig: {{ env "LINERA_HELMFILE_SET_GENESIS_CONFIG" | default "working/genesis.json" }}
182+
183+
# OpenTelemetry Collector - Tail-Based Sampling
184+
otelCollector:
185+
enabled: {{ env "LINERA_HELMFILE_SET_OTEL_COLLECTOR_ENABLED" | default (env "LINERA_HELMFILE_SET_TEMPO_ENABLED" | default "false") }}
186+
tempoEndpoint: {{ env "LINERA_HELMFILE_SET_TEMPO_ENDPOINT" | default "tempo.tempo.svc.cluster.local:4317" }}
187+
latencyThresholdMs: {{ env "LINERA_HELMFILE_SET_OTEL_LATENCY_THRESHOLD" | default "500" }}
188+
baselineSamplingPercent: {{ env "LINERA_HELMFILE_SET_OTEL_BASELINE_SAMPLING" | default "5.0" }}
189+
decisionWait: {{ env "LINERA_HELMFILE_SET_OTEL_DECISION_WAIT" | default "200ms" }}
190+
numTraces: {{ env "LINERA_HELMFILE_SET_OTEL_NUM_TRACES" | default "10000" }}
191+
expectedTracesPerSec: {{ env "LINERA_HELMFILE_SET_OTEL_EXPECTED_TRACES_PER_SEC" | default "500" }}
192+
resources:
193+
requests:
194+
memory: {{ env "LINERA_HELMFILE_SET_OTEL_MEMORY_REQUEST" | default "512Mi" }}
195+
cpu: {{ env "LINERA_HELMFILE_SET_OTEL_CPU_REQUEST" | default "500m" }}
196+
limits:
197+
memory: {{ env "LINERA_HELMFILE_SET_OTEL_MEMORY_LIMIT" | default "4Gi" }}
198+
cpu: {{ env "LINERA_HELMFILE_SET_OTEL_CPU_LIMIT" | default "4000m" }}

0 commit comments

Comments
 (0)