Skip to content

Commit 2bcf7b5

Browse files
[9.1] (backport #8785) add logs only edot kube-stack configuration (#9089)
* add logs only edot kube-stack configuration (#8785) * add logs only edot kube-stack configuration * rename logs values file * disable default pipelines * remove resource/k8s processor and use k8sattributes processor for service attributes (cherry picked from commit 502cd5c) * fix: downgrade elastic-agent tag to 9.1.0 --------- Co-authored-by: Roger Coll <[email protected]>
1 parent 883034c commit 2bcf7b5

File tree

4 files changed

+316
-0
lines changed

4 files changed

+316
-0
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Kind can be one of:
2+
# - breaking-change: a change to previously-documented behavior
3+
# - deprecation: functionality that is being removed in a later release
4+
# - bug-fix: fixes a problem in a previous version
5+
# - enhancement: extends functionality but does not break or fix existing behavior
6+
# - feature: new functionality
7+
# - known-issue: problems that we are aware of in a given version
8+
# - security: impacts on the security of a product or a user’s deployment.
9+
# - upgrade: important information for someone upgrading from a prior version
10+
# - other: does not fit into any of the other categories
11+
kind: feature
12+
13+
# Change summary; a 80ish characters long description of the change.
14+
summary: Add file logs only mOTEL kube-stack configuration
15+
16+
# Long description; in case the summary is not enough to describe the change
17+
# this field accommodate a description without length limits.
18+
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
19+
#description:
20+
21+
# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
22+
component: elastic-agent
23+
24+
# PR URL; optional; the PR number that added the changeset.
25+
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
26+
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
27+
# Please provide it if you are adding a fragment for a different PR.
28+
#pr: https://github.com/owner/repo/1234
29+
30+
# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
31+
# If not present is automatically filled by the tooling with the issue linked to the PR number.
32+
#issue: https://github.com/owner/repo/1234
Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
# For installation and configuration options, refer to the [installation instructions](https://github.com/elastic/opentelemetry/blob/main/docs/kubernetes/operator/README.md)
2+
3+
# For advanced configuration options, refer to the [official OpenTelemetry Helm chart](https://github.com/open-telemetry/opentelemetry-helm-charts/blob/main/charts/opentelemetry-kube-stack/values.yaml)
4+
# This file has been tested together with opentelemetry-kube-stack helm chart version: 0.3.9
5+
opentelemetry-operator:
6+
manager:
7+
extraArgs:
8+
- --enable-go-instrumentation
9+
admissionWebhooks:
10+
certManager:
11+
enabled: false # For production environments, it is [recommended to use cert-manager for better security and scalability](https://github.com/open-telemetry/opentelemetry-helm-charts/tree/main/charts/opentelemetry-operator#tls-certificate-requirement).
12+
autoGenerateCert:
13+
enabled: true # Enable/disable automatic certificate generation. Set to false if manually managing certificates.
14+
recreate: true # Force certificate regeneration on updates. Only applicable if autoGenerateCert.enabled is true.
15+
crds:
16+
create: true # Install the OpenTelemetry Operator CRDs.
17+
defaultCRConfig:
18+
image:
19+
repository: "docker.elastic.co/elastic-agent/elastic-agent"
20+
tag: "9.1.0"
21+
targetAllocator:
22+
enabled: false # Enable/disable the Operator's Target allocator.
23+
# Refer to: https://github.com/open-telemetry/opentelemetry-operator/tree/main/cmd/otel-allocator
24+
clusterRole:
25+
rules:
26+
- apiGroups: [""]
27+
resources: ["configmaps"]
28+
verbs: ["get"]
29+
# `clusterName` specifies the name of the Kubernetes cluster. It sets the 'k8s.cluster.name' field.
30+
# Cluster Name is automatically detected for EKS/GKE/AKS. Add the below value in environments where cluster name cannot be detected.
31+
# clusterName: myClusterName
32+
collectors:
33+
cluster:
34+
enabled: false
35+
# Daemon is a K8s daemonset EDOT collector focused on gathering telemetry at
36+
# node level and exposing an OTLP endpoint for data ingestion.
37+
# Auto-instrumentation SDKs will use this endpoint.
38+
daemon:
39+
fullnameOverride: "opentelemetry-kube-stack-daemon"
40+
env:
41+
- name: ELASTIC_AGENT_OTEL
42+
value: '"true"'
43+
presets:
44+
kubeletMetrics:
45+
enabled: false
46+
hostMetrics:
47+
enabled: false
48+
logsCollection:
49+
enabled: true # Enable/disable the collection of node's logs.
50+
storeCheckpoints: true # Store checkpoints for log collection, allowing for resumption from the last processed log.
51+
scrape_configs_file: "" # [Prometheus metrics](https://github.com/open-telemetry/opentelemetry-helm-charts/tree/main/charts/opentelemetry-kube-stack#scrape_configs_file-details)
52+
config:
53+
exporters:
54+
# [Debug exporter](https://github.com/open-telemetry/opentelemetry-collector/blob/main/exporter/debugexporter/README.md)
55+
debug:
56+
verbosity: basic
57+
otlp/gateway:
58+
endpoint: "http://opentelemetry-kube-stack-gateway-collector-headless:4317"
59+
tls:
60+
insecure: true
61+
processors:
62+
# [Batch Processor](https://github.com/open-telemetry/opentelemetry-collector/tree/main/processor/batchprocessor)
63+
batch: {}
64+
# [Resource Detection Processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/resourcedetectionprocessor)
65+
resourcedetection/eks:
66+
detectors: [env, eks] # Detects resources from environment variables and EKS (Elastic Kubernetes Service).
67+
timeout: 15s
68+
override: true
69+
eks:
70+
resource_attributes:
71+
k8s.cluster.name:
72+
enabled: true
73+
resourcedetection/gcp:
74+
detectors: [env, gcp] # Detects resources from environment variables and GCP (Google Cloud Platform).
75+
timeout: 2s
76+
override: true
77+
resourcedetection/aks:
78+
detectors: [env, aks] # Detects resources from environment variables and AKS (Azure Kubernetes Service).
79+
timeout: 2s
80+
override: true
81+
aks:
82+
resource_attributes:
83+
k8s.cluster.name:
84+
enabled: true
85+
resource/hostname:
86+
attributes:
87+
- key: host.name
88+
from_attribute: k8s.node.name
89+
action: upsert
90+
resourcedetection/system:
91+
detectors: ["system", "ec2"] # Detects resources from the system and EC2 instances.
92+
system:
93+
hostname_sources: ["os"]
94+
resource_attributes:
95+
host.name:
96+
enabled: true
97+
host.id:
98+
enabled: false
99+
host.arch:
100+
enabled: true
101+
host.ip:
102+
enabled: true
103+
host.mac:
104+
enabled: true
105+
host.cpu.vendor.id:
106+
enabled: true
107+
host.cpu.family:
108+
enabled: true
109+
host.cpu.model.id:
110+
enabled: true
111+
host.cpu.model.name:
112+
enabled: true
113+
host.cpu.stepping:
114+
enabled: true
115+
host.cpu.cache.l2.size:
116+
enabled: true
117+
os.description:
118+
enabled: true
119+
os.type:
120+
enabled: true
121+
ec2:
122+
resource_attributes:
123+
host.name:
124+
enabled: false
125+
host.id:
126+
enabled: true
127+
resource/cloud:
128+
attributes:
129+
- key: cloud.instance.id
130+
from_attribute: host.id
131+
action: insert
132+
# [K8s Attributes Processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/k8sattributesprocessor)
133+
k8sattributes:
134+
filter:
135+
# Only retrieve pods running on the same node as the collector
136+
node_from_env_var: OTEL_K8S_NODE_NAME
137+
passthrough: false
138+
pod_association:
139+
# Below association takes a look at the k8s.pod.ip and k8s.pod.uid resource attributes or connection's context, and tries to match it with the pod having the same attribute.
140+
- sources:
141+
- from: resource_attribute
142+
name: k8s.pod.ip
143+
- sources:
144+
- from: resource_attribute
145+
name: k8s.pod.uid
146+
- sources:
147+
- from: connection
148+
extract:
149+
metadata:
150+
- "k8s.namespace.name"
151+
- "k8s.deployment.name"
152+
- "k8s.replicaset.name"
153+
- "k8s.statefulset.name"
154+
- "k8s.daemonset.name"
155+
- "k8s.cronjob.name"
156+
- "k8s.job.name"
157+
- "k8s.node.name"
158+
- "k8s.pod.name"
159+
- "k8s.pod.ip"
160+
- "k8s.pod.uid"
161+
- "k8s.pod.start_time"
162+
# Service attributes added based on https://opentelemetry.io/docs/specs/semconv/non-normative/k8s-attributes/#service-attributes
163+
- "service.name"
164+
- "service.version"
165+
receivers:
166+
otlp: null
167+
# [File Log Receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/filelogreceiver)
168+
filelog:
169+
exclude:
170+
# exlude opentelemetry-kube-stack pod logs
171+
- /var/log/pods/*opentelemetry-kube-stack*/*/*.log
172+
# [Service Section](https://opentelemetry.io/docs/collector/configuration/#service)
173+
service:
174+
pipelines:
175+
logs: null
176+
metrics: null
177+
traces: null
178+
logs/node:
179+
receivers:
180+
- filelog
181+
processors:
182+
- batch
183+
- k8sattributes
184+
- resourcedetection/system
185+
- resourcedetection/eks
186+
- resourcedetection/gcp
187+
- resourcedetection/aks
188+
- resource/hostname
189+
- resource/cloud
190+
exporters:
191+
- otlp/gateway
192+
# Gateway is a K8s deployment EDOT collector focused on processing and
193+
# forwarding telemetry to an Elasticsearch endpoint.
194+
gateway:
195+
fullnameOverride: "opentelemetry-kube-stack-gateway"
196+
suffix: gateway
197+
replicas: 1
198+
autoscaler:
199+
minReplicas: 1 # Start with at least 2 replicas for better availability.
200+
maxReplicas: 5 # Allow more scale-out if needed.
201+
targetCPUUtilization: 70 # Scale when CPU usage exceeds 70%.
202+
targetMemoryUtilization: 75 # Scale when memory usage exceeds 75%.
203+
resources:
204+
limits:
205+
cpu: 500m
206+
memory: 500Mi
207+
requests:
208+
cpu: 100m
209+
memory: 250Mi
210+
enabled: true
211+
env:
212+
- name: ELASTIC_AGENT_OTEL
213+
value: '"true"'
214+
- name: ELASTIC_OTLP_ENDPOINT
215+
valueFrom:
216+
secretKeyRef:
217+
name: elastic-secret-otel
218+
key: elastic_otlp_endpoint
219+
- name: ELASTIC_API_KEY
220+
valueFrom:
221+
secretKeyRef:
222+
name: elastic-secret-otel
223+
key: elastic_api_key
224+
config:
225+
receivers:
226+
otlp:
227+
protocols:
228+
grpc:
229+
endpoint: ${env:MY_POD_IP}:4317
230+
http:
231+
endpoint: ${env:MY_POD_IP}:4318
232+
processors:
233+
batch:
234+
send_batch_size: 1000
235+
timeout: 1s
236+
send_batch_max_size: 1500
237+
exporters:
238+
debug:
239+
otlp/ingest:
240+
endpoint: ${env:ELASTIC_OTLP_ENDPOINT}
241+
headers:
242+
Authorization: ApiKey ${env:ELASTIC_API_KEY}
243+
timeout: 15s
244+
service:
245+
pipelines:
246+
logs:
247+
receivers: [otlp]
248+
processors: [batch]
249+
exporters: [debug, otlp/ingest]
250+
# For more details on OpenTelemetry's zero-code instrumentation, see:
251+
# https://opentelemetry.io/docs/concepts/instrumentation/zero-code/
252+
instrumentation:
253+
name: elastic-instrumentation
254+
enabled: false # Enable/disable auto-instrumentation.

magefile.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3807,6 +3807,9 @@ func (Helm) UpdateAgentVersion() error {
38073807
filepath.Join(helmMOtelChartPath, "values.yaml"): {
38083808
{"defaultCRConfig.image.tag", agentVersion},
38093809
},
3810+
filepath.Join(helmMOtelChartPath, "logs-values.yaml"): {
3811+
{"defaultCRConfig.image.tag", agentVersion},
3812+
},
38103813
} {
38113814
if err := updateYamlFile(yamlFile, keyVals...); err != nil {
38123815
return fmt.Errorf("failed to update agent version: %w", err)

testing/integration/k8s/otel_helm_test.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,33 @@ func TestOtelKubeStackHelm(t *testing.T) {
122122
k8sStepCheckRunningPods("app.kubernetes.io/managed-by=opentelemetry-operator", 4, "otc-container"),
123123
},
124124
},
125+
{
126+
name: "mOTel logs only helm kube-stack operator standalone agent kubernetes privileged",
127+
steps: []k8sTestStep{
128+
k8sStepCreateNamespace(),
129+
k8sStepHelmDeployWithValueOptions(KubeStackChartPath, "kube-stack-otel",
130+
values.Options{
131+
ValueFiles: []string{"../../../deploy/helm/edot-collector/kube-stack/managed_otlp/logs-values.yaml"},
132+
Values: []string{fmt.Sprintf("defaultCRConfig.image.repository=%s", kCtx.agentImageRepo), fmt.Sprintf("defaultCRConfig.image.tag=%s", kCtx.agentImageTag)},
133+
134+
// override secrets reference with env variables
135+
JSONValues: []string{
136+
// TODO: replace with managed OTLP ingest endpoint/apiKey when available
137+
fmt.Sprintf(`collectors.gateway.env[1]={"name":"ELASTIC_OTLP_ENDPOINT","value":"%s"}`, "https://otlp.ingest:433"),
138+
fmt.Sprintf(`collectors.gateway.env[2]={"name":"ELASTIC_API_KEY","value":"%s"}`, "CHANGEME=="),
139+
},
140+
},
141+
),
142+
// - An OpenTelemetry Operator Deployment (1 pod per
143+
// cluster)
144+
k8sStepCheckRunningPods("app.kubernetes.io/name=opentelemetry-operator", 1, "manager"),
145+
// - A Daemonset to collect K8s node's logs
146+
// (1 EDOT collector pod per node)
147+
// - One Gateway replicas to collect, aggregate and forward
148+
// telemetry.
149+
k8sStepCheckRunningPods("app.kubernetes.io/managed-by=opentelemetry-operator", 2, "otc-container"),
150+
},
151+
},
125152
}
126153

127154
for _, tc := range testCases {

0 commit comments

Comments
 (0)