Skip to content

Commit 81597bf

Browse files
committed
Add node local DNS cache
1 parent c003b5b commit 81597bf

File tree

9 files changed

+323
-0
lines changed

9 files changed

+323
-0
lines changed
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
apiVersion: v1
2+
kind: Service
3+
metadata:
4+
name: kube-dns-upstream
5+
namespace: kube-system
6+
labels:
7+
app: node-local-dns
8+
k8s-app: kube-dns-upstream
9+
spec:
10+
selector:
11+
k8s-app: kube-dns
12+
ports:
13+
- name: dns
14+
port: 53
15+
protocol: UDP
16+
targetPort: 53
17+
- name: dns-tcp
18+
port: 53
19+
protocol: TCP
20+
targetPort: 53
21+
type: ClusterIP
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
{{- if .Values.nodeLocalDns.enabled }}
2+
{{- if not .Values.nodeLocalDns.kubeDnsIp }}
3+
{{- fail "nodeLocalDns.kubeDnsIp is required when nodeLocalDns is enabled. Get it with: kubectl get svc kube-dns -n kube-system -o jsonpath='{.spec.clusterIP}'" }}
4+
{{- end }}
5+
{{- /* Get the kube-dns-upstream service IP. If it doesn't exist yet, use kubeDnsUpstreamIp from values or fall back to kubeDnsIp */ -}}
6+
{{- $upstreamSvc := lookup "v1" "Service" "kube-system" "kube-dns-upstream" }}
7+
{{- $upstreamIp := "" }}
8+
{{- if $upstreamSvc }}
9+
{{- $upstreamIp = $upstreamSvc.spec.clusterIP }}
10+
{{- else if .Values.nodeLocalDns.kubeDnsUpstreamIp }}
11+
{{- $upstreamIp = .Values.nodeLocalDns.kubeDnsUpstreamIp }}
12+
{{- else }}
13+
{{- /* First deploy: upstream service doesn't exist yet, will be created by this release */ -}}
14+
{{- /* Use kubeDnsIp as fallback - the DaemonSet will need to be restarted after service is created */ -}}
15+
{{- $upstreamIp = .Values.nodeLocalDns.kubeDnsIp }}
16+
{{- end }}
17+
apiVersion: v1
18+
kind: ConfigMap
19+
metadata:
20+
name: node-local-dns
21+
namespace: kube-system
22+
labels:
23+
app: node-local-dns
24+
annotations:
25+
# Force ConfigMap update when upstream IP changes
26+
nodelocaldns.kubernetes.io/upstream-ip: {{ $upstreamIp | quote }}
27+
data:
28+
Corefile: |
29+
{{ .Values.nodeLocalDns.clusterDomain }}:53 {
30+
errors
31+
cache {
32+
success {{ .Values.nodeLocalDns.successTtl }}
33+
denial {{ .Values.nodeLocalDns.denialTtl }}
34+
}
35+
reload
36+
loop
37+
bind {{ .Values.nodeLocalDns.localDnsIp }}
38+
forward . {{ $upstreamIp }} {
39+
force_tcp
40+
}
41+
prometheus :9253
42+
health {{ .Values.nodeLocalDns.localDnsIp }}:8080
43+
}
44+
in-addr.arpa:53 {
45+
errors
46+
cache 30
47+
reload
48+
loop
49+
bind {{ .Values.nodeLocalDns.localDnsIp }}
50+
forward . {{ $upstreamIp }} {
51+
force_tcp
52+
}
53+
prometheus :9253
54+
}
55+
ip6.arpa:53 {
56+
errors
57+
cache 30
58+
reload
59+
loop
60+
bind {{ .Values.nodeLocalDns.localDnsIp }}
61+
forward . {{ $upstreamIp }} {
62+
force_tcp
63+
}
64+
prometheus :9253
65+
}
66+
.:53 {
67+
errors
68+
cache 30
69+
reload
70+
loop
71+
bind {{ .Values.nodeLocalDns.localDnsIp }}
72+
{{- if .Values.nodeLocalDns.upstreamServers }}
73+
forward . {{ .Values.nodeLocalDns.upstreamServers }}
74+
{{- else }}
75+
forward . /etc/resolv.conf
76+
{{- end }}
77+
prometheus :9253
78+
}
79+
{{- end }}
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
{{- if .Values.nodeLocalDns.enabled }}
2+
apiVersion: apps/v1
3+
kind: DaemonSet
4+
metadata:
5+
name: node-local-dns
6+
namespace: kube-system
7+
labels:
8+
app: node-local-dns
9+
spec:
10+
selector:
11+
matchLabels:
12+
app: node-local-dns
13+
updateStrategy:
14+
rollingUpdate:
15+
maxUnavailable: 10%
16+
type: RollingUpdate
17+
template:
18+
metadata:
19+
labels:
20+
app: node-local-dns
21+
annotations:
22+
prometheus.io/scrape: "true"
23+
prometheus.io/port: "9253"
24+
# Trigger pod restart when ConfigMap changes (includes upstream IP)
25+
checksum/config: {{ include (print $.Template.BasePath "/node-local-dns-config.yaml") . | sha256sum }}
26+
spec:
27+
serviceAccountName: node-local-dns
28+
priorityClassName: system-node-critical
29+
hostNetwork: true
30+
dnsPolicy: Default
31+
tolerations:
32+
- key: CriticalAddonsOnly
33+
operator: Exists
34+
- operator: Exists
35+
effect: NoSchedule
36+
- operator: Exists
37+
effect: NoExecute
38+
containers:
39+
- name: node-cache
40+
image: "{{ .Values.nodeLocalDns.image }}:{{ .Values.nodeLocalDns.imageTag }}"
41+
imagePullPolicy: {{ .Values.nodeLocalDns.imagePullPolicy }}
42+
args:
43+
- "-localip"
44+
- "{{ .Values.nodeLocalDns.localDnsIp }}"
45+
- "-conf"
46+
- "/etc/Corefile"
47+
- "-upstreamsvc"
48+
- "kube-dns"
49+
- "-skipteardown=true"
50+
- "-setupinterface=true"
51+
- "-setupiptables=true"
52+
securityContext:
53+
privileged: true
54+
ports:
55+
- name: dns
56+
containerPort: 53
57+
protocol: UDP
58+
- name: dns-tcp
59+
containerPort: 53
60+
protocol: TCP
61+
- name: metrics
62+
containerPort: 9253
63+
protocol: TCP
64+
livenessProbe:
65+
httpGet:
66+
host: {{ .Values.nodeLocalDns.localDnsIp }}
67+
path: /health
68+
port: 8080
69+
initialDelaySeconds: 60
70+
timeoutSeconds: 5
71+
resources:
72+
{{- toYaml .Values.nodeLocalDns.resources | nindent 12 }}
73+
volumeMounts:
74+
- name: config
75+
mountPath: /etc/Corefile
76+
subPath: Corefile
77+
- name: xtables-lock
78+
mountPath: /run/xtables.lock
79+
readOnly: false
80+
volumes:
81+
- name: config
82+
configMap:
83+
name: node-local-dns
84+
items:
85+
- key: Corefile
86+
path: Corefile
87+
- name: xtables-lock
88+
hostPath:
89+
path: /run/xtables.lock
90+
type: FileOrCreate
91+
{{- end }}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{{- if .Values.nodeLocalDns.enabled }}
2+
apiVersion: v1
3+
kind: Service
4+
metadata:
5+
name: node-local-dns
6+
namespace: kube-system
7+
labels:
8+
app: node-local-dns
9+
spec:
10+
clusterIP: None
11+
selector:
12+
app: node-local-dns
13+
ports:
14+
- name: metrics
15+
port: 9253
16+
targetPort: 9253
17+
protocol: TCP
18+
{{- end }}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{{- if .Values.nodeLocalDns.enabled }}
2+
apiVersion: v1
3+
kind: ServiceAccount
4+
metadata:
5+
name: node-local-dns
6+
namespace: kube-system
7+
labels:
8+
app: node-local-dns
9+
{{- end }}

kubernetes/linera-validator/templates/otel-collector-router-deployment.yaml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,32 @@ spec:
2626
tolerations:
2727
{{- toYaml .Values.otelCollector.router.tolerations | nindent 8 }}
2828
{{- end }}
29+
{{- if .Values.nodeLocalDns.enabled }}
30+
dnsPolicy: None
31+
dnsConfig:
32+
nameservers:
33+
- {{ .Values.nodeLocalDns.localDnsIp }}
34+
searches:
35+
- {{ .Release.Namespace }}.svc.cluster.local
36+
- svc.cluster.local
37+
- cluster.local
38+
options:
39+
- name: ndots
40+
value: "5"
41+
{{- end }}
42+
initContainers:
43+
- name: wait-for-sampler-dns
44+
image: busybox:1.36
45+
command:
46+
- sh
47+
- -c
48+
- |
49+
echo "Waiting for sampler DNS to be resolvable..."
50+
until nslookup otel-collector-sampler-headless.{{ .Release.Namespace }}.svc.cluster.local; do
51+
echo "DNS not ready, retrying in 2 seconds..."
52+
sleep 2
53+
done
54+
echo "DNS resolution successful!"
2955
containers:
3056
- name: otel-collector-router
3157
image: {{ .Values.otelCollector.router.image }}:{{ .Values.otelCollector.router.imageTag }}

kubernetes/linera-validator/templates/proxy.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,19 @@ spec:
5858
value: "true"
5959
effect: NoSchedule
6060
{{- end }}
61+
{{- if .Values.nodeLocalDns.enabled }}
62+
dnsPolicy: None
63+
dnsConfig:
64+
nameservers:
65+
- {{ .Values.nodeLocalDns.localDnsIp }}
66+
searches:
67+
- {{ .Release.Namespace }}.svc.cluster.local
68+
- svc.cluster.local
69+
- cluster.local
70+
options:
71+
- name: ndots
72+
value: "5"
73+
{{- end }}
6174
terminationGracePeriodSeconds: 10
6275
initContainers:
6376
- name: linera-proxy-initializer

kubernetes/linera-validator/templates/shards.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,19 @@ spec:
3838
value: "true"
3939
effect: NoSchedule
4040
{{- end }}
41+
{{- if .Values.nodeLocalDns.enabled }}
42+
dnsPolicy: None
43+
dnsConfig:
44+
nameservers:
45+
- {{ .Values.nodeLocalDns.localDnsIp }}
46+
searches:
47+
- {{ .Release.Namespace }}.svc.cluster.local
48+
- svc.cluster.local
49+
- cluster.local
50+
options:
51+
- name: ndots
52+
value: "5"
53+
{{- end }}
4154
terminationGracePeriodSeconds: 10
4255
initContainers:
4356
- name: linera-server-initializer

kubernetes/linera-validator/values.yaml

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,3 +335,56 @@ otelCollector:
335335
# Tempo endpoint (where Tier 2 exports to)
336336
tempoEndpoint: "tempo.tempo.svc.cluster.local:4317"
337337
tempoInsecure: true
338+
339+
# ============================================================================
340+
# NodeLocal DNSCache - Reduces CoreDNS load by caching DNS at node level
341+
# ============================================================================
342+
# Deploys a DaemonSet that runs a DNS cache on every node, reducing
343+
# latency and load on the cluster's CoreDNS pods.
344+
345+
nodeLocalDns:
346+
# Enable/disable NodeLocal DNSCache
347+
enabled: false
348+
349+
# Local DNS IP - link-local address for the node cache
350+
# Must not conflict with any existing cluster IPs
351+
localDnsIp: "169.254.20.10"
352+
353+
# kube-dns service IP - REQUIRED for iptables interception
354+
# Get this with: kubectl get svc kube-dns -n kube-system -o jsonpath='{.spec.clusterIP}'
355+
# For GKE, typically 10.x.0.10 where x depends on your service CIDR
356+
kubeDnsIp: ""
357+
358+
# kube-dns-upstream service IP - Used for forwarding DNS queries
359+
# This service bypasses NOTRACK iptables rules, allowing NAT to work properly
360+
# If empty, Helm will look up the service IP automatically (requires service to exist)
361+
# On first deploy, falls back to kubeDnsIp (requires second helm upgrade to fix)
362+
kubeDnsUpstreamIp: ""
363+
364+
# Image configuration
365+
image: "registry.k8s.io/dns/k8s-dns-node-cache"
366+
imageTag: "1.23.1"
367+
imagePullPolicy: "IfNotPresent"
368+
369+
# Resource limits - lineractl calculates proportional values based on VM vCPUs
370+
# Shard nodes have high DNS load from ScyllaDB connection queries
371+
resources:
372+
requests:
373+
cpu: "400m"
374+
memory: "64Mi"
375+
limits:
376+
cpu: "1200m"
377+
memory: "256Mi"
378+
379+
# DNS configuration
380+
# Cluster DNS domain (usually cluster.local)
381+
clusterDomain: "cluster.local"
382+
383+
# Upstream DNS servers for external queries (empty = use node's resolv.conf)
384+
upstreamServers: ""
385+
386+
# Cache TTL settings (in seconds)
387+
# How long to cache successful DNS responses
388+
successTtl: 30
389+
# How long to cache negative DNS responses (NXDOMAIN)
390+
denialTtl: 5

0 commit comments

Comments
 (0)