Skip to content

Commit 53e4f3c

Browse files
emosbaughxavpaice
authored andcommitted
Fix prometheus alerts
1 parent 9706a69 commit 53e4f3c

25 files changed

+71177
-9
lines changed

Diff for: addons/prometheus/0.55.0-34.10.0/Manifest

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
image alertmanager quay.io/prometheus/alertmanager:v0.24.0
2+
image grafana grafana/grafana:8.4.5
3+
image k8s-sidecar quay.io/kiwigrid/k8s-sidecar:1.15.6
4+
image kube-state-metrics k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.4.1
5+
image node-exporter quay.io/prometheus/node-exporter:v1.3.1
6+
image prometheus quay.io/prometheus/prometheus:v2.34.0
7+
image prometheus-adapter k8s.gcr.io/prometheus-adapter/prometheus-adapter:v0.9.1
8+
image prometheus-config-reloader quay.io/prometheus-operator/prometheus-config-reloader:v0.55.0
9+
image prometheus-operator quay.io/prometheus-operator/prometheus-operator:v0.55.0

Diff for: addons/prometheus/0.55.0-34.10.0/crds/crds.yaml

+26,605
Large diffs are not rendered by default.
+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
resources:
2+
- crds.yaml
3+
4+
patchesJson6902:
5+
- target:
6+
group: "apiextensions.k8s.io"
7+
version: v1 # apiVersion
8+
kind: CustomResourceDefinition
9+
name: alertmanagers.monitoring.coreos.com
10+
path: preserveUnknown.yaml
11+
- target:
12+
group: "apiextensions.k8s.io"
13+
version: v1 # apiVersion
14+
kind: CustomResourceDefinition
15+
name: prometheuses.monitoring.coreos.com
16+
path: preserveUnknown.yaml
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
- op: add
2+
path: "/spec/preserveUnknownFields"
3+
value: false

Diff for: addons/prometheus/0.55.0-34.10.0/host-preflight.yaml

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
apiVersion: troubleshoot.sh/v1beta2
2+
kind: HostPreflight
3+
metadata:
4+
name: prometheus
5+
spec:
6+
collectors:
7+
- tcpPortStatus:
8+
collectorName: "Node Exporter Metrics Server TCP Port Status"
9+
port: 9100
10+
exclude: '{{kurl .IsUpgrade }}'
11+
12+
analyzers:
13+
- tcpPortStatus:
14+
checkName: "Node Exporter Metrics Server TCP Port Status"
15+
collectorName: "Node Exporter Metrics Server TCP Port Status"
16+
exclude: '{{kurl .IsUpgrade }}'
17+
outcomes:
18+
- fail:
19+
when: "connection-refused"
20+
message: Connection to port 9100 was refused. This is likely to be a routing problem since this preflight configures a test server to listen on this port.
21+
- warn:
22+
when: "address-in-use"
23+
message: Another process was already listening on port 9100.
24+
- fail:
25+
when: "connection-timeout"
26+
message: Timed out connecting to port 9100. Check your firewall.
27+
- fail:
28+
when: "error"
29+
message: Unexpected port status
30+
- pass:
31+
when: "connected"
32+
message: Port 9100 is available
33+
- warn:
34+
message: Unexpected port status

Diff for: addons/prometheus/0.55.0-34.10.0/install.sh

+162
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
# shellcheck disable=SC2148
2+
3+
function prometheus() {
4+
local src="$DIR/addons/prometheus/0.55.0-34.10.0"
5+
local dst="$DIR/kustomize/prometheus"
6+
7+
local operatorsrc="$src/operator"
8+
local operatordst="$dst/operator"
9+
10+
local crdssrc="$src/crds"
11+
local crdsdst="$dst/crds"
12+
13+
cp -r "$operatorsrc/" "$operatordst/"
14+
cp -r "$crdssrc/" "$crdsdst/"
15+
16+
if [ "$KUBERNETES_TARGET_VERSION_MINOR" -lt "22" ]; then
17+
local patchessrc="$src/patches"
18+
19+
cp "$patchessrc/kube-controller-manager-service.yaml" "$operatordst/patches-kube-controller-manager-service.yaml"
20+
insert_patches_json_6902 "$operatordst/kustomization.yaml" "patches-kube-controller-manager-service.yaml" "" v1 Service prometheus-kube-controller-manager kube-system
21+
cp "$patchessrc/kube-scheduler-service.yaml" "$operatordst/patches-kube-scheduler-service.yaml"
22+
insert_patches_json_6902 "$operatordst/kustomization.yaml" "patches-kube-scheduler-service.yaml" "" v1 Service prometheus-kube-scheduler kube-system
23+
fi
24+
25+
grafana_admin_secret "$src" "$operatordst"
26+
27+
# Server-side apply is needed here because the CRDs are too large to keep in metadata
28+
# https://github.com/prometheus-community/helm-charts/issues/1500
29+
kubectl apply --server-side --force-conflicts -k "$crdsdst/"
30+
spinner_until -1 prometheus_crd_ready
31+
32+
prometheus_rook_ceph "$operatordst"
33+
prometheus_longhorn "$operatordst"
34+
35+
# remove deployments and daemonsets that had labelselectors change (as those are immutable)
36+
kubectl delete deployment -n monitoring kube-state-metrics || true
37+
kubectl delete daemonset -n monitoring node-exporter || true
38+
kubectl delete deployment -n monitoring grafana || true
39+
kubectl delete deployment -n monitoring prometheus-adapter || true
40+
41+
# remove things that had names change during upgrades
42+
kubectl delete alertmanager -n monitoring main || true
43+
44+
# remove services that had a clusterip change
45+
kubectl delete service -n monitoring kube-state-metrics || true
46+
kubectl delete service -n monitoring prometheus-operator || true
47+
48+
# remove nodeport services that had names change
49+
kubectl delete service -n monitoring grafana || true
50+
kubectl delete service -n monitoring alertmanager-main || true
51+
kubectl delete service -n monitoring prometheus-k8s || true
52+
53+
# if the prometheus-node-exporter daemonset exists and has a release labelSelector set, delete it
54+
if kubernetes_resource_exists monitoring daemonset prometheus-node-exporter; then
55+
local promNodeExporterLabelSelector=$(kubectl get daemonset -n monitoring prometheus-node-exporter --output="jsonpath={.spec.selector.matchLabels.release}")
56+
if [ -n "$promNodeExporterLabelSelector" ]; then
57+
kubectl delete daemonset -n monitoring prometheus-node-exporter || true
58+
fi
59+
fi
60+
61+
# if the prometheus-operator deployment exists and has the wrong labelSelectors set, delete it
62+
if kubernetes_resource_exists monitoring deployment prometheus-operator; then
63+
local promOperatorLabelSelector=$(kubectl get deployment -n monitoring prometheus-operator --output="jsonpath={.spec.selector.matchLabels.release}") || true
64+
if [ -n "$promOperatorLabelSelector" ]; then
65+
kubectl delete deployment -n monitoring prometheus-operator || true
66+
fi
67+
68+
promOperatorLabelSelector=$(kubectl get deployment -n monitoring prometheus-operator --output="jsonpath={.spec.selector.matchLabels.app\.kubernetes\.io/component}") || true
69+
if [ -n "$promOperatorLabelSelector" ]; then
70+
kubectl delete deployment -n monitoring prometheus-operator || true
71+
fi
72+
fi
73+
74+
# the metrics service has been renamed to v1beta1.custom.metrics.k8s.io, delete the old
75+
if kubectl get --no-headers apiservice v1beta1.metrics.k8s.io 2>/dev/null | grep -q 'monitoring/prometheus-adapter' ; then
76+
kubectl delete apiservice v1beta1.metrics.k8s.io
77+
fi
78+
79+
# change ClusterIP services to NodePorts if required
80+
if [ -z "$PROMETHEUS_SERVICE_TYPE" ] || [ "$PROMETHEUS_SERVICE_TYPE" = "NodePort" ] ; then
81+
cp "$src/nodeport-services.yaml" "$operatordst"
82+
insert_patches_strategic_merge "$operatordst/kustomization.yaml" nodeport-services.yaml
83+
fi
84+
85+
kubectl apply -k "$operatordst/"
86+
}
87+
88+
function prometheus_pre_init() {
89+
local src="$DIR/addons/prometheus/0.55.0-34.10.0/patches"
90+
local dst="$DIR/kustomize/kubeadm/init-patches"
91+
92+
# expose metrics
93+
cp "$src/kubeproxy-config-v1alpha1.yaml" "$dst/prometheus-kubeproxy-config-v1alpha1.yaml"
94+
cp "$src/kubeadm-cluster-config-v1beta2.yaml" "$dst/prometheus-kubeadm-cluster-config-v1beta2.yaml"
95+
}
96+
97+
GRAFANA_ADMIN_USER=
98+
GRAFANA_ADMIN_PASS=
99+
function grafana_admin_secret() {
100+
if kubernetes_resource_exists monitoring secret grafana-admin; then
101+
return 0
102+
fi
103+
104+
local src="$1"
105+
local grafanadst="$2"
106+
107+
GRAFANA_ADMIN_USER=admin
108+
GRAFANA_ADMIN_PASS=$(< /dev/urandom tr -dc A-Za-z0-9 | head -c9)
109+
110+
insert_resources "$grafanadst/kustomization.yaml" grafana-secret.yaml
111+
112+
render_yaml_file "$src/tmpl-grafana-secret.yaml" > "$grafanadst/grafana-secret.yaml"
113+
}
114+
115+
function prometheus_outro() {
116+
printf "\n"
117+
printf "\n"
118+
if [ -z "$PROMETHEUS_SERVICE_TYPE" ] || [ "$PROMETHEUS_SERVICE_TYPE" = "NodePort" ] ; then
119+
printf "The UIs of Prometheus, Grafana and Alertmanager have been exposed on NodePorts ${GREEN}30900${NC}, ${GREEN}30902${NC} and ${GREEN}30903${NC} respectively.\n"
120+
else
121+
printf "The UIs of Prometheus, Grafana and Alertmanager have been exposed on internal ClusterIP services.\n"
122+
fi
123+
if [ -n "$GRAFANA_ADMIN_PASS" ]; then
124+
printf "\n"
125+
printf "To access Grafana use the generated user:password of ${GREEN}${GRAFANA_ADMIN_USER:-admin}:${GRAFANA_ADMIN_PASS} .${NC}\n"
126+
fi
127+
printf "\n"
128+
printf "\n"
129+
}
130+
131+
function prometheus_crd_ready() {
132+
# https://github.com/coreos/kube-prometheus#quickstart
133+
if ! kubectl get customresourcedefinitions servicemonitors.monitoring.coreos.com &>/dev/null; then
134+
return 1
135+
fi
136+
if ! kubectl get servicemonitors --all-namespaces &>/dev/null; then
137+
return 1
138+
fi
139+
if ! kubectl get customresourcedefinitions prometheuses.monitoring.coreos.com &>/dev/null; then
140+
return 1
141+
fi
142+
if ! kubectl get prometheuses --all-namespaces &>/dev/null; then
143+
return 1
144+
fi
145+
return 0
146+
}
147+
148+
function prometheus_rook_ceph() {
149+
local dst="$1"
150+
151+
if kubectl get ns | grep -q rook-ceph; then
152+
insert_resources "$dst/kustomization.yaml" rook-ceph-rolebindings.yaml
153+
fi
154+
}
155+
156+
function prometheus_longhorn() {
157+
local dst="$1"
158+
159+
if kubectl get ns | grep -q longhorn-system; then
160+
insert_resources "$dst/kustomization.yaml" longhorn.yaml
161+
fi
162+
}
+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
apiVersion: v1
2+
kind: Service
3+
metadata:
4+
name: prometheus-alertmanager
5+
namespace: monitoring
6+
spec:
7+
ports:
8+
- name: web
9+
port: 9093
10+
protocol: TCP
11+
nodePort: 30903
12+
type: "NodePort"
13+
---
14+
apiVersion: v1
15+
kind: Service
16+
metadata:
17+
name: prometheus-k8s
18+
namespace: monitoring
19+
spec:
20+
ports:
21+
- name: web
22+
port: 9090
23+
nodePort: 30900
24+
type: "NodePort"
25+
---
26+
apiVersion: v1
27+
kind: Service
28+
metadata:
29+
name: grafana
30+
namespace: monitoring
31+
spec:
32+
type: "NodePort"
33+
ports:
34+
- name: service
35+
port: 80
36+
protocol: TCP
37+
nodePort: 30902

0 commit comments

Comments
 (0)