Skip to content

Commit

Permalink
[test_verify_email] Add a block to separate test procedure and cleanup
Browse files Browse the repository at this point in the history
Add alarm cleanup
  • Loading branch information
elfiesmelfie committed Nov 27, 2024
1 parent 3091406 commit 4817858
Showing 1 changed file with 62 additions and 47 deletions.
109 changes: 62 additions & 47 deletions roles/test_verify_email/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,54 +5,69 @@
name: test_alerts
tasks_from: get_observability_api.yml

- name: "RHELOSP-176042 Create the alert"
ansible.builtin.shell:
cmd: |
oc apply -f - <<EOF
apiVersion: {{ observability_api }}/v1
kind: PrometheusRule
metadata:
creationTimestamp: null
labels:
prometheus: default
role: alert-rules
name: prometheus-alarm-rules
namespace: service-telemetry
spec:
groups:
- name: ./openstack.rules
rules:
- alert: Collectd metrics receive rate is zero
expr: rate(sg_total_collectd_msg_received_count[1m]) == 0
EOF
changed_when: false
register: cmd_output
failed_when: cmd_output.rc != 0
- name: Do the test procedure
block:
- name: "RHELOSP-176042 Create the alert"
ansible.builtin.shell:
cmd: |
oc apply -f - <<EOF
apiVersion: {{ observability_api }}/v1
kind: PrometheusRule
metadata:
creationTimestamp: null
labels:
prometheus: default
role: alert-rules
name: fvt-testing-prometheus-alarm-rules-email
namespace: service-telemetry
spec:
groups:
- name: ./openstack.rules
rules:
- alert: FVT_TESTING Collectd metrics receive rate is zero
expr: rate(sg_total_collectd_msg_received_count[1m]) == 0
EOF
changed_when: false
register: cmd_output
failed_when: cmd_output.rc != 0

- name: "RHELOSP-176043 Patch the ServiceTelemetry object for the STF deployment"
ansible.builtin.shell:
cmd: |
oc patch stf default --type merge -p '{"spec": {"alertmanagerConfigManifest": "apiVersion: v1\nkind: Secret\nmetadata:\n name: 'alertmanager-default'\n namespace: 'service-telemetry'\ntype: Opaque\nstringData:\n alertmanager.yaml: |-\n global:\n resolve_timeout: 10m\n route:\n group_by: ['job']\n group_wait: 30s\n group_interval: 5m\n repeat_interval: 12h\n receiver: 'email'\n receivers:\n - name: 'email'\n - email_configs:\n - to: [email protected]"}}'
changed_when: false
- name: "RHELOSP-176043 Patch the ServiceTelemetry object for the STF deployment"
ansible.builtin.shell:
cmd: |
oc patch stf default --type merge -p '{"spec": {"alertmanagerConfigManifest": "apiVersion: v1\nkind: Secret\nmetadata:\n name: 'alertmanager-default'\n namespace: 'service-telemetry'\ntype: Opaque\nstringData:\n alertmanager.yaml: |-\n global:\n resolve_timeout: 10m\n route:\n group_by: ['job']\n group_wait: 30s\n group_interval: 5m\n repeat_interval: 12h\n receiver: 'email'\n receivers:\n - name: 'email'\n - email_configs:\n - to: [email protected]"}}'
changed_when: false

- name: "RHELOSP-176044 Interrupt metrics flow by preventing the QDR from running"
ansible.builtin.shell:
cmd: |
for i in {1..60}; do oc delete po -l application=default-interconnect; sleep 1; done
changed_when: false
- name: "RHELOSP-176044 Interrupt metrics flow by preventing the QDR from running"
ansible.builtin.shell:
cmd: |
for i in {1..60}; do oc delete po -l application=default-interconnect; sleep 1; done
changed_when: false

- name: "RHELOSP-176045 Check for alertmanager logs"
ansible.builtin.shell:
cmd: |
oc logs alertmanager-default-0 -c alertmanager | grep 'receiver=email' | wc -l
register: cmd_output
failed_when: "cmd_output.stdout|int == 0"
changed_when: false
- name: "RHELOSP-176045 Check for alertmanager logs"
ansible.builtin.shell:
cmd: |
oc logs alertmanager-default-0 -c alertmanager | grep 'receiver=email' | wc -l
register: cmd_output
failed_when: "cmd_output.stdout|int == 0"
changed_when: false

- name: "RHELOSP-176046 Remove alertmanagerConfigManifest from the ServiceTelemetry object"
ansible.builtin.shell:
cmd: |
oc patch stf/default --type='json' -p '[{"op": "remove", "path": "/spec/alertmanagerConfigManifest"}]'
register: cmd_output
failed_when: cmd_output.rc != 0
changed_when: false
always:
- name: "Delete the PrometheusRule"
ansible.builtin.command: |
oc delete prometheusrule.{{ observability_api }} fvt-testing-prometheus-alarm-rules-email
- name: "Wait up to two minutes until the rule is deleted"
ansible.builtin.command:
cmd: |
curl -k {{ prom_auth_string }} https://{{ prom_url }}/api/v1/rules
retries: 12
delay: 10
until: 'not "FVT_TESTING Collectd metrics receive rate is zero" in cmd_output.stdout'

- name: "RHELOSP-176046 Remove alertmanagerConfigManifest from the ServiceTelemetry object"
ansible.builtin.shell:
cmd: |
oc patch stf/default --type='json' -p '[{"op": "remove", "path": "/spec/alertmanagerConfigManifest"}]'
register: cmd_output
failed_when: cmd_output.rc != 0
changed_when: false

0 comments on commit 4817858

Please sign in to comment.