-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[test_verify_email] Add a block to separate test procedure and cleanup
Add alarm cleanup
- Loading branch information
1 parent
3091406
commit 4817858
Showing
1 changed file
with
62 additions
and
47 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,54 +5,69 @@ | |
name: test_alerts | ||
tasks_from: get_observability_api.yml | ||
|
||
- name: "RHELOSP-176042 Create the alert" | ||
ansible.builtin.shell: | ||
cmd: | | ||
oc apply -f - <<EOF | ||
apiVersion: {{ observability_api }}/v1 | ||
kind: PrometheusRule | ||
metadata: | ||
creationTimestamp: null | ||
labels: | ||
prometheus: default | ||
role: alert-rules | ||
name: prometheus-alarm-rules | ||
namespace: service-telemetry | ||
spec: | ||
groups: | ||
- name: ./openstack.rules | ||
rules: | ||
- alert: Collectd metrics receive rate is zero | ||
expr: rate(sg_total_collectd_msg_received_count[1m]) == 0 | ||
EOF | ||
changed_when: false | ||
register: cmd_output | ||
failed_when: cmd_output.rc != 0 | ||
- name: Do the test procedure | ||
block: | ||
- name: "RHELOSP-176042 Create the alert" | ||
ansible.builtin.shell: | ||
cmd: | | ||
oc apply -f - <<EOF | ||
apiVersion: {{ observability_api }}/v1 | ||
kind: PrometheusRule | ||
metadata: | ||
creationTimestamp: null | ||
labels: | ||
prometheus: default | ||
role: alert-rules | ||
name: fvt-testing-prometheus-alarm-rules-email | ||
namespace: service-telemetry | ||
spec: | ||
groups: | ||
- name: ./openstack.rules | ||
rules: | ||
- alert: FVT_TESTING Collectd metrics receive rate is zero | ||
expr: rate(sg_total_collectd_msg_received_count[1m]) == 0 | ||
EOF | ||
changed_when: false | ||
register: cmd_output | ||
failed_when: cmd_output.rc != 0 | ||
|
||
- name: "RHELOSP-176043 Patch the ServiceTelemetry object for the STF deployment" | ||
ansible.builtin.shell: | ||
cmd: | | ||
oc patch stf default --type merge -p '{"spec": {"alertmanagerConfigManifest": "apiVersion: v1\nkind: Secret\nmetadata:\n name: 'alertmanager-default'\n namespace: 'service-telemetry'\ntype: Opaque\nstringData:\n alertmanager.yaml: |-\n global:\n resolve_timeout: 10m\n route:\n group_by: ['job']\n group_wait: 30s\n group_interval: 5m\n repeat_interval: 12h\n receiver: 'email'\n receivers:\n - name: 'email'\n - email_configs:\n - to: [email protected]"}}' | ||
changed_when: false | ||
- name: "RHELOSP-176043 Patch the ServiceTelemetry object for the STF deployment" | ||
ansible.builtin.shell: | ||
cmd: | | ||
oc patch stf default --type merge -p '{"spec": {"alertmanagerConfigManifest": "apiVersion: v1\nkind: Secret\nmetadata:\n name: 'alertmanager-default'\n namespace: 'service-telemetry'\ntype: Opaque\nstringData:\n alertmanager.yaml: |-\n global:\n resolve_timeout: 10m\n route:\n group_by: ['job']\n group_wait: 30s\n group_interval: 5m\n repeat_interval: 12h\n receiver: 'email'\n receivers:\n - name: 'email'\n - email_configs:\n - to: [email protected]"}}' | ||
changed_when: false | ||
|
||
- name: "RHELOSP-176044 Interrupt metrics flow by preventing the QDR from running" | ||
ansible.builtin.shell: | ||
cmd: | | ||
for i in {1..60}; do oc delete po -l application=default-interconnect; sleep 1; done | ||
changed_when: false | ||
- name: "RHELOSP-176044 Interrupt metrics flow by preventing the QDR from running" | ||
ansible.builtin.shell: | ||
cmd: | | ||
for i in {1..60}; do oc delete po -l application=default-interconnect; sleep 1; done | ||
changed_when: false | ||
|
||
- name: "RHELOSP-176045 Check for alertmanager logs" | ||
ansible.builtin.shell: | ||
cmd: | | ||
oc logs alertmanager-default-0 -c alertmanager | grep 'receiver=email' | wc -l | ||
register: cmd_output | ||
failed_when: "cmd_output.stdout|int == 0" | ||
changed_when: false | ||
- name: "RHELOSP-176045 Check for alertmanager logs" | ||
ansible.builtin.shell: | ||
cmd: | | ||
oc logs alertmanager-default-0 -c alertmanager | grep 'receiver=email' | wc -l | ||
register: cmd_output | ||
failed_when: "cmd_output.stdout|int == 0" | ||
changed_when: false | ||
|
||
- name: "RHELOSP-176046 Remove alertmanagerConfigManifest from the ServiceTelemetry object" | ||
ansible.builtin.shell: | ||
cmd: | | ||
oc patch stf/default --type='json' -p '[{"op": "remove", "path": "/spec/alertmanagerConfigManifest"}]' | ||
register: cmd_output | ||
failed_when: cmd_output.rc != 0 | ||
changed_when: false | ||
always: | ||
- name: "Delete the PrometheusRule" | ||
ansible.builtin.command: | | ||
oc delete prometheusrule.{{ observability_api }} fvt-testing-prometheus-alarm-rules-email | ||
- name: "Wait up to two minutes until the rule is deleted" | ||
ansible.builtin.command: | ||
cmd: | | ||
curl -k {{ prom_auth_string }} https://{{ prom_url }}/api/v1/rules | ||
retries: 12 | ||
delay: 10 | ||
until: 'not "FVT_TESTING Collectd metrics receive rate is zero" in cmd_output.stdout' | ||
|
||
- name: "RHELOSP-176046 Remove alertmanagerConfigManifest from the ServiceTelemetry object" | ||
ansible.builtin.shell: | ||
cmd: | | ||
oc patch stf/default --type='json' -p '[{"op": "remove", "path": "/spec/alertmanagerConfigManifest"}]' | ||
register: cmd_output | ||
failed_when: cmd_output.rc != 0 | ||
changed_when: false |