Skip to content

Commit 7088a26

Browse files
authored
Add zenduty integration (#67)
* Add zenduty integration * comment * comment * bump minor version * address comments * mention new event type in readme * fix exponent operator
1 parent 19c5063 commit 7088a26

File tree

6 files changed

+138
-4
lines changed

6 files changed

+138
-4
lines changed

Makefile

+5-2
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,11 @@ cover: ve
2828
lint: lint.python
2929
#lint: lint.yaml - argh, RHEL is too old to do this by default
3030

31-
lint.python: ve
32-
. ve/bin/activate; flake8 observer.py pyth_observer/
31+
lint.python:
32+
poetry run isort pyth_observer/
33+
poetry run black pyth_observer/
34+
poetry run pyright pyth_observer/
35+
poetry run pyflakes pyth_observer/
3336

3437
lint.yaml:
3538
yamllint .

README.md

+4
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ Event types are configured via environment variables:
3434
- `TelegramEvent`
3535
- `TELEGRAM_BOT_TOKEN` - API token for the Telegram bot
3636

37+
- `ZendutyEvent`
38+
- `ZENDUTY_INTEGRATION_KEY` - Integration key for Zenduty service API integration
39+
- `OPEN_ALERTS_FILE` - Path to local file used for persisting open alerts
40+
3741
## Finding the Telegram Group Chat ID
3842

3943
To integrate Telegram events with the Observer, you need the Telegram group chat ID. Here's how you can find it:

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ ignore_missing_imports = true
44

55
[tool.poetry]
66
name = "pyth-observer"
7-
version = "0.2.5"
7+
version = "0.2.6"
88
description = "Alerts and stuff"
99
authors = []
1010
readme = "README.md"

pyth_observer/dispatch.py

+50
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import asyncio
2+
import json
3+
import os
24
from copy import deepcopy
5+
from datetime import datetime, timedelta
36
from typing import Any, Awaitable, Dict, List
47

8+
from loguru import logger
59
from prometheus_client import Gauge
610

711
from pyth_observer.check import Check, State
@@ -10,11 +14,14 @@
1014
from pyth_observer.event import DatadogEvent # Used dynamically
1115
from pyth_observer.event import LogEvent # Used dynamically
1216
from pyth_observer.event import TelegramEvent # Used dynamically
17+
from pyth_observer.event import ZendutyEvent # Used dynamically
1318
from pyth_observer.event import Event
19+
from pyth_observer.zenduty import send_zenduty_alert
1420

1521
assert DatadogEvent
1622
assert LogEvent
1723
assert TelegramEvent
24+
assert ZendutyEvent
1825

1926

2027
class Dispatch:
@@ -36,6 +43,16 @@ def __init__(self, config, publishers):
3643
"Publisher check failure status",
3744
["check", "symbol", "publisher"],
3845
)
46+
if "ZendutyEvent" in self.config["events"]:
47+
self.open_alerts_file = os.environ["OPEN_ALERTS_FILE"]
48+
self.open_alerts = self.load_alerts()
49+
50+
def load_alerts(self):
51+
try:
52+
with open(self.open_alerts_file, "r") as file:
53+
return json.load(file)
54+
except FileNotFoundError:
55+
return {} # Return an empty dict if the file doesn't exist
3956

4057
async def run(self, states: List[State]):
4158
# First, run each check and store the ones that failed
@@ -62,8 +79,41 @@ async def run(self, states: List[State]):
6279

6380
sent_events.append(event.send())
6481

82+
if event_type == "ZendutyEvent":
83+
# Add failed check to open alerts
84+
alert_identifier = (
85+
f"{check.__class__.__name__}-{check.state().symbol}"
86+
)
87+
state = check.state()
88+
if isinstance(state, PublisherState):
89+
alert_identifier += f"-{state.publisher_name}"
90+
self.open_alerts[alert_identifier] = datetime.now().isoformat()
91+
6592
await asyncio.gather(*sent_events)
6693

94+
# Check open alerts and resolve those that are older than 2 minutes
95+
if "ZendutyEvent" in self.config["events"]:
96+
97+
to_remove = []
98+
current_time = datetime.now()
99+
for identifier, last_failure in self.open_alerts.items():
100+
if current_time - datetime.fromisoformat(last_failure) >= timedelta(
101+
minutes=2
102+
):
103+
logger.debug(f"Resolving Zenduty alert {identifier}")
104+
response = await send_zenduty_alert(
105+
alert_identifier=identifier, message=identifier, resolved=True
106+
)
107+
if response and 200 <= response.status < 300:
108+
to_remove.append(identifier)
109+
110+
for identifier in to_remove:
111+
del self.open_alerts[identifier]
112+
113+
# Write open alerts to file to ensure persistence
114+
with open(self.open_alerts_file, "w") as file:
115+
json.dump(self.open_alerts, file)
116+
67117
def check_price_feed(self, state: PriceFeedState) -> List[Check]:
68118
failed_checks: List[Check] = []
69119

pyth_observer/event.py

+26-1
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@
1111
from loguru import logger
1212

1313
from pyth_observer.check import Check
14-
from pyth_observer.check.publisher import PublisherCheck
14+
from pyth_observer.check.publisher import PublisherCheck, PublisherState
1515
from pyth_observer.models import Publisher
16+
from pyth_observer.zenduty import send_zenduty_alert
1617

1718
load_dotenv()
1819

@@ -151,3 +152,27 @@ async def send(self):
151152
logger.error(
152153
f"Failed to send Telegram message: {response_text}"
153154
)
155+
156+
157+
class ZendutyEvent(Event):
158+
def __init__(self, check: Check, context: Context):
159+
self.check = check
160+
self.context = context
161+
162+
async def send(self):
163+
event_details = self.check.error_message()
164+
summary = ""
165+
for key, value in event_details.items():
166+
summary += f"{key}: {value}\n"
167+
168+
alert_identifier = (
169+
f"{self.check.__class__.__name__}-{self.check.state().symbol}"
170+
)
171+
state = self.check.state()
172+
if isinstance(state, PublisherState):
173+
alert_identifier += f"-{state.publisher_name}"
174+
175+
logger.debug(f"Sending Zenduty alert for {alert_identifier}")
176+
await send_zenduty_alert(
177+
alert_identifier=alert_identifier, message=alert_identifier, summary=summary
178+
)

pyth_observer/zenduty.py

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import asyncio
2+
import hashlib
3+
import os
4+
5+
import aiohttp
6+
from loguru import logger
7+
8+
headers = {"Content-Type": "application/json"}
9+
10+
11+
async def send_zenduty_alert(alert_identifier, message, resolved=False, summary=""):
12+
url = f"https://www.zenduty.com/api/events/{os.environ['ZENDUTY_INTEGRATION_KEY']}/"
13+
# Use a hash of the alert_identifier as a unique id for the alert.
14+
# Take the first 32 characters due to length limit of the api.
15+
entity_id = hashlib.sha256(alert_identifier.encode("utf-8")).hexdigest()[:32]
16+
17+
alert_type = "resolved" if resolved else "critical"
18+
19+
data = {
20+
"alert_type": alert_type,
21+
"message": message,
22+
"summary": summary,
23+
"entity_id": entity_id,
24+
}
25+
26+
async with aiohttp.ClientSession() as session:
27+
max_retries = 30
28+
retries = 0
29+
while retries < max_retries:
30+
async with session.post(url, json=data, headers=headers) as response:
31+
if 200 <= response.status < 300:
32+
return response # Success case, return response
33+
elif response.status == 429:
34+
retries += 1
35+
if retries < max_retries:
36+
logger.error(
37+
f"Received 429 Too Many Requests for {alert_identifier}. Retrying in 1 second..."
38+
)
39+
await asyncio.sleep(
40+
min(30, 2**retries)
41+
) # Backoff before retrying, wait upto 30s
42+
else:
43+
logger.error(
44+
f"Failed to send Zenduty event message for {alert_identifier} after {max_retries} retries."
45+
)
46+
return response # Return response after max retries
47+
else:
48+
response_text = await response.text()
49+
logger.error(
50+
f"{response.status} Failed to send Zenduty event message for {alert_identifier}: {response_text}"
51+
)
52+
return response # Non-retryable failure

0 commit comments

Comments
 (0)