Skip to content

Commit 5c10214

Browse files
authored
Merge pull request #167 from tarunps/fix/rq-monitor
fix: Fail silently with logs for misconfigured benches
2 parents 83d8b4f + cd4ed57 commit 5c10214

File tree

1 file changed

+25
-6
lines changed

1 file changed

+25
-6
lines changed

agent/web.py

+25-6
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from flask import Flask, Response, jsonify, request
1313
from passlib.hash import pbkdf2_sha256 as pbkdf2
1414
from playhouse.shortcuts import model_to_dict
15+
from redis.exceptions import ConnectionError as RedisConnectionError
1516
from rq.exceptions import NoSuchJobError
1617
from rq.job import Job as RQJob
1718
from rq.job import JobStatus
@@ -233,11 +234,21 @@ def get_benches():
233234
def get_metrics():
234235
from agent.exporter import get_metrics
235236

236-
benches_metrics = [
237-
get_metrics(name, rq_port)
238-
for name, bench in Server().benches.items()
239-
if (rq_port := bench.bench_config.get("rq_port")) is not None
240-
]
237+
benches_metrics = []
238+
server = Server()
239+
240+
for name, bench in server.benches.items():
241+
rq_port = bench.bench_config.get("rq_port")
242+
if rq_port is not None:
243+
try:
244+
metrics = get_metrics(name, rq_port)
245+
benches_metrics.append(metrics)
246+
except RedisConnectionError as e:
247+
# This is to specifically catch the error on old benches that had their
248+
# configs updated to render rq_port but the container doesn't actually
249+
# expose the rq_port
250+
log.error(f"Failed to get metrics for {name} on port {rq_port}: {e}")
251+
241252
return Response(benches_metrics, mimetype="text/plain")
242253

243254

@@ -254,7 +265,15 @@ def get_bench_metrics(bench_str):
254265
bench = Server().benches[bench_str]
255266
rq_port = bench.bench_config.get("rq_port")
256267
if rq_port:
257-
return Response(get_metrics(bench_str, rq_port), mimetype="text/plain")
268+
try:
269+
res = get_metrics(bench_str, rq_port)
270+
except RedisConnectionError as e:
271+
# This is to specifically catch the error on old benches that had their
272+
# configs updated to render rq_port but the container doesn't actually
273+
# expose the rq_port
274+
log.error(f"Failed to get metrics for {bench_str} on port {rq_port}: {e}")
275+
else:
276+
return Response(res, mimetype="text/plain")
258277

259278
return Response("Unavailable", status=400, mimetype="text/plain")
260279

0 commit comments

Comments
 (0)