Skip to content

Commit f802b9a

Browse files
authored
MRG: Merge pull request #586 from octue/refactor/move-crash-diagnostics-logic-into-class
Move crash diagnostics logic into class
2 parents 331c074 + 4f75b02 commit f802b9a

6 files changed

+582
-351
lines changed

docs/source/inter_service_compatibility.rst

+153-151
Large diffs are not rendered by default.

octue/crash_diagnostics.py

+188
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
import copy
2+
import json
3+
import logging
4+
5+
import coolname
6+
7+
from octue.cloud import storage
8+
from octue.cloud.storage import GoogleCloudStorageClient
9+
from octue.resources import Dataset
10+
from octue.utils.encoders import OctueJSONEncoder
11+
12+
13+
logger = logging.getLogger(__name__)
14+
15+
16+
class CrashDiagnostics:
17+
"""A handler for crash diagnostics that allows uploading of explicitly added configuration and input data and any
18+
questions asked to the cloud.
19+
20+
:param str cloud_path: the cloud path of a directory to upload any added data into
21+
:return None:
22+
"""
23+
24+
def __init__(self, cloud_path):
25+
self.cloud_path = cloud_path
26+
self.analysis_id = None
27+
self.configuration_values = None
28+
self.configuration_manifest = None
29+
self.input_values = None
30+
self.input_manifest = None
31+
self.questions = []
32+
self._storage_client = GoogleCloudStorageClient()
33+
34+
def add_data(
35+
self,
36+
analysis_id=None,
37+
configuration_values=None,
38+
configuration_manifest=None,
39+
input_values=None,
40+
input_manifest=None,
41+
):
42+
"""Add an analysis ID, configuration values, a configuration manifest, input values, and/or an input manifest to
43+
the crash diagnostics. The values and manifests are deep-copied before being added. This method can be called
44+
multiple times as data becomes available. Calling again with the same keyword arguments will overwrite any data
45+
of that type added previously.
46+
47+
:param str analysis_id: the ID of the analysis to save crash diagnostics for
48+
:param any configuration_values: configuration values to save
49+
:param any configuration_manifest: a configuration manifest to save
50+
:param any input_values: input values to save
51+
:param any input_manifest: an input manifest to save
52+
:return None:
53+
"""
54+
if analysis_id:
55+
self.analysis_id = analysis_id
56+
57+
if configuration_values:
58+
self.configuration_values = copy.deepcopy(configuration_values)
59+
60+
if configuration_manifest:
61+
self.configuration_manifest = copy.deepcopy(configuration_manifest)
62+
63+
if input_values:
64+
self.input_values = copy.deepcopy(input_values)
65+
66+
if input_manifest:
67+
self.input_manifest = copy.deepcopy(input_manifest)
68+
69+
def add_question(self, question):
70+
"""Add a question to the list of questions to save.
71+
72+
:param dict question: the question to add
73+
:return None:
74+
"""
75+
self.questions.append(question)
76+
77+
def upload(self):
78+
"""Check that a cloud path has been provided before uploading any added data to the crash diagnostics cloud
79+
path. Any errors encountered during upload are caught and logged.
80+
81+
:return None:
82+
"""
83+
if not self.cloud_path:
84+
logger.warning(
85+
"Cannot upload crash diagnostics as the child doesn't have the `crash_diagnostics_cloud_path` field "
86+
"set in its service configuration (`octue.yaml` file)."
87+
)
88+
return
89+
90+
if not self.analysis_id:
91+
self.analysis_id = coolname.generate_slug(3)
92+
93+
try:
94+
self._upload()
95+
logger.info("Crash diagnostics uploaded.")
96+
except Exception:
97+
logger.exception("Failed to upload crash diagnostics.")
98+
99+
def _upload(self):
100+
"""Upload any added data to the crash diagnostics cloud path.
101+
102+
:return None:
103+
"""
104+
question_diagnostics_path = storage.path.join(self.cloud_path, self.analysis_id)
105+
logger.warning("App failed - saving crash diagnostics to %r.", question_diagnostics_path)
106+
107+
for data_type in ("configuration", "input"):
108+
values_type = f"{data_type}_values"
109+
values = getattr(self, values_type)
110+
111+
if values is not None:
112+
if isinstance(values, str):
113+
setattr(self, values_type, self._attempt_deserialise_json(values))
114+
115+
self._upload_values(values_type, question_diagnostics_path)
116+
117+
manifest_type = f"{data_type}_manifest"
118+
manifest = getattr(self, manifest_type)
119+
120+
if manifest is not None:
121+
if isinstance(manifest, str):
122+
setattr(self, manifest_type, self._attempt_deserialise_json(manifest))
123+
124+
self._upload_manifest(manifest_type, question_diagnostics_path)
125+
126+
# Upload the messages received from any children before the crash.
127+
self._storage_client.upload_from_string(
128+
string=json.dumps(self.questions, cls=OctueJSONEncoder),
129+
cloud_path=storage.path.join(question_diagnostics_path, "questions.json"),
130+
)
131+
132+
@staticmethod
133+
def _attempt_deserialise_json(string):
134+
"""Attempt to deserialise the given string from JSON. If deserialisation fails, the original string is returned.
135+
136+
:param str string: the string to attempt to deserialise
137+
:return any: the deserialised python object or the original string
138+
"""
139+
try:
140+
return json.loads(string)
141+
except json.decoder.JSONDecodeError:
142+
return string
143+
144+
def _upload_values(self, values_type, question_diagnostics_path):
145+
"""Upload the values of the given type as part of the crash diagnostics.
146+
147+
:param str values_type: one of "configuration_values" or "input_values"
148+
:param str question_diagnostics_path: the path to a cloud directory to upload the values into
149+
:return None:
150+
"""
151+
values = getattr(self, values_type)
152+
153+
self._storage_client.upload_from_string(
154+
json.dumps(values, cls=OctueJSONEncoder),
155+
cloud_path=storage.path.join(question_diagnostics_path, f"{values_type}.json"),
156+
)
157+
158+
def _upload_manifest(self, manifest_type, question_diagnostics_path):
159+
"""Upload the serialised manifest of the given type and its datasets as part of the crash diagnostics.
160+
161+
:param str manifest_type: one of "configuration_manifest" or "input_manifest"
162+
:param str question_diagnostics_path: the path to a cloud directory to upload the manifest into
163+
:return None:
164+
"""
165+
manifest = getattr(self, manifest_type)
166+
167+
# Upload each dataset and update its path in the manifest.
168+
for dataset_name, dataset_path in manifest["datasets"].items():
169+
170+
# Handle manifests containing serialised datasets instead of just the datasets' paths. Datasets can be in
171+
# this state when serialised if they were instantiated using the `files` argument.
172+
if isinstance(dataset_path, dict):
173+
dataset_path = dataset_path["path"]
174+
175+
new_dataset_path = storage.path.join(
176+
question_diagnostics_path,
177+
f"{manifest_type}_datasets",
178+
dataset_name,
179+
)
180+
181+
Dataset(dataset_path).upload(new_dataset_path)
182+
manifest["datasets"][dataset_name] = new_dataset_path
183+
184+
# Upload the serialised manifest.
185+
self._storage_client.upload_from_string(
186+
json.dumps(manifest, cls=OctueJSONEncoder),
187+
cloud_path=storage.path.join(question_diagnostics_path, f"{manifest_type}.json"),
188+
)

octue/metadata/recorded_questions.jsonl

+1
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,4 @@
7272
{"parent_sdk_version": "0.46.0", "question": {"data": "{\"input_values\": {\"height\": 4, \"width\": 72}, \"input_manifest\": {\"id\": \"a289353b-2708-4a69-a72e-54bf67b75e61\", \"name\": null, \"datasets\": {\"my_dataset\": \"/var/folders/sk/hf5fbp616c77tsys9lz55qn40000gp/T/tmp56ucgays\"}}, \"children\": null, \"message_number\": 0}", "attributes": {"question_uuid": "bb08fdfc-6f67-4942-8fc4-8a4adb7bb93c", "forward_logs": "1", "allow_save_diagnostics_data_on_crash": "1", "octue_sdk_version": "0.46.0"}}}
7373
{"parent_sdk_version": "0.46.1", "question": {"data": "{\"input_values\": {\"height\": 4, \"width\": 72}, \"input_manifest\": {\"id\": \"d45acf3c-cc17-4b5c-9d5d-4036341cbcf4\", \"name\": null, \"datasets\": {\"my_dataset\": \"/var/folders/sk/hf5fbp616c77tsys9lz55qn40000gp/T/tmp11q86f7l\"}}, \"children\": null, \"message_number\": 0}", "attributes": {"question_uuid": "3b880fcc-716b-4bdd-a5f8-044c6163bd20", "forward_logs": "1", "allow_save_diagnostics_data_on_crash": "1", "octue_sdk_version": "0.46.1"}}}
7474
{"parent_sdk_version": "0.46.2", "question": {"data": "{\"input_values\": {\"height\": 4, \"width\": 72}, \"input_manifest\": {\"id\": \"16781d76-e19b-478d-934a-913f75aaab99\", \"name\": null, \"datasets\": {\"my_dataset\": \"/var/folders/sk/hf5fbp616c77tsys9lz55qn40000gp/T/tmplvu6dv73\"}}, \"children\": null, \"message_number\": 0}", "attributes": {"question_uuid": "578f41ed-b412-4a2f-8803-4e3093731d5f", "forward_logs": "1", "allow_save_diagnostics_data_on_crash": "1", "octue_sdk_version": "0.46.2"}}}
75+
{"parent_sdk_version": "0.46.3", "question": {"data": "{\"input_values\": {\"height\": 4, \"width\": 72}, \"input_manifest\": {\"id\": \"c787baf9-9805-469a-85f6-830f069bda3c\", \"name\": null, \"datasets\": {\"my_dataset\": \"/var/folders/sk/hf5fbp616c77tsys9lz55qn40000gp/T/tmp_o_g9w28\"}}, \"children\": null, \"message_number\": 0}", "attributes": {"question_uuid": "4089746b-2a5f-4c15-bab7-e017c8c22884", "forward_logs": "1", "allow_save_diagnostics_data_on_crash": "1", "octue_sdk_version": "0.46.3"}}}

0 commit comments

Comments
 (0)