Skip to content

Commit 2a5a0f7

Browse files
committed
ENH: Improve output validation/upload logging
1 parent 98109e1 commit 2a5a0f7

File tree

2 files changed

+34
-15
lines changed

2 files changed

+34
-15
lines changed

octue/resources/analysis.py

+29-11
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,7 @@ def set_up_periodic_monitor_message(self, create_monitor_message, period=60):
140140
def finalise(self, upload_output_datasets_to=None, use_signed_urls=None):
141141
"""Validate the output values and output manifest and, if the analysis produced an output manifest, upload its
142142
output datasets to a unique subdirectory within the analysis's output location. This output location can be
143-
overridden by providing a different cloud path via the `upload_output_datasets_to` parameter. Either way, the
144-
dataset paths in the output manifest are replaced with signed URLs for easier, expiring access.
143+
overridden by providing a different cloud path via the `upload_output_datasets_to` parameter.
145144
146145
:param str|None upload_output_datasets_to: If not provided but an output location was provided at instantiation, upload any output datasets into a unique subdirectory within this output location; if provided, upload into this location instead. The output manifest is updated with the upload locations.
147146
:param bool|None use_signed_urls: if `True`, use signed URLs instead of cloud URIs for dataset paths in the output manifest; if `None`, use the value of `use_signed_urls_for_output_datasets` given at instantiation
@@ -150,34 +149,53 @@ def finalise(self, upload_output_datasets_to=None, use_signed_urls=None):
150149
serialised_strands = {"output_values": None, "output_manifest": None}
151150

152151
if self.output_values:
152+
logger.info("The analysis produced output values.")
153153
serialised_strands["output_values"] = json.dumps(self.output_values, cls=OctueJSONEncoder)
154+
else:
155+
logger.info("The analysis didn't produce output values.")
154156

155157
if self.output_manifest:
158+
logger.info("The analysis produced an output manifest.")
159+
160+
if not (self.output_location or upload_output_datasets_to):
161+
logger.info("No output location was set in the app configuration - can't upload output datasets.")
162+
156163
serialised_strands["output_manifest"] = self.output_manifest.serialise()
157164

165+
else:
166+
logger.info("The analysis didn't produce an output manifest.")
167+
158168
self.twine.validate(**serialised_strands)
159169
self._finalised = True
160-
logger.info("Validated output values and output manifest against the twine.")
170+
logger.info("Validated outputs against the twine.")
161171

172+
if self.output_manifest and (self.output_location or upload_output_datasets_to):
173+
self._upload_output_datasets(upload_output_datasets_to, use_signed_urls)
174+
175+
def _upload_output_datasets(self, upload_output_datasets_to, use_signed_urls):
176+
"""Upload the output manifest's datasets.
177+
178+
:param str|None upload_output_datasets_to: If not provided but an output location was provided at instantiation, upload any output datasets into a unique subdirectory within this output location; if provided, upload into this location instead. The output manifest is updated with the upload locations.
179+
:param bool|None use_signed_urls: if `True`, use signed URLs instead of cloud URIs for dataset paths in the output manifest; if `None`, use the value of `use_signed_urls_for_output_datasets` given at instantiation
180+
:return None:
181+
"""
162182
# Use a unique subdirectory in the output location given at instantiation (if given) if no
163183
# `upload_output_datasets_to` is provided.
164-
if self.output_location and not upload_output_datasets_to:
184+
if not upload_output_datasets_to:
165185
upload_output_datasets_to = storage.path.join(self.output_location, coolname.generate_slug())
166186

167-
if use_signed_urls is None:
168-
use_signed_urls = self.use_signed_urls_for_output_datasets
169-
170-
# If there isn't both a non-None output manifest and upload location, nothing is uploaded.
171-
if not (upload_output_datasets_to and getattr(self, "output_manifest")):
172-
return
187+
logger.info("Beginning upload of output datasets to %r...", upload_output_datasets_to)
173188

174189
for name, dataset in self.output_manifest.datasets.items():
175190
dataset.upload(cloud_path=storage.path.join(upload_output_datasets_to, name))
176191

192+
if use_signed_urls is None:
193+
use_signed_urls = self.use_signed_urls_for_output_datasets
194+
177195
if use_signed_urls:
178196
self.output_manifest.use_signed_urls_for_datasets()
179197

180-
logger.info("Uploaded output datasets to %r.", upload_output_datasets_to)
198+
logger.info("Finished uploading output datasets to %r.", upload_output_datasets_to)
181199

182200
def _calculate_strand_hashes(self, strands):
183201
"""Calculate the hashes of the strands specified in the HASH_FUNCTIONS constant.

tests/cloud/pub_sub/test_service.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -708,10 +708,11 @@ def test_child_messages_can_be_recorded_by_parent(self):
708708

709709
# Check that the child's messages have been recorded by the parent.
710710
self.assertEqual(parent.received_events[0]["event"]["kind"], "delivery_acknowledgement")
711-
self.assertEqual(parent.received_events[1]["event"]["kind"], "log_record")
712-
self.assertEqual(parent.received_events[2]["event"]["kind"], "log_record")
713-
self.assertEqual(parent.received_events[3]["event"]["kind"], "log_record")
714-
self.assertEqual(parent.received_events[4]["event"], {"kind": "result", "output_values": "Hello! It worked!"})
711+
712+
for i in range(1, 6):
713+
self.assertEqual(parent.received_events[i]["event"]["kind"], "log_record")
714+
715+
self.assertEqual(parent.received_events[6]["event"], {"kind": "result", "output_values": "Hello! It worked!"})
715716

716717
def test_child_exception_message_can_be_recorded_by_parent(self):
717718
"""Test that the parent can record exceptions raised by the child."""

0 commit comments

Comments
 (0)