5
5
import importlib .metadata
6
6
import json
7
7
import logging
8
+ import os
9
+ import time
8
10
import uuid
9
11
10
12
import google .api_core .exceptions
@@ -232,11 +234,12 @@ def answer(self, question, heartbeat_interval=120, timeout=30):
232
234
233
235
try :
234
236
self ._send_delivery_acknowledgment (** routing_metadata )
237
+ start_time = time .perf_counter ()
235
238
236
239
heartbeater = RepeatingTimer (
237
240
interval = heartbeat_interval ,
238
- function = self ._send_heartbeat ,
239
- kwargs = routing_metadata ,
241
+ function = self ._send_heartbeat_and_check_runtime ,
242
+ kwargs = { "start_time" : start_time , ** routing_metadata } ,
240
243
)
241
244
242
245
heartbeater .daemon = True
@@ -666,24 +669,29 @@ def _send_delivery_acknowledgment(
666
669
667
670
logger .info ("%r acknowledged receipt of question %r." , self , question_uuid )
668
671
669
- def _send_heartbeat (
672
+ def _send_heartbeat_and_check_runtime (
670
673
self ,
671
674
question_uuid ,
672
675
parent_question_uuid ,
673
676
originator_question_uuid ,
674
677
parent ,
675
678
originator ,
676
679
retry_count ,
680
+ start_time ,
681
+ runtime_timeout_warning_time = 3480 , # This is 58 minutes in seconds.
677
682
timeout = 30 ,
678
683
):
679
- """Send a heartbeat to the parent, indicating that the service is alive.
684
+ """Send a heartbeat to the parent, indicating that the service is alive. If it's running on Cloud Run and it's
685
+ been running for longer than the runtime timeout warning time, log a warning that it will be stopped soon.
680
686
681
687
:param str question_uuid: the UUID of the question this event relates to
682
688
:param str|None parent_question_uuid: the UUID of the question that triggered this question
683
689
:param str|None originator_question_uuid: the UUID of the question that triggered all ancestor questions of this question
684
690
:param str parent: the SRUID of the parent that asked the question this event is related to
685
691
:param str originator: the SRUID of the service revision that triggered all ancestor questions of this question
686
692
:param int retry_count: the retry count of the question (this is zero if it's the first attempt at the question)
693
+ :param int|float start_time: the `time.perf_counter` time that the analysis was started [s]
694
+ :param int|float runtime_timeout_warning_time: the amount of time after which to warn that the runtime timeout is approaching [s]
687
695
:param float timeout: time in seconds after which to give up sending
688
696
:return None:
689
697
"""
@@ -700,6 +708,12 @@ def _send_heartbeat(
700
708
timeout = timeout ,
701
709
)
702
710
711
+ if (
712
+ os .environ .get ("COMPUTE_PROVIDER" ) == "GOOGLE_CLOUD_RUN"
713
+ and time .perf_counter () - start_time > runtime_timeout_warning_time
714
+ ):
715
+ logger .warning ("This analysis will reach the maximum runtime and be stopped soon." )
716
+
703
717
logger .debug ("Heartbeat sent by %r." , self )
704
718
705
719
def _send_monitor_message (
0 commit comments