Time limit for resource_allocation_plot

Gossty · Gossty · commit 8e148b6c9442 · 2025-02-25T11:03:12.000-08:00
1. Created time limit for resource_allocation_plot using signal library.
2. Don’t show failures in legend if no failures.
3. Added more robust print statements with timestamps.
4. Only populate commands to qiita_pet when plots were created.
diff --git a/qiita_db/meta_util.py b/qiita_db/meta_util.py
@@ -36,6 +36,8 @@
 from hashlib import md5
 from re import sub
 from json import loads, dump, dumps
+import signal
+import traceback
 
 from qiita_db.util import create_nested_path, retrieve_resource_data
 from qiita_db.util import resource_allocation_plot
@@ -555,7 +557,8 @@ def generate_plugin_releases():
         f(redis_key, v)
 
 
-def update_resource_allocation_redis(active=True, verbose=False):
+def update_resource_allocation_redis(active=True, verbose=False,
+                                     time_limit=300):
     """Updates redis with plots and information about current software.
 
     Parameters
@@ -564,7 +567,11 @@ def update_resource_allocation_redis(active=True, verbose=False):
         Defaults to True. Should only be False when testing.
 
     verbose: boolean, optional
-        Defaults to False. Prints status on what function
+        Defaults to False. Prints status on what function is running.
+
+    time_limit: integer, optional
+        Defaults to 300, representing 5 minutes. This is the limit for how long
+        resource_allocation_plot function will run.
 
     """
     time = datetime.now().strftime('%m-%d-%y')
@@ -592,24 +599,24 @@ def update_resource_allocation_redis(active=True, verbose=False):
             cmd_name = command.name
             scommands[sname][sversion][cmd_name] = col_names
 
-    redis_key = 'resources:commands'
-    r_client.set(redis_key, str(scommands))
-
+    # software commands for which resource allocations were sucessfully
+    # calculated
+    scommands_allocation = {}
     for sname, versions in scommands.items():
         for version, commands in versions.items():
             for cname, col_names in commands.items():
                 df = retrieve_resource_data(cname, sname, version, COLUMNS)
                 if verbose:
-                    print(("Retrieving allocation resources for " +
-                           f" software: {sname}" +
-                           f" version: {version}" +
-                           f" command: {cname}"))
+                    print(("\nRetrieving allocation resources for:\n" +
+                           f"  software: {sname}\n" +
+                           f"  version: {version}\n" +
+                           f"  command: {cname}"))
                 if len(df) == 0:
                     if verbose:
-                        print(("No allocation resources available for" +
+                        print(("\nNo allocation resources available for" +
                                f" software: {sname}" +
                                f" version: {version}" +
-                               f" command: {cname}"))
+                               f" command: {cname}\n"))
                     continue
                 # column_name_str looks like col1*col2*col3, etc
                 for col_name in col_names:
@@ -624,16 +631,38 @@ def update_resource_allocation_redis(active=True, verbose=False):
                         else:
                             new_column *= df_copy[curr_column]
                     if verbose:
-                        print(("Building resource allocation plot for " +
-                               f" software: {sname}" +
-                               f" version: {version}" +
-                               f" command: {cname}" +
-                               f" column name: {col_name}"))
+                        print(
+                            ("\nBuilding resource allocation plot for:\n" +
+                             f"  software: {sname}\n" +
+                             f"  version: {version}\n" +
+                             f"  command: {cname}\n" +
+                             f"  column name: {col_name}\n" +
+                             f"  {datetime.now().strftime('%b %d %H:%M:%S')}"))
+
+                    def timeout_handler(signum, frame):
+                        raise TimeoutError((
+                            "\nresource_allocation_plot " +
+                            "execution exceeded time limit." +
+                            "For:\n"
+                            f"  software: {sname}\n" +
+                            f"  version: {version}\n" +
+                            f"  command: {cname}\n" +
+                            f"  column name: {col_name}\n" +
+                            f"  {datetime.now().strftime('%b %d %H:%M:%S')}"))
+
+                    signal.signal(signal.SIGALRM, timeout_handler)
+                    signal.alarm(time_limit)
+                    try:
+                        fig, axs = resource_allocation_plot(df_copy,
+                                                            col_name,
+                                                            new_column,
+                                                            verbose=verbose)
+                        signal.alarm(0)
+                    except TimeoutError:
+                        print("Timeout reached!")
+                        traceback.print_exc()
+                        continue
 
-                    fig, axs = resource_allocation_plot(df_copy,
-                                                        col_name,
-                                                        new_column,
-                                                        verbose=verbose)
                     titles = [0, 0]
                     images = [0, 0]
 
@@ -685,14 +714,28 @@ def update_resource_allocation_redis(active=True, verbose=False):
                         ("title_time", titles[1], r_client.set)
                     ]
                     if verbose:
-                        print(("Saving resource allocation image for " +
-                               f" software: {sname}" +
-                               f" version: {version}" +
-                               f" command: {cname}" +
-                               f" column name: {col_name}"))
+                        print(
+                            ("Saving resource allocation image for\n" +
+                             f"  software: {sname}\n" +
+                             f"  version: {version}\n" +
+                             f"  command: {cname}\n" +
+                             f"  column name: {col_name}\n" +
+                             f"  {datetime.now().strftime('%b %d %H:%M:%S')}"))
 
                     for k, v, f in values:
                         redis_key = 'resources$#%s$#%s$#%s$#%s:%s' % (
                                     cname, sname, version, col_name, k)
                         r_client.delete(redis_key)
                         f(redis_key, v)
+
+                    if sname not in scommands_allocation:
+                        scommands_allocation[sname] = {}
+                    if version not in scommands_allocation[sname]:
+                        scommands_allocation[sname][version] = {}
+                    if cname not in scommands_allocation[sname][version]:
+                        scommands_allocation[sname][version][cname] = []
+                    scommands_allocation[sname][version][cname].append(
+                        col_name)
+
+    redis_key = 'resources:commands'
+    r_client.set(redis_key, str(scommands_allocation))
diff --git a/qiita_db/util.py b/qiita_db/util.py
@@ -2527,7 +2527,7 @@ def _resource_allocation_plot_helper(
         print(f"\t\tFitting best model for {curr}; column {col_name}")
     # 50 - number of maximum iterations, 3 - number of failures we tolerate
     best_model_name, best_model, options = _resource_allocation_calculate(
-        df, x_data, y_data, models, curr, col_name, 50, 3)
+        df, x_data, y_data, models, curr, col_name, 50, 3, verbose)
     if verbose:
         print(
             f"\t\tSuccessfully chose best model for {curr}; column {col_name}")
@@ -2553,8 +2553,9 @@ def _resource_allocation_plot_helper(
     success_df, failures_df = _resource_allocation_success_failures(
         df, k, a, b, best_model, col_name, curr)
     failures = failures_df.shape[0]
-    ax.scatter(failures_df[col_name], failures_df[curr], color='red', s=3,
-               label="failures")
+    if failures != 0:
+        ax.scatter(failures_df[col_name], failures_df[curr], color='red', s=3,
+                   label="failures")
 
     success_df['node_name'].fillna('unknown', inplace=True)
 
@@ -2580,7 +2581,7 @@ def _resource_allocation_plot_helper(
 
 
 def _resource_allocation_calculate(
-        df, x, y, models, type_, col_name, depth, tolerance):
+        df, x, y, models, type_, col_name, depth, tolerance, verbose):
     """Helper function for resource allocation plot. Calculates best_model and
     best_result given the models list and x,y data.
 
@@ -2621,6 +2622,11 @@ def _resource_allocation_calculate(
     best_failures = np.inf
     best_max = np.inf
     for model_name, model in models.items():
+        if verbose:
+            print(
+                f"\t\t\tCalculating {model_name} for {type_}; "
+                f"{col_name} {datetime.now().strftime('%b %d %H:%M:%S')}"
+            )
         model_equation = model['equation']
         # start values for binary search, where sl is left, sr is right
         # penalty weight must be positive & non-zero, hence, sl >= 1.
@@ -2763,9 +2769,9 @@ def _resource_allocation_success_failures(df, k, a, b, model, col_name, type_):
     """
 
     x_plot = np.array(df[col_name])
-    df[f'c{type_}'] = model(x_plot, k, a, b)
-    success_df = df[df[type_] <= df[f'c{type_}']].copy()
-    failures_df = df[df[type_] > df[f'c{type_}']].copy()
+    y_plot = model(x_plot, k, a, b)
+    success_df = df[df[type_] <= y_plot].copy()
+    failures_df = df[df[type_] > y_plot].copy()
     return (success_df, failures_df)