Merge pull request #5 from climateintelligence/dev

climateintelligence · Apr 6, 2024 · c626197 · c626197
2 parents d3a23ba + afe67f1
commit c626197
Show file tree

Hide file tree

Showing 4 changed files with 102 additions and 23 deletions.
diff --git a/hawk/analysis/main.py b/hawk/analysis/main.py
@@ -70,11 +70,11 @@ def __init__(
         self.pcmci_features_lags = list(range(0, self.pcmci_max_lag + 1))
 
         self.baseline = None
-        self.plot_pcmci = None
+        self.plot_pcmci = {}
         self.details_pcmci = None
-        self.plot_tefs = None
+        self.plot_tefs = {}
         self.details_tefs = None
-        self.plot_tefs_wrapper = None
+        self.plot_tefs_wrapper = {}
         self.details_tefs_wrapper = None
 
     def run_baseline_analysis(self):
@@ -246,6 +246,7 @@ def run(self):
             target_column_name=self.target_column_name,
             datasets=self.datasets,
             destination_path=self.workdir,
+            image_formats=["pdf", "png"],
         )
         if self.response:
             self.response.update_status("Postprocessing TEFS", 90)
@@ -254,6 +255,7 @@ def run(self):
             target_column_name=self.target_column_name,
             datasets=self.datasets,
             destination_path=self.workdir,
+            image_formats=["pdf", "png"],
         )
         if self.response:
             self.response.update_status("Postprocessing TEFS Wrapper", 95)
@@ -262,4 +264,5 @@ def run(self):
             target_column_name=self.target_column_name,
             datasets=self.datasets,
             destination_path=self.workdir,
+            image_formats=["pdf", "png"],
         )
diff --git a/hawk/analysis/postprocessing.py b/hawk/analysis/postprocessing.py
@@ -86,6 +86,7 @@ def run_postprocessing_pcmci(
     target_column_name,
     datasets,
     destination_path,
+    image_formats=["pdf", "png"],
 ):
     all_basin_variables = set()
     results_table_pcmci = []
@@ -252,19 +253,26 @@ def run_postprocessing_pcmci(
         scores_values=[scores, scores_lag, scores_lag_ar],
         scores_labels=[r"$R^2$", r"$R^2$ (lag)", r"$R^2$ (lag + AR)"],
     )
-    target_file_plot = os.path.join(destination_path, "algorithm_results", "pcmci", "feature_presence.pdf")
-    os.makedirs(os.path.dirname(target_file_plot), exist_ok=True)
-    plt.savefig(target_file_plot, bbox_inches="tight")
+
+    target_file_plots = {}
+    for image_format in image_formats:
+        target_file_plot = os.path.join(
+            destination_path, "algorithm_results", "pcmci", f"feature_presence.{image_format}"
+        )
+        os.makedirs(os.path.dirname(target_file_plot), exist_ok=True)
+        plt.savefig(target_file_plot, bbox_inches="tight")
+        target_file_plots[image_format] = target_file_plot
     plt.close(fig)
 
-    return target_file_plot, target_file_results_details
+    return target_file_plots, target_file_results_details
 
 
 def run_postprocessing_tefs(
     results_tefs,
     target_column_name,
     datasets,
     destination_path,
+    image_formats=["pdf", "png"],
 ):
     all_basin_variables = set()
     results_table_te = []
@@ -374,23 +382,25 @@ def run_postprocessing_tefs(
         scores_values=[scores, scores_lag, scores_lag_ar],
         scores_labels=[r"$R^2$", r"$R^2$ (lag)", r"$R^2$ (lag + AR)"],
     )
-    target_file_plot = os.path.join(destination_path, "algorithm_results", "te", "feature_presence.pdf")
-    os.makedirs(os.path.dirname(target_file_plot), exist_ok=True)
-    plt.savefig(target_file_plot, bbox_inches="tight")
+    target_file_plots = {}
+    for image_format in image_formats:
+        target_file_plot = os.path.join(destination_path, "algorithm_results", "te", f"feature_presence.{image_format}")
+        os.makedirs(os.path.dirname(target_file_plot), exist_ok=True)
+        plt.savefig(target_file_plot, bbox_inches="tight")
+        target_file_plots[image_format] = target_file_plot
     plt.close(fig)
 
-    return target_file_plot, target_file_results_details
+    return target_file_plots, target_file_results_details
 
 
 def run_postprocessing_tefs_wrapper(
     results_tefs,
     target_column_name,
     datasets,
     destination_path,
+    image_formats=["pdf", "png"],
 ):
     results_table_tefs_wrapper = []
-    target_file_train_test = os.path.join(destination_path, "tefs_as_wrapper", "wrapper.pdf")
-    # target_file_cv = os.path.join(constants.path_figures, "tefs_as_wrapper_cv", f"{basename}_wrapper_cv.pdf")
 
     fig, ax = plt.subplots(figsize=(10, 5))
 
@@ -501,11 +511,16 @@ def run_postprocessing_tefs_wrapper(
     ax.set_ylim(-0.1, 1.1)
     ax.grid()
 
-    os.makedirs(os.path.dirname(target_file_train_test), exist_ok=True)
-    plt.savefig(target_file_train_test, bbox_inches="tight")
+    target_files_train_test = {}
+    for image_format in image_formats:
+        # target_file_cv = os.path.join(constants.path_figures, "tefs_as_wrapper_cv", f"{basename}_wrapper_cv.pdf")
+        target_file_train_test = os.path.join(destination_path, "tefs_as_wrapper", f"wrapper.{image_format}")
+        os.makedirs(os.path.dirname(target_file_train_test), exist_ok=True)
+        plt.savefig(target_file_train_test, bbox_inches="tight")
+        target_files_train_test[image_format] = target_file_train_test
     plt.close(fig)
 
-    return target_file_train_test, target_file_results_details
+    return target_files_train_test, target_file_results_details
 
     # # --------------------- Plot cross-validation version ---------------------
     # fig, ax = plt.subplots(figsize=(10, 5))

diff --git a/hawk/processes/wps_causal.py b/hawk/processes/wps_causal.py
@@ -9,6 +9,7 @@
 LOGGER = logging.getLogger("PYWPS")
 
 FORMAT_PNG = Format("image/png", extension=".png", encoding="base64")
+FORMAT_PDF = Format("application/pdf", extension=".pdf", encoding="utf-8")
 FORMAT_PICKLE = Format("application/octet-stream", extension=".pkl", encoding="utf-8")
 
 
@@ -120,7 +121,14 @@ def __init__(self):
                 supported_formats=[FORMAT_PICKLE],
             ),
             ComplexOutput(
-                "png_pcmci",
+                "plot_pcmci",
+                "Selected features by PCMCI",
+                abstract="The selected features by PCMCI.",
+                as_reference=True,
+                supported_formats=[FORMAT_PDF],
+            ),
+            ComplexOutput(
+                "plot_pcmci_preview",
                 "Selected features by PCMCI",
                 abstract="The selected features by PCMCI.",
                 as_reference=True,
@@ -134,7 +142,14 @@ def __init__(self):
                 supported_formats=[FORMAT_PICKLE],
             ),
             ComplexOutput(
-                "png_tefs",
+                "plot_tefs",
+                "Selected features by TEFS",
+                abstract="The selected features by TEFS.",
+                as_reference=True,
+                supported_formats=[FORMAT_PDF],
+            ),
+            ComplexOutput(
+                "plot_tefs_preview",
                 "Selected features by TEFS",
                 abstract="The selected features by TEFS.",
                 as_reference=True,
@@ -148,7 +163,14 @@ def __init__(self):
                 supported_formats=[FORMAT_PICKLE],
             ),
             ComplexOutput(
-                "png_tefs_wrapper",
+                "plot_tefs_wrapper",
+                "Wrapper scores by TEFS",
+                abstract="The wrapper scores evolution by TEFS.",
+                as_reference=True,
+                supported_formats=[FORMAT_PDF],
+            ),
+            ComplexOutput(
+                "plot_tefs_wrapper_preview",
                 "Wrapper scores by TEFS",
                 abstract="The wrapper scores evolution by TEFS.",
                 as_reference=True,
@@ -226,13 +248,18 @@ def _handler(self, request, response):
         causal_analysis.run()
 
         response.outputs["pkl_baseline"].file = causal_analysis.baseline
-        response.outputs["png_pcmci"].file = causal_analysis.plot_pcmci
+        response.outputs["plot_pcmci"].file = causal_analysis.plot_pcmci["pdf"]
         response.outputs["pkl_pcmci"].file = causal_analysis.details_pcmci
-        response.outputs["png_tefs"].file = causal_analysis.plot_tefs
+        response.outputs["plot_tefs"].file = causal_analysis.plot_tefs["pdf"]
         response.outputs["pkl_tefs"].file = causal_analysis.details_tefs
-        response.outputs["png_tefs_wrapper"].file = causal_analysis.plot_tefs_wrapper
+        response.outputs["plot_tefs_wrapper"].file = causal_analysis.plot_tefs_wrapper["pdf"]
         response.outputs["pkl_tefs_wrapper"].file = causal_analysis.details_tefs_wrapper
 
+        # Previews for the plots in png format
+        response.outputs["plot_pcmci_preview"].file = causal_analysis.plot_pcmci["png"]
+        response.outputs["plot_tefs_preview"].file = causal_analysis.plot_tefs["png"]
+        response.outputs["plot_tefs_wrapper_preview"].file = causal_analysis.plot_tefs_wrapper["png"]
+
         response.update_status("Processing completed", 100)
 
         return response
diff --git a/tests/test_hawk.py b/tests/test_hawk.py
@@ -2,12 +2,15 @@
 
 """Tests for `hawk` package."""
 
-import pytest
+import os
 
+import pandas as pd
+import pytest
 from click.testing import CliRunner  # noqa: F401
 
 import hawk  # noqa: F401
 from hawk import cli  # noqa: F401
+from hawk.analysis import CausalAnalysis
 
 
 @pytest.fixture
@@ -24,3 +27,34 @@ def test_content(response):
     """Sample pytest test function with the pytest fixture as an argument."""
     # from bs4 import BeautifulSoup
     # assert 'GitHub' in BeautifulSoup(response.content).title.string
+
+
+def test_causal_analysis():
+    df_train = pd.read_csv("hawk/demo/Ticino_train.csv", header=0)
+    df_test = pd.read_csv("hawk/demo/Ticino_test.csv", header=0)
+    target_column_name = "target"
+    pcmci_test_choice = "ParCorr"
+    pcmci_max_lag = 0
+    tefs_direction = "forward"
+    tefs_use_contemporary_features = True
+    tefs_max_lag_features = 1
+    tefs_max_lag_target = 1
+    workdir = "tests/output"
+
+    causal_analysis = CausalAnalysis(
+        df_train,
+        df_test,
+        target_column_name,
+        pcmci_test_choice,
+        pcmci_max_lag,
+        tefs_direction,
+        tefs_use_contemporary_features,
+        tefs_max_lag_features,
+        tefs_max_lag_target,
+        workdir,
+        response=None,
+    )
+
+    causal_analysis.run()
+
+    os.system("rm -r tests/output")