Skip to content

Commit

Permalink
Merge pull request #5 from climateintelligence/dev
Browse files Browse the repository at this point in the history
  • Loading branch information
nilshempelmann authored Apr 6, 2024
2 parents d3a23ba + afe67f1 commit c626197
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 23 deletions.
9 changes: 6 additions & 3 deletions hawk/analysis/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,11 @@ def __init__(
self.pcmci_features_lags = list(range(0, self.pcmci_max_lag + 1))

self.baseline = None
self.plot_pcmci = None
self.plot_pcmci = {}
self.details_pcmci = None
self.plot_tefs = None
self.plot_tefs = {}
self.details_tefs = None
self.plot_tefs_wrapper = None
self.plot_tefs_wrapper = {}
self.details_tefs_wrapper = None

def run_baseline_analysis(self):
Expand Down Expand Up @@ -246,6 +246,7 @@ def run(self):
target_column_name=self.target_column_name,
datasets=self.datasets,
destination_path=self.workdir,
image_formats=["pdf", "png"],
)
if self.response:
self.response.update_status("Postprocessing TEFS", 90)
Expand All @@ -254,6 +255,7 @@ def run(self):
target_column_name=self.target_column_name,
datasets=self.datasets,
destination_path=self.workdir,
image_formats=["pdf", "png"],
)
if self.response:
self.response.update_status("Postprocessing TEFS Wrapper", 95)
Expand All @@ -262,4 +264,5 @@ def run(self):
target_column_name=self.target_column_name,
datasets=self.datasets,
destination_path=self.workdir,
image_formats=["pdf", "png"],
)
41 changes: 28 additions & 13 deletions hawk/analysis/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def run_postprocessing_pcmci(
target_column_name,
datasets,
destination_path,
image_formats=["pdf", "png"],
):
all_basin_variables = set()
results_table_pcmci = []
Expand Down Expand Up @@ -252,19 +253,26 @@ def run_postprocessing_pcmci(
scores_values=[scores, scores_lag, scores_lag_ar],
scores_labels=[r"$R^2$", r"$R^2$ (lag)", r"$R^2$ (lag + AR)"],
)
target_file_plot = os.path.join(destination_path, "algorithm_results", "pcmci", "feature_presence.pdf")
os.makedirs(os.path.dirname(target_file_plot), exist_ok=True)
plt.savefig(target_file_plot, bbox_inches="tight")

target_file_plots = {}
for image_format in image_formats:
target_file_plot = os.path.join(
destination_path, "algorithm_results", "pcmci", f"feature_presence.{image_format}"
)
os.makedirs(os.path.dirname(target_file_plot), exist_ok=True)
plt.savefig(target_file_plot, bbox_inches="tight")
target_file_plots[image_format] = target_file_plot
plt.close(fig)

return target_file_plot, target_file_results_details
return target_file_plots, target_file_results_details


def run_postprocessing_tefs(
results_tefs,
target_column_name,
datasets,
destination_path,
image_formats=["pdf", "png"],
):
all_basin_variables = set()
results_table_te = []
Expand Down Expand Up @@ -374,23 +382,25 @@ def run_postprocessing_tefs(
scores_values=[scores, scores_lag, scores_lag_ar],
scores_labels=[r"$R^2$", r"$R^2$ (lag)", r"$R^2$ (lag + AR)"],
)
target_file_plot = os.path.join(destination_path, "algorithm_results", "te", "feature_presence.pdf")
os.makedirs(os.path.dirname(target_file_plot), exist_ok=True)
plt.savefig(target_file_plot, bbox_inches="tight")
target_file_plots = {}
for image_format in image_formats:
target_file_plot = os.path.join(destination_path, "algorithm_results", "te", f"feature_presence.{image_format}")
os.makedirs(os.path.dirname(target_file_plot), exist_ok=True)
plt.savefig(target_file_plot, bbox_inches="tight")
target_file_plots[image_format] = target_file_plot
plt.close(fig)

return target_file_plot, target_file_results_details
return target_file_plots, target_file_results_details


def run_postprocessing_tefs_wrapper(
results_tefs,
target_column_name,
datasets,
destination_path,
image_formats=["pdf", "png"],
):
results_table_tefs_wrapper = []
target_file_train_test = os.path.join(destination_path, "tefs_as_wrapper", "wrapper.pdf")
# target_file_cv = os.path.join(constants.path_figures, "tefs_as_wrapper_cv", f"{basename}_wrapper_cv.pdf")

fig, ax = plt.subplots(figsize=(10, 5))

Expand Down Expand Up @@ -501,11 +511,16 @@ def run_postprocessing_tefs_wrapper(
ax.set_ylim(-0.1, 1.1)
ax.grid()

os.makedirs(os.path.dirname(target_file_train_test), exist_ok=True)
plt.savefig(target_file_train_test, bbox_inches="tight")
target_files_train_test = {}
for image_format in image_formats:
# target_file_cv = os.path.join(constants.path_figures, "tefs_as_wrapper_cv", f"{basename}_wrapper_cv.pdf")
target_file_train_test = os.path.join(destination_path, "tefs_as_wrapper", f"wrapper.{image_format}")
os.makedirs(os.path.dirname(target_file_train_test), exist_ok=True)
plt.savefig(target_file_train_test, bbox_inches="tight")
target_files_train_test[image_format] = target_file_train_test
plt.close(fig)

return target_file_train_test, target_file_results_details
return target_files_train_test, target_file_results_details

# # --------------------- Plot cross-validation version ---------------------
# fig, ax = plt.subplots(figsize=(10, 5))
Expand Down
39 changes: 33 additions & 6 deletions hawk/processes/wps_causal.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
LOGGER = logging.getLogger("PYWPS")

FORMAT_PNG = Format("image/png", extension=".png", encoding="base64")
FORMAT_PDF = Format("application/pdf", extension=".pdf", encoding="utf-8")
FORMAT_PICKLE = Format("application/octet-stream", extension=".pkl", encoding="utf-8")


Expand Down Expand Up @@ -120,7 +121,14 @@ def __init__(self):
supported_formats=[FORMAT_PICKLE],
),
ComplexOutput(
"png_pcmci",
"plot_pcmci",
"Selected features by PCMCI",
abstract="The selected features by PCMCI.",
as_reference=True,
supported_formats=[FORMAT_PDF],
),
ComplexOutput(
"plot_pcmci_preview",
"Selected features by PCMCI",
abstract="The selected features by PCMCI.",
as_reference=True,
Expand All @@ -134,7 +142,14 @@ def __init__(self):
supported_formats=[FORMAT_PICKLE],
),
ComplexOutput(
"png_tefs",
"plot_tefs",
"Selected features by TEFS",
abstract="The selected features by TEFS.",
as_reference=True,
supported_formats=[FORMAT_PDF],
),
ComplexOutput(
"plot_tefs_preview",
"Selected features by TEFS",
abstract="The selected features by TEFS.",
as_reference=True,
Expand All @@ -148,7 +163,14 @@ def __init__(self):
supported_formats=[FORMAT_PICKLE],
),
ComplexOutput(
"png_tefs_wrapper",
"plot_tefs_wrapper",
"Wrapper scores by TEFS",
abstract="The wrapper scores evolution by TEFS.",
as_reference=True,
supported_formats=[FORMAT_PDF],
),
ComplexOutput(
"plot_tefs_wrapper_preview",
"Wrapper scores by TEFS",
abstract="The wrapper scores evolution by TEFS.",
as_reference=True,
Expand Down Expand Up @@ -226,13 +248,18 @@ def _handler(self, request, response):
causal_analysis.run()

response.outputs["pkl_baseline"].file = causal_analysis.baseline
response.outputs["png_pcmci"].file = causal_analysis.plot_pcmci
response.outputs["plot_pcmci"].file = causal_analysis.plot_pcmci["pdf"]
response.outputs["pkl_pcmci"].file = causal_analysis.details_pcmci
response.outputs["png_tefs"].file = causal_analysis.plot_tefs
response.outputs["plot_tefs"].file = causal_analysis.plot_tefs["pdf"]
response.outputs["pkl_tefs"].file = causal_analysis.details_tefs
response.outputs["png_tefs_wrapper"].file = causal_analysis.plot_tefs_wrapper
response.outputs["plot_tefs_wrapper"].file = causal_analysis.plot_tefs_wrapper["pdf"]
response.outputs["pkl_tefs_wrapper"].file = causal_analysis.details_tefs_wrapper

# Previews for the plots in png format
response.outputs["plot_pcmci_preview"].file = causal_analysis.plot_pcmci["png"]
response.outputs["plot_tefs_preview"].file = causal_analysis.plot_tefs["png"]
response.outputs["plot_tefs_wrapper_preview"].file = causal_analysis.plot_tefs_wrapper["png"]

response.update_status("Processing completed", 100)

return response
36 changes: 35 additions & 1 deletion tests/test_hawk.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@

"""Tests for `hawk` package."""

import pytest
import os

import pandas as pd
import pytest
from click.testing import CliRunner # noqa: F401

import hawk # noqa: F401
from hawk import cli # noqa: F401
from hawk.analysis import CausalAnalysis


@pytest.fixture
Expand All @@ -24,3 +27,34 @@ def test_content(response):
"""Sample pytest test function with the pytest fixture as an argument."""
# from bs4 import BeautifulSoup
# assert 'GitHub' in BeautifulSoup(response.content).title.string


def test_causal_analysis():
df_train = pd.read_csv("hawk/demo/Ticino_train.csv", header=0)
df_test = pd.read_csv("hawk/demo/Ticino_test.csv", header=0)
target_column_name = "target"
pcmci_test_choice = "ParCorr"
pcmci_max_lag = 0
tefs_direction = "forward"
tefs_use_contemporary_features = True
tefs_max_lag_features = 1
tefs_max_lag_target = 1
workdir = "tests/output"

causal_analysis = CausalAnalysis(
df_train,
df_test,
target_column_name,
pcmci_test_choice,
pcmci_max_lag,
tefs_direction,
tefs_use_contemporary_features,
tefs_max_lag_features,
tefs_max_lag_target,
workdir,
response=None,
)

causal_analysis.run()

os.system("rm -r tests/output")

0 comments on commit c626197

Please sign in to comment.