From a1042b73831273cc0b736538f1dc15dd58235bbe Mon Sep 17 00:00:00 2001 From: jvivian Date: Tue, 20 Feb 2024 23:24:22 -0800 Subject: [PATCH 1/2] Choose the start date on the Dashboard for running the model Fixes #27 --- coverage.xml | 2 +- covid19_drdfm/streamlit/Dashboard.py | 23 ++++++------------- .../streamlit/pages/1_Factor_Analysis.py | 5 +++- 3 files changed, 12 insertions(+), 18 deletions(-) diff --git a/coverage.xml b/coverage.xml index 4a32fad..85b1816 100644 --- a/coverage.xml +++ b/coverage.xml @@ -1,5 +1,5 @@ - + diff --git a/covid19_drdfm/streamlit/Dashboard.py b/covid19_drdfm/streamlit/Dashboard.py index a2ccc6a..1037f67 100644 --- a/covid19_drdfm/streamlit/Dashboard.py +++ b/covid19_drdfm/streamlit/Dashboard.py @@ -1,3 +1,4 @@ +from datetime import datetime import json import time from pathlib import Path @@ -63,10 +64,12 @@ def get_data(): # State selections state_sel = st.multiselect("States", df.State.unique(), default=df.State.unique()) - c1, c2, c3 = st.columns([0.5, 0.25, 0.25]) + c1, c2, c3, c4 = st.columns([0.35, 0.25, 0.20, 0.20]) outdir = c1.text_input("Output Directory", value="./") - mult_sel = c2.slider("Global Multiplier", 0, 4, 2) - maxiter = c3.slider("Max EM Iterations", 1000, 20_000, 10_000, 100) + date_start = c2.date_input("Start Date", value=df.Time.min(), min_value=df.Time.min(), max_value=df.Time.max()) + mult_sel = c3.slider("Global Multiplier", 0, 4, 2) + maxiter = c4.slider("Max EM Iterations", 1000, 20_000, 10_000, 100) + df = df[df.Time > date_start.isoformat()] # Metrics lengths = [len(selectors[x]) for x in selectors] @@ -110,10 +113,7 @@ def get_data(): subdir / "filtered-factors.csv" for subdir in outdir.iterdir() if (subdir / "filtered-factors.csv").exists() ] dfs = [pd.read_csv(x) for x in filt_paths] -try: - filt_df = pd.concat([x for x in dfs if ~x.empty]).set_index("Time") -except ValueError: - filt_df = pd.DataFrame() +filt_df = pd.concat([x for x in dfs if ~x.empty]).set_index("Time") filt_df.to_csv(outdir / "filtered-factors.csv") st.dataframe(filt_df) @@ -122,13 +122,4 @@ def get_data(): minutes, seconds = divmod(rem, 60) st.write(f"Runtime: {int(hours):0>2}H:{int(minutes):0>2}M:{seconds:05.2f}S") -fail_path = outdir / "failed.txt" -if fail_path.exists(): - with open(fail_path) as f: - lines = f.readlines() - _, c1, c2, _ = st.columns([0.1, 0.2, 0.6, 0.1]) - c1.metric("Failures", value=len(lines)) - lines = "\n".join(lines) - c2.warning(f"\t\tFailures Detected\n\n{lines}") - st.balloons() diff --git a/covid19_drdfm/streamlit/pages/1_Factor_Analysis.py b/covid19_drdfm/streamlit/pages/1_Factor_Analysis.py index 3305d2b..14c1171 100644 --- a/covid19_drdfm/streamlit/pages/1_Factor_Analysis.py +++ b/covid19_drdfm/streamlit/pages/1_Factor_Analysis.py @@ -21,6 +21,7 @@ factor_path = path_to_results / "filtered-factors.csv" df = pd.read_csv(factor_path, index_col=0) df["Time"] = df.index +df.index.name = "Time" filter_list = ["Global", "Unnamed", "Time", "State"] factor = st.sidebar.selectbox("Factor", [x for x in df.columns if x not in filter_list]) @@ -40,7 +41,9 @@ columns = [*factor_vars, "State", "Time"] # Normalize original data for state / valid variables -new = normalize(raw.query("State == @state")[columns].iloc[1:]) # .reset_index(drop=True) +raw = raw.set_index("Time", drop=False) +raw = raw.loc[df.index, :] +new = normalize(raw.query("State == @state")[columns]) # .iloc[1:]) # .reset_index(drop=True) # Normalize factors and add to new dataframe df = normalize(df[df.State == state]).reset_index(drop=True) From 6f19075ba146ca624d679927ef6ab8cbb4ce113a Mon Sep 17 00:00:00 2001 From: jvivian Date: Tue, 20 Feb 2024 23:26:25 -0800 Subject: [PATCH 2/2] Update coverage --- coverage.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coverage.xml b/coverage.xml index 85b1816..728e050 100644 --- a/coverage.xml +++ b/coverage.xml @@ -1,5 +1,5 @@ - +