From a1042b73831273cc0b736538f1dc15dd58235bbe Mon Sep 17 00:00:00 2001
From: jvivian <jtvivian@gmail.com>
Date: Tue, 20 Feb 2024 23:24:22 -0800
Subject: [PATCH 1/2] Choose the start date on the Dashboard for running the
 model Fixes #27

---
 coverage.xml                                  |  2 +-
 covid19_drdfm/streamlit/Dashboard.py          | 23 ++++++-------------
 .../streamlit/pages/1_Factor_Analysis.py      |  5 +++-
 3 files changed, 12 insertions(+), 18 deletions(-)
diff --git a/coverage.xml b/coverage.xml
index 4a32fad..85b1816 100644
--- a/coverage.xml
+++ b/coverage.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" ?>
-<coverage version="7.3.1" timestamp="1708495065791" lines-valid="147" lines-covered="145" line-rate="0.9864" branches-valid="54" branches-covered="51" branch-rate="0.9444" complexity="0">
+<coverage version="7.3.1" timestamp="1708500234310" lines-valid="147" lines-covered="145" line-rate="0.9864" branches-valid="54" branches-covered="51" branch-rate="0.9444" complexity="0">
 	<!-- Generated by coverage.py: https://coverage.readthedocs.io/en/7.3.1 -->
 	<!-- Based on https://raw.githubusercontent.com/cobertura/web/master/htdocs/xml/coverage-04.dtd -->
 	<sources>
diff --git a/covid19_drdfm/streamlit/Dashboard.py b/covid19_drdfm/streamlit/Dashboard.py
index a2ccc6a..1037f67 100644
--- a/covid19_drdfm/streamlit/Dashboard.py
+++ b/covid19_drdfm/streamlit/Dashboard.py
@@ -1,3 +1,4 @@
+from datetime import datetime
 import json
 import time
 from pathlib import Path
@@ -63,10 +64,12 @@ def get_data():
 
     # State selections
     state_sel = st.multiselect("States", df.State.unique(), default=df.State.unique())
-    c1, c2, c3 = st.columns([0.5, 0.25, 0.25])
+    c1, c2, c3, c4 = st.columns([0.35, 0.25, 0.20, 0.20])
     outdir = c1.text_input("Output Directory", value="./")
-    mult_sel = c2.slider("Global Multiplier", 0, 4, 2)
-    maxiter = c3.slider("Max EM Iterations", 1000, 20_000, 10_000, 100)
+    date_start = c2.date_input("Start Date", value=df.Time.min(), min_value=df.Time.min(), max_value=df.Time.max())
+    mult_sel = c3.slider("Global Multiplier", 0, 4, 2)
+    maxiter = c4.slider("Max EM Iterations", 1000, 20_000, 10_000, 100)
+    df = df[df.Time > date_start.isoformat()]
 
     # Metrics
     lengths = [len(selectors[x]) for x in selectors]
@@ -110,10 +113,7 @@ def get_data():
     subdir / "filtered-factors.csv" for subdir in outdir.iterdir() if (subdir / "filtered-factors.csv").exists()
 ]
 dfs = [pd.read_csv(x) for x in filt_paths]
-try:
-    filt_df = pd.concat([x for x in dfs if ~x.empty]).set_index("Time")
-except ValueError:
-    filt_df = pd.DataFrame()
+filt_df = pd.concat([x for x in dfs if ~x.empty]).set_index("Time")
 filt_df.to_csv(outdir / "filtered-factors.csv")
 st.dataframe(filt_df)
 
@@ -122,13 +122,4 @@ def get_data():
 minutes, seconds = divmod(rem, 60)
 st.write(f"Runtime: {int(hours):0>2}H:{int(minutes):0>2}M:{seconds:05.2f}S")
 
-fail_path = outdir / "failed.txt"
-if fail_path.exists():
-    with open(fail_path) as f:
-        lines = f.readlines()
-    _, c1, c2, _ = st.columns([0.1, 0.2, 0.6, 0.1])
-    c1.metric("Failures", value=len(lines))
-    lines = "\n".join(lines)
-    c2.warning(f"\t\tFailures Detected\n\n{lines}")
-
 st.balloons()
diff --git a/covid19_drdfm/streamlit/pages/1_Factor_Analysis.py b/covid19_drdfm/streamlit/pages/1_Factor_Analysis.py
index 3305d2b..14c1171 100644
--- a/covid19_drdfm/streamlit/pages/1_Factor_Analysis.py
+++ b/covid19_drdfm/streamlit/pages/1_Factor_Analysis.py
@@ -21,6 +21,7 @@
 factor_path = path_to_results / "filtered-factors.csv"
 df = pd.read_csv(factor_path, index_col=0)
 df["Time"] = df.index
+df.index.name = "Time"
 
 filter_list = ["Global", "Unnamed", "Time", "State"]
 factor = st.sidebar.selectbox("Factor", [x for x in df.columns if x not in filter_list])
@@ -40,7 +41,9 @@
 columns = [*factor_vars, "State", "Time"]
 
 # Normalize original data for state / valid variables
-new = normalize(raw.query("State == @state")[columns].iloc[1:])  # .reset_index(drop=True)
+raw = raw.set_index("Time", drop=False)
+raw = raw.loc[df.index, :]
+new = normalize(raw.query("State == @state")[columns])  # .iloc[1:])  # .reset_index(drop=True)
 
 # Normalize factors and add to new dataframe
 df = normalize(df[df.State == state]).reset_index(drop=True)

From 6f19075ba146ca624d679927ef6ab8cbb4ce113a Mon Sep 17 00:00:00 2001
From: jvivian <jtvivian@gmail.com>
Date: Tue, 20 Feb 2024 23:26:25 -0800
Subject: [PATCH 2/2] Update coverage

---
 coverage.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/coverage.xml b/coverage.xml
index 85b1816..728e050 100644
--- a/coverage.xml
+++ b/coverage.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" ?>
-<coverage version="7.3.1" timestamp="1708500234310" lines-valid="147" lines-covered="145" line-rate="0.9864" branches-valid="54" branches-covered="51" branch-rate="0.9444" complexity="0">
+<coverage version="7.3.1" timestamp="1708500367269" lines-valid="147" lines-covered="145" line-rate="0.9864" branches-valid="54" branches-covered="51" branch-rate="0.9444" complexity="0">
 	<!-- Generated by coverage.py: https://coverage.readthedocs.io/en/7.3.1 -->
 	<!-- Based on https://raw.githubusercontent.com/cobertura/web/master/htdocs/xml/coverage-04.dtd -->
 	<sources>