From 93eb7d9efb7fe2719a3b95919585b2ff4bb9c98c Mon Sep 17 00:00:00 2001 From: jvivian Date: Thu, 13 Jun 2024 19:18:10 -0700 Subject: [PATCH] Rename repository before making public Fixes #73 --- .../streamlit/pages/1_Factor_Analysis.py | 87 +++++++++++++------ 1 file changed, 59 insertions(+), 28 deletions(-) diff --git a/covid19_drdfm/streamlit/pages/1_Factor_Analysis.py b/covid19_drdfm/streamlit/pages/1_Factor_Analysis.py index 1016899..5f9becf 100644 --- a/covid19_drdfm/streamlit/pages/1_Factor_Analysis.py +++ b/covid19_drdfm/streamlit/pages/1_Factor_Analysis.py @@ -36,28 +36,26 @@ def normalize(df): # TEST_DIR = Path('covid19_drdfm/data/example-output/') # Parameter for results +def get_factors(res_dir): + factor_path = res_dir / "factors.csv" + df = pd.read_csv(factor_path, index_col=0) + df["Time"] = df.index + df.index.name = "Time" + cols_to_drop = [x for x in df.columns if "Time." in x] + df = df.drop(columns=cols_to_drop) + df.columns = [x.lstrip("Factor_") for x in df.columns] + return df + + res_dir = Path(st.text_input("Path to results", value=EX_PATH)) -factor_path = res_dir / "factors.csv" -df = pd.read_csv(factor_path, index_col=0) -df["Time"] = df.index -df.index.name = "Time" -cols_to_drop = [x for x in df.columns if "Time." in x] -df = df.drop(columns=cols_to_drop) -df.columns = [x.lstrip("Factor_") for x in df.columns] -# st.dataframe(df) - -# AnnData -# dfs = [] -# for subdir in res_dir.iterdir(): -# if not subdir.is_dir(): -# continue -# dfs.append(pd.read_csv(res_dir / 'df.csv')) +if not res_dir: + st.warning("Please provide and hit ") + st.stop() +df = get_factors(res_dir) filter_list = ["Unnamed", "Time", "State"] -factor = st.sidebar.selectbox("Factor", [x for x in df.columns if x not in filter_list]) state = st.sidebar.selectbox("State", sorted(df.State.unique())) - with st.expander("State Factors"): st.dataframe(df[df.State == state]) @@ -70,28 +68,61 @@ def normalize(df): # factor_vars = [x for x in FACTORS_GROUPED[factor.split("_")[1]] if x in valid_cols] # and '_' in x] # columns = [*factor_vars, "State", "Time"] +# Make df from res_dir +dfs = [] +for subdir in res_dir.iterdir(): + if not subdir.is_dir(): + continue + path = subdir / "df.csv" + if not path.exists(): + st.write(f"Skipping {path}, not found") + continue + sub = pd.read_csv(path, index_col=0) + sub["State"] = subdir.stem + dfs.append(sub) + +new = pd.concat(dfs) # Normalize original data for state / valid variables ad = ann.read_h5ad(res_dir / "data.h5ad") -new = ad.to_df().reset_index() -new["State"] = ad.obs["State"].to_list() -new = normalize(new[new.State == state]) +factor_map = ad.var["factor"].to_frame() +factor_set = factor_map["factor"].unique().to_list() + [x for x in df.columns if "Global" in x] +# st.dataframe(factor_map) +factor = st.sidebar.selectbox("Factor", factor_set) +# new = ad.to_df().reset_index() +# new["State"] = ad.obs["State"].to_list() +# new = normalize(new[new.State == state]) # Normalize factors and add to new dataframe -if st.sidebar.checkbox("Invert Factor"): - df[factor] = df[factor] * -1 -df = normalize(df[df.State == state]).reset_index(drop=True) +# if st.sidebar.checkbox("Invert Factor"): +# df[factor] = df[factor] * -1 +# df = normalize(df[df.State == state]).reset_index(drop=True) + +df = df[df["State"] == state] +df = df[[factor]].join(new, on="Time") +# st.dataframe(df.head()) +# st.dataframe(new.head()) # Coerce time bullshit to get dates standardized -df["Time"] = pd.to_datetime(df["Time"]).dt.date -new["Time"] = pd.to_datetime(new["Time"]).dt.date -df = df[[factor, "Time"]].merge(new, on="Time") +# df["Time"] = pd.to_datetime(df["Time"]).dt.date +# new["Time"] = pd.to_datetime(new["Time"]).dt.date with st.expander("Graph Data"): - st.dataframe(df) + factor_cols = factor_map[factor_map["factor"] == factor] + if factor_cols.empty: + factor_cols = new.columns + else: + factor_cols = factor_cols.index.to_list() + factor_cols += [factor] + factor_cols = [x for x in factor_cols if x in df.columns] + st.write(factor_cols) + st.dataframe(df[factor_cols]) + +df = df[factor_cols].reset_index() # Melt into format for plotting -melted_df = df.drop(columns="State").melt(id_vars=["Time"], value_name="value") +# melted_df = df.drop(columns="State").melt(id_vars=["Time"], value_name="value") +melted_df = df.melt(id_vars=["Time"], value_name="value") melted_df["Label"] = [5 if x == factor else 1 for x in melted_df.variable] # Plot