Skip to content

Commit

Permalink
Merge pull request #86 from jvivian/fix-factor-analysis-page
Browse files Browse the repository at this point in the history
Rename repository before making public
  • Loading branch information
jvivian authored Jun 14, 2024
2 parents d870bf6 + 93eb7d9 commit 58907a1
Showing 1 changed file with 59 additions and 28 deletions.
87 changes: 59 additions & 28 deletions covid19_drdfm/streamlit/pages/1_Factor_Analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,28 +36,26 @@ def normalize(df):
# TEST_DIR = Path('covid19_drdfm/data/example-output/')

# Parameter for results
def get_factors(res_dir):
factor_path = res_dir / "factors.csv"
df = pd.read_csv(factor_path, index_col=0)
df["Time"] = df.index
df.index.name = "Time"
cols_to_drop = [x for x in df.columns if "Time." in x]
df = df.drop(columns=cols_to_drop)
df.columns = [x.lstrip("Factor_") for x in df.columns]
return df


res_dir = Path(st.text_input("Path to results", value=EX_PATH))
factor_path = res_dir / "factors.csv"
df = pd.read_csv(factor_path, index_col=0)
df["Time"] = df.index
df.index.name = "Time"
cols_to_drop = [x for x in df.columns if "Time." in x]
df = df.drop(columns=cols_to_drop)
df.columns = [x.lstrip("Factor_") for x in df.columns]
# st.dataframe(df)

# AnnData
# dfs = []
# for subdir in res_dir.iterdir():
# if not subdir.is_dir():
# continue
# dfs.append(pd.read_csv(res_dir / 'df.csv'))
if not res_dir:
st.warning("Please provide and hit <ENTER>")
st.stop()
df = get_factors(res_dir)

filter_list = ["Unnamed", "Time", "State"]
factor = st.sidebar.selectbox("Factor", [x for x in df.columns if x not in filter_list])
state = st.sidebar.selectbox("State", sorted(df.State.unique()))


with st.expander("State Factors"):
st.dataframe(df[df.State == state])

Expand All @@ -70,28 +68,61 @@ def normalize(df):
# factor_vars = [x for x in FACTORS_GROUPED[factor.split("_")[1]] if x in valid_cols] # and '_' in x]
# columns = [*factor_vars, "State", "Time"]

# Make df from res_dir
dfs = []
for subdir in res_dir.iterdir():
if not subdir.is_dir():
continue
path = subdir / "df.csv"
if not path.exists():
st.write(f"Skipping {path}, not found")
continue
sub = pd.read_csv(path, index_col=0)
sub["State"] = subdir.stem
dfs.append(sub)

new = pd.concat(dfs)

# Normalize original data for state / valid variables
ad = ann.read_h5ad(res_dir / "data.h5ad")
new = ad.to_df().reset_index()
new["State"] = ad.obs["State"].to_list()
new = normalize(new[new.State == state])
factor_map = ad.var["factor"].to_frame()
factor_set = factor_map["factor"].unique().to_list() + [x for x in df.columns if "Global" in x]
# st.dataframe(factor_map)
factor = st.sidebar.selectbox("Factor", factor_set)
# new = ad.to_df().reset_index()
# new["State"] = ad.obs["State"].to_list()
# new = normalize(new[new.State == state])

# Normalize factors and add to new dataframe
if st.sidebar.checkbox("Invert Factor"):
df[factor] = df[factor] * -1
df = normalize(df[df.State == state]).reset_index(drop=True)
# if st.sidebar.checkbox("Invert Factor"):
# df[factor] = df[factor] * -1
# df = normalize(df[df.State == state]).reset_index(drop=True)

df = df[df["State"] == state]
df = df[[factor]].join(new, on="Time")

# st.dataframe(df.head())
# st.dataframe(new.head())

# Coerce time bullshit to get dates standardized
df["Time"] = pd.to_datetime(df["Time"]).dt.date
new["Time"] = pd.to_datetime(new["Time"]).dt.date
df = df[[factor, "Time"]].merge(new, on="Time")
# df["Time"] = pd.to_datetime(df["Time"]).dt.date
# new["Time"] = pd.to_datetime(new["Time"]).dt.date
with st.expander("Graph Data"):
st.dataframe(df)
factor_cols = factor_map[factor_map["factor"] == factor]
if factor_cols.empty:
factor_cols = new.columns
else:
factor_cols = factor_cols.index.to_list()
factor_cols += [factor]
factor_cols = [x for x in factor_cols if x in df.columns]
st.write(factor_cols)
st.dataframe(df[factor_cols])

df = df[factor_cols].reset_index()

# Melt into format for plotting
melted_df = df.drop(columns="State").melt(id_vars=["Time"], value_name="value")
# melted_df = df.drop(columns="State").melt(id_vars=["Time"], value_name="value")
melted_df = df.melt(id_vars=["Time"], value_name="value")
melted_df["Label"] = [5 if x == factor else 1 for x in melted_df.variable]

# Plot
Expand Down

0 comments on commit 58907a1

Please sign in to comment.