Skip to content

Commit

Permalink
Merge pull request #6 from jvivian/fix-processing-bugs-after-file-update
Browse files Browse the repository at this point in the history
Fix processing bugs after file update
  • Loading branch information
AaronCooke2718 authored Dec 19, 2023
2 parents 9a6c9c9 + 0591e68 commit 11394e1
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 96 deletions.
Binary file modified .coverage
Binary file not shown.
155 changes: 79 additions & 76 deletions coverage.xml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
<?xml version="1.0" ?>
<coverage version="7.3.1" timestamp="1701986374806" lines-valid="162" lines-covered="144" line-rate="0.8889" branches-valid="54" branches-covered="49" branch-rate="0.9074" complexity="0">
<coverage version="7.3.1" timestamp="1703026094115" lines-valid="165" lines-covered="147" line-rate="0.8909" branches-valid="54" branches-covered="49" branch-rate="0.9074" complexity="0">
<!-- Generated by coverage.py: https://coverage.readthedocs.io/en/7.3.1 -->
<!-- Based on https://raw.githubusercontent.com/cobertura/web/master/htdocs/xml/coverage-04.dtd -->
<sources>
<source>/Users/jvivian/Library/CloudStorage/[email protected]/My Drive/projects/covid19-drDFM/covid19_drdfm</source>
<source>/home/jvivian/covid19-drDFM/covid19_drdfm</source>
</sources>
<packages>
<package name="." line-rate="0.8889" branch-rate="0.9074" complexity="0">
<package name="." line-rate="0.8909" branch-rate="0.9074" complexity="0">
<classes>
<class name="cli.py" filename="cli.py" complexity="0" line-rate="1" branch-rate="1">
<methods/>
Expand Down Expand Up @@ -42,7 +42,7 @@
<line number="147" hits="1"/>
</lines>
</class>
<class name="dfm.py" filename="dfm.py" complexity="0" line-rate="0.9118" branch-rate="0.7917">
<class name="dfm.py" filename="dfm.py" complexity="0" line-rate="0.9143" branch-rate="0.7917">
<methods/>
<lines>
<line number="7" hits="1"/>
Expand All @@ -52,67 +52,69 @@
<line number="12" hits="1"/>
<line number="13" hits="1"/>
<line number="15" hits="1"/>
<line number="18" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="19" hits="1"/>
<line number="16" hits="1"/>
<line number="19" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="20" hits="1"/>
<line number="21" hits="1"/>
<line number="25" hits="1"/>
<line number="27" hits="1"/>
<line number="30" hits="1"/>
<line number="40" hits="1"/>
<line number="42" hits="1"/>
<line number="43" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="44" hits="1"/>
<line number="22" hits="1"/>
<line number="26" hits="1"/>
<line number="28" hits="1"/>
<line number="31" hits="1"/>
<line number="41" hits="1"/>
<line number="43" hits="1"/>
<line number="45" hits="1"/>
<line number="46" hits="1"/>
<line number="47" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="48" hits="1"/>
<line number="49" hits="1"/>
<line number="58" hits="1"/>
<line number="59" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="60" hits="1"/>
<line number="61" hits="1"/>
<line number="62" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="63" hits="1"/>
<line number="50" hits="1"/>
<line number="53" hits="1"/>
<line number="62" hits="1"/>
<line number="63" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="64" hits="1"/>
<line number="65" hits="1"/>
<line number="66" hits="1"/>
<line number="66" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="67" hits="1"/>
<line number="69" hits="1"/>
<line number="81" hits="1"/>
<line number="82" hits="1"/>
<line number="84" hits="1"/>
<line number="70" hits="1"/>
<line number="73" hits="1"/>
<line number="85" hits="1"/>
<line number="86" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="88" hits="1" branch="true" condition-coverage="50% (1/2)" missing-branches="89"/>
<line number="89" hits="0"/>
<line number="90" hits="0"/>
<line number="92" hits="1"/>
<line number="93" hits="1"/>
<line number="94" hits="1"/>
<line number="95" hits="1"/>
<line number="96" hits="0"/>
<line number="97" hits="0" branch="true" condition-coverage="0% (0/2)" missing-branches="98,99"/>
<line number="98" hits="0"/>
<line number="99" hits="0"/>
<line number="101" hits="1"/>
<line number="102" hits="1"/>
<line number="86" hits="1"/>
<line number="88" hits="1"/>
<line number="89" hits="1"/>
<line number="90" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="92" hits="1" branch="true" condition-coverage="50% (1/2)" missing-branches="93"/>
<line number="93" hits="0"/>
<line number="94" hits="0"/>
<line number="96" hits="1"/>
<line number="97" hits="1"/>
<line number="98" hits="1"/>
<line number="99" hits="1"/>
<line number="100" hits="0"/>
<line number="101" hits="0" branch="true" condition-coverage="0% (0/2)" missing-branches="102,103"/>
<line number="102" hits="0"/>
<line number="103" hits="0"/>
<line number="105" hits="1"/>
<line number="113" hits="1"/>
<line number="114" hits="1"/>
<line number="115" hits="1"/>
<line number="116" hits="1"/>
<line number="106" hits="1"/>
<line number="109" hits="1"/>
<line number="117" hits="1"/>
<line number="118" hits="1"/>
<line number="119" hits="1"/>
<line number="120" hits="1"/>
<line number="121" hits="1"/>
<line number="131" hits="1" branch="true" condition-coverage="50% (1/2)" missing-branches="136"/>
<line number="132" hits="1"/>
<line number="133" hits="1"/>
<line number="135" hits="1"/>
<line number="136" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="122" hits="1"/>
<line number="125" hits="1"/>
<line number="135" hits="1" branch="true" condition-coverage="50% (1/2)" missing-branches="140"/>
<line number="136" hits="1"/>
<line number="137" hits="1"/>
<line number="138" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="139" hits="1"/>
<line number="140" hits="1"/>
<line number="141" hits="1" branch="true" condition-coverage="50% (1/2)" missing-branches="exit"/>
<line number="140" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="141" hits="1"/>
<line number="142" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="143" hits="1"/>
<line number="144" hits="1"/>
<line number="145" hits="1" branch="true" condition-coverage="50% (1/2)" missing-branches="exit"/>
<line number="146" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="147" hits="1"/>
</lines>
</class>
<class name="processing.py" filename="processing.py" complexity="0" line-rate="1" branch-rate="1">
Expand All @@ -134,48 +136,49 @@
<line number="33" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="34" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="35" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="53" hits="1"/>
<line number="55" hits="1"/>
<line number="56" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="57" hits="1"/>
<line number="58" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="59" hits="1"/>
<line number="60" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="61" hits="1"/>
<line number="62" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="63" hits="1"/>
<line number="65" hits="1"/>
<line number="68" hits="1"/>
<line number="74" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="75" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="78" hits="1"/>
<line number="87" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="98" hits="1"/>
<line number="107" hits="1"/>
<line number="66" hits="1"/>
<line number="72" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="73" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="76" hits="1"/>
<line number="85" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="96" hits="1"/>
<line number="105" hits="1"/>
<line number="106" hits="1"/>
<line number="107" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="108" hits="1"/>
<line number="109" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="109" hits="1"/>
<line number="110" hits="1"/>
<line number="111" hits="1"/>
<line number="111" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="112" hits="1"/>
<line number="113" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="114" hits="1"/>
<line number="115" hits="1"/>
<line number="118" hits="1"/>
<line number="127" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="128" hits="1"/>
<line number="129" hits="1"/>
<line number="132" hits="1"/>
<line number="141" hits="1"/>
<line number="144" hits="1"/>
<line number="113" hits="1"/>
<line number="116" hits="1"/>
<line number="125" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="126" hits="1"/>
<line number="127" hits="1"/>
<line number="130" hits="1"/>
<line number="139" hits="1"/>
<line number="142" hits="1"/>
<line number="155" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="157" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="159" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="161" hits="1"/>
<line number="162" hits="1"/>
<line number="165" hits="1"/>
<line number="159" hits="1"/>
<line number="160" hits="1"/>
<line number="163" hits="1"/>
<line number="172" hits="1"/>
<line number="174" hits="1"/>
<line number="176" hits="1"/>
<line number="177" hits="1"/>
<line number="178" hits="1"/>
<line number="179" hits="1"/>
<line number="180" hits="1"/>
<line number="181" hits="1"/>
</lines>
</class>
<class name="scm.py" filename="scm.py" complexity="0" line-rate="0" branch-rate="1">
Expand Down
2 changes: 1 addition & 1 deletion covid19_drdfm/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
"Demand_5": "Cons5",
"Demand_6": "Employment1",
"Demand_7": "Employment2",
"Supply_1": "GDP",
"GDP": "GDP",
"Supply_2": "UI",
"Supply_3": "PartR",
"Supply_4": "UR",
Expand Down
6 changes: 5 additions & 1 deletion covid19_drdfm/dfm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from statsmodels.tsa.stattools import adfuller

from covid19_drdfm.constants import FACTORS
from covid19_drdfm.processing import normalize


@dataclass
Expand Down Expand Up @@ -40,9 +41,12 @@ def state_process(df: pd.DataFrame, state: str) -> pd.DataFrame:
df = df[df.State == state]
#! The trunctation will be removed when data is updated in OCT - A.C.
df = df[:-12]
#! Test double-norm
df = normalize(df).fillna(0)
#! TEST REMOVE
const_cols = [x for x in df.columns if is_constant(df[x])]
pprint(f"Constant Columns...dropping\n{const_cols}")
df = df.drop(columns=const_cols)
df = df.drop(columns=const_cols).set_index("Time", drop=True)
return df


Expand Down
7 changes: 3 additions & 4 deletions covid19_drdfm/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ def get_df() -> pd.DataFrame:
dfs = [pd.read_csv(x) for x in paths]
return (
reduce(lambda x, y: pd.merge(x, y, on=["State", "Year", "Period"], how="left"), dfs)
.fillna(0)
.drop(columns=["Monetary_1_x", "Monetary_11_x"])
.rename(columns={"Monetary_1_y": "Monetary_1", "Monetary_11_y": "Monetary_11"})
.drop(
Expand All @@ -46,7 +45,6 @@ def get_df() -> pd.DataFrame:
.pipe(adjust_pandemic_response)
.pipe(diff_vars, cols=DIFF_COLS)
.pipe(diff_vars, cols=LOG_DIFF_COLS, log=True)
.fillna(0)
.pipe(normalize)
.drop(index=0) # Drop first row with NaNs from diff
)
Expand Down Expand Up @@ -90,7 +88,7 @@ def adjust_inflation(df: pd.DataFrame) -> pd.DataFrame:
.assign(Demand_3=lambda x: x.Demand_3.div(x.Monetary_3 / 100))
.assign(Demand_4=lambda x: x.Demand_4.div(x.Monetary_3 / 100))
.assign(Demand_5=lambda x: x.Demand_5.div(x.Monetary_3 / 100))
.assign(Supply_1=lambda x: x.Supply_1.div(x.Monetary_3 / 100))
.assign(GDP=lambda x: x.GDP.div(x.Monetary_3 / 100))
.assign(Supply_6=lambda x: x.Supply_6.div(x.Monetary_3 / 100))
)

Expand Down Expand Up @@ -171,11 +169,12 @@ def normalize(df: pd.DataFrame) -> pd.DataFrame:
Returns:
pd.DataFrame: Normalized and stationary DataFrame
"""
meta_cols = df[["State", "Time"]]
meta_cols = df[["State", "Time"]].copy().reset_index(drop=True)
# df = df.drop(columns=["Time"]) if "Time" in df.columns else df
df = df.drop(columns=["State", "Time"])
# Normalize data
scaler = MinMaxScaler()
new = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)
new["State"] = meta_cols["State"]
new["Time"] = meta_cols["Time"]
return new
21 changes: 10 additions & 11 deletions covid19_drdfm/streamlit/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@
from rich import print as pprint
from sklearn.preprocessing import MinMaxScaler

from covid19_drdfm.constants import FACTORS
from covid19_drdfm.dfm import state_process
from covid19_drdfm.processing import get_df, get_factors
from covid19_drdfm.processing import NAME_MAP
from covid19_drdfm.processing import NAME_MAP, get_df, normalize

st.set_page_config(layout="wide")
pio.templates.default = "plotly_white"

DEFAULTS = {
"Uncat": ["Monetary_5", "Monetary_9", "Monetary_10", "Supply_1", "Supply_7"],
"Uncat": ["Monetary_5", "Monetary_9", "Monetary_10", "GDP", "Supply_7"],
"Consumption": ["Demand_3", "Demand_4", "Demand_5"],
"Response": [
"Pandemic_Response_1",
Expand All @@ -32,8 +32,8 @@
"Inflation": ["Monetary_2", "Monetary_3", "Monetary_1"],
"Pandemic": ["Pandemic_1", "Pandemic_2", "Pandemic_6", "Pandemic_9", "Pandemic_7", "Pandemic_10"],
}
DEFAULTS = {NAME_MAP[x]: [NAME_MAP[z] for z in y] for x, y in DEFAULTS.items() if x in NAME_MAP in NAME_MAP}
print(DEFAULTS)
DEFAULTS = {x: [NAME_MAP[z] for z in y] for x, y in DEFAULTS.items()}
# st.write(DEFAULTS)


def center_title(text):
Expand All @@ -55,10 +55,9 @@ def run_parameterized_model(
"""
# Factors and input data
factors = get_factors()
factor_multiplicities = {"Global": global_multiplier}
df = state_process(df, state)
columns = list(columns) + ["State", "Time"]
columns = list(columns) # + ["State", "Time"]
columns = [x for x in columns if x in df.columns]
new = df[columns]
variables = list(factors.keys())
Expand All @@ -73,9 +72,9 @@ def run_parameterized_model(
# Run Model
if (out / "model.csv").exists():
return
model = sm.tsa.DynamicFactorMQ(new, factors=factors, factor_multiplicities=factor_multiplicities)
model = sm.tsa.DynamicFactorMQ(new, factors=FACTORS, factor_multiplicities=factor_multiplicities)
try:
results = model.fit(disp=10, maxiter=5_000)
results = model.fit(disp=10, maxiter=10_000)
except Exception as e:
with open(outdir / "failed.txt", "a") as f:
f.write(f"{state}\t{e}\n")
Expand All @@ -86,7 +85,7 @@ def run_parameterized_model(
f.write(results.summary().as_csv())
filtered = results.factors["filtered"]
filtered["State"] = state
filtered.to_csv(out / "filtered-factors.csv")
filtered.to_csv(out / "filtered-factors.csv", index=None)
return model


Expand All @@ -97,7 +96,7 @@ def get_data():

df = get_df()
sub = pd.Series([x for x in df.columns if x not in ["State", "Time"]], name="Variables").to_frame()
factors = get_factors()
factors = FACTORS.copy()
factor_vars = list(factors.keys())
_ = [factors.pop(x) for x in factor_vars if x not in df.columns]
sub["Group"] = [factors[x][1] for x in sub.Variables if x in df.columns]
Expand Down
7 changes: 4 additions & 3 deletions tests/test_dfm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
# TODO: output should go in a directory instead of dumping shit everywhere
def test_run_model():
df = get_df()
run_model(df, "NY", Path("./testdir"))
assert Path("./testdir/NY/model.csv").exists()
assert Path("./testdir/NY/results.csv").exists()
state = "SD"
run_model(df, state, Path("./testdir"))
assert Path("./testdir/SD/model.csv").exists()
assert Path("./testdir/SD/results.csv").exists()
shutil.rmtree("./testdir")

0 comments on commit 11394e1

Please sign in to comment.