Skip to content

Commit

Permalink
Merge pull request #32 from jvivian/jvivian/issue31
Browse files Browse the repository at this point in the history
Improve coverage
  • Loading branch information
jvivian authored Feb 21, 2024
2 parents 19be5a3 + bf5591e commit 779f594
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 129 deletions.
83 changes: 31 additions & 52 deletions coverage.xml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
<?xml version="1.0" ?>
<coverage version="7.3.1" timestamp="1708493672279" lines-valid="182" lines-covered="138" line-rate="0.7582" branches-valid="66" branches-covered="47" branch-rate="0.7121" complexity="0">
<coverage version="7.3.1" timestamp="1708494529412" lines-valid="159" lines-covered="145" line-rate="0.9119" branches-valid="54" branches-covered="51" branch-rate="0.9444" complexity="0">
<!-- Generated by coverage.py: https://coverage.readthedocs.io/en/7.3.1 -->
<!-- Based on https://raw.githubusercontent.com/cobertura/web/master/htdocs/xml/coverage-04.dtd -->
<sources>
<source>/home/jvivian/covid19-drDFM/covid19_drdfm</source>
</sources>
<packages>
<package name="." line-rate="0.7778" branch-rate="0.7344" complexity="0">
<package name="." line-rate="0.9119" branch-rate="0.9444" complexity="0">
<classes>
<class name="cli.py" filename="cli.py" complexity="0" line-rate="1" branch-rate="1">
<methods/>
Expand Down Expand Up @@ -47,7 +47,7 @@
<line number="153" hits="1"/>
</lines>
</class>
<class name="dfm.py" filename="dfm.py" complexity="0" line-rate="0.65" branch-rate="0.4688">
<class name="dfm.py" filename="dfm.py" complexity="0" line-rate="0.9661" branch-rate="0.8636">
<methods/>
<lines>
<line number="7" hits="1"/>
Expand All @@ -68,68 +68,47 @@
<line number="38" hits="1"/>
<line number="39" hits="1"/>
<line number="42" hits="1"/>
<line number="51" hits="0"/>
<line number="52" hits="0" branch="true" condition-coverage="0% (0/2)" missing-branches="53,58"/>
<line number="53" hits="0"/>
<line number="54" hits="0"/>
<line number="55" hits="0" branch="true" condition-coverage="0% (0/2)" missing-branches="52,56"/>
<line number="56" hits="0"/>
<line number="58" hits="0"/>
<line number="59" hits="0"/>
<line number="51" hits="1"/>
<line number="52" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="53" hits="1"/>
<line number="54" hits="1"/>
<line number="55" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="56" hits="1"/>
<line number="58" hits="1"/>
<line number="59" hits="1"/>
<line number="62" hits="1"/>
<line number="82" hits="1"/>
<line number="83" hits="1" branch="true" condition-coverage="50% (1/2)" missing-branches="84"/>
<line number="84" hits="0" branch="true" condition-coverage="0% (0/2)" missing-branches="exit,85"/>
<line number="85" hits="0"/>
<line number="83" hits="1"/>
<line number="84" hits="1" branch="true" condition-coverage="50% (1/2)" missing-branches="85"/>
<line number="85" hits="0" branch="true" condition-coverage="0% (0/2)" missing-branches="exit,86"/>
<line number="86" hits="0"/>
<line number="88" hits="1"/>
<line number="89" hits="1"/>
<line number="90" hits="1"/>
<line number="91" hits="1"/>
<line number="92" hits="1"/>
<line number="95" hits="1"/>
<line number="96" hits="1"/>
<line number="97" hits="1"/>
<line number="98" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="99" hits="1"/>
<line number="93" hits="1"/>
<line number="94" hits="1"/>
<line number="95" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="96" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="97" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="98" hits="1"/>
<line number="100" hits="1"/>
<line number="101" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="101" hits="1"/>
<line number="102" hits="1"/>
<line number="103" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="103" hits="1"/>
<line number="104" hits="1"/>
<line number="105" hits="1"/>
<line number="105" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="106" hits="1"/>
<line number="107" hits="1"/>
<line number="108" hits="1"/>
<line number="108" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="109" hits="1"/>
<line number="110" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="111" hits="1"/>
<line number="112" hits="1"/>
<line number="125" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="127" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="128" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="129" hits="1"/>
<line number="131" hits="1"/>
<line number="132" hits="1"/>
<line number="134" hits="1"/>
<line number="135" hits="1"/>
<line number="138" hits="1"/>
<line number="146" hits="0"/>
<line number="147" hits="0"/>
<line number="148" hits="0"/>
<line number="149" hits="0"/>
<line number="150" hits="0"/>
<line number="151" hits="0"/>
<line number="154" hits="1"/>
<line number="164" hits="0" branch="true" condition-coverage="0% (0/2)" missing-branches="165,169"/>
<line number="165" hits="0"/>
<line number="166" hits="0"/>
<line number="168" hits="0"/>
<line number="169" hits="0" branch="true" condition-coverage="0% (0/2)" missing-branches="170,171"/>
<line number="170" hits="0"/>
<line number="171" hits="0" branch="true" condition-coverage="0% (0/2)" missing-branches="172,173"/>
<line number="172" hits="0"/>
<line number="173" hits="0"/>
<line number="174" hits="0" branch="true" condition-coverage="0% (0/2)" missing-branches="exit,175"/>
<line number="175" hits="0" branch="true" condition-coverage="0% (0/2)" missing-branches="exit,176"/>
<line number="176" hits="0"/>
<line number="113" hits="1"/>
<line number="114" hits="1"/>
<line number="115" hits="1"/>
<line number="116" hits="1"/>
</lines>
</class>
<class name="processing.py" filename="processing.py" complexity="0" line-rate="1" branch-rate="1">
Expand Down
94 changes: 17 additions & 77 deletions covid19_drdfm/dfm.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,22 +78,29 @@ def run_parameterized_model(
Returns:
sm.tsa.DynamicFactor: Dynamic Factor Model
"""
# Subset for state and columns of interest
# Factors and input data
df = state_process(df, state)
_ = get_nonstationary_columns(df)
if columns:
columns = [x for x in list(columns) if x in df.columns]
df = df[columns]

# Save input to outdir
new = df[columns]
else:
new = df
# Save input data
outdir.mkdir(exist_ok=True)
out = outdir / state
out.mkdir(exist_ok=True)
df.to_excel(out / "df.xlsx")
df.to_csv(out / "df.tsv", sep="\t")

# Run model, save output, log if failure occurs
new.to_excel(out / "df.xlsx")
new.to_csv(out / "df.tsv", sep="\t")
factors = {k: v for k, v in factors.items() if k in new.columns}
if global_multiplier == 0:
factors = {k: {v[1]} for k, v in factors.items()}
model = sm.tsa.DynamicFactorMQ(new, factors=factors)
else:
factor_multiplicities = {"Global": global_multiplier}
model = sm.tsa.DynamicFactorMQ(new, factors=factors, factor_multiplicities=factor_multiplicities)
try:
model, results = _run_model(df, factors, global_multiplier, maxiter)
results = model.fit(disp=10, maxiter=maxiter)
except Exception as e:
with open(outdir / "failed.txt", "a") as f:
f.write(f"{state}\t{e}\n")
Expand All @@ -104,73 +111,6 @@ def run_parameterized_model(
f.write(results.summary().as_csv())
filtered = results.factors["filtered"]
filtered["State"] = state
filtered.index = df.index
filtered.index = new.index
filtered.to_csv(out / "filtered-factors.csv")
return model


def _run_model(df: pd.DataFrame, factors: dict[str, tuple[str, str]], global_multiplier: int, maxiter: int):
"""Run the model with the given parameters.
Args:
df (pd.DataFrame): Input DataFrame.
factors (dict[str, tuple[str, str]]): Factors to include in the model.
global_multiplier (int): Global multiplier.
maxiter (int): Maximum number of iterations.
Returns:
Tuple[sm.tsa.DynamicFactor, Any]: Model and results.
"""
# Subset to valid factors based on columns
factors = {k: v for k, v in factors.items() if k in df.columns}
# Handle global multiplier edge condition when set to 0 by filtering factors
if global_multiplier == 0:
factors = {k: {v[1]} for k, v in factors.items()}
model = sm.tsa.DynamicFactorMQ(df, factors=factors)
else:
factor_multiplicities = {"Global": global_multiplier}
model = sm.tsa.DynamicFactorMQ(df, factors=factors, factor_multiplicities=factor_multiplicities)
# Fit model and return results
results = model.fit(disp=10, maxiter=maxiter)
return model, results


def save_df(df: pd.DataFrame, outdir: Path, state: str):
"""Save DataFrame as CSV / Excel
Args:
df (pd.DataFrame): Input DataFrame to model
outdir (Path): output directory
state (str): State to subset by
"""
outdir.mkdir(exist_ok=True)
state_dir = outdir / state
pprint(f"Saving state input information to {state_dir}")
state_dir.mkdir(exist_ok=True)
df.to_excel(state_dir / "df.xlsx")
df.to_csv(state_dir / "df.tsv", sep="\t")


def save_results(df: pd.DataFrame, model, results, outdir: Path, verbose: bool = False):
"""Save model and results to given directory
Args:
df pd.DataFrame: _description_
model (_type_): _description_
results (_type_): _description_
outdir (Path): _description_
verbose (bool, optional): _description_. Defaults to False.
"""
if verbose is True:
pprint(model.summary())
pprint(results.summary())
# Output
pprint(f"Saving output to {outdir}")
with open(outdir / "model.csv", "w") as f:
f.write(model.summary().as_csv())
with open(outdir / "results.csv", "w") as f:
f.write(results.summary().as_csv())
non_stationary_cols = get_nonstationary_columns(df)
if non_stationary_cols:
with open(outdir / "non-stationary-columns.txt", "w") as f:
f.write("\n".join(non_stationary_cols))

0 comments on commit 779f594

Please sign in to comment.