Skip to content

Commit

Permalink
Merge pull request #60 from jvivian/jvivian/issue55
Browse files Browse the repository at this point in the history
Add raw data to filtered factors and add unittest
jvivian authored Mar 6, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
2 parents 00e853a + 031f048 commit cde822a
Showing 7 changed files with 279 additions and 93 deletions.
134 changes: 68 additions & 66 deletions coverage.xml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
<?xml version="1.0" ?>
<coverage version="7.3.1" timestamp="1709534576492" lines-valid="191" lines-covered="190" line-rate="0.9948" branches-valid="66" branches-covered="63" branch-rate="0.9545" complexity="0">
<!-- Generated by coverage.py: https://coverage.readthedocs.io/en/7.3.1 -->
<coverage version="7.4.3" timestamp="1709686796870" lines-valid="193" lines-covered="188" line-rate="0.9741" branches-valid="66" branches-covered="62" branch-rate="0.9394" complexity="0">
<!-- Generated by coverage.py: https://coverage.readthedocs.io/en/7.4.3 -->
<!-- Based on https://raw.githubusercontent.com/cobertura/web/master/htdocs/xml/coverage-04.dtd -->
<sources>
<source>/home/jvivian/covid19-drDFM/covid19_drdfm</source>
</sources>
<packages>
<package name="." line-rate="0.9948" branch-rate="0.9545" complexity="0">
<package name="." line-rate="0.9741" branch-rate="0.9394" complexity="0">
<classes>
<class name="cli.py" filename="cli.py" complexity="0" line-rate="1" branch-rate="1">
<methods/>
@@ -47,54 +47,54 @@
<line number="153" hits="1"/>
</lines>
</class>
<class name="dfm.py" filename="dfm.py" complexity="0" line-rate="1" branch-rate="0.95">
<class name="dfm.py" filename="dfm.py" complexity="0" line-rate="0.9394" branch-rate="0.9">
<methods/>
<lines>
<line number="7" hits="1"/>
<line number="8" hits="1"/>
<line number="9" hits="1"/>
<line number="10" hits="1"/>
<line number="11" hits="1"/>
<line number="12" hits="1"/>
<line number="13" hits="1"/>
<line number="14" hits="1"/>
<line number="15" hits="1"/>
<line number="16" hits="1"/>
<line number="17" hits="1"/>
<line number="20" hits="1"/>
<line number="22" hits="1"/>
<line number="25" hits="1"/>
<line number="19" hits="1"/>
<line number="21" hits="1"/>
<line number="24" hits="1"/>
<line number="34" hits="1"/>
<line number="35" hits="1"/>
<line number="36" hits="1"/>
<line number="37" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="36" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="37" hits="1"/>
<line number="38" hits="1"/>
<line number="39" hits="1"/>
<line number="42" hits="1"/>
<line number="51" hits="1"/>
<line number="52" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="41" hits="1"/>
<line number="50" hits="1"/>
<line number="51" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="52" hits="1"/>
<line number="53" hits="1"/>
<line number="54" hits="1"/>
<line number="55" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="56" hits="1"/>
<line number="54" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="55" hits="1"/>
<line number="57" hits="1"/>
<line number="58" hits="1"/>
<line number="59" hits="1"/>
<line number="62" hits="1"/>
<line number="61" hits="1"/>
<line number="80" hits="1"/>
<line number="81" hits="1"/>
<line number="82" hits="1"/>
<line number="83" hits="1" branch="true" condition-coverage="50% (1/2)" missing-branches="exit"/>
<line number="84" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="85" hits="1"/>
<line number="82" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="83" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="84" hits="1"/>
<line number="85" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="86" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="87" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="88" hits="1"/>
<line number="87" hits="1"/>
<line number="89" hits="1"/>
<line number="90" hits="1"/>
<line number="91" hits="1"/>
<line number="92" hits="1"/>
<line number="93" hits="1"/>
<line number="94" hits="1"/>
<line number="95" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="96" hits="1"/>
<line number="93" hits="0"/>
<line number="94" hits="0" branch="true" condition-coverage="0% (0/2)" missing-branches="95,96"/>
<line number="95" hits="0"/>
<line number="96" hits="0"/>
<line number="97" hits="1"/>
<line number="98" hits="1"/>
<line number="99" hits="1"/>
<line number="101" hits="1"/>
<line number="102" hits="1"/>
<line number="103" hits="1"/>
<line number="104" hits="1"/>
@@ -114,6 +114,8 @@
<line number="120" hits="1"/>
<line number="121" hits="1"/>
<line number="122" hits="1"/>
<line number="123" hits="1"/>
<line number="124" hits="1"/>
</lines>
</class>
<class name="processing.py" filename="processing.py" complexity="0" line-rate="0.9846" branch-rate="0.9688">
@@ -137,53 +139,53 @@
<line number="36" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="39" hits="1"/>
<line number="46" hits="1"/>
<line number="55" hits="1"/>
<line number="62" hits="1"/>
<line number="65" hits="1"/>
<line number="79" hits="1"/>
<line number="80" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="81" hits="1"/>
<line number="82" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="83" hits="1"/>
<line number="84" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="85" hits="1"/>
<line number="86" hits="1" branch="true" condition-coverage="50% (1/2)" missing-branches="87"/>
<line number="87" hits="0"/>
<line number="89" hits="1"/>
<line number="92" hits="1"/>
<line number="98" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="56" hits="1"/>
<line number="63" hits="1"/>
<line number="66" hits="1"/>
<line number="80" hits="1"/>
<line number="81" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="82" hits="1"/>
<line number="83" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="84" hits="1"/>
<line number="85" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="86" hits="1"/>
<line number="87" hits="1" branch="true" condition-coverage="50% (1/2)" missing-branches="88"/>
<line number="88" hits="0"/>
<line number="90" hits="1"/>
<line number="93" hits="1"/>
<line number="99" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="102" hits="1"/>
<line number="111" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="122" hits="1"/>
<line number="131" hits="1"/>
<line number="100" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="103" hits="1"/>
<line number="112" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="123" hits="1"/>
<line number="132" hits="1"/>
<line number="133" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="134" hits="1"/>
<line number="133" hits="1"/>
<line number="134" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="135" hits="1"/>
<line number="136" hits="1"/>
<line number="137" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="138" hits="1"/>
<line number="137" hits="1"/>
<line number="138" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="139" hits="1"/>
<line number="142" hits="1"/>
<line number="151" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="152" hits="1"/>
<line number="140" hits="1"/>
<line number="143" hits="1"/>
<line number="152" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="153" hits="1"/>
<line number="156" hits="1"/>
<line number="165" hits="1"/>
<line number="168" hits="1"/>
<line number="181" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="154" hits="1"/>
<line number="157" hits="1"/>
<line number="166" hits="1"/>
<line number="169" hits="1"/>
<line number="182" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="184" hits="1"/>
<line number="183" hits="1" branch="true" condition-coverage="100% (2/2)"/>
<line number="185" hits="1"/>
<line number="188" hits="1"/>
<line number="197" hits="1"/>
<line number="186" hits="1"/>
<line number="189" hits="1"/>
<line number="198" hits="1"/>
<line number="200" hits="1"/>
<line number="199" hits="1"/>
<line number="201" hits="1"/>
<line number="202" hits="1"/>
<line number="203" hits="1"/>
<line number="204" hits="1"/>
<line number="205" hits="1"/>
</lines>
</class>
<class name="results.py" filename="results.py" complexity="0" line-rate="1" branch-rate="0.9167">
10 changes: 6 additions & 4 deletions covid19_drdfm/dfm.py
Original file line number Diff line number Diff line change
@@ -4,7 +4,6 @@
- `c19_dfm run`
"""

from dataclasses import dataclass
from pathlib import Path
from typing import Optional

@@ -14,7 +13,7 @@
from statsmodels.tsa.stattools import adfuller

from covid19_drdfm.constants import DIFF_COLS, FACTORS, LOG_DIFF_COLS
from covid19_drdfm.processing import diff_vars, normalize, get_raw, write
from covid19_drdfm.processing import diff_vars, get_raw, normalize, write


def is_constant(column) -> bool:
@@ -104,7 +103,8 @@ def _save_input(df, state, columns, outdir):
out = outdir / state
out.mkdir(exist_ok=True)
raw = get_raw().query("State == @state")
raw = raw[columns] if columns else raw
raw = raw[columns + ["Time"]] if columns else raw
raw = raw.set_index("Time").loc[df.index]
write(raw, out / "raw.csv")
write(df, (out / "df.xlsx"))
write(df, out / "df.csv")
@@ -118,5 +118,7 @@ def _save_output(df, model, results, state, outdir):
f.write(results.summary().as_csv())
filtered = results.factors["filtered"]
filtered["State"] = state
filtered.index = df.index
raw = pd.read_csv(out / "raw.csv", index_col=0)
filtered.index = raw.index
filtered = filtered.merge(raw, left_index=True, right_index=True)
filtered.to_csv(out / "filtered-factors.csv")
3 changes: 2 additions & 1 deletion covid19_drdfm/processing.py
Original file line number Diff line number Diff line change
@@ -48,6 +48,7 @@ def get_raw() -> pd.DataFrame:
.drop(columns=["Monetary_1_x", "Monetary_11_x"])
.rename(columns={"Monetary_1_y": "Monetary_1", "Monetary_11_y": "Monetary_11"})
.drop(columns=["Proportion", "proportion_vax2", "Pandemic_Response_8", "Distributed"])
.pipe(add_datetime)
.pipe(fix_names)
)

@@ -59,7 +60,7 @@ def get_df() -> pd.DataFrame:
Returns:
pd.DataFrame: The cleaned DataFrame.
"""
return get_raw().pipe(adjust_inflation).pipe(add_datetime).pipe(adjust_pandemic_response)
return get_raw().pipe(adjust_inflation).pipe(adjust_pandemic_response)


def write(df: pd.DataFrame, outpath: Path) -> None:
Loading

0 comments on commit cde822a

Please sign in to comment.