Skip to content

Commit 04fbeef

Browse files
committed
add ssm models to monte cover
1 parent 0c8ec4d commit 04fbeef

File tree

3 files changed

+257
-0
lines changed

3 files changed

+257
-0
lines changed
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
"""Monte Carlo coverage simulations for SSM."""
2+
3+
from montecover.ssm.ssm_mar_ate import SSMMarATECoverageSimulation
4+
from montecover.ssm.ssm_nonig_ate import SSMNonIgnorableATECoverageSimulation
5+
6+
__all__ = [
7+
"SSMMarATECoverageSimulation",
8+
"SSMNonIgnorableATECoverageSimulation",
9+
]
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
from typing import Any, Dict, Optional
2+
3+
import doubleml as dml
4+
from doubleml.datasets import make_ssm_data
5+
6+
from montecover.base import BaseSimulation
7+
from montecover.utils import create_learner_from_config
8+
9+
10+
class SSMMarATECoverageSimulation(BaseSimulation):
11+
"""Simulation class for coverage properties of DoubleMLSSM with missing at random for ATE estimation."""
12+
13+
def __init__(
14+
self,
15+
config_file: str,
16+
suppress_warnings: bool = True,
17+
log_level: str = "INFO",
18+
log_file: Optional[str] = None,
19+
):
20+
super().__init__(
21+
config_file=config_file,
22+
suppress_warnings=suppress_warnings,
23+
log_level=log_level,
24+
log_file=log_file,
25+
)
26+
27+
# Calculate oracle values
28+
self._calculate_oracle_values()
29+
30+
def _process_config_parameters(self):
31+
"""Process simulation-specific parameters from config"""
32+
# Process ML models in parameter grid
33+
assert "learners" in self.dml_parameters, "No learners specified in the config file"
34+
35+
required_learners = ["ml_g", "ml_m", "ml_pi"]
36+
for learner in self.dml_parameters["learners"]:
37+
for ml in required_learners:
38+
assert ml in learner, f"No {ml} specified in the config file"
39+
40+
def _calculate_oracle_values(self):
41+
"""Calculate oracle values for the simulation."""
42+
self.logger.info("Calculating oracle values")
43+
44+
self.oracle_values = dict()
45+
self.oracle_values["theta"] = self.dgp_parameters["theta"]
46+
47+
def run_single_rep(self, dml_data: dml.DoubleMLData, dml_params: Dict[str, Any]) -> Dict[str, Any]:
48+
"""Run a single repetition with the given parameters."""
49+
# Extract parameters
50+
learner_config = dml_params["learners"]
51+
learner_g_name, ml_g = create_learner_from_config(learner_config["ml_g"])
52+
learner_m_name, ml_m = create_learner_from_config(learner_config["ml_m"])
53+
learner_pi_name, ml_pi = create_learner_from_config(learner_config["ml_pi"])
54+
55+
# Model
56+
dml_model = dml.DoubleMLSSM(
57+
obj_dml_data=dml_data,
58+
ml_g=ml_g,
59+
ml_m=ml_m,
60+
ml_pi=ml_pi,
61+
score="missing-at-random",
62+
)
63+
dml_model.fit()
64+
65+
result = {
66+
"coverage": [],
67+
}
68+
for level in self.confidence_parameters["level"]:
69+
level_result = dict()
70+
level_result["coverage"] = self._compute_coverage(
71+
thetas=dml_model.coef,
72+
oracle_thetas=self.oracle_values["theta"],
73+
confint=dml_model.confint(level=level),
74+
joint_confint=None,
75+
)
76+
77+
# add parameters to the result
78+
for res_metric in level_result.values():
79+
res_metric.update(
80+
{
81+
"Learner g": learner_g_name,
82+
"Learner m": learner_m_name,
83+
"Learner pi": learner_pi_name,
84+
"level": level,
85+
}
86+
)
87+
for key, res in level_result.items():
88+
result[key].append(res)
89+
90+
return result
91+
92+
def summarize_results(self):
93+
"""Summarize the simulation results."""
94+
self.logger.info("Summarizing simulation results")
95+
96+
# Group by parameter combinations
97+
groupby_cols = ["Learner g", "Learner m", "Learner pi", "level"]
98+
aggregation_dict = {
99+
"Coverage": "mean",
100+
"CI Length": "mean",
101+
"Bias": "mean",
102+
"repetition": "count",
103+
}
104+
105+
# Aggregate results (possibly multiple result dfs)
106+
result_summary = dict()
107+
for result_name, result_df in self.results.items():
108+
result_summary[result_name] = result_df.groupby(groupby_cols).agg(aggregation_dict).reset_index()
109+
self.logger.debug(f"Summarized {result_name} results")
110+
111+
return result_summary
112+
113+
def _generate_dml_data(self, dgp_params: Dict[str, Any]) -> dml.DoubleMLData:
114+
"""Generate data for the simulation."""
115+
data = make_ssm_data(
116+
theta=dgp_params["theta"],
117+
n_obs=dgp_params["n_obs"],
118+
dim_x=dgp_params["dim_x"],
119+
mar=True,
120+
return_type="DataFrame",
121+
)
122+
dml_data = dml.DoubleMLData(data, "y", "d", s_col="s")
123+
return dml_data
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
from typing import Any, Dict, Optional
2+
3+
import doubleml as dml
4+
from doubleml.datasets import make_ssm_data
5+
6+
from montecover.base import BaseSimulation
7+
from montecover.utils import create_learner_from_config
8+
9+
10+
class SSMNonIgnorableATECoverageSimulation(BaseSimulation):
11+
"""
12+
Simulation class for coverage properties of DoubleMLSSM with nonignorable nonresponse for ATE estimation.
13+
"""
14+
15+
def __init__(
16+
self,
17+
config_file: str,
18+
suppress_warnings: bool = True,
19+
log_level: str = "INFO",
20+
log_file: Optional[str] = None,
21+
):
22+
super().__init__(
23+
config_file=config_file,
24+
suppress_warnings=suppress_warnings,
25+
log_level=log_level,
26+
log_file=log_file,
27+
)
28+
29+
# Calculate oracle values
30+
self._calculate_oracle_values()
31+
32+
def _process_config_parameters(self):
33+
"""Process simulation-specific parameters from config"""
34+
# Process ML models in parameter grid
35+
assert "learners" in self.dml_parameters, "No learners specified in the config file"
36+
37+
required_learners = ["ml_g", "ml_m", "ml_pi"]
38+
for learner in self.dml_parameters["learners"]:
39+
for ml in required_learners:
40+
assert ml in learner, f"No {ml} specified in the config file"
41+
42+
def _calculate_oracle_values(self):
43+
"""Calculate oracle values for the simulation."""
44+
self.logger.info("Calculating oracle values")
45+
46+
self.oracle_values = dict()
47+
self.oracle_values["theta"] = self.dgp_parameters["theta"]
48+
49+
def run_single_rep(self, dml_data: dml.DoubleMLData, dml_params: Dict[str, Any]) -> Dict[str, Any]:
50+
"""Run a single repetition with the given parameters."""
51+
# Extract parameters
52+
learner_config = dml_params["learners"]
53+
learner_g_name, ml_g = create_learner_from_config(learner_config["ml_g"])
54+
learner_m_name, ml_m = create_learner_from_config(learner_config["ml_m"])
55+
learner_pi_name, ml_pi = create_learner_from_config(learner_config["ml_pi"])
56+
57+
# Model
58+
dml_model = dml.DoubleMLSSM(
59+
obj_dml_data=dml_data,
60+
ml_g=ml_g,
61+
ml_m=ml_m,
62+
ml_pi=ml_pi,
63+
score="nonignorable",
64+
)
65+
dml_model.fit()
66+
67+
result = {
68+
"coverage": [],
69+
}
70+
for level in self.confidence_parameters["level"]:
71+
level_result = dict()
72+
level_result["coverage"] = self._compute_coverage(
73+
thetas=dml_model.coef,
74+
oracle_thetas=self.oracle_values["theta"],
75+
confint=dml_model.confint(level=level),
76+
joint_confint=None,
77+
)
78+
79+
# add parameters to the result
80+
for res_metric in level_result.values():
81+
res_metric.update(
82+
{
83+
"Learner g": learner_g_name,
84+
"Learner m": learner_m_name,
85+
"Learner pi": learner_pi_name,
86+
"level": level,
87+
}
88+
)
89+
for key, res in level_result.items():
90+
result[key].append(res)
91+
92+
return result
93+
94+
def summarize_results(self):
95+
"""Summarize the simulation results."""
96+
self.logger.info("Summarizing simulation results")
97+
98+
# Group by parameter combinations
99+
groupby_cols = ["Learner g", "Learner m", "Learner pi", "level"]
100+
aggregation_dict = {
101+
"Coverage": "mean",
102+
"CI Length": "mean",
103+
"Bias": "mean",
104+
"repetition": "count",
105+
}
106+
107+
# Aggregate results (possibly multiple result dfs)
108+
result_summary = dict()
109+
for result_name, result_df in self.results.items():
110+
result_summary[result_name] = result_df.groupby(groupby_cols).agg(aggregation_dict).reset_index()
111+
self.logger.debug(f"Summarized {result_name} results")
112+
113+
return result_summary
114+
115+
def _generate_dml_data(self, dgp_params: Dict[str, Any]) -> dml.DoubleMLData:
116+
"""Generate data for the simulation."""
117+
data = make_ssm_data(
118+
theta=dgp_params["theta"],
119+
n_obs=dgp_params["n_obs"],
120+
dim_x=dgp_params["dim_x"],
121+
mar=False,
122+
return_type="DataFrame",
123+
)
124+
dml_data = dml.DoubleMLData(data, "y", "d", z_cols="z", s_col="s")
125+
return dml_data

0 commit comments

Comments
 (0)