Skip to content

Add sensitivity analysis methods #967

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions econml/dml/causal_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from .._cate_estimator import LinearCateEstimator
from .._shap import _shap_explain_multitask_model_cate
from .._ortho_learner import _OrthoLearner
from ..validate import sensitivity_interval, RV, dml_sensitivity_values


class _CausalForestFinalWrapper:
Expand Down Expand Up @@ -56,6 +57,11 @@ def fit(self, X, T, T_res, Y_res, sample_weight=None, freq_weight=None, sample_v
T_res = T_res.reshape((-1, 1))
if Y_res.ndim == 1:
Y_res = Y_res.reshape((-1, 1))

# if binary/continuous treatment and single outcome, can calculate sensitivity params
if not ((self._d_t and self._d_t[0] > 1) or (self._d_y and self._d_y[0] > 1)):
self.sensitivity_params = dml_sensitivity_values(T_res, Y_res)

self._model.fit(fts, T_res, Y_res, sample_weight=sample_weight)
# Fit a doubly robust average effect
if self._discrete_treatment and self._drate:
Expand Down Expand Up @@ -811,6 +817,76 @@ def tune(self, Y, T, *, X=None, W=None,

return self

def sensitivity_interval(self, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., interval_type='ci'):
"""
Calculate the sensitivity interval for the ATE.

The sensitivity interval is the range of values for the ATE that are
consistent with the observed data, given a specified level of confounding.

Can only be calculated when Y and T are single arrays, and T is binary or continuous.

Based on `Chernozhukov et al. (2022) <https://www.nber.org/papers/w30302>`_

Parameters
----------
alpha: float, default 0.05
The significance level for the sensitivity interval.

c_y: float, default 0.05
The level of confounding in the outcome. Ranges from 0 to 1.

c_d: float, default 0.05
The level of confounding in the treatment. Ranges from 0 to 1.

interval_type: str, default 'ci'
The type of interval to return. Can be 'ci' or 'theta'

Returns
-------
(lb, ub): tuple of floats
sensitivity interval for the ATE
"""
if (self._d_t and self._d_t[0] > 1) or (self._d_y and self._d_y[0] > 1):
raise ValueError(
"Sensitivity analysis for DML is not supported for multi-dimensional outcomes or treatments.")
sensitivity_params = self._ortho_learner_model_final._model_final.sensitivity_params
return sensitivity_interval(**sensitivity_params, alpha=alpha,
c_y=c_y, c_t=c_t, rho=rho, interval_type=interval_type)

def robustness_value(self, alpha=0.05, interval_type='ci'):
"""
Calculate the robustness value for the ATE.

The robustness value is the level of confounding (between 0 and 1) in
*both* the treatment and outcome that would make
the ATE not statistically significant. A higher value indicates
a more robust estimate.
Returns 0 if the original interval already includes zero.

Can only be calculated when Y and T are single arrays, and T is binary or continuous.

Based on `Chernozhukov et al. (2022) <https://www.nber.org/papers/w30302>`_

Parameters
----------
alpha: float, default 0.05
The significance level for the robustness value.

interval_type: str, default 'ci'
The type of interval to return. Can be 'ci' or 'theta'

Returns
-------
float
The robustness value
"""
if (self._d_t and self._d_t[0] > 1) or (self._d_y and self._d_y[0] > 1):
raise ValueError(
"Sensitivity analysis for DML is not supported for multi-dimensional outcomes or treatments.")
sensitivity_params = self._ortho_learner_model_final._model_final.sensitivity_params
return RV(**sensitivity_params, alpha=alpha, interval_type=interval_type)

# override only so that we can update the docstring to indicate support for `blb`
def fit(self, Y, T, *, X=None, W=None, sample_weight=None, groups=None,
cache_values=False, inference='auto'):
Expand Down
86 changes: 84 additions & 2 deletions econml/dml/dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
shape, get_feature_names_or_default, filter_none_kwargs)
from .._shap import _shap_explain_model_cate
from ..sklearn_extensions.model_selection import get_selector, SingleModelSelector
from ..validate import sensitivity_interval, RV, dml_sensitivity_values


def _combine(X, W, n_samples):
Expand Down Expand Up @@ -105,10 +106,12 @@ def _make_first_stage_selector(model, is_discrete, random_state):


class _FinalWrapper:
def __init__(self, model_final, fit_cate_intercept, featurizer, use_weight_trick):
def __init__(self, model_final, fit_cate_intercept, featurizer,
use_weight_trick, allow_sensitivity_analysis=False):
self._model = clone(model_final, safe=False)
self._use_weight_trick = use_weight_trick
self._original_featurizer = clone(featurizer, safe=False)
self.allow_sensitivity_analysis = allow_sensitivity_analysis
if self._use_weight_trick:
self._fit_cate_intercept = False
self._featurizer = self._original_featurizer
Expand Down Expand Up @@ -147,6 +150,14 @@ def fit(self, X, T, T_res, Y_res, sample_weight=None, freq_weight=None, sample_v
self._d_t = shape(T_res)[1:]
self._d_y = shape(Y_res)[1:]
if not self._use_weight_trick:

# if binary/continuous treatment and single outcome, can calculate sensitivity params
if self.allow_sensitivity_analysis and not (
(self._d_t and self._d_t[0] > 1) or (
self._d_y and self._d_y[0] > 1)
):
self.sensitivity_params = dml_sensitivity_values(T_res, Y_res)

fts = self._combine(X, T_res)
filtered_kwargs = filter_none_kwargs(sample_weight=sample_weight,
freq_weight=freq_weight, sample_var=sample_var)
Expand Down Expand Up @@ -535,7 +546,8 @@ def _gen_model_final(self):
return clone(self.model_final, safe=False)

def _gen_rlearner_model_final(self):
return _FinalWrapper(self._gen_model_final(), self.fit_cate_intercept, self._gen_featurizer(), False)
return _FinalWrapper(self._gen_model_final(), self.fit_cate_intercept,
self._gen_featurizer(), False, allow_sensitivity_analysis=True)

# override only so that we can update the docstring to indicate support for `LinearModelFinalInference`
def fit(self, Y, T, *, X=None, W=None, sample_weight=None, freq_weight=None, sample_var=None, groups=None,
Expand Down Expand Up @@ -594,6 +606,76 @@ def bias_part_of_coef(self):
def fit_cate_intercept_(self):
return self.rlearner_model_final_._fit_cate_intercept

def sensitivity_interval(self, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., interval_type='ci'):
"""
Calculate the sensitivity interval for the ATE.

The sensitivity interval is the range of values for the ATE that are
consistent with the observed data, given a specified level of confounding.

Can only be calculated when Y and T are single arrays, and T is binary or continuous.

Based on `Chernozhukov et al. (2022) <https://www.nber.org/papers/w30302>`_

Parameters
----------
alpha: float, default 0.05
The significance level for the sensitivity interval.

c_y: float, default 0.05
The level of confounding in the outcome. Ranges from 0 to 1.

c_d: float, default 0.05
The level of confounding in the treatment. Ranges from 0 to 1.

interval_type: str, default 'ci'
The type of interval to return. Can be 'ci' or 'theta'

Returns
-------
(lb, ub): tuple of floats
sensitivity interval for the ATE
"""
if (self._d_t and self._d_t[0] > 1) or (self._d_y and self._d_y[0] > 1):
raise ValueError(
"Sensitivity analysis for DML is not supported for multi-dimensional outcomes or treatments.")
sensitivity_params = self._ortho_learner_model_final._model_final.sensitivity_params
return sensitivity_interval(**sensitivity_params, alpha=alpha,
c_y=c_y, c_t=c_t, rho=rho, interval_type=interval_type)

def robustness_value(self, alpha=0.05, interval_type='ci'):
"""
Calculate the robustness value for the ATE.

The robustness value is the level of confounding (between 0 and 1) in
*both* the treatment and outcome that would make
the ATE not statistically significant. A higher value indicates
a more robust estimate.
Returns 0 if the original interval already includes zero.

Can only be calculated when Y and T are single arrays, and T is binary or continuous.

Based on `Chernozhukov et al. (2022) <https://www.nber.org/papers/w30302>`_

Parameters
----------
alpha: float, default 0.05
The significance level for the robustness value.

interval_type: str, default 'ci'
The type of interval to return. Can be 'ci' or 'theta'

Returns
-------
float
The robustness value
"""
if (self._d_t and self._d_t[0] > 1) or (self._d_y and self._d_y[0] > 1):
raise ValueError(
"Sensitivity analysis for DML is not supported for multi-dimensional outcomes or treatments.")
sensitivity_params = self._ortho_learner_model_final._model_final.sensitivity_params
return RV(**sensitivity_params, alpha=alpha, interval_type=interval_type)


class LinearDML(StatsModelsCateEstimatorMixin, DML):
"""
Expand Down
92 changes: 88 additions & 4 deletions econml/dr/_drlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
from ..utilities import (check_high_dimensional,
filter_none_kwargs, inverse_onehot, get_feature_names_or_default)
from .._shap import _shap_explain_multitask_model_cate, _shap_explain_model_cate
from ..validate import sensitivity_interval, RV, dr_sensitivity_values


class _ModelNuisance(ModelSelector):
Expand Down Expand Up @@ -118,9 +119,7 @@ def predict(self, Y, T, X=None, W=None, *, sample_weight=None, groups=None):
else:
Y_pred[:, t + 1] = self._model_regression.predict(np.hstack([XW, T_counter])).reshape(n)
Y_pred[:, t + 1] += (Y.reshape(n) - Y_pred[:, t + 1]) * (T[:, t] == 1) / propensities[:, t + 1]
T_complete = np.hstack(((np.all(T == 0, axis=1) * 1).reshape(-1, 1), T))
propensities_weight = np.sum(propensities * T_complete, axis=1)
return Y_pred.reshape(Y.shape + (T.shape[1] + 1,)), propensities_weight.reshape((n,))
return Y_pred.reshape(Y.shape + (T.shape[1] + 1,)), propensities


def _make_first_stage_selector(model, is_discrete, random_state):
Expand All @@ -143,9 +142,15 @@ def __init__(self, model_final, featurizer, multitask_model_final):

def fit(self, Y, T, X=None, W=None, *, nuisances,
sample_weight=None, freq_weight=None, sample_var=None, groups=None):
Y_pred, propensities = nuisances
Y_pred, T_pred = nuisances
T_complete = np.hstack(((np.all(T == 0, axis=1) * 1).reshape(-1, 1), T))
propensities = np.sum(T_pred * T_complete, axis=1).reshape((T.shape[0],))

self.d_y = Y_pred.shape[1:-1] # track whether there's a Y dimension (must be a singleton)
self.d_t = Y_pred.shape[-1] - 1 # track # of treatment (exclude baseline treatment)

self.sensitivity_params = dr_sensitivity_values(Y, T_complete, Y_pred, T_pred)

if (X is not None) and (self._featurizer is not None):
X = self._featurizer.fit_transform(X)

Expand Down Expand Up @@ -751,6 +756,85 @@ def shap_values(self, X, *, feature_names=None, treatment_names=None, output_nam
background_samples=background_samples)
shap_values.__doc__ = LinearCateEstimator.shap_values.__doc__

def sensitivity_interval(self, T, alpha=0.05, c_y=0.05, c_t=0.05, rho=1., interval_type='ci'):
"""
Calculate the sensitivity interval for the ATE for a given treatment category.

The sensitivity interval is the range of values for the ATE that are
consistent with the observed data, given a specified level of confounding.

Based on `Chernozhukov et al. (2022) <https://www.nber.org/papers/w30302>`_

Parameters
----------
T: alphanumeric
The treatment with respect to calculate the sensitivity interval.

alpha: float, default 0.05
The significance level for the sensitivity interval.

c_y: float, default 0.05
The level of confounding in the outcome. Ranges from 0 to 1.

c_d: float, default 0.05
The level of confounding in the treatment. Ranges from 0 to 1.

interval_type: str, default 'ci'
The type of interval to return. Can be 'ci' or 'theta'

Returns
-------
(lb, ub): tuple of floats
sensitivity interval for the ATE for treatment T
"""
if T not in self.transformer.categories_[0]:
# raise own ValueError here because sometimes error from sklearn is not transparent
raise ValueError(f"Treatment {T} not in the list of treatments {self.transformer.categories_[0]}")
_, T = self._expand_treatments(None, T)
T_ind = inverse_onehot(T).item() - 1
assert T_ind >= 0, "No model was fitted for the control"
sensitivity_params = {k: v[T_ind] for k, v in self._ortho_learner_model_final.sensitivity_params.items()}
return sensitivity_interval(**sensitivity_params, alpha=alpha,
c_y=c_y, c_t=c_t, rho=rho, interval_type=interval_type)


def robustness_value(self, T, alpha=0.05, interval_type='ci'):
"""
Calculate the robustness value for the ATE for a given treatment category.

The robustness value is the level of confounding (between 0 and 1) in
*both* the treatment and outcome that would make
the ATE not statistically significant. A higher value indicates
a more robust estimate.
Returns 0 if the original interval already includes zero.

Based on `Chernozhukov et al. (2022) <https://www.nber.org/papers/w30302>`_

Parameters
----------
T: alphanumeric
The treatment with respect to calculate the robustness value.

alpha: float, default 0.05
The significance level for the robustness value.

interval_type: str, default 'ci'
The type of interval to return. Can be 'ci' or 'theta'

Returns
-------
float
The robustness value
"""
if T not in self.transformer.categories_[0]:
# raise own ValueError here because sometimes error from sklearn is not transparent
raise ValueError(f"Treatment {T} not in the list of treatments {self.transformer.categories_[0]}")
_, T = self._expand_treatments(None, T)
T_ind = inverse_onehot(T).item() - 1
assert T_ind >= 0, "No model was fitted for the control"
sensitivity_params = {k: v[T_ind] for k, v in self._ortho_learner_model_final.sensitivity_params.items()}
return RV(**sensitivity_params, alpha=alpha, interval_type=interval_type)


class LinearDRLearner(StatsModelsCateEstimatorDiscreteMixin, DRLearner):
"""
Expand Down
13 changes: 13 additions & 0 deletions econml/tests/test_dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,19 @@ def make_random(n, is_discrete, d):
with pytest.raises(AttributeError):
self.assertEqual(shape(est.intercept_), intercept_shape)

if d_y > 1 or is_discrete or d_t > 1:
# sensitivity interval should not calculate
# when d_y > 1 or t is multi category discrete / multi dim cont
with pytest.raises(ValueError):
est.sensitivity_interval()

with pytest.raises(ValueError):
est.robustness_value()

else:
est.sensitivity_interval()
est.robustness_value()

if inf is not None:
const_marg_eff_int = est.const_marginal_effect_interval(X)
marg_eff_int = est.marginal_effect_interval(T, X)
Expand Down
22 changes: 22 additions & 0 deletions econml/tests/test_drlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,28 @@ def make_random(is_discrete, d):
# ensure that we can serialize fit estimator
pickle.dumps(est)

est.sensitivity_interval(T='b')
est.robustness_value(T='b')

est.sensitivity_interval(T='c')
est.robustness_value(T='c')

# ensure sensitivity analysis fails on control
with pytest.raises(AssertionError):
est.sensitivity_interval(T='a')

with pytest.raises(AssertionError):
est.robustness_value(T='a')

# ensure failure on unknown treatment values
with pytest.raises(ValueError):
est.sensitivity_interval(T=1)

with pytest.raises(ValueError):
est.robustness_value(T=1)



# make sure we can call the marginal_effect and effect methods
const_marg_eff = est.const_marginal_effect(
X)
Expand Down
Loading
Loading