py-why · carl-offerfit · Mar 21, 2025 · Mar 28, 2025 · Mar 31, 2025 · Apr 2, 2025
diff --git a/README.md b/README.md
@@ -357,9 +357,9 @@ lb, ub = est.effect_interval(X_test, alpha=0.05) # OLS confidence intervals
 ```Python
 from econml.iv.dml import NonParamDMLIV
 
-est = NonParamDMLIV(projection=False, 
-                    discrete_treatment=True, 
-                    discrete_instrument=True)
+est = NonParamDMLIV(discrete_treatment=True, 
+                    discrete_instrument=True,
+                    model_final=RandomForestRegressor())
 est.fit(Y, T, Z=Z, X=X, W=W) # no analytical confidence interval available
 treatment_effects = est.effect(X_test)
 ```

diff --git a/doc/spec/references.rst b/doc/spec/references.rst
@@ -17,6 +17,12 @@ References
     Two-Stage Estimation with a High-Dimensional Second Stage.
     2018.
 
+.. [Chernozhukov2022]
+    V. Chernozhukov, C. Cinelli, N. Kallus, W. Newey, A. Sharma, and V. Syrgkanis.
+    Long Story Short: Omitted Variable Bias in Causal Machine Learning.
+    *NBER Working Paper No. 30302*, 2022.
+    URL https://www.nber.org/papers/w30302.
+
 .. [Hartford2017]
     Jason Hartford, Greg Lewis, Kevin Leyton-Brown, and Matt Taddy.
     Deep IV: A flexible approach for counterfactual prediction.

diff --git a/doc/spec/spec.rst b/doc/spec/spec.rst
@@ -13,6 +13,7 @@ EconML User Guide
     estimation_dynamic
     inference
     model_selection
+    validation
     interpretability
     federated_learning
     references

diff --git a/doc/spec/validation.rst b/doc/spec/validation.rst
@@ -0,0 +1,68 @@
+Validation
+======================
+
+Validating causal estimates is inherently challenging, as the true counterfactual outcome for a given treatment is
+unobservable. However, there are several checks and tools available in EconML to help assess the credibility of causal
+estimates.
+
+
+Sensitivity Analysis
+---------------------
+
+For many EconML estimators, unobserved confounding can lead to biased causal estimates.
+Moreover, it is impossible to prove the absence of unobserved confounders.
+This is a fundamental problem for observational causal inference.
+
+To mitigate this problem, EconML provides a suite of sensitivity analysis tools,
+based on [Chernozhukov2022]_,
+to assess the robustness of causal estimates to unobserved confounding. 
+
+Specifically, select estimators (subclasses of :class:`.DML` and :class:`.DRLearner`)
+have access to ``sensitivity_analysis``, ``robustness_value``, and ``sensitivity_summary`` methods.
+
+``sensitivity_analysis`` provides an updated confidence interval for the ATE based on a specified level of unobserved confounding.
+
+
+``robustness_value`` computes the minimum level of unobserved confounding required
+so that confidence intervals around the ATE would begin to include the given point (0 by default).
+
+
+``sensitivity_summary`` provides a summary of the the two above methods.
+
+DRTester
+----------------
+
+EconML provides the :class:`.DRTester` class, which implements Best Linear Predictor (BLP), calibration r-squared,
+and uplift modeling methods for validation.
+
+See an example notebook `here <https://github.com/py-why/EconML/blob/main/notebooks/CATE%20validation.ipynb>`__.
+
+Scoring
+-------
+
+Many EconML estimators implement a ``.score`` method to evaluate the goodness-of-fit of the final model. While it may be 
+difficult to make direct sense of results from ``.score``, EconML offers the :class:`RScorer` class to facilitate model 
+selection based on scoring.
+
+:class:`RScorer` enables comparison and selection among different causal models.
+
+See an example notebook `here
+<https://github.com/py-why/EconML/blob/main/notebooks/Causal%20Model%20Selection%20with%20the%20RScorer.ipynb>`__.
+
+Confidence Intervals and Inference
+----------------------------------
+
+Most EconML estimators allow for inference, including standard errors, confidence intervals, and p-values for
+estimated effects. A common validation approach is to check whether the p-values are below a chosen significance level
+(e.g., 0.05). If not, the null hypothesis that the causal effect is zero cannot be rejected.
+
+**Note:** Inference results are only valid if the model specification is correct. For example, if a linear model is used
+but the true data-generating process is nonlinear, the inference may not be reliable. It is generally not possible to
+guarantee correct specification, so p-value inspection should be considered a surface-level check.
+
+DoWhy Refutation Tests
+----------------------
+
+The DoWhy library, which complements EconML, includes several refutation tests for validating causal estimates. These
+tests work by comparing the original causal estimate to estimates obtained from perturbed versions of the data, helping
+to assess the robustness of causal conclusions.
diff --git a/econml/_cate_estimator.py b/econml/_cate_estimator.py
@@ -880,7 +880,12 @@ def _postfit(self, Y, T, *args, **kwargs):
             self._set_transformed_treatment_names()
 
     def _expand_treatments(self, X=None, *Ts, transform=True):
-        X, *Ts = check_input_arrays(X, *Ts)
+        if 'X' in self._gen_allowed_missing_vars():
+            force_all_finite = 'allow-nan'
+        else:
+            force_all_finite = False
+        X, = check_input_arrays(X, force_all_finite=force_all_finite)
+        Ts = check_input_arrays(*Ts)
         n_rows = 1 if X is None else shape(X)[0]
         outTs = []
         for T in Ts:

diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py
@@ -990,7 +990,11 @@ def _fit_final(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight
                                                                                            groups=groups))
 
     def const_marginal_effect(self, X=None):
-        X, = check_input_arrays(X)
+        if 'X' in self._gen_allowed_missing_vars():
+            force_all_finite = 'allow-nan'
+        else:
+            force_all_finite = False
+        X, = check_input_arrays(X, force_all_finite=force_all_finite)
         self._check_fitted_dims(X)
         if X is None:
             return self._ortho_learner_model_final.predict()
@@ -1000,34 +1004,52 @@ def const_marginal_effect(self, X=None):
     const_marginal_effect.__doc__ = LinearCateEstimator.const_marginal_effect.__doc__
 
     def const_marginal_effect_interval(self, X=None, *, alpha=0.05):
-        X, = check_input_arrays(X)
+        if 'X' in self._gen_allowed_missing_vars():
+            force_all_finite = 'allow-nan'
+        else:
+            force_all_finite = False
+        X, = check_input_arrays(X, force_all_finite=force_all_finite)
         self._check_fitted_dims(X)
         return super().const_marginal_effect_interval(X, alpha=alpha)
 
     const_marginal_effect_interval.__doc__ = LinearCateEstimator.const_marginal_effect_interval.__doc__
 
     def const_marginal_effect_inference(self, X=None):
-        X, = check_input_arrays(X)
+        if 'X' in self._gen_allowed_missing_vars():
+            force_all_finite = 'allow-nan'
+        else:
+            force_all_finite = False
+        X, = check_input_arrays(X, force_all_finite=force_all_finite)
         self._check_fitted_dims(X)
         return super().const_marginal_effect_inference(X)
 
     const_marginal_effect_inference.__doc__ = LinearCateEstimator.const_marginal_effect_inference.__doc__
 
     def effect_interval(self, X=None, *, T0=0, T1=1, alpha=0.05):
-        X, T0, T1 = check_input_arrays(X, T0, T1)
+        if 'X' in self._gen_allowed_missing_vars():
+            force_all_finite = 'allow-nan'
+        else:
+            force_all_finite = False
+        X, = check_input_arrays(X, force_all_finite=force_all_finite)
+        T0, T1 = check_input_arrays(T0, T1)
         self._check_fitted_dims(X)
         return super().effect_interval(X, T0=T0, T1=T1, alpha=alpha)
 
     effect_interval.__doc__ = LinearCateEstimator.effect_interval.__doc__
 
     def effect_inference(self, X=None, *, T0=0, T1=1):
-        X, T0, T1 = check_input_arrays(X, T0, T1)
+        if 'X' in self._gen_allowed_missing_vars():
+            force_all_finite = 'allow-nan'
+        else:
+            force_all_finite = False
+        X, = check_input_arrays(X, force_all_finite=force_all_finite)
+        T0, T1 = check_input_arrays(T0, T1)
         self._check_fitted_dims(X)
         return super().effect_inference(X, T0=T0, T1=T1)
 
     effect_inference.__doc__ = LinearCateEstimator.effect_inference.__doc__
 
-    def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None):
+    def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None, scoring=None):
         """
         Score the fitted CATE model on a new data set.
 
@@ -1055,6 +1077,9 @@ def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None):
             Weights for each samples
         groups: (n,) vector, optional
             All rows corresponding to the same group will be kept together during splitting.
+        scoring: name of an sklearn scoring function to use instead of the default, optional
+            Supports f1_score, log_loss, mean_absolute_error, mean_squared_error, r2_score,
+            and roc_auc_score.
 
         Returns
         -------
@@ -1113,9 +1138,24 @@ def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None):
 
             accumulated_nuisances += nuisances
 
+        score_kwargs = {
+            'X': X,
+            'W': W,
+            'Z': Z,
+            'sample_weight': sample_weight,
+            'groups': groups
+        }
+        # If using an _rlearner, the scoring parameter can be passed along, if provided
+        if scoring is not None:
+            # Cannot import in header, or circular imports
+            from .dml._rlearner import _ModelFinal
+            if isinstance(self._ortho_learner_model_final, _ModelFinal):
+                score_kwargs['scoring'] = scoring
+            else:
+                raise NotImplementedError("scoring parameter only implemented for "
+                                          "_rlearner._ModelFinal")
         return self._ortho_learner_model_final.score(Y, T, nuisances=accumulated_nuisances,
-                                                     **filter_none_kwargs(X=X, W=W, Z=Z,
-                                                                          sample_weight=sample_weight, groups=groups))
+                                                     **filter_none_kwargs(**score_kwargs))
 
     @property
     def ortho_learner_model_final_(self):