automate the first stage model T and update DML notebook (#172)

heimengqi · Miruna Oprescu · commit 818c8320d691 · 2019-11-21T17:58:34.000-05:00
* automate the first stage model T and update DML notebook
* Changed model defaults in ORF and fixed a bug in WeightedKFold
diff --git a/econml/dml.py b/econml/dml.py
@@ -39,9 +39,9 @@
 from .utilities import (shape, reshape, ndim, hstack, cross_product, transpose, inverse_onehot,
                         broadcast_unit_treatments, reshape_treatmentwise_effects,
                         StatsModelsLinearRegression, LassoCVWrapper, check_high_dimensional)
-from econml.sklearn_extensions.linear_model import MultiOutputDebiasedLasso
+from econml.sklearn_extensions.linear_model import MultiOutputDebiasedLasso, WeightedLassoCVWrapper
 from sklearn.model_selection import KFold, StratifiedKFold, check_cv
-from sklearn.linear_model import LinearRegression, LassoCV, ElasticNetCV
+from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegressionCV, ElasticNetCV
 from sklearn.preprocessing import (PolynomialFeatures, LabelEncoder, OneHotEncoder,
                                    FunctionTransformer)
 from sklearn.base import clone, TransformerMixin
@@ -52,6 +52,7 @@
                              DebiasedLassoCateEstimatorMixin)
 from .inference import StatsModelsInference
 from ._rlearner import _RLearner
+from .sklearn_extensions.model_selection import WeightedStratifiedKFold
 
 
 class DMLCateEstimator(_RLearner):
@@ -116,9 +117,15 @@ class takes as input the parameter `model_t`, which is an arbitrary scikit-learn
         The estimator for fitting the response to the features. Must implement
         `fit` and `predict` methods.  Must be a linear model for correctness when linear_first_stages is ``True``.
 
-    model_t: estimator
-        The estimator for fitting the treatment to the features. Must implement
-        `fit` and `predict` methods.  Must be a linear model for correctness when linear_first_stages is ``True``.
+    model_t: estimator or 'auto' (default is 'auto')
+        The estimator for fitting the treatment to the features.
+        If estimator, it must implement `fit` and `predict` methods.  Must be a linear model for correctness
+        when linear_first_stages is ``True``;
+        If 'auto', :class:`LogisticRegressionCV() <sklearn.linear_model.LogisticRegressionCV>`
+        will be applied for discrete treatment,
+        and :class:`WeightedLassoCV() <econml.sklearn_extensions.linear_model.WeightedLassoCV>`/
+        :class:`WeightedMultitaskLassoCV() <econml.sklearn_extensions.linear_model.WeightedMultitaskLassoCV>`
+        will be applied for continuous treatment.
 
     model_final: estimator
         The estimator for fitting the response residuals to the treatment residuals. Must implement
@@ -170,6 +177,12 @@ def __init__(self,
         # TODO: consider whether we need more care around stateful featurizers,
         #       since we clone it and fit separate copies
 
+        if model_t == 'auto':
+            if discrete_treatment:
+                model_t = LogisticRegressionCV(cv=WeightedStratifiedKFold())
+            else:
+                model_t = WeightedLassoCVWrapper()
+
         class FirstStageWrapper:
             def __init__(self, model, is_Y):
                 self._model = clone(model, safe=False)
@@ -284,13 +297,19 @@ class LinearDMLCateEstimator(StatsModelsCateEstimatorMixin, DMLCateEstimator):
 
     Parameters
     ----------
-    model_y: estimator
+    model_y: estimator, optional (default is :class:`WeightedLassoCVWrapper()
+        <econml.sklearn_extensions.linear_model.WeightedLassoCVWrapper>`)
         The estimator for fitting the response to the features. Must implement
         `fit` and `predict` methods.
 
-    model_t: estimator
-        The estimator for fitting the treatment to the features. Must implement
-        `fit` and `predict` methods.
+    model_t: estimator or 'auto', optional (default is 'auto')
+        The estimator for fitting the treatment to the features.
+        If estimator, it must implement `fit` and `predict` methods;
+        If 'auto', :class:`LogisticRegressionCV() <sklearn.linear_model.LogisticRegressionCV>`
+        will be applied for discrete treatment,
+        and :class:`WeightedLassoCV() <econml.sklearn_extensions.linear_model.WeightedLassoCV>`/
+        :class:`WeightedMultitaskLassoCV() <econml.sklearn_extensions.linear_model.WeightedMultitaskLassoCV>`
+        will be applied for continuous treatment.
 
     featurizer: transformer, optional (default is \
         :class:`PolynomialFeatures(degree=1, include_bias=True) <sklearn.preprocessing.PolynomialFeatures>`)
@@ -329,7 +348,7 @@ class LinearDMLCateEstimator(StatsModelsCateEstimatorMixin, DMLCateEstimator):
     """
 
     def __init__(self,
-                 model_y=LassoCV(), model_t=LassoCV(),
+                 model_y=WeightedLassoCVWrapper(), model_t='auto',
                  featurizer=PolynomialFeatures(degree=1, include_bias=True),
                  linear_first_stages=True,
                  discrete_treatment=False,
@@ -389,13 +408,20 @@ class SparseLinearDMLCateEstimator(DebiasedLassoCateEstimatorMixin, DMLCateEstim
 
     Parameters
     ----------
-    model_y: estimator
+    model_y: estimator, optional (default is :class:`WeightedLassoCVWrapper()
+        <econml.sklearn_extensions.linear_model.WeightedLassoCVWrapper>`)
         The estimator for fitting the response to the features. Must implement
         `fit` and `predict` methods.
 
-    model_t: estimator
-        The estimator for fitting the treatment to the features. Must implement
-        `fit` and `predict` methods, and must be a linear model for correctness.
+    model_t: estimator or 'auto', optional (default is 'auto')
+        The estimator for fitting the treatment to the features.
+        If estimator, it must implement `fit` and `predict` methods, and must be a
+        linear model for correctness;
+        If 'auto', :class:`LogisticRegressionCV() <sklearn.linear_model.LogisticRegressionCV>`
+        will be applied for discrete treatment,
+        and :class:`WeightedLassoCV() <econml.sklearn_extensions.linear_model.WeightedLassoCV>`/
+        :class:`WeightedMultitaskLassoCV() <econml.sklearn_extensions.linear_model.WeightedMultitaskLassoCV>`
+        will be applied for continuous treatment.
 
     alpha: string | float, optional. Default='auto'.
         CATE L1 regularization applied through the debiased lasso in the final model.
@@ -446,7 +472,7 @@ class SparseLinearDMLCateEstimator(DebiasedLassoCateEstimatorMixin, DMLCateEstim
     """
 
     def __init__(self,
-                 model_y=LassoCV(), model_t=LassoCV(),
+                 model_y=WeightedLassoCVWrapper(), model_t='auto',
                  alpha='auto',
                  max_iter=1000,
                  tol=1e-4,
@@ -511,13 +537,18 @@ class KernelDMLCateEstimator(DMLCateEstimator):
 
     Parameters
     ----------
-    model_y: estimator, optional (default is :class:`LassoCV() <sklearn.linear_model.LassoCV>`)
+    model_y: estimator, optional (default is :class:`<econml.sklearn_extensions.linear_model.WeightedLassoCVWrapper>`)
         The estimator for fitting the response to the features. Must implement
         `fit` and `predict` methods.
 
-    model_t: estimator, optional (default is :class:`LassoCV() <sklearn.linear_model.LassoCV>`)
-        The estimator for fitting the treatment to the features. Must implement
-        `fit` and `predict` methods.
+    model_t: estimator or 'auto', optional (default is 'auto')
+        The estimator for fitting the treatment to the features.
+        If estimator, it must implement `fit` and `predict` methods;
+        If 'auto', :class:`LogisticRegressionCV() <sklearn.linear_model.LogisticRegressionCV>`
+        will be applied for discrete treatment,
+        and :class:`WeightedLassoCV() <econml.sklearn_extensions.linear_model.WeightedLassoCV>`/
+        :class:`WeightedMultitaskLassoCV() <econml.sklearn_extensions.linear_model.WeightedMultitaskLassoCV>`
+        will be applied for continuous treatment.
 
     dim: int, optional (default is 20)
         The number of random Fourier features to generate
@@ -551,7 +582,7 @@ class KernelDMLCateEstimator(DMLCateEstimator):
         by :mod:`np.random<numpy.random>`.
     """
 
-    def __init__(self, model_y=LassoCV(), model_t=LassoCV(),
+    def __init__(self, model_y=WeightedLassoCVWrapper(), model_t='auto',
                  dim=20, bw=1.0, discrete_treatment=False, n_splits=2, random_state=None):
         class RandomFeatures(TransformerMixin):
             def __init__(self, random_state):
diff --git a/econml/ortho_forest.py b/econml/ortho_forest.py
@@ -33,9 +33,10 @@
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import OneHotEncoder, LabelEncoder, PolynomialFeatures, FunctionTransformer
 from sklearn.utils import check_random_state, check_array, column_or_1d
+from .sklearn_extensions.linear_model import WeightedLassoCVWrapper
 from .cate_estimator import BaseCateEstimator, LinearCateEstimator, TreatmentExpansionMixin
 from .causal_tree import CausalTree
-from .utilities import reshape, reshape_Y_T, MAX_RAND_SEED, check_inputs, WeightedModelWrapper, cross_product
+from .utilities import reshape, reshape_Y_T, MAX_RAND_SEED, check_inputs, cross_product
 
 
 def _build_tree_in_parallel(Y, T, X, W,
@@ -399,8 +400,8 @@ def __init__(self,
                  subsample_ratio=0.7,
                  bootstrap=False,
                  lambda_reg=0.01,
-                 model_T=WeightedModelWrapper(LassoCV(cv=3)),
-                 model_Y=WeightedModelWrapper(LassoCV(cv=3)),
+                 model_T=WeightedLassoCVWrapper(cv=3),
+                 model_Y=WeightedLassoCVWrapper(cv=3),
                  model_T_final=None,
                  model_Y_final=None,
                  n_jobs=-1,
@@ -627,7 +628,7 @@ def __init__(self,
                  lambda_reg=0.01,
                  propensity_model=LogisticRegression(penalty='l1', solver='saga',
                                                      multi_class='auto'),  # saga solver supports l1
-                 model_Y=WeightedModelWrapper(LassoCV(cv=3)),
+                 model_Y=WeightedLassoCVWrapper(cv=3),
                  propensity_model_final=None,
                  model_Y_final=None,
                  n_jobs=-1,
diff --git a/econml/sklearn_extensions/linear_model.py b/econml/sklearn_extensions/linear_model.py
@@ -9,6 +9,7 @@
 from collections.abc import Iterable
 from scipy.stats import norm
 from econml.sklearn_extensions.model_selection import WeightedKFold, WeightedStratifiedKFold
+from econml.utilities import ndim, shape, reshape
 from sklearn.linear_model import LassoCV, MultiTaskLassoCV, Lasso, MultiTaskLasso
 from sklearn.model_selection import KFold, StratifiedKFold
 from sklearn.model_selection._split import _CVIterableWrapper, CV_WARNING
@@ -1048,3 +1049,40 @@ def _set_attribute(self, attribute_name, condition=True, default=None):
         else:
             attribute_value = default
         setattr(self, attribute_name, attribute_value)
+
+
+class WeightedLassoCVWrapper:
+    """Helper class to wrap either WeightedLassoCV or WeightedMultiTaskLassoCV depending on the shape of the target."""
+
+    def __init__(self, *args, **kwargs):
+        self.args = args
+        self.kwargs = kwargs
+
+    def fit(self, X, y, sample_weight=None):
+        self.needs_unravel = False
+        if ndim(y) == 2 and shape(y)[1] > 1:
+            self.model = WeightedMultiTaskLassoCV(*self.args, **self.kwargs)
+        else:
+            if ndim(y) == 2 and shape(y)[1] == 1:
+                y = np.ravel(y)
+                self.needs_unravel = True
+            self.model = WeightedLassoCV(*self.args, **self.kwargs)
+        self.model.fit(X, y, sample_weight)
+        # set intercept_ attribute
+        self.intercept_ = self.model.intercept_
+        # set coef_ attribute
+        self.coef_ = self.model.coef_
+        # set alpha_ attribute
+        self.alpha_ = self.model.alpha_
+        # set alphas_ attribute
+        self.alphas_ = self.model.alphas_
+        # set n_iter_ attribute
+        self.n_iter_ = self.model.n_iter_
+        return self
+
+    def predict(self, X):
+        predictions = self.model.predict(X)
+        return reshape(predictions, (-1, 1)) if self.needs_unravel else predictions
+
+    def score(self, X, y, sample_weight=None):
+        return self.model.score(X, y, sample_weight)
diff --git a/econml/sklearn_extensions/model_selection.py b/econml/sklearn_extensions/model_selection.py
@@ -30,7 +30,7 @@ def _split_weighted_sample(self, X, y, sample_weight, is_stratified=False):
             return self._get_folds_from_splits(splits, X.shape[0])
         # Record all splits in case the stratification by weight yeilds a worse partition
         all_splits.append(splits)
-        max_deviation = np.abs(weight_fracs - 1 / self.n_splits)
+        max_deviation = np.max(np.abs(weight_fracs - 1 / self.n_splits))
         max_deviations.append(max_deviation)
         # Reseed random generator and try again
         kfold_model.shuffle = True
@@ -57,7 +57,7 @@ def _split_weighted_sample(self, X, y, sample_weight, is_stratified=False):
         # Did not find a good split
         # Record the devaiation for the weight-stratified split to compare with KFold splits
         all_splits.append(stratified_weight_splits)
-        max_deviation = np.abs(weight_fracs - 1 / self.n_splits)
+        max_deviation = np.max(np.abs(weight_fracs - 1 / self.n_splits))
         max_deviations.append(max_deviation)
     # Return most weight-balanced partition
     min_deviation_index = np.argmin(max_deviations)
diff --git a/econml/tests/test_dml.py b/econml/tests/test_dml.py
@@ -76,7 +76,7 @@ def make_random(is_discrete, d):
                                 all_infs.append(BootstrapInference(1))
 
                             for est, multi, infs in [(LinearDMLCateEstimator(model_y=Lasso(),
-                                                                             model_t=model_t,
+                                                                             model_t='auto',
                                                                              discrete_treatment=is_discrete),
                                                       False,
                                                       all_infs),
@@ -149,8 +149,8 @@ def test_can_use_vectors(self):
     def test_can_use_sample_weights(self):
         """Test that we can pass sample weights to an estimator."""
         dmls = [
-            LinearDMLCateEstimator(LinearRegression(), LinearRegression(), featurizer=FunctionTransformer()),
-            SparseLinearDMLCateEstimator(LinearRegression(), LinearRegression(), featurizer=FunctionTransformer())
+            LinearDMLCateEstimator(LinearRegression(), 'auto', featurizer=FunctionTransformer()),
+            SparseLinearDMLCateEstimator(LinearRegression(), 'auto', featurizer=FunctionTransformer())
         ]
         for dml in dmls:
             dml.fit(np.array([1, 2, 3, 1, 2, 3]), np.array([1, 2, 3, 1, 2, 3]),
diff --git a/econml/tests/test_orf.py b/econml/tests/test_orf.py
@@ -10,8 +10,8 @@
 from sklearn.linear_model import LinearRegression, Lasso, LassoCV, LogisticRegression, LogisticRegressionCV
 from sklearn.multioutput import MultiOutputRegressor
 from sklearn.pipeline import Pipeline
-from econml.ortho_forest import ContinuousTreatmentOrthoForest, DiscreteTreatmentOrthoForest, \
-    WeightedModelWrapper
+from econml.ortho_forest import ContinuousTreatmentOrthoForest, DiscreteTreatmentOrthoForest
+from econml.sklearn_extensions.linear_model import WeightedLassoCVWrapper
 
 
 class TestOrthoForest(unittest.TestCase):
@@ -53,8 +53,8 @@ def test_continuous_treatments(self):
         est = ContinuousTreatmentOrthoForest(n_jobs=4, n_trees=10,
                                              model_T=Lasso(),
                                              model_Y=Lasso(),
-                                             model_T_final=WeightedModelWrapper(LassoCV(), sample_type="weighted"),
-                                             model_Y_final=WeightedModelWrapper(LassoCV(), sample_type="weighted"))
+                                             model_T_final=WeightedLassoCVWrapper(),
+                                             model_Y_final=WeightedLassoCVWrapper())
         # Test inputs for continuous treatments
         # --> Check that one can pass in regular lists
         est.fit(list(Y), list(T), list(TestOrthoForest.X), list(TestOrthoForest.W))
@@ -69,8 +69,8 @@ def test_continuous_treatments(self):
                                              max_depth=50, subsample_ratio=0.30, bootstrap=False, n_jobs=4,
                                              model_T=Lasso(alpha=0.024),
                                              model_Y=Lasso(alpha=0.024),
-                                             model_T_final=WeightedModelWrapper(LassoCV(), sample_type="weighted"),
-                                             model_Y_final=WeightedModelWrapper(LassoCV(), sample_type="weighted"))
+                                             model_T_final=WeightedLassoCVWrapper(),
+                                             model_Y_final=WeightedLassoCVWrapper())
         est.fit(Y, T, TestOrthoForest.X, TestOrthoForest.W)
         self._test_te(est, TestOrthoForest.expected_exp_te, tol=0.5)
         # Test continuous treatments without controls
@@ -94,7 +94,7 @@ def test_binary_treatments(self):
         est = DiscreteTreatmentOrthoForest(n_trees=10, n_jobs=4,
                                            propensity_model=LogisticRegression(), model_Y=Lasso(),
                                            propensity_model_final=LogisticRegressionCV(penalty='l1', solver='saga'),
-                                           model_Y_final=WeightedModelWrapper(LassoCV(), sample_type="weighted"))
+                                           model_Y_final=WeightedLassoCVWrapper())
         # Test inputs for binary treatments
         # --> Check that one can pass in regular lists
         est.fit(list(Y), list(T), list(TestOrthoForest.X), list(TestOrthoForest.W))
@@ -118,7 +118,7 @@ def test_binary_treatments(self):
                                            propensity_model=LogisticRegression(C=1 / 0.024, penalty='l1'),
                                            model_Y=Lasso(alpha=0.024),
                                            propensity_model_final=LogisticRegressionCV(penalty='l1', solver='saga'),
-                                           model_Y_final=WeightedModelWrapper(LassoCV(), sample_type="weighted"))
+                                           model_Y_final=WeightedLassoCVWrapper())
         est.fit(Y, T, TestOrthoForest.X, TestOrthoForest.W)
         self._test_te(est, TestOrthoForest.expected_exp_te, tol=0.7, treatment_type='discrete')
         # Test binary treatments without controls
@@ -146,9 +146,8 @@ def test_multiple_treatments(self):
                                              max_depth=50, subsample_ratio=0.30, bootstrap=False, n_jobs=4,
                                              model_T=MultiOutputRegressor(Lasso(alpha=0.024)),
                                              model_Y=Lasso(alpha=0.024),
-                                             model_T_final=WeightedModelWrapper(
-                                                 MultiOutputRegressor(LassoCV()), sample_type="weighted"),
-                                             model_Y_final=WeightedModelWrapper(LassoCV(), sample_type="weighted"))
+                                             model_T_final=WeightedLassoCVWrapper(),
+                                             model_Y_final=WeightedLassoCVWrapper())
         est.fit(Y, T, TestOrthoForest.X, TestOrthoForest.W)
         expected_te = np.array([TestOrthoForest.expected_exp_te, TestOrthoForest.expected_const_te]).T
         self._test_te(est, expected_te, tol=0.5, treatment_type='multi')
diff --git a/notebooks/Double Machine Learning Examples.ipynb b/notebooks/Double Machine Learning Examples.ipynb
diff --git a/notebooks/Orthogonal Random Forest Examples.ipynb b/notebooks/Orthogonal Random Forest Examples.ipynb