py-why · moprescu · Nov 21, 2019 · Nov 19, 2019 · Nov 19, 2019 · Nov 19, 2019
diff --git a/econml/dml.py b/econml/dml.py
@@ -39,9 +39,9 @@
 from .utilities import (shape, reshape, ndim, hstack, cross_product, transpose, inverse_onehot,
                         broadcast_unit_treatments, reshape_treatmentwise_effects,
                         StatsModelsLinearRegression, LassoCVWrapper, check_high_dimensional)
-from econml.sklearn_extensions.linear_model import MultiOutputDebiasedLasso
+from econml.sklearn_extensions.linear_model import MultiOutputDebiasedLasso, WeightedLassoCVWrapper
 from sklearn.model_selection import KFold, StratifiedKFold, check_cv
-from sklearn.linear_model import LinearRegression, LassoCV, ElasticNetCV
+from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegressionCV, ElasticNetCV
 from sklearn.preprocessing import (PolynomialFeatures, LabelEncoder, OneHotEncoder,
                                    FunctionTransformer)
 from sklearn.base import clone, TransformerMixin
@@ -52,6 +52,7 @@
                              DebiasedLassoCateEstimatorMixin)
 from .inference import StatsModelsInference
 from ._rlearner import _RLearner
+from .sklearn_extensions.model_selection import WeightedStratifiedKFold
 
 
 class DMLCateEstimator(_RLearner):
@@ -116,9 +117,15 @@ class takes as input the parameter `model_t`, which is an arbitrary scikit-learn
         The estimator for fitting the response to the features. Must implement
         `fit` and `predict` methods.  Must be a linear model for correctness when linear_first_stages is ``True``.
 
-    model_t: estimator
-        The estimator for fitting the treatment to the features. Must implement
-        `fit` and `predict` methods.  Must be a linear model for correctness when linear_first_stages is ``True``.
+    model_t: estimator or 'auto' (default is 'auto')
+        The estimator for fitting the treatment to the features.
+        If estimator, it must implement `fit` and `predict` methods.  Must be a linear model for correctness
+        when linear_first_stages is ``True``;
+        If 'auto', :class:`LogisticRegressionCV() <sklearn.linear_model.LogisticRegressionCV>`
+        will be applied for discrete treatment,
+        and :class:`WeightedLassoCV() <econml.sklearn_extensions.linear_model.WeightedLassoCV>`/
+        :class:`WeightedMultitaskLassoCV() <econml.sklearn_extensions.linear_model.WeightedMultitaskLassoCV>`
+        will be applied for continuous treatment.
 
     model_final: estimator
         The estimator for fitting the response residuals to the treatment residuals. Must implement
@@ -170,6 +177,12 @@ def __init__(self,
         # TODO: consider whether we need more care around stateful featurizers,
         #       since we clone it and fit separate copies
 
+        if model_t == 'auto':
+            if discrete_treatment:
+                model_t = LogisticRegressionCV(cv=WeightedStratifiedKFold())
+            else:
+                model_t = WeightedLassoCVWrapper()
+
         class FirstStageWrapper:
             def __init__(self, model, is_Y):
                 self._model = clone(model, safe=False)
@@ -284,13 +297,19 @@ class LinearDMLCateEstimator(StatsModelsCateEstimatorMixin, DMLCateEstimator):
 
     Parameters
     ----------
-    model_y: estimator
+    model_y: estimator, optional (default is :class:`WeightedLassoCVWrapper()
+        <econml.sklearn_extensions.linear_model.WeightedLassoCVWrapper>`)
         The estimator for fitting the response to the features. Must implement
         `fit` and `predict` methods.
 
-    model_t: estimator
-        The estimator for fitting the treatment to the features. Must implement
-        `fit` and `predict` methods.
+    model_t: estimator or 'auto', optional (default is 'auto')
+        The estimator for fitting the treatment to the features.
+        If estimator, it must implement `fit` and `predict` methods;
+        If 'auto', :class:`LogisticRegressionCV() <sklearn.linear_model.LogisticRegressionCV>`
+        will be applied for discrete treatment,
+        and :class:`WeightedLassoCV() <econml.sklearn_extensions.linear_model.WeightedLassoCV>`/
+        :class:`WeightedMultitaskLassoCV() <econml.sklearn_extensions.linear_model.WeightedMultitaskLassoCV>`
+        will be applied for continuous treatment.
 
     featurizer: transformer, optional (default is \
         :class:`PolynomialFeatures(degree=1, include_bias=True) <sklearn.preprocessing.PolynomialFeatures>`)
@@ -329,7 +348,7 @@ class LinearDMLCateEstimator(StatsModelsCateEstimatorMixin, DMLCateEstimator):
     """
 
     def __init__(self,
-                 model_y=LassoCV(), model_t=LassoCV(),
+                 model_y=WeightedLassoCVWrapper(), model_t='auto',
                  featurizer=PolynomialFeatures(degree=1, include_bias=True),
                  linear_first_stages=True,
                  discrete_treatment=False,
@@ -389,13 +408,20 @@ class SparseLinearDMLCateEstimator(DebiasedLassoCateEstimatorMixin, DMLCateEstim
 
     Parameters
     ----------
-    model_y: estimator
+    model_y: estimator, optional (default is :class:`WeightedLassoCVWrapper()
+        <econml.sklearn_extensions.linear_model.WeightedLassoCVWrapper>`)
         The estimator for fitting the response to the features. Must implement
         `fit` and `predict` methods.
 
-    model_t: estimator
-        The estimator for fitting the treatment to the features. Must implement
-        `fit` and `predict` methods, and must be a linear model for correctness.
+    model_t: estimator or 'auto', optional (default is 'auto')
+        The estimator for fitting the treatment to the features.
+        If estimator, it must implement `fit` and `predict` methods, and must be a
+        linear model for correctness;
+        If 'auto', :class:`LogisticRegressionCV() <sklearn.linear_model.LogisticRegressionCV>`
+        will be applied for discrete treatment,
+        and :class:`WeightedLassoCV() <econml.sklearn_extensions.linear_model.WeightedLassoCV>`/
+        :class:`WeightedMultitaskLassoCV() <econml.sklearn_extensions.linear_model.WeightedMultitaskLassoCV>`
+        will be applied for continuous treatment.
 
     alpha: string | float, optional. Default='auto'.
         CATE L1 regularization applied through the debiased lasso in the final model.
@@ -446,7 +472,7 @@ class SparseLinearDMLCateEstimator(DebiasedLassoCateEstimatorMixin, DMLCateEstim
     """
 
     def __init__(self,
-                 model_y=LassoCV(), model_t=LassoCV(),
+                 model_y=WeightedLassoCVWrapper(), model_t='auto',
                  alpha='auto',
                  max_iter=1000,
                  tol=1e-4,
@@ -511,13 +537,18 @@ class KernelDMLCateEstimator(DMLCateEstimator):
 
     Parameters
     ----------
-    model_y: estimator, optional (default is :class:`LassoCV() <sklearn.linear_model.LassoCV>`)
+    model_y: estimator, optional (default is :class:`<econml.sklearn_extensions.linear_model.WeightedLassoCVWrapper>`)
         The estimator for fitting the response to the features. Must implement
         `fit` and `predict` methods.
 
-    model_t: estimator, optional (default is :class:`LassoCV() <sklearn.linear_model.LassoCV>`)
-        The estimator for fitting the treatment to the features. Must implement
-        `fit` and `predict` methods.
+    model_t: estimator or 'auto', optional (default is 'auto')
+        The estimator for fitting the treatment to the features.
+        If estimator, it must implement `fit` and `predict` methods;
+        If 'auto', :class:`LogisticRegressionCV() <sklearn.linear_model.LogisticRegressionCV>`
+        will be applied for discrete treatment,
+        and :class:`WeightedLassoCV() <econml.sklearn_extensions.linear_model.WeightedLassoCV>`/
+        :class:`WeightedMultitaskLassoCV() <econml.sklearn_extensions.linear_model.WeightedMultitaskLassoCV>`
+        will be applied for continuous treatment.
 
     dim: int, optional (default is 20)
         The number of random Fourier features to generate
@@ -551,7 +582,7 @@ class KernelDMLCateEstimator(DMLCateEstimator):
         by :mod:`np.random<numpy.random>`.
     """
 
-    def __init__(self, model_y=LassoCV(), model_t=LassoCV(),
+    def __init__(self, model_y=WeightedLassoCVWrapper(), model_t='auto',
                  dim=20, bw=1.0, discrete_treatment=False, n_splits=2, random_state=None):
         class RandomFeatures(TransformerMixin):
             def __init__(self, random_state):

diff --git a/econml/sklearn_extensions/linear_model.py b/econml/sklearn_extensions/linear_model.py
@@ -9,6 +9,7 @@
 from collections.abc import Iterable
 from scipy.stats import norm
 from econml.sklearn_extensions.model_selection import WeightedKFold, WeightedStratifiedKFold
+from econml.utilities import ndim, shape, reshape
 from sklearn.linear_model import LassoCV, MultiTaskLassoCV, Lasso, MultiTaskLasso
 from sklearn.model_selection import KFold, StratifiedKFold
 from sklearn.model_selection._split import _CVIterableWrapper, CV_WARNING
@@ -1048,3 +1049,40 @@ def _set_attribute(self, attribute_name, condition=True, default=None):
         else:
             attribute_value = default
         setattr(self, attribute_name, attribute_value)
+
+
+class WeightedLassoCVWrapper:
+    """Helper class to wrap either WeightedLassoCV or WeightedMultiTaskLassoCV depending on the shape of the target."""
+
+    def __init__(self, *args, **kwargs):
+        self.args = args
+        self.kwargs = kwargs
+
+    def fit(self, X, y, sample_weight=None):
+        self.needs_unravel = False
+        if ndim(y) == 2 and shape(y)[1] > 1:
+            self.model = WeightedMultiTaskLassoCV(*self.args, **self.kwargs)
+        else:
+            if ndim(y) == 2 and shape(y)[1] == 1:
+                y = np.ravel(y)
+                self.needs_unravel = True
+            self.model = WeightedLassoCV(*self.args, **self.kwargs)
+        self.model.fit(X, y, sample_weight)
+        # set intercept_ attribute
+        self.intercept_ = self.model.intercept_
+        # set coef_ attribute
+        self.coef_ = self.model.coef_
+        # set alpha_ attribute
+        self.alpha_ = self.model.alpha_
+        # set alphas_ attribute
+        self.alphas_ = self.model.alphas_
+        # set n_iter_ attribute
+        self.n_iter_ = self.model.n_iter_
+        return self
+
+    def predict(self, X):
+        predictions = self.model.predict(X)
+        return reshape(predictions, (-1, 1)) if self.needs_unravel else predictions
+
+    def score(self, X, y, sample_weight=None):
+        return self.model.score(X, y, sample_weight)
diff --git a/econml/tests/test_dml.py b/econml/tests/test_dml.py
@@ -76,7 +76,7 @@ def make_random(is_discrete, d):
                                 all_infs.append(BootstrapInference(1))
 
                             for est, multi, infs in [(LinearDMLCateEstimator(model_y=Lasso(),
-                                                                             model_t=model_t,
+                                                                             model_t='auto',
                                                                              discrete_treatment=is_discrete),
                                                       False,
                                                       all_infs),
@@ -149,8 +149,8 @@ def test_can_use_vectors(self):
     def test_can_use_sample_weights(self):
         """Test that we can pass sample weights to an estimator."""
         dmls = [
-            LinearDMLCateEstimator(LinearRegression(), LinearRegression(), featurizer=FunctionTransformer()),
-            SparseLinearDMLCateEstimator(LinearRegression(), LinearRegression(), featurizer=FunctionTransformer())
+            LinearDMLCateEstimator(LinearRegression(), 'auto', featurizer=FunctionTransformer()),
+            SparseLinearDMLCateEstimator(LinearRegression(), 'auto', featurizer=FunctionTransformer())
         ]
         for dml in dmls:
             dml.fit(np.array([1, 2, 3, 1, 2, 3]), np.array([1, 2, 3, 1, 2, 3]),

diff --git a/notebooks/Double Machine Learning Examples.ipynb b/notebooks/Double Machine Learning Examples.ipynb