Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

automate the first stage model T and update DML notebook #172

Merged
merged 11 commits into from
Nov 21, 2019
71 changes: 51 additions & 20 deletions econml/dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@
from .utilities import (shape, reshape, ndim, hstack, cross_product, transpose, inverse_onehot,
broadcast_unit_treatments, reshape_treatmentwise_effects,
StatsModelsLinearRegression, LassoCVWrapper, check_high_dimensional)
from econml.sklearn_extensions.linear_model import MultiOutputDebiasedLasso
from econml.sklearn_extensions.linear_model import MultiOutputDebiasedLasso, WeightedLassoCVWrapper
from sklearn.model_selection import KFold, StratifiedKFold, check_cv
from sklearn.linear_model import LinearRegression, LassoCV, ElasticNetCV
from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegressionCV, ElasticNetCV
from sklearn.preprocessing import (PolynomialFeatures, LabelEncoder, OneHotEncoder,
FunctionTransformer)
from sklearn.base import clone, TransformerMixin
Expand All @@ -52,6 +52,7 @@
DebiasedLassoCateEstimatorMixin)
from .inference import StatsModelsInference
from ._rlearner import _RLearner
from .sklearn_extensions.model_selection import WeightedStratifiedKFold


class DMLCateEstimator(_RLearner):
Expand Down Expand Up @@ -116,9 +117,15 @@ class takes as input the parameter `model_t`, which is an arbitrary scikit-learn
The estimator for fitting the response to the features. Must implement
`fit` and `predict` methods. Must be a linear model for correctness when linear_first_stages is ``True``.

model_t: estimator
The estimator for fitting the treatment to the features. Must implement
`fit` and `predict` methods. Must be a linear model for correctness when linear_first_stages is ``True``.
model_t: estimator or 'auto' (default is 'auto')
The estimator for fitting the treatment to the features.
If estimator, it must implement `fit` and `predict` methods. Must be a linear model for correctness
when linear_first_stages is ``True``;
If 'auto', :class:`LogisticRegressionCV() <sklearn.linear_model.LogisticRegressionCV>`
will be applied for discrete treatment,
and :class:`WeightedLassoCV() <econml.sklearn_extensions.linear_model.WeightedLassoCV>`/
:class:`WeightedMultitaskLassoCV() <econml.sklearn_extensions.linear_model.WeightedMultitaskLassoCV>`
will be applied for continuous treatment.

model_final: estimator
The estimator for fitting the response residuals to the treatment residuals. Must implement
Expand Down Expand Up @@ -170,6 +177,12 @@ def __init__(self,
# TODO: consider whether we need more care around stateful featurizers,
# since we clone it and fit separate copies

if model_t == 'auto':
if discrete_treatment:
model_t = LogisticRegressionCV(cv=WeightedStratifiedKFold())
else:
model_t = WeightedLassoCVWrapper()

class FirstStageWrapper:
def __init__(self, model, is_Y):
self._model = clone(model, safe=False)
Expand Down Expand Up @@ -284,13 +297,19 @@ class LinearDMLCateEstimator(StatsModelsCateEstimatorMixin, DMLCateEstimator):

Parameters
----------
model_y: estimator
model_y: estimator, optional (default is :class:`WeightedLassoCVWrapper()
<econml.sklearn_extensions.linear_model.WeightedLassoCVWrapper>`)
The estimator for fitting the response to the features. Must implement
`fit` and `predict` methods.

model_t: estimator
The estimator for fitting the treatment to the features. Must implement
`fit` and `predict` methods.
model_t: estimator or 'auto', optional (default is 'auto')
The estimator for fitting the treatment to the features.
If estimator, it must implement `fit` and `predict` methods;
If 'auto', :class:`LogisticRegressionCV() <sklearn.linear_model.LogisticRegressionCV>`
will be applied for discrete treatment,
and :class:`WeightedLassoCV() <econml.sklearn_extensions.linear_model.WeightedLassoCV>`/
:class:`WeightedMultitaskLassoCV() <econml.sklearn_extensions.linear_model.WeightedMultitaskLassoCV>`
will be applied for continuous treatment.

featurizer: transformer, optional (default is \
:class:`PolynomialFeatures(degree=1, include_bias=True) <sklearn.preprocessing.PolynomialFeatures>`)
Expand Down Expand Up @@ -329,7 +348,7 @@ class LinearDMLCateEstimator(StatsModelsCateEstimatorMixin, DMLCateEstimator):
"""

def __init__(self,
model_y=LassoCV(), model_t=LassoCV(),
model_y=WeightedLassoCVWrapper(), model_t='auto',
featurizer=PolynomialFeatures(degree=1, include_bias=True),
linear_first_stages=True,
discrete_treatment=False,
Expand Down Expand Up @@ -389,13 +408,20 @@ class SparseLinearDMLCateEstimator(DebiasedLassoCateEstimatorMixin, DMLCateEstim

Parameters
----------
model_y: estimator
model_y: estimator, optional (default is :class:`WeightedLassoCVWrapper()
<econml.sklearn_extensions.linear_model.WeightedLassoCVWrapper>`)
The estimator for fitting the response to the features. Must implement
`fit` and `predict` methods.

model_t: estimator
The estimator for fitting the treatment to the features. Must implement
`fit` and `predict` methods, and must be a linear model for correctness.
model_t: estimator or 'auto', optional (default is 'auto')
The estimator for fitting the treatment to the features.
If estimator, it must implement `fit` and `predict` methods, and must be a
linear model for correctness;
If 'auto', :class:`LogisticRegressionCV() <sklearn.linear_model.LogisticRegressionCV>`
will be applied for discrete treatment,
and :class:`WeightedLassoCV() <econml.sklearn_extensions.linear_model.WeightedLassoCV>`/
:class:`WeightedMultitaskLassoCV() <econml.sklearn_extensions.linear_model.WeightedMultitaskLassoCV>`
will be applied for continuous treatment.

alpha: string | float, optional. Default='auto'.
CATE L1 regularization applied through the debiased lasso in the final model.
Expand Down Expand Up @@ -446,7 +472,7 @@ class SparseLinearDMLCateEstimator(DebiasedLassoCateEstimatorMixin, DMLCateEstim
"""

def __init__(self,
model_y=LassoCV(), model_t=LassoCV(),
model_y=WeightedLassoCVWrapper(), model_t='auto',
alpha='auto',
max_iter=1000,
tol=1e-4,
Expand Down Expand Up @@ -511,13 +537,18 @@ class KernelDMLCateEstimator(DMLCateEstimator):

Parameters
----------
model_y: estimator, optional (default is :class:`LassoCV() <sklearn.linear_model.LassoCV>`)
model_y: estimator, optional (default is :class:`<econml.sklearn_extensions.linear_model.WeightedLassoCVWrapper>`)
The estimator for fitting the response to the features. Must implement
`fit` and `predict` methods.

model_t: estimator, optional (default is :class:`LassoCV() <sklearn.linear_model.LassoCV>`)
The estimator for fitting the treatment to the features. Must implement
`fit` and `predict` methods.
model_t: estimator or 'auto', optional (default is 'auto')
The estimator for fitting the treatment to the features.
If estimator, it must implement `fit` and `predict` methods;
If 'auto', :class:`LogisticRegressionCV() <sklearn.linear_model.LogisticRegressionCV>`
will be applied for discrete treatment,
and :class:`WeightedLassoCV() <econml.sklearn_extensions.linear_model.WeightedLassoCV>`/
:class:`WeightedMultitaskLassoCV() <econml.sklearn_extensions.linear_model.WeightedMultitaskLassoCV>`
will be applied for continuous treatment.

dim: int, optional (default is 20)
The number of random Fourier features to generate
Expand Down Expand Up @@ -551,7 +582,7 @@ class KernelDMLCateEstimator(DMLCateEstimator):
by :mod:`np.random<numpy.random>`.
"""

def __init__(self, model_y=LassoCV(), model_t=LassoCV(),
def __init__(self, model_y=WeightedLassoCVWrapper(), model_t='auto',
dim=20, bw=1.0, discrete_treatment=False, n_splits=2, random_state=None):
class RandomFeatures(TransformerMixin):
def __init__(self, random_state):
Expand Down
38 changes: 38 additions & 0 deletions econml/sklearn_extensions/linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from collections.abc import Iterable
from scipy.stats import norm
from econml.sklearn_extensions.model_selection import WeightedKFold, WeightedStratifiedKFold
from econml.utilities import ndim, shape, reshape
from sklearn.linear_model import LassoCV, MultiTaskLassoCV, Lasso, MultiTaskLasso
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.model_selection._split import _CVIterableWrapper, CV_WARNING
Expand Down Expand Up @@ -1048,3 +1049,40 @@ def _set_attribute(self, attribute_name, condition=True, default=None):
else:
attribute_value = default
setattr(self, attribute_name, attribute_value)


class WeightedLassoCVWrapper:
"""Helper class to wrap either WeightedLassoCV or WeightedMultiTaskLassoCV depending on the shape of the target."""

def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs

def fit(self, X, y, sample_weight=None):
self.needs_unravel = False
if ndim(y) == 2 and shape(y)[1] > 1:
self.model = WeightedMultiTaskLassoCV(*self.args, **self.kwargs)
else:
if ndim(y) == 2 and shape(y)[1] == 1:
y = np.ravel(y)
self.needs_unravel = True
self.model = WeightedLassoCV(*self.args, **self.kwargs)
self.model.fit(X, y, sample_weight)
# set intercept_ attribute
self.intercept_ = self.model.intercept_
# set coef_ attribute
self.coef_ = self.model.coef_
# set alpha_ attribute
self.alpha_ = self.model.alpha_
# set alphas_ attribute
self.alphas_ = self.model.alphas_
# set n_iter_ attribute
self.n_iter_ = self.model.n_iter_
return self

def predict(self, X):
predictions = self.model.predict(X)
return reshape(predictions, (-1, 1)) if self.needs_unravel else predictions

def score(self, X, y, sample_weight=None):
return self.model.score(X, y, sample_weight)
6 changes: 3 additions & 3 deletions econml/tests/test_dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def make_random(is_discrete, d):
all_infs.append(BootstrapInference(1))

for est, multi, infs in [(LinearDMLCateEstimator(model_y=Lasso(),
model_t=model_t,
model_t='auto',
discrete_treatment=is_discrete),
False,
all_infs),
Expand Down Expand Up @@ -149,8 +149,8 @@ def test_can_use_vectors(self):
def test_can_use_sample_weights(self):
"""Test that we can pass sample weights to an estimator."""
dmls = [
LinearDMLCateEstimator(LinearRegression(), LinearRegression(), featurizer=FunctionTransformer()),
SparseLinearDMLCateEstimator(LinearRegression(), LinearRegression(), featurizer=FunctionTransformer())
LinearDMLCateEstimator(LinearRegression(), 'auto', featurizer=FunctionTransformer()),
SparseLinearDMLCateEstimator(LinearRegression(), 'auto', featurizer=FunctionTransformer())
]
for dml in dmls:
dml.fit(np.array([1, 2, 3, 1, 2, 3]), np.array([1, 2, 3, 1, 2, 3]),
Expand Down
46 changes: 23 additions & 23 deletions notebooks/Double Machine Learning Examples.ipynb

Large diffs are not rendered by default.