py-why
diff --git a/‎econml/cate_estimator.py
+40-10 b/‎econml/cate_estimator.py
+40-10
@@ -10,7 +10,7 @@
 from warnings import warn
 from .bootstrap import BootstrapEstimator
 from .inference import BootstrapInference
-from .utilities import tensordot, ndim, reshape, shape
+from .utilities import tensordot, ndim, reshape, shape, parse_final_model_params
 from .inference import StatsModelsInference, StatsModelsInferenceDiscrete, LinearModelFinalInference,\
     LinearModelFinalInferenceDiscrete
 
@@ -383,7 +383,20 @@ def effect(self, X=None, *, T0=0, T1=1):
 
 
 class LinearModelFinalCateEstimatorMixin(BaseCateEstimator):
-    """Base class for models where the final stage is a linear model."""
+    """
+    Base class for models where the final stage is a linear model.
+
+    Subclasses must expose a ``model_final`` attribute containing the model's
+    final stage model.
+
+    Attributes
+    ----------
+    bias_part_of_coef: bool
+        Whether the CATE model's intercept is contained in the final model's ``coef_`` rather
+        than as a separate ``intercept_``
+    """
+
+    bias_part_of_coef = False
 
     @property
     def coef_(self):
@@ -392,15 +405,17 @@ def coef_(self):
 
         Returns
         -------
-        coef: (n_x * n_t,) or (n_y, n_x * n_t) array like
+        coef: (n_x,) or (n_t, n_x) or (n_y, n_t, n_x) array like
             Where n_x is the number of features that enter the final model (either the
             dimension of X or the dimension of featurizer.fit_transform(X) if the CATE
             estimator has a featurizer.), n_t is the number of treatments, n_y is
-            the number of outcomes. The coefficient is flattened in a manner that
-            the first block of n_x columns are the coefficients associated with treatment 0,
-            the next n_x columns are the coefficients associated with treatment 1 etc.
+            the number of outcomes. Dimensions are omitted if the original input was
+            a vector and not a 2D array. For binary treatment the n_t dimension is
+            also omitted.
         """
-        return self.model_final.coef_
+        return parse_final_model_params(self.model_final.coef_, self.model_final.intercept_,
+                                        self._d_y, self._d_t, self._d_t_in, self.bias_part_of_coef,
+                                        self.fit_cate_intercept)[0]
 
     @property
     def intercept_(self):
@@ -409,9 +424,17 @@ def intercept_(self):
 
         Returns
         -------
-        intercept: float or (n_y,) array like
+        intercept: float or (n_y,) or (n_y, n_t) array like
+            Where n_t is the number of treatments, n_y is
+            the number of outcomes. Dimensions are omitted if the original input was
+            a vector and not a 2D array. For binary treatment the n_t dimension is
+            also omitted.
         """
-        return self.model_final.intercept_
+        if not self.fit_cate_intercept:
+            raise AttributeError("No intercept was fitted!")
+        return parse_final_model_params(self.model_final.coef_, self.model_final.intercept_,
+                                        self._d_y, self._d_t, self._d_t_in, self.bias_part_of_coef,
+                                        self.fit_cate_intercept)[1]
 
     @BaseCateEstimator._defer_to_inference
     def coef__interval(self, *, alpha=0.1):
@@ -479,6 +502,12 @@ def _get_inference_options(self):
 
 class LinearModelFinalCateEstimatorDiscreteMixin(BaseCateEstimator):
     # TODO Share some logic with non-discrete version
+    """
+    Base class for models where the final stage is a linear model.
+
+    Subclasses must expose a ``fitted_models_final`` attribute
+    returning an array of the fitted models for each non-control treatment
+    """
 
     def coef_(self, T):
         """ The coefficients in the linear model of the constant marginal treatment
@@ -498,7 +527,8 @@ def coef_(self, T):
         """
         _, T = self._expand_treatments(None, T)
         ind = (T @ np.arange(T.shape[1])).astype(int)[0]
-        return self.fitted_models_final[ind].coef_
+        all_coefs = self.fitted_models_final[ind].coef_
+        return all_coefs
 
     def intercept_(self, T):
         """ The intercept in the linear model of the constant marginal treatment