Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: Default of observed=False in DataFrame.pivot_table #56237

Merged
merged 6 commits into from
Dec 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/user_guide/categorical.rst
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ Pivot tables:

raw_cat = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"])
df = pd.DataFrame({"A": raw_cat, "B": ["c", "d", "c", "d"], "values": [1, 2, 3, 4]})
pd.pivot_table(df, values="values", index=["A", "B"])
pd.pivot_table(df, values="values", index=["A", "B"], observed=False)

Data munging
------------
Expand Down
31 changes: 26 additions & 5 deletions doc/source/whatsnew/v0.23.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -286,12 +286,33 @@ For pivoting operations, this behavior is *already* controlled by the ``dropna``
df = pd.DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
df

.. ipython:: python

pd.pivot_table(df, values='values', index=['A', 'B'],
dropna=True)
pd.pivot_table(df, values='values', index=['A', 'B'],
dropna=False)
.. code-block:: ipython

In [1]: pd.pivot_table(df, values='values', index=['A', 'B'], dropna=True)

Out[1]:
values
A B
a c 1.0
d 2.0
b c 3.0
d 4.0

In [2]: pd.pivot_table(df, values='values', index=['A', 'B'], dropna=False)

Out[2]:
values
A B
a c 1.0
d 2.0
y NaN
b c 3.0
d 4.0
y NaN
z c NaN
d NaN
y NaN


.. _whatsnew_0230.enhancements.window_raw:
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,7 @@ Other Deprecations
- Deprecated the ``ordinal`` keyword in :class:`PeriodIndex`, use :meth:`PeriodIndex.from_ordinals` instead (:issue:`55960`)
- Deprecated the ``unit`` keyword in :class:`TimedeltaIndex` construction, use :func:`to_timedelta` instead (:issue:`55499`)
- Deprecated the behavior of :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype; in a future version these will not perform dtype inference on the resulting :class:`Index`, do ``result.index = result.index.infer_objects()`` to retain the old behavior (:issue:`56161`)
- Deprecated the default of ``observed=False`` in :meth:`DataFrame.pivot_table`; will be ``True`` in a future version (:issue:`56236`)
- Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
- Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`)
- Deprecated the previous implementation of :class:`DataFrame.stack`; specify ``future_stack=True`` to adopt the future version (:issue:`53515`)
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9297,6 +9297,11 @@ def pivot(
If True: only show observed values for categorical groupers.
If False: show all values for categorical groupers.

.. deprecated:: 2.2.0

The default value of ``False`` is deprecated and will change to
``True`` in a future version of pandas.

sort : bool, default True
Specifies if the result should be sorted.

Expand Down Expand Up @@ -9407,7 +9412,7 @@ def pivot_table(
margins: bool = False,
dropna: bool = True,
margins_name: Level = "All",
observed: bool = False,
observed: bool | lib.NoDefault = lib.no_default,
sort: bool = True,
) -> DataFrame:
from pandas.core.reshape.pivot import pivot_table
Expand Down
20 changes: 17 additions & 3 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
Literal,
cast,
)
import warnings

import numpy as np

Expand All @@ -18,6 +19,7 @@
Appender,
Substitution,
)
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.cast import maybe_downcast_to_dtype
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -68,7 +70,7 @@ def pivot_table(
margins: bool = False,
dropna: bool = True,
margins_name: Hashable = "All",
observed: bool = False,
observed: bool | lib.NoDefault = lib.no_default,
sort: bool = True,
) -> DataFrame:
index = _convert_by(index)
Expand Down Expand Up @@ -123,7 +125,7 @@ def __internal_pivot_table(
margins: bool,
dropna: bool,
margins_name: Hashable,
observed: bool,
observed: bool | lib.NoDefault,
sort: bool,
) -> DataFrame:
"""
Expand Down Expand Up @@ -166,7 +168,18 @@ def __internal_pivot_table(
pass
values = list(values)

grouped = data.groupby(keys, observed=observed, sort=sort, dropna=dropna)
observed_bool = False if observed is lib.no_default else observed
grouped = data.groupby(keys, observed=observed_bool, sort=sort, dropna=dropna)
if observed is lib.no_default and any(
ping._passed_categorical for ping in grouped.grouper.groupings
):
warnings.warn(
"The default value of observed=False is deprecated and will change "
"to observed=True in a future version of pandas. Specify "
"observed=False to silence this warning and retain the current behavior",
category=FutureWarning,
stacklevel=find_stack_level(),
)
agged = grouped.agg(aggfunc)

if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
Expand Down Expand Up @@ -719,6 +732,7 @@ def crosstab(
margins=margins,
margins_name=margins_name,
dropna=dropna,
observed=False,
**kwargs, # type: ignore[arg-type]
)

Expand Down
41 changes: 29 additions & 12 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,9 @@ def test_pivot_table_categorical(self):
["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True
)
df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
result = pivot_table(df, values="values", index=["A", "B"], dropna=True)
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = pivot_table(df, values="values", index=["A", "B"], dropna=True)

exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"])
expected = DataFrame({"values": [1.0, 2.0, 3.0, 4.0]}, index=exp_index)
Expand All @@ -220,7 +222,9 @@ def test_pivot_table_dropna_categoricals(self, dropna):
)

df["A"] = df["A"].astype(CategoricalDtype(categories, ordered=False))
result = df.pivot_table(index="B", columns="A", values="C", dropna=dropna)
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.pivot_table(index="B", columns="A", values="C", dropna=dropna)
expected_columns = Series(["a", "b", "c"], name="A")
expected_columns = expected_columns.astype(
CategoricalDtype(categories, ordered=False)
Expand Down Expand Up @@ -250,7 +254,9 @@ def test_pivot_with_non_observable_dropna(self, dropna):
}
)

result = df.pivot_table(index="A", values="B", dropna=dropna)
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.pivot_table(index="A", values="B", dropna=dropna)
if dropna:
values = [2.0, 3.0]
codes = [0, 1]
Expand Down Expand Up @@ -283,7 +289,9 @@ def test_pivot_with_non_observable_dropna_multi_cat(self, dropna):
}
)

result = df.pivot_table(index="A", values="B", dropna=dropna)
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.pivot_table(index="A", values="B", dropna=dropna)
expected = DataFrame(
{"B": [2.0, 3.0, 0.0]},
index=Index(
Expand All @@ -301,7 +309,10 @@ def test_pivot_with_non_observable_dropna_multi_cat(self, dropna):
def test_pivot_with_interval_index(self, interval_values, dropna):
# GH 25814
df = DataFrame({"A": interval_values, "B": 1})
result = df.pivot_table(index="A", values="B", dropna=dropna)

msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.pivot_table(index="A", values="B", dropna=dropna)
expected = DataFrame(
{"B": 1.0}, index=Index(interval_values.unique(), name="A")
)
Expand All @@ -322,9 +333,11 @@ def test_pivot_with_interval_index_margins(self):
}
)

pivot_tab = pivot_table(
df, index="C", columns="B", values="A", aggfunc="sum", margins=True
)
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
pivot_tab = pivot_table(
df, index="C", columns="B", values="A", aggfunc="sum", margins=True
)

result = pivot_tab["All"]
expected = Series(
Expand Down Expand Up @@ -1827,7 +1840,9 @@ def test_categorical_margins_category(self, observed):

df.y = df.y.astype("category")
df.z = df.z.astype("category")
table = df.pivot_table("x", "y", "z", dropna=observed, margins=True)
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
table = df.pivot_table("x", "y", "z", dropna=observed, margins=True)
tm.assert_frame_equal(table, expected)

def test_margins_casted_to_float(self):
Expand Down Expand Up @@ -1889,9 +1904,11 @@ def test_categorical_aggfunc(self, observed):
{"C1": ["A", "B", "C", "C"], "C2": ["a", "a", "b", "b"], "V": [1, 2, 3, 4]}
)
df["C1"] = df["C1"].astype("category")
result = df.pivot_table(
"V", index="C1", columns="C2", dropna=observed, aggfunc="count"
)
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.pivot_table(
"V", index="C1", columns="C2", dropna=observed, aggfunc="count"
)

expected_index = pd.CategoricalIndex(
["A", "B", "C"], categories=["A", "B", "C"], ordered=False, name="C1"
Expand Down