Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLN: Remove deprecated read_*(date_parser=) #58624

Merged
merged 3 commits into from
May 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 0 additions & 13 deletions doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -279,19 +279,6 @@ parse_dates : boolean or list of ints or names or list of lists or dict, default
keep_date_col : boolean, default ``False``
If ``True`` and parse_dates specifies combining multiple columns then keep the
original columns.
date_parser : function, default ``None``
Function to use for converting a sequence of string columns to an array of
datetime instances. The default uses ``dateutil.parser.parser`` to do the
conversion. pandas will try to call date_parser in three different ways,
advancing to the next if an exception occurs: 1) Pass one or more arrays (as
defined by parse_dates) as arguments; 2) concatenate (row-wise) the string
values from the columns defined by parse_dates into a single array and pass
that; and 3) call date_parser once for each row using one or more strings
(corresponding to the columns defined by parse_dates) as arguments.

.. deprecated:: 2.0.0
Use ``date_format`` instead, or read in as ``object`` and then apply
:func:`to_datetime` as-needed.
date_format : str or dict of column -> format, default ``None``
If used in conjunction with ``parse_dates``, will parse dates according to this
format. For anything more complex,
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ Removal of prior version deprecations/changes
- Enforced deprecation of :meth:`offsets.Tick.delta`, use ``pd.Timedelta(obj)`` instead (:issue:`55498`)
- Enforced deprecation of ``axis=None`` acting the same as ``axis=0`` in the DataFrame reductions ``sum``, ``prod``, ``std``, ``var``, and ``sem``, passing ``axis=None`` will now reduce over both axes; this is particularly the case when doing e.g. ``numpy.sum(df)`` (:issue:`21597`)
- Enforced deprecation of ``core.internals`` members ``Block``, ``ExtensionBlock``, and ``DatetimeTZBlock`` (:issue:`58467`)
- Enforced deprecation of ``date_parser`` in :func:`read_csv`, :func:`read_table`, :func:`read_fwf`, and :func:`read_excel` in favour of ``date_format`` (:issue:`50601`)
- Enforced deprecation of ``quantile`` keyword in :meth:`.Rolling.quantile` and :meth:`.Expanding.quantile`, renamed to ``q`` instead. (:issue:`52550`)
- Enforced deprecation of argument ``infer_datetime_format`` in :func:`read_csv`, as a strict version of it is now the default (:issue:`48621`)
- Enforced deprecation of non-standard (``np.ndarray``, :class:`ExtensionArray`, :class:`Index`, or :class:`Series`) argument to :func:`api.extensions.take` (:issue:`52981`)
Expand Down
38 changes: 0 additions & 38 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,20 +240,6 @@
For non-standard datetime parsing, use ``pd.to_datetime`` after ``pd.read_excel``.

Note: A fast-path exists for iso8601-formatted dates.
date_parser : function, optional
Function to use for converting a sequence of string columns to an array of
datetime instances. The default uses ``dateutil.parser.parser`` to do the
conversion. Pandas will try to call `date_parser` in three different ways,
advancing to the next if an exception occurs: 1) Pass one or more arrays
(as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the
string values from the columns defined by `parse_dates` into a single array
and pass that; and 3) call `date_parser` once for each row using one or
more strings (corresponding to the columns defined by `parse_dates`) as
arguments.

.. deprecated:: 2.0.0
Use ``date_format`` instead, or read in as ``object`` and then apply
:func:`to_datetime` as-needed.
date_format : str or dict of column -> format, default ``None``
If used in conjunction with ``parse_dates``, will parse dates according to this
format. For anything more complex,
Expand Down Expand Up @@ -398,7 +384,6 @@ def read_excel(
na_filter: bool = ...,
verbose: bool = ...,
parse_dates: list | dict | bool = ...,
date_parser: Callable | lib.NoDefault = ...,
date_format: dict[Hashable, str] | str | None = ...,
thousands: str | None = ...,
decimal: str = ...,
Expand Down Expand Up @@ -436,7 +421,6 @@ def read_excel(
na_filter: bool = ...,
verbose: bool = ...,
parse_dates: list | dict | bool = ...,
date_parser: Callable | lib.NoDefault = ...,
date_format: dict[Hashable, str] | str | None = ...,
thousands: str | None = ...,
decimal: str = ...,
Expand Down Expand Up @@ -474,7 +458,6 @@ def read_excel(
na_filter: bool = True,
verbose: bool = False,
parse_dates: list | dict | bool = False,
date_parser: Callable | lib.NoDefault = lib.no_default,
date_format: dict[Hashable, str] | str | None = None,
thousands: str | None = None,
decimal: str = ".",
Expand Down Expand Up @@ -521,7 +504,6 @@ def read_excel(
na_filter=na_filter,
verbose=verbose,
parse_dates=parse_dates,
date_parser=date_parser,
date_format=date_format,
thousands=thousands,
decimal=decimal,
Expand Down Expand Up @@ -726,7 +708,6 @@ def parse(
na_values=None,
verbose: bool = False,
parse_dates: list | dict | bool = False,
date_parser: Callable | lib.NoDefault = lib.no_default,
date_format: dict[Hashable, str] | str | None = None,
thousands: str | None = None,
decimal: str = ".",
Expand Down Expand Up @@ -795,7 +776,6 @@ def parse(
false_values=false_values,
na_values=na_values,
parse_dates=parse_dates,
date_parser=date_parser,
date_format=date_format,
thousands=thousands,
decimal=decimal,
Expand Down Expand Up @@ -829,7 +809,6 @@ def _parse_sheet(
false_values: Iterable[Hashable] | None = None,
na_values=None,
parse_dates: list | dict | bool = False,
date_parser: Callable | lib.NoDefault = lib.no_default,
date_format: dict[Hashable, str] | str | None = None,
thousands: str | None = None,
decimal: str = ".",
Expand Down Expand Up @@ -942,7 +921,6 @@ def _parse_sheet(
na_values=na_values,
skip_blank_lines=False, # GH 39808
parse_dates=parse_dates,
date_parser=date_parser,
date_format=date_format,
thousands=thousands,
decimal=decimal,
Expand Down Expand Up @@ -1648,7 +1626,6 @@ def parse(
nrows: int | None = None,
na_values=None,
parse_dates: list | dict | bool = False,
date_parser: Callable | lib.NoDefault = lib.no_default,
date_format: str | dict[Hashable, str] | None = None,
thousands: str | None = None,
comment: str | None = None,
Expand Down Expand Up @@ -1737,20 +1714,6 @@ def parse(
``pd.to_datetime`` after ``pd.read_excel``.

Note: A fast-path exists for iso8601-formatted dates.
date_parser : function, optional
Function to use for converting a sequence of string columns to an array of
datetime instances. The default uses ``dateutil.parser.parser`` to do the
conversion. Pandas will try to call `date_parser` in three different ways,
advancing to the next if an exception occurs: 1) Pass one or more arrays
(as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the
string values from the columns defined by `parse_dates` into a single array
and pass that; and 3) call `date_parser` once for each row using one or
more strings (corresponding to the columns defined by `parse_dates`) as
arguments.

.. deprecated:: 2.0.0
Use ``date_format`` instead, or read in as ``object`` and then apply
:func:`to_datetime` as-needed.
date_format : str or dict of column -> format, default ``None``
If used in conjunction with ``parse_dates``, will parse dates
according to this format. For anything more complex,
Expand Down Expand Up @@ -1810,7 +1773,6 @@ def parse(
nrows=nrows,
na_values=na_values,
parse_dates=parse_dates,
date_parser=date_parser,
date_format=date_format,
thousands=thousands,
comment=comment,
Expand Down
95 changes: 23 additions & 72 deletions pandas/io/parsers/base_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from collections import defaultdict
from copy import copy
import csv
import datetime
from enum import Enum
import itertools
from typing import (
Expand Down Expand Up @@ -127,7 +126,6 @@ def __init__(self, kwds) -> None:

self.parse_dates = _validate_parse_dates_arg(kwds.pop("parse_dates", False))
self._parse_date_cols: Iterable = []
self.date_parser = kwds.pop("date_parser", lib.no_default)
self.date_format = kwds.pop("date_format", None)
self.dayfirst = kwds.pop("dayfirst", False)
self.keep_date_col = kwds.pop("keep_date_col", False)
Expand All @@ -146,7 +144,6 @@ def __init__(self, kwds) -> None:
self.cache_dates = kwds.pop("cache_dates", True)

self._date_conv = _make_date_converter(
date_parser=self.date_parser,
date_format=self.date_format,
dayfirst=self.dayfirst,
cache_dates=self.cache_dates,
Expand Down Expand Up @@ -1120,84 +1117,39 @@ def _get_empty_meta(


def _make_date_converter(
date_parser=lib.no_default,
dayfirst: bool = False,
cache_dates: bool = True,
date_format: dict[Hashable, str] | str | None = None,
):
if date_parser is not lib.no_default:
warnings.warn(
"The argument 'date_parser' is deprecated and will "
"be removed in a future version. "
"Please use 'date_format' instead, or read your data in as 'object' dtype "
"and then call 'to_datetime'.",
FutureWarning,
stacklevel=find_stack_level(),
)
if date_parser is not lib.no_default and date_format is not None:
raise TypeError("Cannot use both 'date_parser' and 'date_format'")

def unpack_if_single_element(arg):
# NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615
if isinstance(arg, np.ndarray) and arg.ndim == 1 and len(arg) == 1:
return arg[0]
return arg

def converter(*date_cols, col: Hashable):
if len(date_cols) == 1 and date_cols[0].dtype.kind in "Mm":
return date_cols[0]
# TODO: Can we remove concat_date_cols after deprecation of parsing
# multiple cols?
strs = parsing.concat_date_cols(date_cols)
date_fmt = (
date_format.get(col) if isinstance(date_format, dict) else date_format
)

if date_parser is lib.no_default:
strs = parsing.concat_date_cols(date_cols)
date_fmt = (
date_format.get(col) if isinstance(date_format, dict) else date_format
str_objs = ensure_object(strs)
try:
result = tools.to_datetime(
str_objs,
format=date_fmt,
utc=False,
dayfirst=dayfirst,
cache=cache_dates,
)
except (ValueError, TypeError):
# test_usecols_with_parse_dates4
# test_multi_index_parse_dates
return str_objs

str_objs = ensure_object(strs)
try:
result = tools.to_datetime(
str_objs,
format=date_fmt,
utc=False,
dayfirst=dayfirst,
cache=cache_dates,
)
except (ValueError, TypeError):
# test_usecols_with_parse_dates4
return str_objs

if isinstance(result, DatetimeIndex):
arr = result.to_numpy()
arr.flags.writeable = True
return arr
return result._values
else:
try:
pre_parsed = date_parser(
*(unpack_if_single_element(arg) for arg in date_cols)
)
try:
result = tools.to_datetime(
pre_parsed,
cache=cache_dates,
)
except (ValueError, TypeError):
# test_read_csv_with_custom_date_parser
result = pre_parsed
if isinstance(result, datetime.datetime):
raise Exception("scalar parser")
return result
except Exception:
# e.g. test_datetime_fractional_seconds
pre_parsed = parsing.try_parse_dates(
parsing.concat_date_cols(date_cols),
parser=date_parser,
)
try:
return tools.to_datetime(pre_parsed)
except (ValueError, TypeError):
# TODO: not reached in tests 2023-10-27; needed?
return pre_parsed
if isinstance(result, DatetimeIndex):
arr = result.to_numpy()
arr.flags.writeable = True
return arr
return result._values

return converter

Expand Down Expand Up @@ -1230,7 +1182,6 @@ def converter(*date_cols, col: Hashable):
"parse_dates": False,
"keep_date_col": False,
"dayfirst": False,
"date_parser": lib.no_default,
"date_format": None,
"usecols": None,
# 'iterator': False,
Expand Down
30 changes: 3 additions & 27 deletions pandas/io/parsers/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ class _read_shared(TypedDict, Generic[HashableT], total=False):
skip_blank_lines: bool
parse_dates: bool | Sequence[Hashable] | None
keep_date_col: bool | lib.NoDefault
date_parser: Callable | lib.NoDefault
date_format: str | dict[Hashable, str] | None
dayfirst: bool
cache_dates: bool
Expand Down Expand Up @@ -306,8 +305,7 @@ class _read_shared(TypedDict, Generic[HashableT], total=False):
The behavior is as follows:

* ``bool``. If ``True`` -> try parsing the index.
* ``None``. Behaves like ``True`` if ``date_parser`` or ``date_format`` are
specified.
* ``None``. Behaves like ``True`` if ``date_format`` is specified.
* ``list`` of ``int`` or names. e.g. If ``[1, 2, 3]`` -> try parsing columns 1, 2, 3
each as a separate date column.
* ``list`` of ``list``. e.g. If ``[[1, 3]]`` -> combine columns 1 and 3 and parse
Expand All @@ -325,20 +323,6 @@ class _read_shared(TypedDict, Generic[HashableT], total=False):
keep_date_col : bool, default False
If ``True`` and ``parse_dates`` specifies combining multiple columns then
keep the original columns.
date_parser : Callable, optional
Function to use for converting a sequence of string columns to an array of
``datetime`` instances. The default uses ``dateutil.parser.parser`` to do the
conversion. pandas will try to call ``date_parser`` in three different ways,
advancing to the next if an exception occurs: 1) Pass one or more arrays
(as defined by ``parse_dates``) as arguments; 2) concatenate (row-wise) the
string values from the columns defined by ``parse_dates`` into a single array
and pass that; and 3) call ``date_parser`` once for each row using one or
more strings (corresponding to the columns defined by ``parse_dates``) as
arguments.

.. deprecated:: 2.0.0
Use ``date_format`` instead, or read in as ``object`` and then apply
:func:`~pandas.to_datetime` as-needed.
date_format : str or dict of column -> format, optional
Format to use for parsing dates when used in conjunction with ``parse_dates``.
The strftime to parse time, e.g. :const:`"%d/%m/%Y"`. See
Expand Down Expand Up @@ -624,13 +608,10 @@ def _read(
filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], kwds
) -> DataFrame | TextFileReader:
"""Generic reader of line files."""
# if we pass a date_parser and parse_dates=False, we should not parse the
# if we pass a date_format and parse_dates=False, we should not parse the
# dates GH#44366
if kwds.get("parse_dates", None) is None:
if (
kwds.get("date_parser", lib.no_default) is lib.no_default
and kwds.get("date_format", None) is None
):
if kwds.get("date_format", None) is None:
kwds["parse_dates"] = False
else:
kwds["parse_dates"] = True
Expand Down Expand Up @@ -749,7 +730,6 @@ def read_csv(
# Datetime Handling
parse_dates: bool | Sequence[Hashable] | None = None,
keep_date_col: bool | lib.NoDefault = lib.no_default,
date_parser: Callable | lib.NoDefault = lib.no_default,
date_format: str | dict[Hashable, str] | None = None,
dayfirst: bool = False,
cache_dates: bool = True,
Expand Down Expand Up @@ -928,7 +908,6 @@ def read_table(
# Datetime Handling
parse_dates: bool | Sequence[Hashable] | None = None,
keep_date_col: bool | lib.NoDefault = lib.no_default,
date_parser: Callable | lib.NoDefault = lib.no_default,
date_format: str | dict[Hashable, str] | None = None,
dayfirst: bool = False,
cache_dates: bool = True,
Expand Down Expand Up @@ -1638,9 +1617,6 @@ def TextParser(*args, **kwds) -> TextFileReader:
Comment out remainder of line
parse_dates : bool, default False
keep_date_col : bool, default False
date_parser : function, optional

.. deprecated:: 2.0.0
date_format : str or dict of column -> format, default ``None``

.. versionadded:: 2.0.0
Expand Down
13 changes: 0 additions & 13 deletions pandas/tests/io/excel/test_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,19 +295,6 @@ def test_read_excel_parse_dates(self, tmp_excel):
res = pd.read_excel(tmp_excel, parse_dates=["date_strings"], index_col=0)
tm.assert_frame_equal(df, res)

date_parser = lambda x: datetime.strptime(x, "%m/%d/%Y")
with tm.assert_produces_warning(
FutureWarning,
match="use 'date_format' instead",
raise_on_extra_warnings=False,
):
res = pd.read_excel(
tmp_excel,
parse_dates=["date_strings"],
date_parser=date_parser,
index_col=0,
)
tm.assert_frame_equal(df, res)
res = pd.read_excel(
tmp_excel, parse_dates=["date_strings"], date_format="%m/%d/%Y", index_col=0
)
Expand Down
Loading