pandas-dev · phofl · Nov 3, 2022 · Sep 29, 2022 · Sep 29, 2022 · Sep 29, 2022
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -154,25 +154,6 @@ usecols : list-like or callable, default ``None``
   Using this parameter results in much faster parsing time and lower memory usage
   when using the c engine. The Python engine loads the data first before deciding
   which columns to drop.
-squeeze : boolean, default ``False``
-  If the parsed data only contains one column then return a ``Series``.
-
-  .. deprecated:: 1.4.0
-     Append ``.squeeze("columns")`` to the call to ``{func_name}`` to squeeze
-     the data.
-prefix : str, default ``None``
-  Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...
-
-  .. deprecated:: 1.4.0
-     Use a list comprehension on the DataFrame's columns after calling ``read_csv``.
-
-  .. ipython:: python
-
-     data = "col1,col2,col3\na,b,1"
-
-     df = pd.read_csv(StringIO(data))
-     df.columns = [f"pre_{col}" for col in df.columns]
-     df
 
 mangle_dupe_cols : boolean, default ``True``
   Duplicate columns will be specified as 'X', 'X.1'...'X.N', rather than 'X'...'X'.
@@ -395,23 +376,6 @@ dialect : str or :class:`python:csv.Dialect` instance, default ``None``
 Error handling
 ++++++++++++++
 
-error_bad_lines : boolean, optional, default ``None``
-  Lines with too many fields (e.g. a csv line with too many commas) will by
-  default cause an exception to be raised, and no ``DataFrame`` will be
-  returned. If ``False``, then these "bad lines" will dropped from the
-  ``DataFrame`` that is returned. See :ref:`bad lines <io.bad_lines>`
-  below.
-
-  .. deprecated:: 1.3.0
-     The ``on_bad_lines`` parameter should be used instead to specify behavior upon
-     encountering a bad line instead.
-warn_bad_lines : boolean, optional, default ``None``
-  If error_bad_lines is ``False``, and warn_bad_lines is ``True``, a warning for
-  each "bad line" will be output.
-
-  .. deprecated:: 1.3.0
-     The ``on_bad_lines`` parameter should be used instead to specify behavior upon
-     encountering a bad line instead.
 on_bad_lines : {{'error', 'warn', 'skip'}}, default 'error'
     Specifies what to do upon encountering a bad line (a line with too many fields).
     Allowed values are :
@@ -1221,37 +1185,6 @@ Infinity
 ``inf`` like values will be parsed as ``np.inf`` (positive infinity), and ``-inf`` as ``-np.inf`` (negative infinity).
 These will ignore the case of the value, meaning ``Inf``, will also be parsed as ``np.inf``.
 
-
-Returning Series
-''''''''''''''''
-
-Using the ``squeeze`` keyword, the parser will return output with a single column
-as a ``Series``:
-
-.. deprecated:: 1.4.0
-   Users should append ``.squeeze("columns")`` to the DataFrame returned by
-   ``read_csv`` instead.
-
-.. ipython:: python
-   :okwarning:
-
-   data = "level\nPatient1,123000\nPatient2,23000\nPatient3,1234018"
-
-   with open("tmp.csv", "w") as fh:
-       fh.write(data)
-
-   print(open("tmp.csv").read())
-
-   output = pd.read_csv("tmp.csv", squeeze=True)
-   output
-
-   type(output)
-
-.. ipython:: python
-   :suppress:
-
-   os.remove("tmp.csv")
-
 .. _io.boolean:
 
 Boolean values
@@ -1708,8 +1641,6 @@ Options that are unsupported by the pyarrow engine which are not covered by the
 * ``thousands``
 * ``memory_map``
 * ``dialect``
-* ``warn_bad_lines``
-* ``error_bad_lines``
 * ``on_bad_lines``
 * ``delim_whitespace``
 * ``quoting``

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -213,6 +213,7 @@ Removal of prior version deprecations/changes
 - Removed argument ``sort_columns`` in :meth:`DataFrame.plot` and :meth:`Series.plot` (:issue:`47563`)
 - Removed argument ``is_copy`` from :meth:`DataFrame.take` and :meth:`Series.take` (:issue:`30615`)
 - Removed argument ``kind`` from :meth:`Index.get_slice_bound`, :meth:`Index.slice_indexer` and :meth:`Index.slice_locs` (:issue:`41378`)
+- Removed arguments ``prefix``, ``squeeze``, ``error_bad_lines`` and ``warn_bad_lines`` from :func:`read_csv` (:issue:`40413`, :issue:`43427`)
 - Removed argument ``datetime_is_numeric`` from :meth:`DataFrame.describe` and :meth:`Series.describe` as datetime data will always be summarized as numeric data (:issue:`34798`)
 - Disallow subclass-specific keywords (e.g. "freq", "tz", "names", "closed") in the :class:`Index` constructor (:issue:`38597`)
 - Removed argument ``inplace`` from :meth:`Categorical.remove_unused_categories` (:issue:`37918`)

diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -95,9 +95,7 @@ def _finalize_output(self, frame: DataFrame) -> DataFrame:
         multi_index_named = True
         if self.header is None:
             if self.names is None:
-                if self.prefix is not None:
-                    self.names = [f"{self.prefix}{i}" for i in range(num_cols)]
-                elif self.header is None:
+                if self.header is None:
                     self.names = range(num_cols)
             if len(self.names) != num_cols:
                 # usecols is passed through to pyarrow, we only handle index col here

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
@@ -97,7 +97,6 @@ def __init__(self, kwds) -> None:
 
         self.names = kwds.get("names")
         self.orig_names: list | None = None
-        self.prefix = kwds.pop("prefix", None)
 
         self.index_col = kwds.get("index_col", None)
         self.unnamed_cols: set = set()
@@ -155,11 +154,6 @@ def __init__(self, kwds) -> None:
                         "index_col must only contain row numbers "
                         "when specifying a multi-index header"
                     )
-        elif self.header is not None and self.prefix is not None:
-            # GH 27394
-            raise ValueError(
-                "Argument prefix must be None if argument header is not None"
-            )
 
         self._name_processed = False
 
@@ -1161,7 +1155,6 @@ def converter(*date_cols):
     "header": "infer",
     "index_col": None,
     "names": None,
-    "prefix": None,
     "skiprows": None,
     "skipfooter": 0,
     "nrows": None,
@@ -1185,15 +1178,12 @@ def converter(*date_cols):
     "chunksize": None,
     "verbose": False,
     "encoding": None,
-    "squeeze": None,
     "compression": None,
     "mangle_dupe_cols": True,
     "infer_datetime_format": False,
     "skip_blank_lines": True,
     "encoding_errors": "strict",
     "on_bad_lines": ParserBase.BadLineHandleMethod.ERROR,
-    "error_bad_lines": None,
-    "warn_bad_lines": None,
     "use_nullable_dtypes": False,
 }
 

diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py
@@ -71,8 +71,6 @@ def __init__(self, src: ReadCsvBuffer[str], **kwds) -> None:
             "encoding",
             "memory_map",
             "compression",
-            "error_bad_lines",
-            "warn_bad_lines",
         ):
             kwds.pop(key, None)
 
@@ -102,16 +100,8 @@ def __init__(self, src: ReadCsvBuffer[str], **kwds) -> None:
 
         # error: Cannot determine type of 'names'
         if self.names is None:  # type: ignore[has-type]
-            if self.prefix:
-                # error: Cannot determine type of 'names'
-                self.names = [  # type: ignore[has-type]
-                    f"{self.prefix}{i}" for i in range(self._reader.table_width)
-                ]
-            else:
-                # error: Cannot determine type of 'names'
-                self.names = list(  # type: ignore[has-type]
-                    range(self._reader.table_width)
-                )
+            # error: Cannot determine type of 'names'
+            self.names = list(range(self._reader.table_width))  # type: ignore[has-type]
 
         # gh-9755
         #

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
@@ -536,10 +536,7 @@ def _infer_columns(
             num_original_columns = ncols
 
             if not names:
-                if self.prefix:
-                    columns = [[f"{self.prefix}{i}" for i in range(ncols)]]
-                else:
-                    columns = [list(range(ncols))]
+                columns = [list(range(ncols))]
                 columns = self._handle_usecols(
                     columns, columns[0], num_original_columns
                 )