address pd.read_csv deprecation:

- https://pandas.pydata.org/pandas-docs/stable/whatsnew/v2.2.0.html#other-deprecations - pandas-dev/pandas#55569
tobiscode · Mar 11, 2024 · 93f53e5 · 93f53e5
1 parent 3d01e28
commit 93f53e5
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 8 deletions.
diff --git a/disstans/timeseries.py b/disstans/timeseries.py
@@ -1211,11 +1211,11 @@ def __init__(self,
         var_cols = ["east_var", "north_var", "up_var"]
         cov_cols = ["east_north_cov", "east_up_cov", "north_up_cov"]
         all_cols = data_cols + var_cols + cov_cols
-        time = pd.read_csv(self._path, delim_whitespace=True, header=None,
+        time = pd.read_csv(self._path, delimiter=r"\s+", header=None,
                            usecols=[11, 12, 13, 14, 15, 16],
                            names=["year", "month", "day", "hour", "minute", "second"])
         time = pd.to_datetime(time).to_frame(name="time")
-        data = pd.read_csv(self._path, delim_whitespace=True, header=None,
+        data = pd.read_csv(self._path, delimiter=r"\s+", header=None,
                            usecols=[1, 2, 3, 4, 5, 6, 7, 8, 9],
                            names=all_cols)
         # compute covariance from correlation, still in meters
@@ -1347,7 +1347,7 @@ def __init__(self,
         else:
             raise ValueError(f"'data_unit' needs to be 'mm' or 'm', got {data_unit}.")
         # load data and check for some warnings
-        df = pd.read_csv(self._path, delim_whitespace=True,
+        df = pd.read_csv(self._path, delimiter=r"\s+",
                          usecols=[0, 3] + list(range(6, 13)) + list(range(14, 20)))
         if show_warnings and len(df['site'].unique()) > 1:
             warn(f"Timeseries file {self._path} contains multiple site codes: "
@@ -1541,18 +1541,18 @@ def __init__(self,
                                        ).with_traceback(e.__traceback__) from e
             # load data into pandas
             f.seek(0)
-            df = pd.read_csv(f, delim_whitespace=True,
+            df = pd.read_csv(f, delimiter=r"\s+",
                              names=["site", "sec-J2000", "___e-ref(m)", "___n-ref(m)",
                                     "___v-ref(m)", "sig_e(m)", "sig_n(m)", "sig_v(m)"],
                              usecols=[0, 1] + list(range(8, 11)) + list(range(14, 17)))
         # if the path is a .kenv.gz file, we only need to extract the single file
         elif pathobj.match("*.kenv.gz"):
             with gzip.open(self._path, mode="r") as f:
-                df = pd.read_csv(f, delim_whitespace=True,
+                df = pd.read_csv(f, delimiter=r"\s+",
                                  usecols=[0, 1] + list(range(8, 11)) + list(range(14, 17)))
         # in all other cases, try loading directly
         else:
-            df = pd.read_csv(self._path, delim_whitespace=True,
+            df = pd.read_csv(self._path, delimiter=r"\s+",
                              usecols=[0, 1] + list(range(8, 11)) + list(range(14, 17)))
         # check for duplicate sites
         if show_warnings and len(df['site'].unique()) > 1:

diff --git a/disstans/tools.py b/disstans/tools.py
@@ -998,7 +998,7 @@ def get_sta_url(sta, year, doy, date):
     except error.HTTPError as e:
         raise RuntimeError("Failed to download the station list from "
                            f"{station_list_url}.").with_traceback(e.__traceback__) from e
-    stations = pd.read_csv(station_list_path, delim_whitespace=True, usecols=list(range(11)),
+    stations = pd.read_csv(station_list_path, delimiter=r"\s+", usecols=list(range(11)),
                            parse_dates=[7, 8, 9])
     # subset according to station_list_or_bbox
     if all([isinstance(site, str) for site in station_list_or_bbox]):
@@ -1261,7 +1261,7 @@ def parse_unr_steps(filepath: str,
     # load the file
     col_names = ["station", "time", "code", "type", "distance", "magnitude", "usgsid"]
     # (for earthquake events, the "type" column is actually the "threshold" column)
-    raw = pd.read_csv(filepath, names=col_names, delim_whitespace=True)
+    raw = pd.read_csv(filepath, names=col_names, delimiter=r"\s+")
     # we now have a locale-dependent time column in the non-standard format yymmmdd
     # (%y%b%d in strptime language) which we need to convert in a hard-coded way, because we
     # shouldn't change the locale temporarily as it affects the entire system