|
12 | 12 | import time
|
13 | 13 | import webbrowser
|
14 | 14 | from collections.abc import Iterable, Mapping, Sequence
|
15 |
| -from itertools import islice |
16 | 15 | from pathlib import Path
|
17 | 16 | from typing import Any, Literal
|
18 | 17 |
|
|
40 | 39 | "ISO-8859-15",
|
41 | 40 | "ISO-8859-16",
|
42 | 41 | ]
|
| 42 | +# Type hints for the list of possible data kinds. |
| 43 | +Kind = Literal[ |
| 44 | + "arg", "empty", "file", "geojson", "grid", "image", "matrix", "stringio", "vectors" |
| 45 | +] |
43 | 46 |
|
44 | 47 |
|
45 |
| -def _validate_data_input( # noqa: PLR0912 |
46 |
| - data=None, x=None, y=None, z=None, required_z=False, required_data=True, kind=None |
47 |
| -) -> None: |
| 48 | +def _validate_data_input(data: Any, kind: Kind, required_z: bool = False) -> None: |
48 | 49 | """
|
49 |
| - Check if the combination of data/x/y/z is valid. |
| 50 | + Check if the data to be passed to the virtualfile_from_ functions is valid. |
50 | 51 |
|
51 | 52 | Examples
|
52 | 53 | --------
|
53 |
| - >>> _validate_data_input(data="infile") |
54 |
| - >>> _validate_data_input(x=[1, 2, 3], y=[4, 5, 6]) |
55 |
| - >>> _validate_data_input(x=[1, 2, 3], y=[4, 5, 6], z=[7, 8, 9]) |
56 |
| - >>> _validate_data_input(data=None, required_data=False) |
57 |
| - >>> _validate_data_input() |
| 54 | + The "empty" kind means the data is given via a series of vectors like x/y/z. |
| 55 | +
|
| 56 | + >>> _validate_data_input(data=[[1, 2, 3], [4, 5, 6]], kind="empty") |
| 57 | + >>> _validate_data_input(data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], kind="empty") |
| 58 | + >>> _validate_data_input(data=[None, [4, 5, 6]], kind="empty") |
58 | 59 | Traceback (most recent call last):
|
59 | 60 | ...
|
60 |
| - pygmt.exceptions.GMTInvalidInput: No input data provided. |
61 |
| - >>> _validate_data_input(x=[1, 2, 3]) |
| 61 | + pygmt.exceptions.GMTInvalidInput: Must provide both x and y. |
| 62 | + >>> _validate_data_input(data=[[1, 2, 3], None], kind="empty") |
62 | 63 | Traceback (most recent call last):
|
63 | 64 | ...
|
64 | 65 | pygmt.exceptions.GMTInvalidInput: Must provide both x and y.
|
65 |
| - >>> _validate_data_input(y=[4, 5, 6]) |
| 66 | + >>> _validate_data_input(data=[None, None], kind="empty") |
66 | 67 | Traceback (most recent call last):
|
67 | 68 | ...
|
68 | 69 | pygmt.exceptions.GMTInvalidInput: Must provide both x and y.
|
69 |
| - >>> _validate_data_input(x=[1, 2, 3], y=[4, 5, 6], required_z=True) |
| 70 | + >>> _validate_data_input(data=[[1, 2, 3], [4, 5, 6]], kind="empty", required_z=True) |
70 | 71 | Traceback (most recent call last):
|
71 | 72 | ...
|
72 | 73 | pygmt.exceptions.GMTInvalidInput: Must provide x, y, and z.
|
| 74 | +
|
| 75 | + The "matrix" kind means the data is given via a 2-D numpy.ndarray. |
| 76 | +
|
73 | 77 | >>> import numpy as np
|
74 | 78 | >>> import pandas as pd
|
75 | 79 | >>> import xarray as xr
|
76 | 80 | >>> data = np.arange(8).reshape((4, 2))
|
77 |
| - >>> _validate_data_input(data=data, required_z=True, kind="matrix") |
| 81 | + >>> _validate_data_input(data=data, kind="matrix", required_z=True) |
78 | 82 | Traceback (most recent call last):
|
79 | 83 | ...
|
80 |
| - pygmt.exceptions.GMTInvalidInput: data must provide x, y, and z columns. |
| 84 | + pygmt.exceptions.GMTInvalidInput: Need at least 3 columns but 2 column(s) are given. |
| 85 | +
|
| 86 | + The "vectors" kind means the original data is either dictionary, list, tuple, |
| 87 | + pandas.DataFrame, pandas.Series, xarray.Dataset, or xarray.DataArray. |
| 88 | +
|
81 | 89 | >>> _validate_data_input(
|
82 | 90 | ... data=pd.DataFrame(data, columns=["x", "y"]),
|
83 |
| - ... required_z=True, |
84 | 91 | ... kind="vectors",
|
| 92 | + ... required_z=True, |
85 | 93 | ... )
|
86 | 94 | Traceback (most recent call last):
|
87 | 95 | ...
|
88 |
| - pygmt.exceptions.GMTInvalidInput: data must provide x, y, and z columns. |
| 96 | + pygmt.exceptions.GMTInvalidInput: Need at least 3 columns but 2 column(s) are given. |
89 | 97 | >>> _validate_data_input(
|
90 | 98 | ... data=xr.Dataset(pd.DataFrame(data, columns=["x", "y"])),
|
91 |
| - ... required_z=True, |
92 | 99 | ... kind="vectors",
|
| 100 | + ... required_z=True, |
93 | 101 | ... )
|
94 | 102 | Traceback (most recent call last):
|
95 | 103 | ...
|
96 |
| - pygmt.exceptions.GMTInvalidInput: data must provide x, y, and z columns. |
97 |
| - >>> _validate_data_input(data="infile", x=[1, 2, 3]) |
98 |
| - Traceback (most recent call last): |
99 |
| - ... |
100 |
| - pygmt.exceptions.GMTInvalidInput: Too much data. Use either data or x/y/z. |
101 |
| - >>> _validate_data_input(data="infile", y=[4, 5, 6]) |
102 |
| - Traceback (most recent call last): |
103 |
| - ... |
104 |
| - pygmt.exceptions.GMTInvalidInput: Too much data. Use either data or x/y/z. |
105 |
| - >>> _validate_data_input(data="infile", x=[1, 2, 3], y=[4, 5, 6]) |
106 |
| - Traceback (most recent call last): |
107 |
| - ... |
108 |
| - pygmt.exceptions.GMTInvalidInput: Too much data. Use either data or x/y/z. |
109 |
| - >>> _validate_data_input(data="infile", z=[7, 8, 9]) |
110 |
| - Traceback (most recent call last): |
111 |
| - ... |
112 |
| - pygmt.exceptions.GMTInvalidInput: Too much data. Use either data or x/y/z. |
| 104 | + pygmt.exceptions.GMTInvalidInput: Need at least 3 columns but 2 column(s) are given. |
113 | 105 |
|
114 | 106 | Raises
|
115 | 107 | ------
|
116 | 108 | GMTInvalidInput
|
117 | 109 | If the data input is not valid.
|
118 | 110 | """
|
119 |
| - if data is None: # data is None |
120 |
| - if x is None and y is None: # both x and y are None |
121 |
| - if required_data: # data is not optional |
122 |
| - msg = "No input data provided." |
| 111 | + # Determine the required number of columns based on the required_z flag. |
| 112 | + required_cols = 3 if required_z else 1 |
| 113 | + |
| 114 | + match kind: |
| 115 | + case "empty": # data = [x, y], [x, y, z], [x, y, z, ...] |
| 116 | + if len(data) < 2 or any(v is None for v in data[:2]): |
| 117 | + msg = "Must provide both x and y." |
123 | 118 | raise GMTInvalidInput(msg)
|
124 |
| - elif x is None or y is None: # either x or y is None |
125 |
| - msg = "Must provide both x and y." |
126 |
| - raise GMTInvalidInput(msg) |
127 |
| - if required_z and z is None: # both x and y are not None, now check z |
128 |
| - msg = "Must provide x, y, and z." |
129 |
| - raise GMTInvalidInput(msg) |
130 |
| - else: # data is not None |
131 |
| - if x is not None or y is not None or z is not None: |
132 |
| - msg = "Too much data. Use either data or x/y/z." |
133 |
| - raise GMTInvalidInput(msg) |
134 |
| - # check if data has the required z column |
135 |
| - if required_z: |
136 |
| - msg = "data must provide x, y, and z columns." |
137 |
| - if kind == "matrix" and data.shape[1] < 3: |
| 119 | + if required_z and (len(data) < 3 or data[:3] is None): |
| 120 | + msg = "Must provide x, y, and z." |
138 | 121 | raise GMTInvalidInput(msg)
|
139 |
| - if kind == "vectors": |
140 |
| - if hasattr(data, "shape") and ( |
141 |
| - (len(data.shape) == 1 and data.shape[0] < 3) |
142 |
| - or (len(data.shape) > 1 and data.shape[1] < 3) |
143 |
| - ): # np.ndarray or pd.DataFrame |
144 |
| - raise GMTInvalidInput(msg) |
145 |
| - if hasattr(data, "data_vars") and len(data.data_vars) < 3: # xr.Dataset |
146 |
| - raise GMTInvalidInput(msg) |
147 |
| - if kind == "vectors" and isinstance(data, dict): |
148 |
| - # Iterator over the up-to-3 first elements. |
149 |
| - arrays = list(islice(data.values(), 3)) |
150 |
| - if len(arrays) < 2 or any(v is None for v in arrays[:2]): # Check x/y |
151 |
| - msg = "Must provide x and y." |
| 122 | + case "matrix": # 2-D numpy.ndarray |
| 123 | + if (actual_cols := data.shape[1]) < required_cols: |
| 124 | + msg = f"Need at least {required_cols} columns but {actual_cols} column(s) are given." |
152 | 125 | raise GMTInvalidInput(msg)
|
153 |
| - if required_z and (len(arrays) < 3 or arrays[2] is None): # Check z |
154 |
| - msg = "Must provide x, y, and z." |
| 126 | + case "vectors": |
| 127 | + # "vectors" means the original data is either dictionary, list, tuple, |
| 128 | + # pandas.DataFrame, pandas.Series, xarray.Dataset, or xarray.DataArray. |
| 129 | + # The original data is converted to a list of vectors or a 2-D numpy.ndarray |
| 130 | + # in the virtualfile_in function. |
| 131 | + if (actual_cols := len(data)) < required_cols: |
| 132 | + msg = f"Need at least {required_cols} columns but {actual_cols} column(s) are given." |
155 | 133 | raise GMTInvalidInput(msg)
|
156 | 134 |
|
157 | 135 |
|
@@ -271,11 +249,7 @@ def _check_encoding(argstr: str) -> Encoding:
|
271 | 249 | return "ISOLatin1+"
|
272 | 250 |
|
273 | 251 |
|
274 |
| -def data_kind( |
275 |
| - data: Any, required: bool = True |
276 |
| -) -> Literal[ |
277 |
| - "arg", "empty", "file", "geojson", "grid", "image", "matrix", "stringio", "vectors" |
278 |
| -]: |
| 252 | +def data_kind(data: Any, required: bool = True) -> Kind: |
279 | 253 | r"""
|
280 | 254 | Check the kind of data that is provided to a module.
|
281 | 255 |
|
|
0 commit comments