Skip to content

Commit 14d3e40

Browse files
authored
Merge pull request #50 from tanbro/develop
Develop
2 parents bc1cf7b + da96563 commit 14d3e40

File tree

13 files changed

+202
-37
lines changed

13 files changed

+202
-37
lines changed

.pre-commit-config.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ repos:
2020
- id: check-docstring-first
2121

2222
- repo: https://github.com/astral-sh/ruff-pre-commit
23-
rev: v0.6.2
23+
rev: v0.6.5
2424
hooks:
2525
- id: ruff # Run the linter.
2626
types_or: [python, pyi, jupyter]
@@ -36,7 +36,7 @@ repos:
3636
additional_dependencies: [types-PyYAML]
3737

3838
- repo: https://github.com/python-jsonschema/check-jsonschema
39-
rev: "0.28.6"
39+
rev: "0.29.2"
4040
hooks:
4141
- id: check-github-workflows
4242
- id: check-readthedocs

README.md

+47-2
Original file line numberDiff line numberDiff line change
@@ -414,11 +414,56 @@ xyz: !http-include xyz.yml
414414

415415
the actual URL to access is `http://$HOST:$PORT/sub_1/sub_1_1/xyz.yml`
416416

417+
### Flatten sequence object in multiple matched files
418+
419+
Consider we have such a YAML:
420+
421+
```yaml
422+
items: !include "*.yaml"
423+
```
424+
425+
If every file matches `*.yaml` contains a sequence object at the top level in it, what parsed and loaded will be:
426+
427+
```yaml
428+
items: [
429+
[item 0 of 1st file, item 1 of 1st file, ... , item n of 1st file, ...],
430+
[item 0 of 2nd file, item 1 of 2nd file, ... , item n of 2nd file, ...],
431+
# ....
432+
[item 0 of nth file, item 1 of nth file, ... , item n of nth file, ...],
433+
# ...
434+
]
435+
```
436+
437+
It's a 2-dim array, because YAML content of each matched file is treated as a member of the list(sequence).
438+
439+
But if `flatten` parameter was set to `true`, like:
440+
441+
```yaml
442+
items: !include {urlpath: "*.yaml", flatten: true}
443+
```
444+
445+
we'll get:
446+
447+
```yaml
448+
items: [
449+
item 0 of 1st file, item 1 of 1st file, ... , item n of 1st file, # ...
450+
item 0 of 2nd file, item 1 of 2nd file, ... , item n of 2nd file, # ...
451+
# ....
452+
item 0 of n-th file, item 1 of n-th file, ... , item n of n-th file, # ...
453+
# ...
454+
]
455+
```
456+
457+
> ℹ️ **Note**
458+
>
459+
> - Only available when multiple files were matched.
460+
> - **Every matched file should have a Sequence object in its top level**, or a `TypeError` exception may be thrown.
461+
417462
### Serialization
418463

419-
When load [YAML][] string with include statement, the including files are default parsed into python objects. Thant is, if we call `yaml.dump()` on the object, what dumped is the parsed python object, and can not serialize the include statement itself.
464+
When load [YAML][] string with include statement, the including files are parsed into python objects by default. That is, if we call `yaml.dump()` on the object, what dumped is the parsed python object, and can not serialize the include statement itself.
420465

421-
To serialize the statement, we shall first create an `yaml_include.Constructor` object whose **`autoload` is `False`**:
466+
To serialize the statement, we shall first create an `yaml_include.Constructor` object whose **`autoload` attribute is `False`**:
422467

423468
```python
424469
import yaml

docs/README.rst

+16-3
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,19 @@ How to build docs
2323
2424
#. Build HTML documentation:
2525

26-
.. code:: sh
26+
* Make tool:
27+
28+
.. code:: sh
29+
30+
make -C docs/make html
31+
32+
* Windows:
2733

28-
docs/make html
34+
.. code:: bat
2935
30-
The built static web site is output to ``docs/_build/html``, we can serve it:
36+
docs\make html
37+
38+
The built-out static web site is at ``docs/_build/html``, we can serve it:
3139

3240
.. code:: sh
3341
@@ -44,3 +52,8 @@ then open http://localhost:8000/ in a web browser.
4452
python -m http.server -d docs/_build/html 8080
4553
4654
.. seealso:: Python ``stdlib``'s :mod:`http.server`
55+
56+
.. tip::
57+
If want to build PDF, use ``make rinoh`` instead.
58+
59+
.. seealso:: <https://www.sphinx-doc.org/en/master/usage/builders/index.html#sphinx.builders.latex.LaTeXBuilder>

docs/apidocs/yaml_include.rst

-8
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,6 @@
11
yaml\_include package
22
=====================
33

4-
.. automodule:: yaml_include
5-
:members:
6-
:undoc-members:
7-
:show-inheritance:
8-
9-
Submodules
10-
----------
11-
124
.. toctree::
135
:maxdepth: 4
146

docs/conf.py

+3
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,6 @@
106106
napoleon_use_admonition_for_examples = True
107107
napoleon_use_admonition_for_notes = True
108108
napoleon_use_admonition_for_references = True
109+
110+
111+
latex_engine = "xelatex"

docs/requirements.txt

+4
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,7 @@ sphinx-copybutton
1111
sphinx-inline-tabs
1212
sphinx_tippy
1313
sphinx-version-warning
14+
15+
# for direct pdf generate
16+
rinohtype
17+
pillow

pyproject.toml

-2
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@ classifiers = [
3333
"Programming Language :: Python",
3434
"Programming Language :: Python :: 3",
3535
"Programming Language :: Python :: 3 :: Only",
36-
"Programming Language :: Python :: 3.7",
37-
"Programming Language :: Python :: 3.7",
3836
"Programming Language :: Python :: 3.8",
3937
"Programming Language :: Python :: 3.9",
4038
"Programming Language :: Python :: 3.10",

requirements.txt

-2
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
-r docs/requirements.txt
77
-r tests/requirements.txt
88

9-
setuptools_scm
10-
119
coverage
1210

1311
mypy

src/yaml_include/constructor.py

+39-17
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from itertools import chain
1212
from os import PathLike
1313
from pathlib import Path
14-
from typing import TYPE_CHECKING, Any, Callable, Generator, Iterable, Mapping, Optional, Sequence, Type, TypeVar, Union
14+
from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Mapping, Optional, Sequence, Type, TypeVar, Union
1515
from urllib.parse import urlsplit, urlunsplit
1616

1717
if sys.version_info >= (3, 10): # pragma: no cover
@@ -31,13 +31,13 @@
3131

3232
if TYPE_CHECKING: # pragma: no cover
3333
from yaml import Node
34-
from yaml.constructor import _Scalar
34+
from yaml.constructor import _Scalar # type: ignore[attr-defined]
3535
from yaml.cyaml import _CLoader
3636
from yaml.loader import _Loader
3737
from yaml.reader import _ReadStream
3838

39-
_TOpenFile = TypeVar("_TOpenFile", bound=_ReadStream)
40-
_TLoaderType = TypeVar("_TLoaderType", bound=Type[Union[_Loader, _CLoader]])
39+
OpenFileT = TypeVar("OpenFileT", bound=_ReadStream)
40+
LoaderTypeT = TypeVar("LoaderTypeT", bound=Type[Union[_Loader, _CLoader]])
4141

4242

4343
__all__ = ["Constructor"]
@@ -47,11 +47,17 @@
4747
) # We support "**", "?" and "[..]". We do not support "^" for pattern negation.
4848

4949

50+
if yaml.__with_libyaml__: # pragma: no cover
51+
DEFAULT_YAML_LOAD_FUNCTION = lambda x: yaml.load(x, yaml.CSafeLoader) # noqa: E731
52+
else: # pragma: no cover
53+
DEFAULT_YAML_LOAD_FUNCTION = yaml.safe_load
54+
55+
5056
def load_open_file(
51-
file: _TOpenFile,
52-
loader_type: _TLoaderType,
57+
file: OpenFileT,
58+
loader_type: LoaderTypeT,
5359
path: str,
54-
custom_loader: Optional[Callable[[str, _TOpenFile, _TLoaderType], Any]] = None,
60+
custom_loader: Optional[Callable[[str, OpenFileT, LoaderTypeT], Any]] = None,
5561
) -> Any:
5662
if custom_loader is None:
5763
return yaml.load(file, loader_type)
@@ -181,7 +187,7 @@ def read(self, length: int = ..., /) -> bytes | str: ...
181187
"""
182188

183189
@contextmanager
184-
def managed_autoload(self, autoload: bool) -> Generator[Self, None, None]:
190+
def managed_autoload(self, autoload: bool) -> Iterator[Self]:
185191
"""``with`` statement context manager for :attr:`autoload`
186192
187193
Args:
@@ -207,9 +213,19 @@ def __call__(self, loader: Union[_Loader, _CLoader], node: Node) -> Union[Data,
207213
elif is_yaml_mapping_node(node):
208214
val = loader.construct_mapping(node)
209215
if is_kwds(val):
210-
data = Data(val["urlpath"], mapping_params={k: v for k, v in val.items() if k != "urlpath"})
216+
kdargs = {
217+
"urlpath": val["urlpath"],
218+
"mapping_params": {k: v for k, v in val.items() if k not in ("urlpath", "flatten")},
219+
}
220+
if (flatten := val.get("flatten")) is not None:
221+
if isinstance(flatten, str):
222+
flatten = DEFAULT_YAML_LOAD_FUNCTION(flatten)
223+
if not isinstance(flatten, bool): # pragma: no cover
224+
raise ValueError("`flatten` must be a boolean")
225+
kdargs["flatten"] = flatten
226+
data = Data(**kdargs)
211227
else: # pragma: no cover
212-
raise ValueError("not all key of the YAML mapping node is `str`")
228+
raise ValueError("not all keys type of the YAML mapping node are identifier string")
213229
else: # pragma: no cover
214230
raise TypeError(f"{type(node)}")
215231
if self.autoload:
@@ -333,8 +349,8 @@ def load(self, loader_type: Type[Union[_Loader, _CLoader]], data: Data) -> Any:
333349
result = []
334350
with fsspec.open_files(urlpath, *data.sequence_params, **data.mapping_params) as ofs:
335351
for of_ in ofs:
336-
data = load_open_file(of_, loader_type, urlpath, self.custom_loader)
337-
result.append(data)
352+
loaded_data = load_open_file(of_, loader_type, urlpath, self.custom_loader)
353+
result.append(loaded_data)
338354
return result
339355
# else if no wildcard, returns a single object
340356
with fsspec.open(urlpath, *data.sequence_params, **data.mapping_params) as of_:
@@ -374,7 +390,10 @@ def load(self, loader_type: Type[Union[_Loader, _CLoader]], data: Data) -> Any:
374390
glob_fn = lambda: self.fs.glob(urlpath, *pos_args) # noqa: E731
375391
else:
376392
# special for maxdepth, because PyYAML sometimes treat number as string for constructor's parameter
377-
maxdepth = int(glob_params)
393+
try:
394+
maxdepth = int(glob_params)
395+
except ValueError:
396+
maxdepth = None
378397
glob_fn = lambda: self.fs.glob(urlpath, maxdepth=maxdepth) # noqa: E731
379398

380399
if open_params is None:
@@ -392,9 +411,12 @@ def load(self, loader_type: Type[Union[_Loader, _CLoader]], data: Data) -> Any:
392411
if not isinstance(file, str): # pragma: no cover
393412
raise RuntimeError(f"`fs.glob()` function does not return a `str` ({file})")
394413
with open_fn(file) as of_:
395-
data = load_open_file(of_, loader_type, file, self.custom_loader)
396-
result.append(data)
397-
return result
414+
loaded_data = load_open_file(of_, loader_type, file, self.custom_loader)
415+
result.append(loaded_data)
416+
if data.flatten:
417+
return [child for item in result for child in item]
418+
else:
419+
return result
398420

399421
# else if no wildcards, return a single object
400422
with self.fs.open(urlpath, *data.sequence_params, **data.mapping_params) as of_:
@@ -415,4 +437,4 @@ def is_yaml_mapping_node(node) -> TypeGuard[yaml.MappingNode]:
415437

416438

417439
def is_kwds(val) -> TypeGuard[Mapping[str, Any]]:
418-
return isinstance(val, Mapping) and all(isinstance(k, str) for k in val)
440+
return isinstance(val, Mapping) and all(isinstance(k, str) and k.isidentifier() for k in val)

src/yaml_include/data.py

+47-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ class Data:
1111
urlpath: str
1212
"""url/path of the YAML include statement
1313
14-
urlpath can be either absolute (like `/usr/src/Python-1.5/*.yml`) or relative (like `../../Tools/*/*.yml`), and can contain shell-style wildcards.
14+
``urlpath`` can be either absolute (like `/usr/src/Python-1.5/*.yml`) or relative (like `../../Tools/*/*.yml`), and can contain shell-style wildcards.
1515
1616
We support ``"**"``, ``"?"`` and ``"[..]"``. We do not support ``"^"`` for pattern negation.
1717
The ``maxdepth`` option is applied on the first ``"**"`` found in the path.
@@ -20,6 +20,52 @@ class Data:
2020
Using the ``"**"`` pattern in large directory trees or remote files may consume an inordinate amount of time.
2121
"""
2222

23+
flatten: bool = False
24+
"""Whether to flatten sequence object pared from multiple matched YAML files.
25+
26+
* Only available when multiple files were matched
27+
* **Every matched file should have a Sequence object in its top level**, or a :class:`TypeError` exception may be thrown.
28+
29+
Example:
30+
Consider we have such a YAML:
31+
32+
.. code-block:: yaml
33+
34+
items: !include "*.yaml"
35+
36+
If every file matches `*.yaml` contains a sequence object at the top level in it, what parsed and loaded will be:
37+
38+
.. code-block:: yaml
39+
40+
items: [
41+
[item 0 of 1st file, item 1 of 1st file, ... , item n of 1st file, ...],
42+
[item 0 of 2nd file, item 1 of 2nd file, ... , item n of 2nd file, ...],
43+
# ....
44+
[item 0 of nth file, item 1 of nth file, ... , item n of nth file, ...],
45+
# ...
46+
]
47+
48+
It's a 2-dim array, because YAML content of each matched file is treated as a member of the list(sequence).
49+
50+
But if ``flatten`` parameter was set to ``true``, like:
51+
52+
.. code-block:: yaml
53+
54+
items: !include {urlpath: "*.yaml", flatten: true}
55+
56+
we'll get:
57+
58+
.. code-block:: yaml
59+
60+
items: [
61+
item 0 of 1st file, item 1 of 1st file, ... , item n of 1st file, # ...
62+
item 0 of 2nd file, item 1 of 2nd file, ... , item n of 2nd file, # ...
63+
# ....
64+
item 0 of n-th file, item 1 of n-th file, ... , item n of n-th file, # ...
65+
# ...
66+
]
67+
"""
68+
2369
sequence_params: Sequence[Any] = field(default_factory=list)
2470
"""sequence parameters of the YAML include statement.
2571
"""

tests/data/include3.d/1.yml

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[1, 2, 3]

tests/data/include3.d/2.yml

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[4, 5, 6]

tests/test_basic.py

+42
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,48 @@ def test_include_wildcards_5(self):
162162
data = yaml.load(StringIO(yml), loader_cls)
163163
self.assertListEqual(sorted(data["files"], key=lambda m: m["name"]), [YAML1, YAML2])
164164

165+
def test_flatten_true(self):
166+
yml = dedent(
167+
"""
168+
items: !inc {urlpath: "include3.d/*.yml", flatten: true}
169+
"""
170+
)
171+
172+
for loader_cls in YAML_LOADERS:
173+
two_dim_sequence = []
174+
for pth in Path().glob("tests/data/include3.d/*.yml"):
175+
two_dim_sequence.append(yaml.load(pth.read_bytes(), loader_cls))
176+
flattened_sequence = sorted([member for data in two_dim_sequence for member in data])
177+
178+
data = yaml.load(StringIO(yml), loader_cls)
179+
result = sorted(data["items"])
180+
self.assertListEqual(result, flattened_sequence)
181+
182+
def test_flatten_false_or_default(self):
183+
yml1 = dedent(
184+
"""
185+
items: !inc {urlpath: "include3.d/*.yml", flatten: false}
186+
"""
187+
)
188+
yml2 = dedent(
189+
"""
190+
items: !inc "include3.d/*.yml"
191+
"""
192+
)
193+
for loader_cls in YAML_LOADERS:
194+
two_dim_sequence = []
195+
for pth in Path().glob("tests/data/include3.d/*.yml"):
196+
two_dim_sequence.append(yaml.load(pth.read_bytes(), loader_cls))
197+
two_dim_sequence = sorted(two_dim_sequence)
198+
199+
data1 = yaml.load(StringIO(yml1), loader_cls)
200+
result1 = data1["items"]
201+
self.assertListEqual(result1, two_dim_sequence)
202+
203+
data2 = yaml.load(StringIO(yml2), loader_cls)
204+
result2 = data2["items"]
205+
self.assertListEqual(result2, two_dim_sequence)
206+
165207

166208
class DefaultFsBasicTestCase(BaseTestCase):
167209
@classmethod

0 commit comments

Comments
 (0)