Skip to content

Commit 89ee3ba

Browse files
author
Michael Panchenko
committed
RemoteStorage: allow passing absolute paths when pulling
This further increases convenience by introducing a useful default behavior when local_base_dir as passed to pull is an absolute path
1 parent 423f4a8 commit 89ee3ba

File tree

2 files changed

+51
-2
lines changed

2 files changed

+51
-2
lines changed

src/accsr/remote_storage.py

+36-2
Original file line numberDiff line numberDiff line change
@@ -648,12 +648,15 @@ def pull(
648648
convert_to_linux_path: bool = True,
649649
dryrun: bool = False,
650650
path_regex: Optional[Union[Pattern, str]] = None,
651+
strip_abspath_prefix: Optional[str] = None,
652+
strip_abs_local_base_dir: bool = True,
651653
) -> TransactionSummary:
652654
r"""
653655
Pull either a file or a directory under the given path relative to local_base_dir.
654656
655657
:param remote_path: remote path on storage bucket relative to the configured remote base path.
656-
e.g. 'data/ground_truth/some_file.json'
658+
e.g. 'data/ground_truth/some_file.json'. Can also be an absolute local path if ``strip_abspath_prefix``
659+
is specified.
657660
:param local_base_dir: Local base directory for constructing local path
658661
e.g. passing 'local_base_dir' will download to the path
659662
'local_base_dir/data/ground_truth/some_file.json' in the above example
@@ -669,8 +672,39 @@ def pull(
669672
(which is discouraged).
670673
:param dryrun: If True, simulates the pull operation and returns the remote objects that would have been pulled.
671674
:param path_regex: DEPRECATED! Use ``include_regex`` instead.
675+
:param strip_abspath_prefix: Will only have an effect if the `remote_path` is absolute.
676+
Then the given prefix is removed from it before pulling. This is useful for pulling files from a remote storage
677+
by directly specifying absolute local paths instead of first converting them to actual remote paths.
678+
Similar in logic to `local_path_prefix` in `push`.
679+
A common use case is to always set `local_base_dir` to the same value and to always pass absolute paths
680+
as `remote_path` to `pull`.
681+
:param strip_abs_local_base_dir: If True, and `local_base_dir` is an absolute path, then
682+
the `local_base_dir` will be treated as `strip_abspath_prefix`. See explanation of `strip_abspath_prefix`.
672683
:return: An object describing the summary of the operation.
673684
"""
685+
686+
if strip_abs_local_base_dir and os.path.isabs(local_base_dir):
687+
if strip_abspath_prefix is not None:
688+
raise ValueError(
689+
f"Cannot specify both `strip_abs_local_base_dir`={strip_abs_local_base_dir} "
690+
f"and `strip_abspath_prefix`={strip_abspath_prefix}"
691+
f"when `local_base_dir`={local_base_dir} is an absolute path."
692+
)
693+
strip_abspath_prefix = local_base_dir
694+
695+
remote_path_is_abs = remote_path.startswith("/") or os.path.isabs(remote_path)
696+
697+
if strip_abspath_prefix is not None and remote_path_is_abs:
698+
remote_path = remote_path.replace("\\", "/")
699+
strip_abspath_prefix = strip_abspath_prefix.replace("\\", "/").rstrip("/")
700+
if not remote_path.startswith(strip_abspath_prefix):
701+
raise ValueError(
702+
f"Remote path {remote_path} is absolute but does not start "
703+
f"with the given prefix {strip_abspath_prefix}"
704+
)
705+
# +1 for removing the leading '/'
706+
remote_path = remote_path[len(strip_abspath_prefix) + 1 :]
707+
674708
include_regex = self._handle_deprecated_path_regex(include_regex, path_regex)
675709
summary = self._get_pull_summary(
676710
remote_path,
@@ -921,7 +955,7 @@ def push(
921955
Upload files into the remote storage.
922956
Does not upload files for which the md5sum matches existing remote files.
923957
The remote path for uploading will be constructed from the remote_base_path and the provided path.
924-
The local_path_prefix serves for finding the directory on the local system or for stripping off
958+
The `local_path_prefix` serves for finding the directory on the local system or for stripping off
925959
parts of absolute paths if path is absolute, see examples below.
926960
927961
Examples:

tests/accsr/test_remote_storage.py

+15
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,21 @@ def test_pull_file(self, storage, file_on_storage, tmpdir):
184184
pull_summary = storage.pull(file_on_storage, force=False)
185185
assert len(pull_summary.synced_files) == 0
186186

187+
@pytest.mark.parametrize(
188+
"file_on_storage",
189+
["sample.txt"],
190+
indirect=["file_on_storage"],
191+
)
192+
def test_pull_file_local_absolute_path(self, storage, file_on_storage, tmpdir):
193+
local_base_dir = os.path.abspath(tmpdir.mkdir("remote_storage"))
194+
pulled_file_abspath = os.path.join(local_base_dir, file_on_storage)
195+
assert not os.path.exists(pulled_file_abspath)
196+
storage.pull(
197+
pulled_file_abspath,
198+
local_base_dir=local_base_dir,
199+
)
200+
assert os.path.isfile(pulled_file_abspath)
201+
187202
@pytest.mark.parametrize(
188203
"file_on_storage",
189204
["sample.txt"],

0 commit comments

Comments
 (0)