From 261295a6f41d0c80ffcb6b183c7f980ef46df832 Mon Sep 17 00:00:00 2001 From: Gerrit Holl Date: Thu, 8 Oct 2020 09:53:20 +0200 Subject: [PATCH 1/3] Test for #378 to verify find respects maxdepth. Added a test to verify that the find method respects the maxdepth parameter. With ``maxdepth=1``, the results of ``find`` should be the same as those ``ls``, without returning subdirectories. See also issue 378. --- s3fs/tests/test_s3fs.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/s3fs/tests/test_s3fs.py b/s3fs/tests/test_s3fs.py index 2978ee2c..f03008eb 100644 --- a/s3fs/tests/test_s3fs.py +++ b/s3fs/tests/test_s3fs.py @@ -1679,3 +1679,15 @@ async def async_wrapper(): await s3._s3.close() asyncio.run(_()) + + +def test_shallow_find(s3): + """Test that find method respects maxdepth. + + Verify that the ``find`` method respects the ``maxdepth`` parameter. With + ``maxdepth=1``, the results of ``find`` should be the same as those of + ``ls``, without returning subdirectories. See also issue 378. + """ + + assert s3.ls(test_bucket_name) == s3.find(test_bucket_name, maxdepth=1) + assert s3.ls(test_bucket_name) == s3.glob(test_bucket_name + "/*") From 47f1a772a30c3db168182c06be059f0260039d8e Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 8 Oct 2020 09:47:17 -0400 Subject: [PATCH 2/3] Add old way --- s3fs/core.py | 6 ++++-- s3fs/tests/test_s3fs.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/s3fs/core.py b/s3fs/core.py index b4e8a38f..900212c6 100644 --- a/s3fs/core.py +++ b/s3fs/core.py @@ -3,7 +3,6 @@ import logging import os import socket -import time from typing import Tuple, Optional import weakref @@ -446,6 +445,9 @@ async def _find(self, path, maxdepth=None, withdirs=None, detail=False): bucket, key, _ = self.split_path(path) if not bucket: raise ValueError("Cannot traverse all of S3") + if maxdepth: + return super().find(bucket + "/" + key, maxdepth=maxdepth, withdirs=withdirs, + detail=detail) # TODO: implement find from dircache, if all listings are present # if refresh is False: # out = incomplete_tree_dirs(self.dircache, path) @@ -481,7 +483,7 @@ async def _find(self, path, maxdepth=None, withdirs=None, detail=False): return {o['name']: o for o in out} return [o['name'] for o in out] - find = sync_wrapper(_find) + #find = sync_wrapper(_find) async def _mkdir(self, path, acl="", create_parents=True, **kwargs): path = self._strip_protocol(path).rstrip('/') diff --git a/s3fs/tests/test_s3fs.py b/s3fs/tests/test_s3fs.py index f03008eb..5bf1b746 100644 --- a/s3fs/tests/test_s3fs.py +++ b/s3fs/tests/test_s3fs.py @@ -1689,5 +1689,5 @@ def test_shallow_find(s3): ``ls``, without returning subdirectories. See also issue 378. """ - assert s3.ls(test_bucket_name) == s3.find(test_bucket_name, maxdepth=1) + assert s3.ls(test_bucket_name) == s3.find(test_bucket_name, maxdepth=1, withdirs=True) assert s3.ls(test_bucket_name) == s3.glob(test_bucket_name + "/*") From 07d51a4e77e9ec1c1f0d894343d7990d26bff8cf Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 8 Oct 2020 10:21:16 -0400 Subject: [PATCH 3/3] more maybe sync --- s3fs/core.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/s3fs/core.py b/s3fs/core.py index 900212c6..b168a6a7 100644 --- a/s3fs/core.py +++ b/s3fs/core.py @@ -203,9 +203,7 @@ async def _call_s3(self, method, *akwarglist, **kwargs): **kwargs) for i in range(self.retries): try: - out = await method(**additional_kwargs) - locals().pop("err", None) # break cycle following retry - return out + return await method(**additional_kwargs) except S3_RETRYABLE_ERRORS as e: logger.debug("Retryable error: %s" % e) err = e @@ -483,7 +481,7 @@ async def _find(self, path, maxdepth=None, withdirs=None, detail=False): return {o['name']: o for o in out} return [o['name'] for o in out] - #find = sync_wrapper(_find) + find = sync_wrapper(_find) async def _mkdir(self, path, acl="", create_parents=True, **kwargs): path = self._strip_protocol(path).rstrip('/') @@ -857,7 +855,7 @@ def isdir(self, path): return False # This only returns things within the path and NOT the path object itself - return bool(sync(self.loop, self._lsdir, path)) + return bool(maybe_sync(self._lsdir, self, path)) def ls(self, path, detail=False, refresh=False, **kwargs): """ List single "directory" with or without details @@ -875,9 +873,9 @@ def ls(self, path, detail=False, refresh=False, **kwargs): additional arguments passed on """ path = self._strip_protocol(path).rstrip('/') - files = sync(self.loop, self._ls, path, refresh=refresh) + files = maybe_sync(self._ls, self, path, refresh=refresh) if not files: - files = sync(self.loop, self._ls, self._parent(path), refresh=refresh) + files = maybe_sync(self._ls, self, self._parent(path), refresh=refresh) files = [o for o in files if o['name'].rstrip('/') == path and o['type'] != 'directory'] if detail: @@ -1082,7 +1080,7 @@ def url(self, path, expires=3600, **kwargs): the number of seconds this signature will be good for. """ bucket, key, version_id = self.split_path(path) - return sync(self.loop, self.s3.generate_presigned_url, + return maybe_sync(self.s3.generate_presigned_url, self, ClientMethod='get_object', Params=dict(Bucket=bucket, Key=key, **version_id_kw(version_id), **kwargs), ExpiresIn=expires) @@ -1276,7 +1274,7 @@ def rm(self, path, recursive=False, **kwargs): bucket, key, _ = self.split_path(path) if not key and self.is_bucket_versioned(bucket): # special path to completely remove versioned bucket - sync(self.loop, self._rm_versioned_bucket_contents, bucket) + maybe_sync(self._rm_versioned_bucket_contents, self, bucket) super().rm(path, recursive=recursive, **kwargs) def invalidate_cache(self, path=None):