Skip to content

Commit

Permalink
Merge pull request #360 from martindurant/simple_find
Browse files Browse the repository at this point in the history
Do find without recursion
  • Loading branch information
martindurant authored Sep 8, 2020
2 parents dde5c6b + dac99d5 commit 0697237
Showing 1 changed file with 24 additions and 4 deletions.
28 changes: 24 additions & 4 deletions s3fs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,17 +404,17 @@ def _open(self, path, mode='rb', block_size=None, acl='', version_id=None,
s3_additional_kwargs=kw, cache_type=cache_type,
autocommit=autocommit, requester_pays=requester_pays)

async def _lsdir(self, path, refresh=False, max_items=None):
async def _lsdir(self, path, refresh=False, max_items=None, delimiter="/"):
bucket, prefix, _ = self.split_path(path)
prefix = prefix + '/' if prefix else ""
if path not in self.dircache or refresh:
if path not in self.dircache or refresh or delimiter is None:
try:
logger.debug("Get directory listing page for %s" % path)
pag = self.s3.get_paginator('list_objects_v2')
config = {}
if max_items is not None:
config.update(MaxItems=max_items, PageSize=2 * max_items)
it = pag.paginate(Bucket=bucket, Prefix=prefix, Delimiter='/',
it = pag.paginate(Bucket=bucket, Prefix=prefix, Delimiter=delimiter,
PaginationConfig=config, **self.req_kw)
files = []
dircache = []
Expand All @@ -435,10 +435,27 @@ async def _lsdir(self, path, refresh=False, max_items=None):
except ClientError as e:
raise translate_boto_error(e) from e

self.dircache[path] = files
if delimiter:
self.dircache[path] = files
return files
return self.dircache[path]

async def _find(self, path, maxdepth=None, withdirs=None, detail=False):
bucket, key, _ = self.split_path(path)
if not bucket:
raise ValueError("Cannot traverse all of S3")
out = await self._lsdir(path, delimiter="")
if not out and key:
try:
out = [await self._info(path)]
except FileNotFoundError:
out = []
if detail:
return {o['name']: o for o in out}
return [o['name'] for o in out]

find = sync_wrapper(_find)

async def _mkdir(self, path, acl="", create_parents=True, **kwargs):
path = self._strip_protocol(path).rstrip('/')
bucket, key, _ = self.split_path(path)
Expand Down Expand Up @@ -626,6 +643,9 @@ async def _pipe_file(self, path, data, chunksize=50*2**20, **kwargs):

async def _put_file(self, lpath, rpath, chunksize=50*2**20, **kwargs):
bucket, key, _ = self.split_path(rpath)
if os.path.isdir(lpath) and key:
# don't make remote "directory"
return
size = os.path.getsize(lpath)
with open(lpath, 'rb') as f0:
if size < 5 * 2**20:
Expand Down

0 comments on commit 0697237

Please sign in to comment.