From 7596c221371473b632352f5424124e3b8f8866f3 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sat, 26 Sep 2020 21:10:20 -0700 Subject: [PATCH 1/3] initial attempt to update azure storage version in zarr --- requirements_dev_optional.txt | 2 +- zarr/storage.py | 91 ++++++++++++++++++----------------- 2 files changed, 47 insertions(+), 46 deletions(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index a5cc0e23bd..b1760fb22c 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -6,7 +6,7 @@ ipytree==0.1.3 # optional library requirements for services # don't let pyup change pinning for azure-storage-blob, need to pin to older # version to get compatibility with azure storage emulator on appveyor -azure-storage-blob==2.0.1 # pyup: ignore +azure-storage-blob==12.5.0 # pyup: ignore redis==3.3.8 pymongo==3.9.0 # optional test requirements diff --git a/zarr/storage.py b/zarr/storage.py index f6fbe2d002..1e9d21c556 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2148,17 +2148,18 @@ class ABSStore(MutableMapping): def __init__(self, container, prefix='', account_name=None, account_key=None, blob_service_kwargs=None): - from azure.storage.blob import BlockBlobService + from azure.storage.blob import ContainerClient self.container = container self.prefix = normalize_storage_path(prefix) self.account_name = account_name + self.account_account_url = f"https://{self.account_name}.blob.core.windows.net" self.account_key = account_key if blob_service_kwargs is not None: self.blob_service_kwargs = blob_service_kwargs else: # pragma: no cover self.blob_service_kwargs = dict() - self.client = BlockBlobService(self.account_name, self.account_key, - **self.blob_service_kwargs) + self.client = ContainerClient(self.account_account_url, self.container, + credential=self.account_key, **self.blob_service_kwargs) # needed for pickling def __getstate__(self): @@ -2167,10 +2168,10 @@ def __getstate__(self): return state def __setstate__(self, state): - from azure.storage.blob import BlockBlobService + from azure.storage.blob import ContainerClient self.__dict__.update(state) - self.client = BlockBlobService(self.account_name, self.account_key, - **self.blob_service_kwargs) + self.client = ContainerClient(self.account_account_url, self.container, + credential=self.account_key, **self.blob_service_kwargs) def _append_path_to_prefix(self, path): if self.prefix == '': @@ -2189,24 +2190,23 @@ def _strip_prefix_from_path(path, prefix): return path_norm def __getitem__(self, key): - from azure.common import AzureMissingResourceHttpError + from azure.core.exceptions import ResourceNotFoundError blob_name = self._append_path_to_prefix(key) try: - blob = self.client.get_blob_to_bytes(self.container, blob_name) - return blob.content - except AzureMissingResourceHttpError: + return self.client.download_blob(blob_name).readall() + except ResourceNotFoundError: raise KeyError('Blob %s not found' % blob_name) def __setitem__(self, key, value): value = ensure_bytes(value) blob_name = self._append_path_to_prefix(key) - self.client.create_blob_from_bytes(self.container, blob_name, value) + self.client.upload_blob(blob_name, value) def __delitem__(self, key): - from azure.common import AzureMissingResourceHttpError + from azure.core.exceptions import ResourceNotFoundError try: - self.client.delete_blob(self.container, self._append_path_to_prefix(key)) - except AzureMissingResourceHttpError: + self.client.delete_blob(self._append_path_to_prefix(key)) + except ResourceNotFoundError: raise KeyError('Blob %s not found' % key) def __eq__(self, other): @@ -2224,7 +2224,7 @@ def __iter__(self): list_blobs_prefix = self.prefix + '/' else: list_blobs_prefix = None - for blob in self.client.list_blobs(self.container, list_blobs_prefix): + for blob in self.client.list_blobs(list_blobs_prefix): yield self._strip_prefix_from_path(blob.name, self.prefix) def __len__(self): @@ -2232,19 +2232,17 @@ def __len__(self): def __contains__(self, key): blob_name = self._append_path_to_prefix(key) - if self.client.exists(self.container, blob_name): - return True - else: - return False + return self.client.get_blob_client(blob_name).exists() def listdir(self, path=None): - from azure.storage.blob import Blob + # from azure.storage.blob import Blob dir_path = normalize_storage_path(self._append_path_to_prefix(path)) if dir_path: dir_path += '/' items = list() - for blob in self.client.list_blobs(self.container, prefix=dir_path, delimiter='/'): - if type(blob) == Blob: + for blob in self.client.list_blobs(name_starts_with=dir_path): + # items.append(self._strip_prefix_from_path(blob.name, dir_path)) + if '/' not in blob.name: # what is this doing? items.append(self._strip_prefix_from_path(blob.name, dir_path)) else: items.append(self._strip_prefix_from_path( @@ -2255,29 +2253,32 @@ def rmdir(self, path=None): dir_path = normalize_storage_path(self._append_path_to_prefix(path)) if dir_path: dir_path += '/' - for blob in self.client.list_blobs(self.container, prefix=dir_path): - self.client.delete_blob(self.container, blob.name) - - def getsize(self, path=None): - from azure.storage.blob import Blob - store_path = normalize_storage_path(path) - fs_path = self.prefix - if store_path: - fs_path = self._append_path_to_prefix(store_path) - if self.client.exists(self.container, fs_path): - return self.client.get_blob_properties(self.container, - fs_path).properties.content_length - else: - size = 0 - if fs_path == '': - fs_path = None - else: - fs_path += '/' - for blob in self.client.list_blobs(self.container, prefix=fs_path, - delimiter='/'): - if type(blob) == Blob: - size += blob.properties.content_length - return size + for blob in self.client.list_blobs(name_starts_with=dir_path): + self.client.delete_blob(blob) + + # It is possible azure.store.blob doesn't provide the content_length attribute on + # the blob propoeries object anymore. Something to look into. + # + # def getsize(self, path=None): + # from azure.storage.blob import Blob + # store_path = normalize_storage_path(path) + # fs_path = self.prefix + # if store_path: + # fs_path = self._append_path_to_prefix(store_path) + # if self.client.get_blob_client(fs_path).exists(): + # return self.client.get_blob_properties(self.container, + # fs_path).properties.content_length + # else: + # size = 0 + # if fs_path == '': + # fs_path = None + # else: + # fs_path += '/' + # for blob in self.client.list_blobs(self.container, prefix=fs_path, + # delimiter='/'): + # if type(blob) == Blob: + # size += blob.properties.content_length + # return size def clear(self): self.rmdir() From 08a9037c68158d7aa6a21aab91002be46207cc5c Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sun, 27 Sep 2020 21:16:14 -0700 Subject: [PATCH 2/3] add getsize back and fixup listdir --- zarr/storage.py | 52 +++++++++++++++++++------------------------------ 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 1e9d21c556..767d98c0ee 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2235,18 +2235,12 @@ def __contains__(self, key): return self.client.get_blob_client(blob_name).exists() def listdir(self, path=None): - # from azure.storage.blob import Blob dir_path = normalize_storage_path(self._append_path_to_prefix(path)) if dir_path: dir_path += '/' - items = list() - for blob in self.client.list_blobs(name_starts_with=dir_path): - # items.append(self._strip_prefix_from_path(blob.name, dir_path)) - if '/' not in blob.name: # what is this doing? - items.append(self._strip_prefix_from_path(blob.name, dir_path)) - else: - items.append(self._strip_prefix_from_path( - blob.name[:blob.name.find('/', len(dir_path))], dir_path)) + items = set() + for blob in self.client.walk_blobs(name_starts_with=dir_path, delimiter='/'): + items.add(self._strip_prefix_from_path(blob.name, dir_path)) return items def rmdir(self, path=None): @@ -2256,29 +2250,23 @@ def rmdir(self, path=None): for blob in self.client.list_blobs(name_starts_with=dir_path): self.client.delete_blob(blob) - # It is possible azure.store.blob doesn't provide the content_length attribute on - # the blob propoeries object anymore. Something to look into. - # - # def getsize(self, path=None): - # from azure.storage.blob import Blob - # store_path = normalize_storage_path(path) - # fs_path = self.prefix - # if store_path: - # fs_path = self._append_path_to_prefix(store_path) - # if self.client.get_blob_client(fs_path).exists(): - # return self.client.get_blob_properties(self.container, - # fs_path).properties.content_length - # else: - # size = 0 - # if fs_path == '': - # fs_path = None - # else: - # fs_path += '/' - # for blob in self.client.list_blobs(self.container, prefix=fs_path, - # delimiter='/'): - # if type(blob) == Blob: - # size += blob.properties.content_length - # return size + def getsize(self, path=None): + store_path = normalize_storage_path(path) + fs_path = self._append_path_to_prefix(store_path) + blob_client = self.client.get_blob_client(fs_path) + if blob_client.exists(): + return blob_client.get_blob_properties().size + else: + size = 0 + if fs_path == '': + fs_path = None + elif not fs_path.endswith('/'): + fs_path += '/' + for blob in self.client.walk_blobs(name_starts_with=fs_path, delimiter='/'): + blob_client = self.client.get_blob_client(blob) + if blob_client.exists(): + size += blob_client.get_blob_properties().size + return size def clear(self): self.rmdir() From 4af839d7d8f7ed8e9bd903f12ed24da7bc5d5d74 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Fri, 27 Nov 2020 21:29:54 +0000 Subject: [PATCH 3/3] overwrite existing blob in setitem --- zarr/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index 36330eb7a4..f218ddc7ed 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2210,7 +2210,7 @@ def __getitem__(self, key): def __setitem__(self, key, value): value = ensure_bytes(value) blob_name = self._append_path_to_prefix(key) - self.client.upload_blob(blob_name, value) + self.client.upload_blob(blob_name, value, overwrite=True) def __delitem__(self, key): from azure.core.exceptions import ResourceNotFoundError