From beba1c32be5b247ae5e7ab5c84b22066832c0e99 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Mon, 5 Feb 2024 10:45:59 +0100 Subject: [PATCH] Define public API for `url_operations` This implements the plan declared at https://github.com/datalad/datalad-next/issues/613#issuecomment-1923489248 A dedicated list of components is exposed at the package's top-level, and only that. All other functionality and imports have been move to package-internal files. All other, now undesired, top-level imports have been annotated and scheduled for removal with the v2.0 release. --- .../annexremotes/tests/test_uncurl.py | 2 +- datalad_next/annexremotes/uncurl.py | 2 +- datalad_next/commands/download.py | 2 +- datalad_next/url_operations/__init__.py | 450 ++---------------- datalad_next/url_operations/any.py | 2 +- datalad_next/url_operations/base.py | 367 ++++++++++++++ datalad_next/url_operations/exceptions.py | 77 +++ datalad_next/url_operations/file.py | 4 +- datalad_next/url_operations/http.py | 4 +- datalad_next/url_operations/ssh.py | 4 +- .../url_operations/tests/test_http.py | 4 +- datalad_next/utils/__init__.py | 2 +- 12 files changed, 488 insertions(+), 432 deletions(-) create mode 100644 datalad_next/url_operations/base.py create mode 100644 datalad_next/url_operations/exceptions.py diff --git a/datalad_next/annexremotes/tests/test_uncurl.py b/datalad_next/annexremotes/tests/test_uncurl.py index f2808b7a..0ce4cfeb 100644 --- a/datalad_next/annexremotes/tests/test_uncurl.py +++ b/datalad_next/annexremotes/tests/test_uncurl.py @@ -14,7 +14,7 @@ UrlOperationsRemoteError, IncompleteResultsError, ) -from datalad_next.url_operations.any import AnyUrlOperations +from datalad_next.url_operations import AnyUrlOperations from ..uncurl import ( RemoteError, diff --git a/datalad_next/annexremotes/uncurl.py b/datalad_next/annexremotes/uncurl.py index 51dade90..3521d661 100644 --- a/datalad_next/annexremotes/uncurl.py +++ b/datalad_next/annexremotes/uncurl.py @@ -228,7 +228,7 @@ UrlOperationsRemoteError, UrlOperationsResourceUnknown, ) -from datalad_next.url_operations.any import AnyUrlOperations +from datalad_next.url_operations import AnyUrlOperations from datalad_next.utils import ensure_list from . import ( diff --git a/datalad_next/commands/download.py b/datalad_next/commands/download.py index 927079b3..479cf464 100644 --- a/datalad_next/commands/download.py +++ b/datalad_next/commands/download.py @@ -38,7 +38,7 @@ WithDescription, ) from datalad_next.constraints.dataset import EnsureDataset -from datalad_next.url_operations.any import AnyUrlOperations +from datalad_next.url_operations import AnyUrlOperations lgr = getLogger('datalad.local.download') diff --git a/datalad_next/url_operations/__init__.py b/datalad_next/url_operations/__init__.py index a66775c4..7dc8db36 100644 --- a/datalad_next/url_operations/__init__.py +++ b/datalad_next/url_operations/__init__.py @@ -6,14 +6,39 @@ .. autosummary:: :toctree: generated - any - file - http - ssh + UrlOperations + AnyUrlOperations + FileUrlOperations + HttpUrlOperations + SshUrlOperations + UrlOperationsRemoteError + UrlOperationsResourceUnknown + UrlOperationsInteractionError + UrlOperationsAuthenticationError + UrlOperationsAuthorizationError """ -# allow for |-type UnionType declarations -from __future__ import annotations +from .base import ( + # base class for 3rd-party extensions and implementations + UrlOperations, +) + +# operation support for different protocols +from .any import AnyUrlOperations +from .file import FileUrlOperations +from .http import HttpUrlOperations +from .ssh import SshUrlOperations + +# primary exceptions types +from .exceptions import ( + UrlOperationsRemoteError, + UrlOperationsResourceUnknown, + UrlOperationsInteractionError, + UrlOperationsAuthenticationError, + UrlOperationsAuthorizationError, +) + +# TODO REMOVE EVERYTHING BELOW FOR V2.0 import logging from functools import partial from more_itertools import side_effect @@ -34,416 +59,3 @@ ) lgr = logging.getLogger('datalad.ext.next.url_operations') - - -class UrlOperations: - """Abstraction for operations on URLs - - Support for specific URL schemes can be implemented via sub-classes. - Such classes must comply with the following conditions: - - - Any configuration look-up must be performed with the `self.cfg` - property, which is guaranteed to be a `ConfigManager` instance. - - - When downloads are to be supported, implement the `download()` method - and comply with the behavior described in its documentation. - - This class provides a range of helper methods to aid computation of - hashes and progress reporting. - """ - def __init__(self, *, cfg: ConfigManager | None = None): - """ - Parameters - ---------- - cfg: ConfigManager, optional - A config manager instance that implementations will consult for - any configuration items they may support. - """ - self._cfg = cfg - - @property - def cfg(self) -> ConfigManager: - - if self._cfg is None: - self._cfg = datalad.cfg - return self._cfg - - def stat(self, - url: str, - *, - credential: str | None = None, - timeout: float | None = None) -> Dict: - """Gather information on a URL target, without downloading it - - Returns - ------- - dict - A mapping of property names to values of the URL target. The - particular composition of properties depends on the specific - URL. A standard property is 'content-length', indicating the - size of a download. - - Raises - ------ - UrlOperationsRemoteError - This exception is raised on any access-related error on the remote - side, with a summary of the underlying issues as its message. - It may carry a status code (e.g. HTTP status code) as its - ``status_code`` property. - Any underlying exception must be linked via the `__cause__` - property (e.g. `raise UrlOperationsRemoteError(...) from ...`). - UrlOperationsInteractionError - UrlOperationsAuthenticationError - UrlOperationsAuthorizationError - UrlOperationsResourceUnknown - Implementations that can distinguish several remote error types - beyond indication a general ``UrlOperationsRemoteError``: - ``UrlOperationsInteractionError`` general issues in communicating - with the remote side; ``UrlOperationsAuthenticationError`` for - errors related to (failed) authentication at the remote; - ``UrlOperationsAuthorizationError`` for (lack of) authorizating - to access a particular resource of perform a particular operation; - ``UrlOperationsResourceUnknown`` if the target of an operation - does not exist. - TimeoutError - If `timeout` is given and the operation does not complete within the - number of seconds that a specified by `timeout`. - """ - raise NotImplementedError - - def download(self, - from_url: str, - to_path: Path | None, - *, - credential: str | None = None, - hash: list[str] | None = None, - timeout: float | None = None) -> Dict: - """Download from a URL to a local file or stream to stdout - - Parameters - ---------- - from_url: str - Valid URL with any scheme supported by a particular implementation. - to_path: Path or None - A local platform-native path or `None`. If `None` the downloaded - data is written to `stdout`, otherwise it is written to a file - at the given path. The path is assumed to not exist. Any existing - file will be overwritten. - credential: str, optional - The name of a dedicated credential to be used for authentication - in order to perform the download. Particular implementations may - or may not require or support authentication. They also may or - may not support automatic credential lookup. - hash: list(algorithm_names), optional - If given, must be a list of hash algorithm names supported by the - `hashlib` module. A corresponding hash will be computed simultaenous - to the download (without reading the data twice), and included - in the return value. - timeout: float, optional - If given, specifies a timeout in seconds. If the operation is not - completed within this time, it will raise a `TimeoutError`-exception. - If timeout is None, the operation will never timeout. - - Returns - ------- - dict - A mapping of property names to values for the completed download. - If `hash` algorithm names are provided, a corresponding key for - each algorithm is included in this mapping, with the hexdigest - of the corresponding checksum as the value. - - Raises - ------ - UrlOperationsRemoteError - This exception is raised on any deletion-related error on the remote - side, with a summary of the underlying issues as its message. - It may carry a status code (e.g. HTTP status code) as its - ``status_code`` property. - Any underlying exception must be linked via the `__cause__` - property (e.g. `raise UrlOperationsRemoteError(...) from ...`). - UrlOperationsInteractionError - UrlOperationsAuthenticationError - UrlOperationsAuthorizationError - UrlOperationsResourceUnknown - Implementations that can distinguish several remote error types - beyond indication a general ``UrlOperationsRemoteError``: - ``UrlOperationsInteractionError`` general issues in communicating - with the remote side; ``UrlOperationsAuthenticationError`` for - errors related to (failed) authentication at the remote; - ``UrlOperationsAuthorizationError`` for (lack of) authorizating - to access a particular resource of perform a particular operation; - ``UrlOperationsResourceUnknown`` if the target of an operation - does not exist. - TimeoutError - If `timeout` is given and the operation does not complete within the - number of seconds that a specified by `timeout`. - """ - raise NotImplementedError - - def upload(self, - from_path: Path | None, - to_url: str, - *, - credential: str | None = None, - hash: list[str] | None = None, - timeout: float | None = None) -> Dict: - """Upload from a local file or stream to a URL - - Parameters - ---------- - from_path: Path or None - A local platform-native path or `None`. If `None` the upload - data is read from `stdin`, otherwise it is read from a file - at the given path. - to_url: str - Valid URL with any scheme supported by a particular implementation. - The target is assumed to not conflict with existing content, and - may be overwritten. - credential: str, optional - The name of a dedicated credential to be used for authentication - in order to perform the upload. Particular implementations may - or may not require or support authentication. They also may or - may not support automatic credential lookup. - hash: list(algorithm_names), optional - If given, must be a list of hash algorithm names supported by the - `hashlib` module. A corresponding hash will be computed simultaenous - to the upload (without reading the data twice), and included - in the return value. - timeout: float, optional - If given, specifies a timeout in seconds. If the operation is not - completed within this time, it will raise a `TimeoutError`-exception. - If timeout is None, the operation will never timeout. - - Returns - ------- - dict - A mapping of property names to values for the completed upload. - If `hash` algorithm names are provided, a corresponding key for - each algorithm is included in this mapping, with the hexdigest - of the corresponding checksum as the value. - - Raises - ------ - FileNotFoundError - If the source file cannot be found. - UrlOperationsRemoteError - This exception is raised on any deletion-related error on the remote - side, with a summary of the underlying issues as its message. - It may carry a status code (e.g. HTTP status code) as its - ``status_code`` property. - Any underlying exception must be linked via the `__cause__` - property (e.g. `raise UrlOperationsRemoteError(...) from ...`). - UrlOperationsInteractionError - UrlOperationsAuthenticationError - UrlOperationsAuthorizationError - UrlOperationsResourceUnknown - Implementations that can distinguish several remote error types - beyond indication a general ``UrlOperationsRemoteError``: - ``UrlOperationsInteractionError`` general issues in communicating - with the remote side; ``UrlOperationsAuthenticationError`` for - errors related to (failed) authentication at the remote; - ``UrlOperationsAuthorizationError`` for (lack of) authorizating - to access a particular resource of perform a particular operation; - ``UrlOperationsResourceUnknown`` if the target of an operation - does not exist. - TimeoutError - If `timeout` is given and the operation does not complete within the - number of seconds that a specified by `timeout`. - """ - raise NotImplementedError - - def delete(self, - url: str, - *, - credential: str | None = None, - timeout: float | None = None) -> Dict: - """Delete a resource identified by a URL - - Parameters - ---------- - url: str - Valid URL with any scheme supported by a particular implementation. - credential: str, optional - The name of a dedicated credential to be used for authentication - in order to perform the deletion. Particular implementations may - or may not require or support authentication. They also may or - may not support automatic credential lookup. - timeout: float, optional - If given, specifies a timeout in seconds. If the operation is not - completed within this time, it will raise a `TimeoutError`-exception. - If timeout is None, the operation will never timeout. - - Returns - ------- - dict - A mapping of property names to values for the deletion. - - Raises - ------ - UrlOperationsRemoteError - This exception is raised on any deletion-related error on the remote - side, with a summary of the underlying issues as its message. - It may carry a status code (e.g. HTTP status code) as its - ``status_code`` property. - Any underlying exception must be linked via the `__cause__` - property (e.g. `raise UrlOperationsRemoteError(...) from ...`). - UrlOperationsInteractionError - UrlOperationsAuthenticationError - UrlOperationsAuthorizationError - UrlOperationsResourceUnknown - Implementations that can distinguish several remote error types - beyond indication a general ``UrlOperationsRemoteError``: - ``UrlOperationsInteractionError`` general issues in communicating - with the remote side; ``UrlOperationsAuthenticationError`` for - errors related to (failed) authentication at the remote; - ``UrlOperationsAuthorizationError`` for (lack of) authorizating - to access a particular resource of perform a particular operation; - ``UrlOperationsResourceUnknown`` if the target of an operation - does not exist. - TimeoutError - If `timeout` is given and the operation does not complete within the - number of seconds that a specified by `timeout`. - """ - raise NotImplementedError - - - def _get_progress_id(self, from_id: str, to_id: str): - return f'progress_transport_{from_id}_{to_id}' - - def _progress_report_start(self, - pid: str, - log_msg: tuple, - label: str, - expected_size: int | None): - log_progress( - lgr.info, - pid, - *log_msg, - unit=' Bytes', - label=label, - total=expected_size, - noninteractive_level=logging.DEBUG, - ) - - def _progress_report_update(self, pid: str, log_msg: tuple, increment: int): - log_progress( - lgr.info, pid, *log_msg, - update=increment, - increment=True, - noninteractive_level=logging.DEBUG, - ) - - def _progress_report_stop(self, pid: str, log_msg: tuple): - log_progress( - lgr.info, pid, *log_msg, - noninteractive_level=logging.DEBUG, - ) - - def _get_hasher(self, hash: list[str] | None) -> NoOpHash | MultiHash: - return MultiHash(hash) if hash is not None else NoOpHash() - - def _with_progress(self, - stream: Iterable[Any], - *, - progress_id: str, - label: str, - expected_size: int | None, - start_log_msg: tuple, - end_log_msg: tuple, - update_log_msg: tuple - ) -> Generator[Any, None, None]: - yield from side_effect( - lambda chunk: self._progress_report_update( - progress_id, - update_log_msg, - len(chunk) - ), - stream, - before=partial( - self._progress_report_start, - progress_id, - start_log_msg, - label, - expected_size - ), - after=partial( - self._progress_report_stop, - progress_id, - end_log_msg - ) - ) - - -# -# Exceptions to be used by all handlers -# - - -class UrlOperationsRemoteError(Exception): - def __init__(self, url, message=None, status_code: Any = None): - # use base exception feature to store all arguments in a tuple - # and have named properties to access them - super().__init__( - url, - message, - status_code, - ) - - def __str__(self): - url, message, status_code = self.args - if message: - return message - - if status_code: - return f"error {status_code} for {url!r}" - - return f"{self.__class__.__name__} for {url!r}" - - def __repr__(self) -> str: - url, message, status_code = self.args - return f"{self.__class__.__name__}(" \ - f"{url!r}, {message!r}, {status_code!r})" - - @property - def url(self): - return self.args[0] - - @property - def message(self): - return self.args[1] - - @property - def status_code(self): - return self.args[2] - - -class UrlOperationsResourceUnknown(UrlOperationsRemoteError): - """A connection request succeeded in principle, but target was not found - - Equivalent of an HTTP404 response. - """ - pass - - -class UrlOperationsInteractionError(UrlOperationsRemoteError): - pass - - -class UrlOperationsAuthenticationError(UrlOperationsInteractionError): - def __init__(self, - url: str, - credential: dict | None = None, - message: str | None = None, - status_code: Any = None): - super().__init__(url, message=message, status_code=status_code) - self.credential = credential - - -class UrlOperationsAuthorizationError(UrlOperationsRemoteError): - def __init__(self, - url: str, - credential: dict | None = None, - message: str | None = None, - status_code: Any | None = None): - super().__init__(url, message=message, status_code=status_code) - self.credential = credential diff --git a/datalad_next/url_operations/any.py b/datalad_next/url_operations/any.py index 9b2cd75e..a3d9d3ac 100644 --- a/datalad_next/url_operations/any.py +++ b/datalad_next/url_operations/any.py @@ -13,7 +13,7 @@ from datalad_next.config import ConfigManager from datalad_next.exceptions import CapturedException -from . import UrlOperations +from .base import UrlOperations lgr = logging.getLogger('datalad.ext.next.url_operations.any') diff --git a/datalad_next/url_operations/base.py b/datalad_next/url_operations/base.py new file mode 100644 index 00000000..a7b5d443 --- /dev/null +++ b/datalad_next/url_operations/base.py @@ -0,0 +1,367 @@ +"""API base class""" + +from __future__ import annotations + + +import logging +from functools import partial +from more_itertools import side_effect +from pathlib import Path +from typing import ( + Any, + Dict, + Generator, + Iterable, +) + +import datalad +from datalad_next.config import ConfigManager +from datalad_next.utils import log_progress +from datalad_next.utils.multihash import ( + MultiHash, + NoOpHash, +) + + +lgr = logging.getLogger('datalad.ext.next.url_operations') + + +class UrlOperations: + """Abstraction for operations on URLs + + Support for specific URL schemes can be implemented via sub-classes. + Such classes must comply with the following conditions: + + - Any configuration look-up must be performed with the `self.cfg` + property, which is guaranteed to be a `ConfigManager` instance. + + - When downloads are to be supported, implement the `download()` method + and comply with the behavior described in its documentation. + + This class provides a range of helper methods to aid computation of + hashes and progress reporting. + """ + def __init__(self, *, cfg: ConfigManager | None = None): + """ + Parameters + ---------- + cfg: ConfigManager, optional + A config manager instance that implementations will consult for + any configuration items they may support. + """ + self._cfg = cfg + + @property + def cfg(self) -> ConfigManager: + + if self._cfg is None: + self._cfg = datalad.cfg + return self._cfg + + def stat(self, + url: str, + *, + credential: str | None = None, + timeout: float | None = None) -> Dict: + """Gather information on a URL target, without downloading it + + Returns + ------- + dict + A mapping of property names to values of the URL target. The + particular composition of properties depends on the specific + URL. A standard property is 'content-length', indicating the + size of a download. + + Raises + ------ + UrlOperationsRemoteError + This exception is raised on any access-related error on the remote + side, with a summary of the underlying issues as its message. + It may carry a status code (e.g. HTTP status code) as its + ``status_code`` property. + Any underlying exception must be linked via the `__cause__` + property (e.g. `raise UrlOperationsRemoteError(...) from ...`). + UrlOperationsInteractionError + UrlOperationsAuthenticationError + UrlOperationsAuthorizationError + UrlOperationsResourceUnknown + Implementations that can distinguish several remote error types + beyond indication a general ``UrlOperationsRemoteError``: + ``UrlOperationsInteractionError`` general issues in communicating + with the remote side; ``UrlOperationsAuthenticationError`` for + errors related to (failed) authentication at the remote; + ``UrlOperationsAuthorizationError`` for (lack of) authorizating + to access a particular resource of perform a particular operation; + ``UrlOperationsResourceUnknown`` if the target of an operation + does not exist. + TimeoutError + If `timeout` is given and the operation does not complete within the + number of seconds that a specified by `timeout`. + """ + raise NotImplementedError + + def download(self, + from_url: str, + to_path: Path | None, + *, + credential: str | None = None, + hash: list[str] | None = None, + timeout: float | None = None) -> Dict: + """Download from a URL to a local file or stream to stdout + + Parameters + ---------- + from_url: str + Valid URL with any scheme supported by a particular implementation. + to_path: Path or None + A local platform-native path or `None`. If `None` the downloaded + data is written to `stdout`, otherwise it is written to a file + at the given path. The path is assumed to not exist. Any existing + file will be overwritten. + credential: str, optional + The name of a dedicated credential to be used for authentication + in order to perform the download. Particular implementations may + or may not require or support authentication. They also may or + may not support automatic credential lookup. + hash: list(algorithm_names), optional + If given, must be a list of hash algorithm names supported by the + `hashlib` module. A corresponding hash will be computed simultaenous + to the download (without reading the data twice), and included + in the return value. + timeout: float, optional + If given, specifies a timeout in seconds. If the operation is not + completed within this time, it will raise a `TimeoutError`-exception. + If timeout is None, the operation will never timeout. + + Returns + ------- + dict + A mapping of property names to values for the completed download. + If `hash` algorithm names are provided, a corresponding key for + each algorithm is included in this mapping, with the hexdigest + of the corresponding checksum as the value. + + Raises + ------ + UrlOperationsRemoteError + This exception is raised on any deletion-related error on the remote + side, with a summary of the underlying issues as its message. + It may carry a status code (e.g. HTTP status code) as its + ``status_code`` property. + Any underlying exception must be linked via the `__cause__` + property (e.g. `raise UrlOperationsRemoteError(...) from ...`). + UrlOperationsInteractionError + UrlOperationsAuthenticationError + UrlOperationsAuthorizationError + UrlOperationsResourceUnknown + Implementations that can distinguish several remote error types + beyond indication a general ``UrlOperationsRemoteError``: + ``UrlOperationsInteractionError`` general issues in communicating + with the remote side; ``UrlOperationsAuthenticationError`` for + errors related to (failed) authentication at the remote; + ``UrlOperationsAuthorizationError`` for (lack of) authorizating + to access a particular resource of perform a particular operation; + ``UrlOperationsResourceUnknown`` if the target of an operation + does not exist. + TimeoutError + If `timeout` is given and the operation does not complete within the + number of seconds that a specified by `timeout`. + """ + raise NotImplementedError + + def upload(self, + from_path: Path | None, + to_url: str, + *, + credential: str | None = None, + hash: list[str] | None = None, + timeout: float | None = None) -> Dict: + """Upload from a local file or stream to a URL + + Parameters + ---------- + from_path: Path or None + A local platform-native path or `None`. If `None` the upload + data is read from `stdin`, otherwise it is read from a file + at the given path. + to_url: str + Valid URL with any scheme supported by a particular implementation. + The target is assumed to not conflict with existing content, and + may be overwritten. + credential: str, optional + The name of a dedicated credential to be used for authentication + in order to perform the upload. Particular implementations may + or may not require or support authentication. They also may or + may not support automatic credential lookup. + hash: list(algorithm_names), optional + If given, must be a list of hash algorithm names supported by the + `hashlib` module. A corresponding hash will be computed simultaenous + to the upload (without reading the data twice), and included + in the return value. + timeout: float, optional + If given, specifies a timeout in seconds. If the operation is not + completed within this time, it will raise a `TimeoutError`-exception. + If timeout is None, the operation will never timeout. + + Returns + ------- + dict + A mapping of property names to values for the completed upload. + If `hash` algorithm names are provided, a corresponding key for + each algorithm is included in this mapping, with the hexdigest + of the corresponding checksum as the value. + + Raises + ------ + FileNotFoundError + If the source file cannot be found. + UrlOperationsRemoteError + This exception is raised on any deletion-related error on the remote + side, with a summary of the underlying issues as its message. + It may carry a status code (e.g. HTTP status code) as its + ``status_code`` property. + Any underlying exception must be linked via the `__cause__` + property (e.g. `raise UrlOperationsRemoteError(...) from ...`). + UrlOperationsInteractionError + UrlOperationsAuthenticationError + UrlOperationsAuthorizationError + UrlOperationsResourceUnknown + Implementations that can distinguish several remote error types + beyond indication a general ``UrlOperationsRemoteError``: + ``UrlOperationsInteractionError`` general issues in communicating + with the remote side; ``UrlOperationsAuthenticationError`` for + errors related to (failed) authentication at the remote; + ``UrlOperationsAuthorizationError`` for (lack of) authorizating + to access a particular resource of perform a particular operation; + ``UrlOperationsResourceUnknown`` if the target of an operation + does not exist. + TimeoutError + If `timeout` is given and the operation does not complete within the + number of seconds that a specified by `timeout`. + """ + raise NotImplementedError + + def delete(self, + url: str, + *, + credential: str | None = None, + timeout: float | None = None) -> Dict: + """Delete a resource identified by a URL + + Parameters + ---------- + url: str + Valid URL with any scheme supported by a particular implementation. + credential: str, optional + The name of a dedicated credential to be used for authentication + in order to perform the deletion. Particular implementations may + or may not require or support authentication. They also may or + may not support automatic credential lookup. + timeout: float, optional + If given, specifies a timeout in seconds. If the operation is not + completed within this time, it will raise a `TimeoutError`-exception. + If timeout is None, the operation will never timeout. + + Returns + ------- + dict + A mapping of property names to values for the deletion. + + Raises + ------ + UrlOperationsRemoteError + This exception is raised on any deletion-related error on the remote + side, with a summary of the underlying issues as its message. + It may carry a status code (e.g. HTTP status code) as its + ``status_code`` property. + Any underlying exception must be linked via the `__cause__` + property (e.g. `raise UrlOperationsRemoteError(...) from ...`). + UrlOperationsInteractionError + UrlOperationsAuthenticationError + UrlOperationsAuthorizationError + UrlOperationsResourceUnknown + Implementations that can distinguish several remote error types + beyond indication a general ``UrlOperationsRemoteError``: + ``UrlOperationsInteractionError`` general issues in communicating + with the remote side; ``UrlOperationsAuthenticationError`` for + errors related to (failed) authentication at the remote; + ``UrlOperationsAuthorizationError`` for (lack of) authorizating + to access a particular resource of perform a particular operation; + ``UrlOperationsResourceUnknown`` if the target of an operation + does not exist. + TimeoutError + If `timeout` is given and the operation does not complete within the + number of seconds that a specified by `timeout`. + """ + raise NotImplementedError + + + def _get_progress_id(self, from_id: str, to_id: str): + return f'progress_transport_{from_id}_{to_id}' + + def _progress_report_start(self, + pid: str, + log_msg: tuple, + label: str, + expected_size: int | None): + log_progress( + lgr.info, + pid, + *log_msg, + unit=' Bytes', + label=label, + total=expected_size, + noninteractive_level=logging.DEBUG, + ) + + def _progress_report_update(self, pid: str, log_msg: tuple, increment: int): + log_progress( + lgr.info, pid, *log_msg, + update=increment, + increment=True, + noninteractive_level=logging.DEBUG, + ) + + def _progress_report_stop(self, pid: str, log_msg: tuple): + log_progress( + lgr.info, pid, *log_msg, + noninteractive_level=logging.DEBUG, + ) + + def _get_hasher(self, hash: list[str] | None) -> NoOpHash | MultiHash: + return MultiHash(hash) if hash is not None else NoOpHash() + + def _with_progress(self, + stream: Iterable[Any], + *, + progress_id: str, + label: str, + expected_size: int | None, + start_log_msg: tuple, + end_log_msg: tuple, + update_log_msg: tuple + ) -> Generator[Any, None, None]: + yield from side_effect( + lambda chunk: self._progress_report_update( + progress_id, + update_log_msg, + len(chunk) + ), + stream, + before=partial( + self._progress_report_start, + progress_id, + start_log_msg, + label, + expected_size + ), + after=partial( + self._progress_report_stop, + progress_id, + end_log_msg + ) + ) + + + diff --git a/datalad_next/url_operations/exceptions.py b/datalad_next/url_operations/exceptions.py new file mode 100644 index 00000000..52f9d047 --- /dev/null +++ b/datalad_next/url_operations/exceptions.py @@ -0,0 +1,77 @@ +"""Exceptions to be used by all handlers""" + +from __future__ import annotations + +from typing import ( + Any, +) + + +class UrlOperationsRemoteError(Exception): + def __init__(self, url, message=None, status_code: Any = None): + # use base exception feature to store all arguments in a tuple + # and have named properties to access them + super().__init__( + url, + message, + status_code, + ) + + def __str__(self): + url, message, status_code = self.args + if message: + return message + + if status_code: + return f"error {status_code} for {url!r}" + + return f"{self.__class__.__name__} for {url!r}" + + def __repr__(self) -> str: + url, message, status_code = self.args + return f"{self.__class__.__name__}(" \ + f"{url!r}, {message!r}, {status_code!r})" + + @property + def url(self): + return self.args[0] + + @property + def message(self): + return self.args[1] + + @property + def status_code(self): + return self.args[2] + + +class UrlOperationsResourceUnknown(UrlOperationsRemoteError): + """A connection request succeeded in principle, but target was not found + + Equivalent of an HTTP404 response. + """ + pass + + +class UrlOperationsInteractionError(UrlOperationsRemoteError): + pass + + +class UrlOperationsAuthenticationError(UrlOperationsInteractionError): + def __init__(self, + url: str, + credential: dict | None = None, + message: str | None = None, + status_code: Any = None): + super().__init__(url, message=message, status_code=status_code) + self.credential = credential + + +class UrlOperationsAuthorizationError(UrlOperationsRemoteError): + def __init__(self, + url: str, + credential: dict | None = None, + message: str | None = None, + status_code: Any | None = None): + super().__init__(url, message=message, status_code=status_code) + self.credential = credential diff --git a/datalad_next/url_operations/file.py b/datalad_next/url_operations/file.py index 7f72b4ad..4e76af68 100644 --- a/datalad_next/url_operations/file.py +++ b/datalad_next/url_operations/file.py @@ -14,8 +14,8 @@ from datalad_next.utils.consts import COPY_BUFSIZE -from . import ( - UrlOperations, +from .base import UrlOperations +from .exceptions import ( UrlOperationsRemoteError, UrlOperationsResourceUnknown, ) diff --git a/datalad_next/url_operations/http.py b/datalad_next/url_operations/http.py index 11eaddc1..46e5e710 100644 --- a/datalad_next/url_operations/http.py +++ b/datalad_next/url_operations/http.py @@ -16,8 +16,8 @@ DataladAuth, parse_www_authenticate, ) -from . import ( - UrlOperations, +from .base import UrlOperations +from .exceptions import ( UrlOperationsRemoteError, UrlOperationsResourceUnknown, ) diff --git a/datalad_next/url_operations/ssh.py b/datalad_next/url_operations/ssh.py index a46fd406..818a724e 100644 --- a/datalad_next/url_operations/ssh.py +++ b/datalad_next/url_operations/ssh.py @@ -35,8 +35,8 @@ ) -from . import ( - UrlOperations, +from .base import UrlOperations +from .exceptions import ( UrlOperationsRemoteError, UrlOperationsResourceUnknown, ) diff --git a/datalad_next/url_operations/tests/test_http.py b/datalad_next/url_operations/tests/test_http.py index 46a57234..1878bfce 100644 --- a/datalad_next/url_operations/tests/test_http.py +++ b/datalad_next/url_operations/tests/test_http.py @@ -119,8 +119,8 @@ def test_size_less_progress_reporting(http_server, monkeypatch): def catch_progress(*_, **kwargs): logs.append(kwargs) - import datalad_next.url_operations - monkeypatch.setattr(datalad_next.url_operations, 'log_progress', catch_progress) + import datalad_next.url_operations.base + monkeypatch.setattr(datalad_next.url_operations.base, 'log_progress', catch_progress) http_handler = HttpUrlOperations() http_handler._stream_download_from_request(r, None) diff --git a/datalad_next/utils/__init__.py b/datalad_next/utils/__init__.py index 97da7ad6..c80056d9 100644 --- a/datalad_next/utils/__init__.py +++ b/datalad_next/utils/__init__.py @@ -78,7 +78,7 @@ def get_specialremote_credential_properties(params): remote_type = params.get('type') if remote_type == 'webdav': from .http_helpers import get_auth_realm - from datalad_next.url_operations.http import HttpUrlOperations + from datalad_next.url_operations import HttpUrlOperations url = params.get('url') if not url: return