Skip to content

Commit

Permalink
Refactor Import Archive (#4510)
Browse files Browse the repository at this point in the history
This commit builds on [c6d38c1](c6d38c1),
to refactor the archive in order to decouple it from its export/import to the AiiDA database.

The `aiida/tools/importexport/archive` module has been created,
which contains the readers and writers used to create and interact with an archive.
Effectively archive formats are now defined by their associated
reader and writer classes, which must inherit and implement the
`ArchiveReaderAbstract` and `ArchiveWriterAbstract` interfaces respectively.

`aiida/tools/importexport/dbimport` has been refactored,
to interface with this new `ArchiveReaderAbstract` class,
and also utilise the new `progress_reporter` context manager.
Both the django and sqlalchemy backends have been "synchronized",
such that conform to exactly the same code structure, which in-turn
has allowed for the sharing of common code.

The commit is intended to be back-compatible,
in that no public API elements have been removed.
However, it does:

- remove the `Archive` class, replaced by the `ReaderJsonZip`/`ReaderJsonTar` classes.
- remove `aiida/tools/importexport/common/progress_bar.py`,
  now replaced by `aiida/common/progress_reporter.py`
- move `aiida/tools/importexport/dbexport/zip.py` → `aiida/tools/importexport/common/zip_folder.py`

The `aiida import --verbosity DEBUG` option has been added,
which sets the log level of the process, and whether the progress bars are removed.

The `verdi export inspect` code has also been refactored, to utilize the `ArchiveReaderAbstract`.
The `verdi export inspect --data` option has been deprecated,
since access to the `data.json` file is only an implementation
detail of the current archive format.
  • Loading branch information
chrisjsewell authored Oct 27, 2020
1 parent 02c8a0c commit 2f8e845
Show file tree
Hide file tree
Showing 32 changed files with 2,768 additions and 2,418 deletions.
5 changes: 4 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,13 @@ repos:
pass_filenames: true
files: >-
(?x)^(
aiida/common/progress_reporter.py|
aiida/engine/processes/calcjobs/calcjob.py|
aiida/tools/groups/paths.py|
aiida/tools/importexport/archive/.*py|
aiida/tools/importexport/common/zip_folder.py|
aiida/tools/importexport/dbexport/__init__.py|
aiida/common/progress_reporter.py|
aiida/tools/importexport/dbimport/backends/.*.py|
)$
- repo: local
Expand Down
36 changes: 27 additions & 9 deletions aiida/cmdline/commands/cmd_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,22 +39,40 @@ def inspect(archive, version, data, meta_data):
By default a summary of the archive contents will be printed. The various options can be used to change exactly what
information is displayed.
.. deprecated:: 1.5.0
Support for the --data flag
"""
from aiida.tools.importexport import Archive, CorruptArchive
import dataclasses
from aiida.tools.importexport import CorruptArchive, detect_archive_type, get_reader

reader_cls = get_reader(detect_archive_type(archive))

with Archive(archive) as archive_object:
with reader_cls(archive) as reader:
try:
if version:
echo.echo(archive_object.version_format)
echo.echo(reader.export_version)
elif data:
echo.echo_dictionary(archive_object.data)
# data is an internal implementation detail
echo.echo_deprecated('--data is deprecated and will be removed in v2.0.0')
echo.echo_dictionary(reader._get_data()) # pylint: disable=protected-access
elif meta_data:
echo.echo_dictionary(archive_object.meta_data)
echo.echo_dictionary(dataclasses.asdict(reader.metadata))
else:
info = archive_object.get_info()
data = sorted([(k.capitalize(), v) for k, v in info.items()])
data.extend(sorted([(k.capitalize(), v) for k, v in archive_object.get_data_statistics().items()]))
echo.echo(tabulate.tabulate(data))
statistics = {
'Version aiida': reader.metadata.aiida_version,
'Version format': reader.metadata.export_version,
'Computers': reader.entity_count('Computer'),
'Groups': reader.entity_count('Group'),
'Links': reader.link_count,
'Nodes': reader.entity_count('Node'),
'Users': reader.entity_count('User'),
}
if reader.metadata.conversion_info:
statistics['Conversion info'] = '\n'.join(reader.metadata.conversion_info)

echo.echo(tabulate.tabulate(statistics.items()))
except CorruptArchive as exception:
echo.echo_critical(f'corrupt archive: {exception}')

Expand Down
31 changes: 22 additions & 9 deletions aiida/cmdline/commands/cmd_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,7 @@ def _echo_error( # pylint: disable=unused-argument
:param raised_exception: Exception raised during error.
:type raised_exception: `Exception`
"""
from aiida.tools.importexport import close_progress_bar, IMPORT_LOGGER

# Close progress bar, if it exists
close_progress_bar(leave=False)
from aiida.tools.importexport import IMPORT_LOGGER

IMPORT_LOGGER.debug('%s', traceback.format_exc())

Expand Down Expand Up @@ -89,6 +86,7 @@ def _try_import(migration_performed, file_to_import, archive, group, migration,
* `'extras_mode_new'`: `import_data`'s `'extras_mode_new'` keyword, determining import rules for Extras.
* `'comment_mode'`: `import_data`'s `'comment_mode'` keyword, determining import rules for Comments.
"""
from aiida.common.log import override_log_formatter_context
from aiida.tools.importexport import import_data, IncompatibleArchiveVersionError

# Checks
Expand All @@ -101,7 +99,8 @@ def _try_import(migration_performed, file_to_import, archive, group, migration,
migrate_archive = False

try:
import_data(file_to_import, group, **kwargs)
with override_log_formatter_context('%(message)s'):
import_data(file_to_import, group, **kwargs)
except IncompatibleArchiveVersionError as exception:
if migration_performed:
# Migration has been performed, something is still wrong
Expand Down Expand Up @@ -235,18 +234,33 @@ def _migrate_archive(ctx, temp_folder, file_to_import, archive, non_interactive,
show_default=True,
help='Force migration of archive file archives, if needed.'
)
@click.option(
'-v',
'--verbosity',
default='INFO',
type=click.Choice(['DEBUG', 'INFO', 'WARNING', 'CRITICAL']),
help='Control the verbosity of console logging'
)
@options.NON_INTERACTIVE()
@decorators.with_dbenv()
@click.pass_context
def cmd_import(
ctx, archives, webpages, group, extras_mode_existing, extras_mode_new, comment_mode, migration, non_interactive
ctx, archives, webpages, group, extras_mode_existing, extras_mode_new, comment_mode, migration, non_interactive,
verbosity
):
"""Import data from an AiiDA archive file.
The archive can be specified by its relative or absolute file path, or its HTTP URL.
"""
# pylint: disable=too-many-statements
from aiida.common.folders import SandboxFolder
from aiida.common.progress_reporter import set_progress_bar_tqdm
from aiida.tools.importexport.common.utils import get_valid_import_links
from aiida.tools.importexport.dbimport.utils import IMPORT_LOGGER

if verbosity in ['DEBUG', 'INFO']:
set_progress_bar_tqdm(leave=(verbosity == 'DEBUG'))
IMPORT_LOGGER.setLevel(verbosity)

archives_url = []
archives_file = []
Expand Down Expand Up @@ -289,7 +303,6 @@ def cmd_import(
'extras_mode_new': extras_mode_new,
'comment_mode': comment_mode,
'non_interactive': non_interactive,
'silent': False,
}

# Import local archives
Expand All @@ -308,7 +321,7 @@ def cmd_import(
# Migrate archive if needed and desired
if migrate_archive:
with SandboxFolder() as temp_folder:
import_opts['file_to_import'] = _migrate_archive(ctx, temp_folder, **import_opts)
import_opts['file_to_import'] = _migrate_archive(ctx, temp_folder, silent=False, **import_opts)
_try_import(migration_performed=True, **import_opts)

# Import web-archives
Expand Down Expand Up @@ -338,5 +351,5 @@ def cmd_import(

# Migrate archive if needed and desired
if migrate_archive:
import_opts['file_to_import'] = _migrate_archive(ctx, temp_folder, **import_opts)
import_opts['file_to_import'] = _migrate_archive(ctx, temp_folder, silent=False, **import_opts)
_try_import(migration_performed=True, **import_opts)
14 changes: 14 additions & 0 deletions aiida/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import os
import re
import sys
from uuid import UUID

from .lang import classproperty

Expand All @@ -28,6 +29,19 @@ def get_new_uuid():
return str(uuid.uuid4())


def validate_uuid(given_uuid: str) -> bool:
"""A simple check for the UUID validity."""
try:
parsed_uuid = UUID(given_uuid, version=4)
except ValueError:
# If not a valid UUID
return False

# Check if there was any kind of conversion of the hex during
# the validation
return str(parsed_uuid) == given_uuid


def validate_list_of_string_tuples(val, tuple_length):
"""
Check that:
Expand Down
4 changes: 2 additions & 2 deletions aiida/tools/importexport/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
Functionality: <tree>/aiida/orm/importexport.py
Tests: <tree>/aiida/backends/tests/test_export_and_import.py
"""

from .archive import *
from .dbexport import *
from .dbimport import *
from .common import *

__all__ = (dbexport.__all__ + dbimport.__all__ + common.__all__)
__all__ = (archive.__all__ + dbexport.__all__ + dbimport.__all__ + common.__all__)
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,12 @@
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
# pylint: disable=wildcard-import,undefined-variable
# type: ignore
"""Readers and writers for archive formats, that work independently of a connection to an AiiDA profile."""

from .common import *
from .readers import *
from .writers import *

__all__ = (readers.__all__ + writers.__all__ + common.__all__)
63 changes: 63 additions & 0 deletions aiida/tools/importexport/archive/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# -*- coding: utf-8 -*-
###########################################################################
# Copyright (c), The AiiDA team. All rights reserved. #
# This file is part of the AiiDA code. #
# #
# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
# For further information on the license, see the LICENSE.txt file #
# For further information please visit http://www.aiida.net #
###########################################################################
"""Shared resources for the archive."""
import dataclasses
import os
from typing import Dict, List, Optional, Set

__all__ = ('ArchiveMetadata', 'detect_archive_type')


@dataclasses.dataclass
class ArchiveMetadata:
"""Class for storing metadata about this archive.
Required fields are necessary for importing the data back into AiiDA,
whereas optional fields capture information about the export/migration process(es)
"""
export_version: str
aiida_version: str
# Entity type -> database ID key
unique_identifiers: Dict[str, str] = dataclasses.field(repr=False)
# Entity type -> database key -> meta parameters
all_fields_info: Dict[str, Dict[str, Dict[str, str]]] = dataclasses.field(repr=False)

# optional data
graph_traversal_rules: Optional[Dict[str, bool]] = dataclasses.field(default=None)
# Entity type -> UUID list
entities_starting_set: Optional[Dict[str, Set[str]]] = dataclasses.field(default=None)
include_comments: Optional[bool] = dataclasses.field(default=None)
include_logs: Optional[bool] = dataclasses.field(default=None)
# list of migration event notifications
conversion_info: List[str] = dataclasses.field(default_factory=list, repr=False)


def detect_archive_type(in_path: str) -> str:
"""For back-compatibility, but should be replaced with direct comparison of classes.
:param in_path: the path to the file
:returns: the archive type identifier (currently one of 'zip', 'tar.gz', 'folder')
"""
import tarfile
import zipfile
from aiida.tools.importexport.common.config import ExportFileFormat
from aiida.tools.importexport.common.exceptions import ImportValidationError

if os.path.isdir(in_path):
return 'folder'
if tarfile.is_tarfile(in_path):
return ExportFileFormat.TAR_GZIPPED
if zipfile.is_zipfile(in_path):
return ExportFileFormat.ZIP
raise ImportValidationError(
'Unable to detect the input file format, it is neither a '
'folder, tar file, nor a (possibly compressed) zip file.'
)
Loading

0 comments on commit 2f8e845

Please sign in to comment.