diff --git a/aiida/backends/tests/cmdline/commands/test_export.py b/aiida/backends/tests/cmdline/commands/test_export.py index 13be629279..18b5536751 100644 --- a/aiida/backends/tests/cmdline/commands/test_export.py +++ b/aiida/backends/tests/cmdline/commands/test_export.py @@ -212,21 +212,6 @@ def test_migrate_silent(self): finally: delete_temporary_file(filename_output) - def test_migrate_tar_gz(self): - """Test that -F/--archive-format option can be used to write a tar.gz instead.""" - filename_input = get_archive_file(self.penultimate_archive, filepath=self.fixture_archive) - filename_output = next(tempfile._get_candidate_names()) # pylint: disable=protected-access - - for option in ['-F', '--archive-format']: - try: - options = [option, 'tar.gz', filename_input, filename_output] - result = self.cli_runner.invoke(cmd_export.migrate, options) - self.assertIsNone(result.exception, result.output) - self.assertTrue(os.path.isfile(filename_output)) - self.assertTrue(tarfile.is_tarfile(filename_output)) - finally: - delete_temporary_file(filename_output) - def test_inspect(self): """Test the functionality of `verdi export inspect`.""" archives = [] diff --git a/aiida/backends/tests/cmdline/commands/test_import.py b/aiida/backends/tests/cmdline/commands/test_import.py index c37553fc6d..1965b2ae78 100644 --- a/aiida/backends/tests/cmdline/commands/test_import.py +++ b/aiida/backends/tests/cmdline/commands/test_import.py @@ -67,6 +67,27 @@ def test_import_archive(self): self.assertIsNone(result.exception, result.output) self.assertEqual(result.exit_code, 0, result.output) + def test_import_folder(self): + """Test import for archive folder from disk""" + import os + + from aiida.tools.importexport import Archive + + archive_filepath = get_archive_file(self.newest_archive, filepath=self.archive_path) + + with Archive(archive_filepath, silent=True) as archive: + archive.unpack() + + # Make sure the JSON files and the nodes subfolder exists and are correctly extracted, + # then try to import it by passing the extracted folder to `verdi import`. + for name in {'metadata.json', 'data.json', 'nodes'}: + self.assertTrue(os.path.exists(os.path.join(archive.folder.abspath, name))) + + result = self.cli_runner.invoke(cmd_import.cmd_import, [archive.folder.abspath]) + + self.assertIsNone(result.exception, msg=result.output) + self.assertEqual(result.exit_code, 0, msg=result.output) + def test_import_to_group(self): """ Test import to existing Group and that Nodes are added correctly for multiple imports of the same, diff --git a/aiida/backends/tests/tools/importexport/common/test_archive.py b/aiida/backends/tests/tools/importexport/common/test_archive.py index 65e20cb5f6..88a08632b6 100644 --- a/aiida/backends/tests/tools/importexport/common/test_archive.py +++ b/aiida/backends/tests/tools/importexport/common/test_archive.py @@ -10,9 +10,8 @@ """Tests for the Archive class.""" from aiida.backends.testbase import AiidaTestCase -from aiida.common.exceptions import InvalidOperation from aiida.backends.tests.utils.archives import get_archive_file -from aiida.tools.importexport import Archive, CorruptArchive +from aiida.tools.importexport import CorruptArchive, InvalidArchiveOperation, Archive class TestCommonArchive(AiidaTestCase): @@ -20,7 +19,7 @@ class TestCommonArchive(AiidaTestCase): def test_context_required(self): """Verify that accessing a property of an Archive outside of a context manager raises.""" - with self.assertRaises(InvalidOperation): + with self.assertRaises(InvalidArchiveOperation): filepath = get_archive_file('export_v0.1_simple.aiida', filepath='export/migrate') archive = Archive(filepath) archive.version_format # pylint: disable=pointless-statement diff --git a/aiida/backends/tests/tools/importexport/migration/test_migration.py b/aiida/backends/tests/tools/importexport/migration/test_migration.py index f63a953e8b..816118288b 100644 --- a/aiida/backends/tests/tools/importexport/migration/test_migration.py +++ b/aiida/backends/tests/tools/importexport/migration/test_migration.py @@ -13,11 +13,11 @@ from aiida import orm from aiida.backends.testbase import AiidaTestCase -from aiida.backends.tests.utils.archives import get_archive_file, get_json_files, migrate_archive +from aiida.backends.tests.utils.archives import get_archive_file, NoContextArchive from aiida.backends.tests.utils.configuration import with_temp_dir -from aiida.tools.importexport import import_data, EXPORT_VERSION as newest_version -from aiida.tools.importexport.migration import migrate_recursively, verify_metadata_version -from aiida.common.utils import Capturing +from aiida.tools.importexport import import_data, Archive, EXPORT_VERSION as newest_version +from aiida.tools.importexport.common.exceptions import MigrationValidationError +from aiida.tools.importexport.migration import migrate_recursively, verify_archive_version, migrate_archive class TestExportFileMigration(AiidaTestCase): @@ -63,44 +63,16 @@ def setUp(self): super().setUp() self.reset_database() - def test_migrate_recursively(self): - """Test function 'migrate_recursively'""" - import io - import tarfile - import zipfile - - from aiida.common.exceptions import NotExistent - from aiida.common.folders import SandboxFolder - from aiida.common.json import load as jsonload - from aiida.tools.importexport.common.archive import extract_tar, extract_zip - - # Get metadata.json and data.json as dicts from v0.1 file archive - # Cannot use 'get_json_files' for 'export_v0.1_simple.aiida', - # because we need to pass the SandboxFolder to 'migrate_recursively' - dirpath_archive = get_archive_file('export_v0.1_simple.aiida', **self.core_archive) - - with SandboxFolder(sandbox_in_repo=False) as folder: - if zipfile.is_zipfile(dirpath_archive): - extract_zip(dirpath_archive, folder, silent=True) - elif tarfile.is_tarfile(dirpath_archive): - extract_tar(dirpath_archive, folder, silent=True) - else: - raise ValueError('invalid file format, expected either a zip archive or gzipped tarball') - - try: - with io.open(folder.get_abs_path('data.json'), 'r', encoding='utf8') as fhandle: - data = jsonload(fhandle) - with io.open(folder.get_abs_path('metadata.json'), 'r', encoding='utf8') as fhandle: - metadata = jsonload(fhandle) - except IOError: - raise NotExistent('export archive does not contain the required file {}'.format(fhandle.filename)) - - verify_metadata_version(metadata, version='0.1') - - # Migrate to newest version - new_version = migrate_recursively(metadata, data, folder) - verify_metadata_version(metadata, version=newest_version) - self.assertEqual(new_version, newest_version) + @with_temp_dir + def test_migrate_archive(self, temp_dir): + """Test function 'migrate_archive'""" + input_file = get_archive_file('export_v0.1_simple.aiida', **self.core_archive) + migrated_file = os.path.join(temp_dir, 'migrated_file.aiida') + + # Migrate to newest version + old_version, new_version = migrate_archive(input_file, migrated_file, silent=True) + self.assertEqual(old_version, '0.1') + self.assertEqual(new_version, newest_version) @with_temp_dir def test_no_node_export(self, temp_dir): @@ -119,7 +91,7 @@ def test_no_node_export(self, temp_dir): user_emails.append('aiida@localhost') # Perform the migration - migrate_archive(input_file, output_file) + migrate_archive(input_file, output_file, silent=True) # Load the migrated file import_data(output_file, silent=True) @@ -156,51 +128,50 @@ def test_wrong_versions(self): msg="'{}' was not expected to be a legal version, legal version: {}".format(version, legal_versions) ) - # Make sure migrate_recursively throws a critical message and raises SystemExit + # Make sure migrate_recursively throws a critical message and raises MigrationValidationError for metadata in wrong_version_metadatas: - with self.assertRaises(SystemExit) as exception: - with Capturing(capture_stderr=True): - new_version = migrate_recursively(metadata, {}, None) - - self.assertIn( - 'Critical: Cannot migrate from version {}'.format(metadata['export_version']), - exception.exception, - msg="Expected a critical statement for the wrong export version '{}', " - 'instead got {}'.format(metadata['export_version'], exception.exception) - ) + archive = NoContextArchive(metadata=metadata) + with self.assertRaises(MigrationValidationError) as exception: + new_version = migrate_recursively(archive) + self.assertIsNone( new_version, msg='migrate_recursively should not return anything, ' "hence the 'return' should be None, but instead it is {}".format(new_version) ) + self.assertIn( + 'Cannot migrate from version {}'.format(metadata['export_version']), + str(exception.exception), + msg="Expected a critical statement for the wrong export version '{}', " + 'instead got {}'.format(metadata['export_version'], str(exception.exception)) + ) + def test_migrate_newest_version(self): """ Test critical message and SystemExit is raised, when an export file with the newest export version is migrated """ # Initialization metadata = {'export_version': newest_version} + archive = NoContextArchive(metadata=metadata) # Check - with self.assertRaises(SystemExit) as exception: - - with Capturing(capture_stderr=True): - new_version = migrate_recursively(metadata, {}, None) + with self.assertRaises(MigrationValidationError) as exception: + new_version = migrate_recursively(archive) - self.assertIn( - 'Critical: Your export file is already at the newest export version {}'.format( - metadata['export_version'] - ), - exception.exception, - msg="Expected a critical statement that the export version '{}' is the newest export version '{}', " - 'instead got {}'.format(metadata['export_version'], newest_version, exception.exception) - ) self.assertIsNone( new_version, msg='migrate_recursively should not return anything, ' "hence the 'return' should be None, but instead it is {}".format(new_version) ) + self.assertIn( + 'Your export file is already at the newest export version {}'.format(metadata['export_version']), + str(exception.exception), + msg="Expected a critical statement that the export version '{}' is the newest export version '{}', " + 'instead got {}'.format(metadata['export_version'], newest_version, str(exception.exception)) + ) + @with_temp_dir def test_v02_to_newest(self, temp_dir): """Test migration of exported files from v0.2 to newest export version""" @@ -209,9 +180,9 @@ def test_v02_to_newest(self, temp_dir): output_file = os.path.join(temp_dir, 'output_file.aiida') # Perform the migration - migrate_archive(input_file, output_file) - metadata, _ = get_json_files(output_file) - verify_metadata_version(metadata, version=newest_version) + migrate_archive(input_file, output=output_file, silent=True) + with Archive(output_file) as archive: + verify_archive_version(archive.version_format, version=newest_version) # Load the migrated file import_data(output_file, silent=True) @@ -264,9 +235,9 @@ def test_v03_to_newest(self, temp_dir): output_file = os.path.join(temp_dir, 'output_file.aiida') # Perform the migration - migrate_archive(input_file, output_file) - metadata, _ = get_json_files(output_file) - verify_metadata_version(metadata, version=newest_version) + migrate_archive(input_file, output_file, silent=True) + with Archive(output_file) as archive: + verify_archive_version(archive.version_format, version=newest_version) # Load the migrated file import_data(output_file, silent=True) @@ -319,9 +290,9 @@ def test_v04_to_newest(self, temp_dir): output_file = os.path.join(temp_dir, 'output_file.aiida') # Perform the migration - migrate_archive(input_file, output_file) - metadata, _ = get_json_files(output_file) - verify_metadata_version(metadata, version=newest_version) + migrate_archive(input_file, output_file, silent=True) + with Archive(output_file) as archive: + verify_archive_version(archive.version_format, version=newest_version) # Load the migrated file import_data(output_file, silent=True) @@ -374,9 +345,9 @@ def test_v05_to_newest(self, temp_dir): output_file = os.path.join(temp_dir, 'output_file.aiida') # Perform the migration - migrate_archive(input_file, output_file) - metadata, _ = get_json_files(output_file) - verify_metadata_version(metadata, version=newest_version) + migrate_archive(input_file, output_file, silent=True) + with Archive(output_file) as archive: + verify_archive_version(archive.version_format, version=newest_version) # Load the migrated file import_data(output_file, silent=True) @@ -429,9 +400,9 @@ def test_v06_to_newest(self, temp_dir): output_file = os.path.join(temp_dir, 'output_file.aiida') # Perform the migration - migrate_archive(input_file, output_file) - metadata, _ = get_json_files(output_file) - verify_metadata_version(metadata, version=newest_version) + migrate_archive(input_file, output_file, silent=True) + with Archive(output_file) as archive: + verify_archive_version(archive.version_format, version=newest_version) # Load the migrated file import_data(output_file, silent=True) @@ -484,9 +455,9 @@ def test_v07_to_newest(self, temp_dir): output_file = os.path.join(temp_dir, 'output_file.aiida') # Perform the migration - migrate_archive(input_file, output_file) - metadata, _ = get_json_files(output_file) - verify_metadata_version(metadata, version=newest_version) + migrate_archive(input_file, output_file, silent=True) + with Archive(output_file) as archive: + verify_archive_version(archive.version_format, version=newest_version) # Load the migrated file import_data(output_file, silent=True) diff --git a/aiida/backends/tests/tools/importexport/migration/test_v01_to_v02.py b/aiida/backends/tests/tools/importexport/migration/test_v01_to_v02.py index a72ebc61ec..0a8c328d9b 100644 --- a/aiida/backends/tests/tools/importexport/migration/test_v01_to_v02.py +++ b/aiida/backends/tests/tools/importexport/migration/test_v01_to_v02.py @@ -11,8 +11,9 @@ from aiida import get_version from aiida.backends.testbase import AiidaTestCase -from aiida.backends.tests.utils.archives import get_json_files -from aiida.tools.importexport.migration.utils import verify_metadata_version +from aiida.backends.tests.utils.archives import get_archive_file +from aiida.tools.importexport import Archive +from aiida.tools.importexport.migration.utils import verify_archive_version from aiida.tools.importexport.migration.v01_to_v02 import migrate_v1_to_v2 @@ -21,17 +22,22 @@ class TestMigrateV01toV02(AiidaTestCase): def test_migrate_v1_to_v2(self): """Test function migrate_v1_to_v2""" - # Get metadata.json and data.json as dicts from v0.1 file archive - metadata_v1, data_v1 = get_json_files('export_v0.1_simple.aiida', filepath='export/migrate') - verify_metadata_version(metadata_v1, version='0.1') + archive_v1 = get_archive_file('export_v0.1_simple.aiida', filepath='export/migrate') + archive_v2 = get_archive_file('export_v0.2_simple.aiida', filepath='export/migrate') - # Get metadata.json and data.json as dicts from v0.2 file archive - metadata_v2, data_v2 = get_json_files('export_v0.2_simple.aiida', filepath='export/migrate') - verify_metadata_version(metadata_v2, version='0.2') + with Archive(archive_v1) as archive: + verify_archive_version(archive.version_format, '0.1') + migrate_v1_to_v2(archive) + verify_archive_version(archive.version_format, '0.2') - # Migrate to v0.2 - migrate_v1_to_v2(metadata_v1, data_v1) - verify_metadata_version(metadata_v1, version='0.2') + data_v1 = archive.data + metadata_v1 = archive.meta_data + + with Archive(archive_v2) as archive: + verify_archive_version(archive.version_format, '0.2') + + data_v2 = archive.data + metadata_v2 = archive.meta_data # Remove AiiDA version, since this may change irregardless of the migration function metadata_v1.pop('aiida_version') diff --git a/aiida/backends/tests/tools/importexport/migration/test_v02_to_v03.py b/aiida/backends/tests/tools/importexport/migration/test_v02_to_v03.py index 5912e22e16..a3ee0172a7 100644 --- a/aiida/backends/tests/tools/importexport/migration/test_v02_to_v03.py +++ b/aiida/backends/tests/tools/importexport/migration/test_v02_to_v03.py @@ -11,8 +11,9 @@ # pylint: disable=too-many-branches from aiida.backends.testbase import AiidaTestCase -from aiida.backends.tests.utils.archives import get_json_files -from aiida.tools.importexport.migration.utils import verify_metadata_version +from aiida.backends.tests.utils.archives import get_archive_file +from aiida.tools.importexport import Archive +from aiida.tools.importexport.migration.utils import verify_archive_version from aiida.tools.importexport.migration.v02_to_v03 import migrate_v2_to_v3 @@ -31,17 +32,22 @@ def test_migrate_v2_to_v3(self): """Test function migrate_v2_to_v3""" from aiida import get_version - # Get metadata.json and data.json as dicts from v0.2 file archive - metadata_v2, data_v2 = get_json_files('export_v0.2_simple.aiida', **self.core_archive) - verify_metadata_version(metadata_v2, version='0.2') + archive_v2 = get_archive_file('export_v0.2_simple.aiida', **self.core_archive) + archive_v3 = get_archive_file('export_v0.3_simple.aiida', **self.core_archive) - # Get metadata.json and data.json as dicts from v0.3 file archive - metadata_v3, data_v3 = get_json_files('export_v0.3_simple.aiida', **self.core_archive) - verify_metadata_version(metadata_v3, version='0.3') + with Archive(archive_v2) as archive: + verify_archive_version(archive.version_format, '0.2') + migrate_v2_to_v3(archive) + verify_archive_version(archive.version_format, '0.3') + + data_v2 = archive.data + metadata_v2 = archive.meta_data + + with Archive(archive_v3) as archive: + verify_archive_version(archive.version_format, '0.3') - # Migrate to v0.3 - migrate_v2_to_v3(metadata_v2, data_v2) - verify_metadata_version(metadata_v2, version='0.3') + data_v3 = archive.data + metadata_v3 = archive.meta_data # Remove AiiDA version, since this may change irregardless of the migration function metadata_v2.pop('aiida_version') @@ -69,14 +75,14 @@ def test_migrate_v2_to_v3(self): def test_migrate_v2_to_v3_complete(self): """Test migration for file containing complete v0.2 era possibilities""" + archive_v2 = get_archive_file('export_v0.2.aiida', **self.external_archive) + with Archive(archive_v2) as archive: + verify_archive_version(archive.version_format, version='0.2') + migrate_v2_to_v3(archive) + verify_archive_version(archive.version_format, version='0.3') - # Get metadata.json and data.json as dicts from v0.2 file archive - metadata, data = get_json_files('export_v0.2.aiida', **self.external_archive) - verify_metadata_version(metadata, version='0.2') - - # Migrate to v0.3 - migrate_v2_to_v3(metadata, data) - verify_metadata_version(metadata, version='0.3') + data = archive.data + metadata = archive.meta_data self.maxDiff = None # pylint: disable=invalid-name # Check link types @@ -124,11 +130,18 @@ def test_compare_migration_with_aiida_made(self): """ # Get metadata.json and data.json as dicts from v0.2 file archive and migrate - metadata_v2, data_v2 = get_json_files('export_v0.2.aiida', **self.external_archive) - migrate_v2_to_v3(metadata_v2, data_v2) + archive_v2 = get_archive_file('export_v0.2.aiida', **self.external_archive) + with Archive(archive_v2) as archive: + migrate_v2_to_v3(archive) + + data_v2 = archive.data + metadata_v2 = archive.meta_data # Get metadata.json and data.json as dicts from v0.3 file archive - metadata_v3, data_v3 = get_json_files('export_v0.3.aiida', **self.external_archive) + archive_v3 = get_archive_file('export_v0.3.aiida', **self.external_archive) + with Archive(archive_v3) as archive: + data_v3 = archive.data + metadata_v3 = archive.meta_data # Compare 'metadata.json' metadata_v2.pop('conversion_info') diff --git a/aiida/backends/tests/tools/importexport/migration/test_v03_to_v04.py b/aiida/backends/tests/tools/importexport/migration/test_v03_to_v04.py index 4ed4be483c..bf5ca0bb73 100644 --- a/aiida/backends/tests/tools/importexport/migration/test_v03_to_v04.py +++ b/aiida/backends/tests/tools/importexport/migration/test_v03_to_v04.py @@ -10,17 +10,10 @@ """Test export file migration from export version 0.3 to 0.4""" # pylint: disable=too-many-locals,too-many-branches,too-many-statements -import io -import tarfile -import zipfile - from aiida.backends.testbase import AiidaTestCase -from aiida.backends.tests.utils.archives import get_archive_file, get_json_files -from aiida.common.exceptions import NotExistent -from aiida.common.folders import SandboxFolder -from aiida.common.json import load as jsonload -from aiida.tools.importexport.common.archive import extract_tar, extract_zip -from aiida.tools.importexport.migration.utils import verify_metadata_version +from aiida.backends.tests.utils.archives import get_archive_file +from aiida.tools.importexport.common.archive import Archive +from aiida.tools.importexport.migration.utils import verify_archive_version from aiida.tools.importexport.migration.v03_to_v04 import migrate_v3_to_v4 @@ -39,36 +32,22 @@ def test_migrate_v3_to_v4(self): """Test function migrate_v3_to_v4""" from aiida import get_version - # Get metadata.json and data.json as dicts from v0.4 file archive - metadata_v4, data_v4 = get_json_files('export_v0.4_simple.aiida', **self.core_archive) - verify_metadata_version(metadata_v4, version='0.4') + archive_v3 = get_archive_file('export_v0.3_simple.aiida', **self.core_archive) + archive_v4 = get_archive_file('export_v0.4_simple.aiida', **self.core_archive) - # Get metadata.json and data.json as dicts from v0.3 file archive - # Cannot use 'get_json_files' for 'export_v0.3_simple.aiida', - # because we need to pass the SandboxFolder to 'migrate_v3_to_v4' - dirpath_archive = get_archive_file('export_v0.3_simple.aiida', **self.core_archive) - - with SandboxFolder(sandbox_in_repo=False) as folder: - if zipfile.is_zipfile(dirpath_archive): - extract_zip(dirpath_archive, folder, silent=True) - elif tarfile.is_tarfile(dirpath_archive): - extract_tar(dirpath_archive, folder, silent=True) - else: - raise ValueError('invalid file format, expected either a zip archive or gzipped tarball') + with Archive(archive_v3) as archive: + verify_archive_version(archive.version_format, '0.3') + migrate_v3_to_v4(archive) + verify_archive_version(archive.version_format, '0.4') - try: - with io.open(folder.get_abs_path('data.json'), 'r', encoding='utf8') as fhandle: - data_v3 = jsonload(fhandle) - with io.open(folder.get_abs_path('metadata.json'), 'r', encoding='utf8') as fhandle: - metadata_v3 = jsonload(fhandle) - except IOError: - raise NotExistent('export archive does not contain the required file {}'.format(fhandle.filename)) + data_v3 = archive.data + metadata_v3 = archive.meta_data - verify_metadata_version(metadata_v3, version='0.3') + with Archive(archive_v4) as archive: + verify_archive_version(archive.version_format, '0.4') - # Migrate to v0.4 - migrate_v3_to_v4(metadata_v3, data_v3, folder) - verify_metadata_version(metadata_v3, version='0.4') + data_v4 = archive.data + metadata_v4 = archive.meta_data # Remove AiiDA version, since this may change irregardless of the migration function metadata_v3.pop('aiida_version') @@ -98,42 +77,28 @@ def test_migrate_v3_to_v4_complete(self): """Test migration for file containing complete v0.3 era possibilities""" # Get metadata.json and data.json as dicts from v0.3 file archive - dirpath_archive = get_archive_file('export_v0.3.aiida', **self.external_archive) - - # Migrate - with SandboxFolder(sandbox_in_repo=False) as folder: - if zipfile.is_zipfile(dirpath_archive): - extract_zip(dirpath_archive, folder, silent=True) - elif tarfile.is_tarfile(dirpath_archive): - extract_tar(dirpath_archive, folder, silent=True) - else: - raise ValueError('invalid file format, expected either a zip archive or gzipped tarball') - - try: - with io.open(folder.get_abs_path('data.json'), 'r', encoding='utf8') as fhandle: - data = jsonload(fhandle) - with io.open(folder.get_abs_path('metadata.json'), 'r', encoding='utf8') as fhandle: - metadata = jsonload(fhandle) - except IOError: - raise NotExistent('export archive does not contain the required file {}'.format(fhandle.filename)) + archive_v3 = get_archive_file('export_v0.3.aiida', **self.external_archive) - verify_metadata_version(metadata, version='0.3') + with Archive(archive_v3) as archive: + verify_archive_version(archive.version_format, '0.3') # Save pre-migration info - links_count_org = len(data['links_uuid']) + links_count_org = len(archive.data['links_uuid']) work_uuids = { value['uuid'] - for value in data['export_data']['Node'].values() + for value in archive.data['export_data']['Node'].values() if value['type'].startswith('calculation.function') or value['type'].startswith('calculation.work') } illegal_links = [] - for link in data['links_uuid']: + for link in archive.data['links_uuid']: if link['input'] in work_uuids and link['type'] == 'createlink': illegal_links.append(link) - # Migrate to v0.4 - migrate_v3_to_v4(metadata, data, folder) - verify_metadata_version(metadata, version='0.4') + migrate_v3_to_v4(archive) + verify_archive_version(archive.version_format, '0.4') + + data = archive.data + metadata = archive.meta_data ## Following checks are based on the archive-file ## Which means there are more legal entities, they are simply not relevant here. @@ -304,31 +269,17 @@ def test_compare_migration_with_aiida_made(self): (AiiDA versions 0.12.3 versus 1.0.0b2) NB: Since PKs and UUIDs will have changed, comparisons between 'data.json'-files will be made indirectly """ - # Get metadata.json and data.json as dicts from v0.3 file archive and migrate - dirpath_archive = get_archive_file('export_v0.3.aiida', **self.external_archive) - - # Migrate - with SandboxFolder(sandbox_in_repo=False) as folder: - if zipfile.is_zipfile(dirpath_archive): - extract_zip(dirpath_archive, folder, silent=True) - elif tarfile.is_tarfile(dirpath_archive): - extract_tar(dirpath_archive, folder, silent=True) - else: - raise ValueError('invalid file format, expected either a zip archive or gzipped tarball') - - try: - with io.open(folder.get_abs_path('data.json'), 'r', encoding='utf8') as fhandle: - data_v3 = jsonload(fhandle) - with io.open(folder.get_abs_path('metadata.json'), 'r', encoding='utf8') as fhandle: - metadata_v3 = jsonload(fhandle) - except IOError: - raise NotExistent('export archive does not contain the required file {}'.format(fhandle.filename)) + archive_v3 = get_archive_file('export_v0.3.aiida', **self.external_archive) + archive_v4 = get_archive_file('export_v0.4.aiida', **self.external_archive) - # Migrate to v0.4 - migrate_v3_to_v4(metadata_v3, data_v3, folder) + with Archive(archive_v3) as archive: + migrate_v3_to_v4(archive) + data_v3 = archive.data + metadata_v3 = archive.meta_data - # Get metadata.json and data.json as dicts from v0.4 file archive - metadata_v4, data_v4 = get_json_files('export_v0.4.aiida', **self.external_archive) + with Archive(archive_v4) as archive: + data_v4 = archive.data + metadata_v4 = archive.meta_data # Compare 'metadata.json' self.maxDiff = None @@ -442,37 +393,21 @@ def test_compare_migration_with_aiida_made(self): def test_illegal_create_links(self): """Test illegal create links from workchain are detected and removed from exports using v0.3""" # Initialization - dirpath_archive = get_archive_file('export_v0.3.aiida', **self.external_archive) + archive_v3 = get_archive_file('export_v0.3.aiida', **self.external_archive) known_illegal_links = 2 - # Unpack archive, check data.json, and migrate to v0.4 - with SandboxFolder(sandbox_in_repo=False) as folder: - if zipfile.is_zipfile(dirpath_archive): - extract_zip(dirpath_archive, folder, silent=True) - elif tarfile.is_tarfile(dirpath_archive): - extract_tar(dirpath_archive, folder, silent=True) - else: - raise ValueError('invalid file format, expected either a zip archive or gzipped tarball') - - try: - with io.open(folder.get_abs_path('data.json'), 'r', encoding='utf8') as fhandle: - data = jsonload(fhandle) - with io.open(folder.get_abs_path('metadata.json'), 'r', encoding='utf8') as fhandle: - metadata = jsonload(fhandle) - except IOError: - raise NotExistent('export archive does not contain the required file {}'.format(fhandle.filename)) - + with Archive(archive_v3) as archive: # Check illegal create links are present in org. export file - links_count = len(data['links_uuid']) + links_count = len(archive.data['links_uuid']) links_count_migrated = links_count - known_illegal_links workfunc_uuids = { value['uuid'] - for value in data['export_data']['Node'].values() + for value in archive.data['export_data']['Node'].values() if value['type'].startswith('calculation.function') or value['type'].startswith('calculation.work') } violations = [] - for link in data['links_uuid']: + for link in archive.data['links_uuid']: if link['input'] in workfunc_uuids and link['type'] == 'createlink': violations.append(link) self.assertEqual( @@ -483,8 +418,8 @@ def test_illegal_create_links(self): ) ) - # Migrate to v0.4 - migrate_v3_to_v4(metadata, data, folder) + migrate_v3_to_v4(archive) + data = archive.data # Check illegal create links were removed self.assertEqual( diff --git a/aiida/backends/tests/tools/importexport/migration/test_v04_to_v05.py b/aiida/backends/tests/tools/importexport/migration/test_v04_to_v05.py index 611af17ac2..fd881d9b10 100644 --- a/aiida/backends/tests/tools/importexport/migration/test_v04_to_v05.py +++ b/aiida/backends/tests/tools/importexport/migration/test_v04_to_v05.py @@ -9,17 +9,10 @@ ########################################################################### """Test export file migration from export version 0.4 to 0.5""" -import io -import tarfile -import zipfile - from aiida.backends.testbase import AiidaTestCase -from aiida.backends.tests.utils.archives import get_archive_file, get_json_files -from aiida.common.exceptions import NotExistent -from aiida.common.folders import SandboxFolder -from aiida.common.json import load as jsonload -from aiida.tools.importexport.common.archive import extract_tar, extract_zip -from aiida.tools.importexport.migration.utils import verify_metadata_version +from aiida.backends.tests.utils.archives import get_archive_file +from aiida.tools.importexport.common.archive import Archive +from aiida.tools.importexport.migration.utils import verify_archive_version from aiida.tools.importexport.migration.v04_to_v05 import migrate_v4_to_v5 @@ -38,36 +31,22 @@ def test_migrate_v4_to_v5(self): """Test function migrate_v4_to_v5""" from aiida import get_version - # Get metadata.json and data.json as dicts from v0.5 file archive - metadata_v5, data_v5 = get_json_files('export_v0.5_simple.aiida', **self.core_archive) - verify_metadata_version(metadata_v5, version='0.5') - - # Get metadata.json and data.json as dicts from v0.4 file archive - # Cannot use 'get_json_files' for 'export_v0.4_simple.aiida', - # because we need to pass the SandboxFolder to 'migrate_v4_to_v5' - dirpath_archive = get_archive_file('export_v0.4_simple.aiida', **self.core_archive) - - with SandboxFolder(sandbox_in_repo=False) as folder: - if zipfile.is_zipfile(dirpath_archive): - extract_zip(dirpath_archive, folder, silent=True) - elif tarfile.is_tarfile(dirpath_archive): - extract_tar(dirpath_archive, folder, silent=True) - else: - raise ValueError('invalid file format, expected either a zip archive or gzipped tarball') - - try: - with io.open(folder.get_abs_path('data.json'), 'r', encoding='utf8') as fhandle: - data_v4 = jsonload(fhandle) - with io.open(folder.get_abs_path('metadata.json'), 'r', encoding='utf8') as fhandle: - metadata_v4 = jsonload(fhandle) - except IOError: - raise NotExistent('export archive does not contain the required file {}'.format(fhandle.filename)) - - verify_metadata_version(metadata_v4, version='0.4') - - # Migrate to v0.5 - migrate_v4_to_v5(metadata_v4, data_v4) - verify_metadata_version(metadata_v4, version='0.5') + archive_v4 = get_archive_file('export_v0.4_simple.aiida', **self.core_archive) + archive_v5 = get_archive_file('export_v0.5_simple.aiida', **self.core_archive) + + with Archive(archive_v4) as archive: + verify_archive_version(archive.version_format, '0.4') + migrate_v4_to_v5(archive) + verify_archive_version(archive.version_format, '0.5') + + data_v4 = archive.data + metadata_v4 = archive.meta_data + + with Archive(archive_v5) as archive: + verify_archive_version(archive.version_format, '0.5') + + data_v5 = archive.data + metadata_v5 = archive.meta_data # Remove AiiDA version, since this may change irregardless of the migration function metadata_v4.pop('aiida_version') @@ -96,14 +75,14 @@ def test_migrate_v4_to_v5(self): def test_migrate_v4_to_v5_complete(self): """Test migration for file containing complete v0.4 era possibilities""" - - # Get metadata.json and data.json as dicts from v0.4 file archive - metadata, data = get_json_files('export_v0.4.aiida', **self.external_archive) - verify_metadata_version(metadata, version='0.4') - - # Migrate to v0.5 - migrate_v4_to_v5(metadata, data) - verify_metadata_version(metadata, version='0.5') + archive_v4 = get_archive_file('export_v0.4.aiida', **self.external_archive) + with Archive(archive_v4) as archive: + verify_archive_version(archive.version_format, version='0.4') + migrate_v4_to_v5(archive) + verify_archive_version(archive.version_format, version='0.5') + + data = archive.data + metadata = archive.meta_data self.maxDiff = None # pylint: disable=invalid-name # Check schema-changes diff --git a/aiida/backends/tests/tools/importexport/migration/test_v05_to_v06.py b/aiida/backends/tests/tools/importexport/migration/test_v05_to_v06.py index 5c4d4371b5..5fafef4fdb 100644 --- a/aiida/backends/tests/tools/importexport/migration/test_v05_to_v06.py +++ b/aiida/backends/tests/tools/importexport/migration/test_v05_to_v06.py @@ -11,8 +11,9 @@ from aiida.backends.general.migrations.calc_state import STATE_MAPPING from aiida.backends.testbase import AiidaTestCase -from aiida.backends.tests.utils.archives import get_json_files -from aiida.tools.importexport.migration.utils import verify_metadata_version +from aiida.backends.tests.utils.archives import get_archive_file +from aiida.tools.importexport.common.archive import Archive +from aiida.tools.importexport.migration.utils import verify_archive_version from aiida.tools.importexport.migration.v05_to_v06 import migrate_v5_to_v6 @@ -31,17 +32,22 @@ def test_migrate_v5_to_v6(self): """Test migration for file containing complete v0.5 era possibilities""" from aiida import get_version - # Get metadata.json and data.json as dicts from v0.5 file archive - metadata_v5, data_v5 = get_json_files('export_v0.5_simple.aiida', **self.core_archive) - verify_metadata_version(metadata_v5, version='0.5') + archive_v5 = get_archive_file('export_v0.5_simple.aiida', **self.core_archive) + archive_v6 = get_archive_file('export_v0.6_simple.aiida', **self.core_archive) - # Get metadata.json and data.json as dicts from v0.6 file archive - metadata_v6, data_v6 = get_json_files('export_v0.6_simple.aiida', **self.core_archive) - verify_metadata_version(metadata_v6, version='0.6') + with Archive(archive_v5) as archive: + verify_archive_version(archive.version_format, '0.5') + migrate_v5_to_v6(archive) + verify_archive_version(archive.version_format, '0.6') - # Migrate to v0.6 - migrate_v5_to_v6(metadata_v5, data_v5) - verify_metadata_version(metadata_v5, version='0.6') + data_v5 = archive.data + metadata_v5 = archive.meta_data + + with Archive(archive_v6) as archive: + verify_archive_version(archive.version_format, '0.6') + + data_v6 = archive.data + metadata_v6 = archive.meta_data # Remove AiiDA version, since this may change irregardless of the migration function metadata_v5.pop('aiida_version') @@ -73,23 +79,21 @@ def test_migrate_v5_to_v6_calc_states(self): This test has to use a local archive because the current archive from the `aiida-export-migration-tests` module does not include a `CalcJobNode` with a legacy `state` attribute. """ - # Get metadata.json and data.json as dicts from v0.5 file archive - metadata, data = get_json_files('export_v0.5_simple.aiida', **self.core_archive) - verify_metadata_version(metadata, version='0.5') + archive_v5 = get_archive_file('export_v0.5_simple.aiida', **self.core_archive) + + with Archive(archive_v5) as archive: - calc_job_node_type = 'process.calculation.calcjob.CalcJobNode.' - node_data = data['export_data'].get('Node', {}) - node_attributes = data['node_attributes'] - calc_jobs = {} - for pk, values in node_data.items(): - if values['node_type'] == calc_job_node_type and 'state' in data['node_attributes'].get(pk, {}): - calc_jobs[pk] = data['node_attributes'][pk]['state'] + calc_job_node_type = 'process.calculation.calcjob.CalcJobNode.' + node_data = archive.data['export_data'].get('Node', {}) + node_attributes = archive.data['node_attributes'] + calc_jobs = {} + for pk, values in node_data.items(): + if values['node_type'] == calc_job_node_type and 'state' in archive.data['node_attributes'].get(pk, {}): + calc_jobs[pk] = archive.data['node_attributes'][pk]['state'] - # Migrate to v0.6 - migrate_v5_to_v6(metadata, data) - verify_metadata_version(metadata, version='0.6') + migrate_v5_to_v6(archive) - node_attributes = data['node_attributes'] + node_attributes = archive.data['node_attributes'] # The export archive contains a single `CalcJobNode` that had `state=FINISHED`. for pk, state in calc_jobs.items(): @@ -115,41 +119,40 @@ def test_migrate_v5_to_v6_datetime(self): Here we test that the archive migration correctly reattaches the timezone information. The archive that we are using `export_v0.5_simple.aiida` contains a node with the attribute "scheduler_lastchecktime". """ - # Get metadata.json and data.json as dicts from v0.5 file archive - metadata, data = get_json_files('export_v0.5_simple.aiida', **self.core_archive) - verify_metadata_version(metadata, version='0.5') - - for key, values in data['node_attributes'].items(): - if 'scheduler_lastchecktime' not in values: - continue - - serialized_original = values['scheduler_lastchecktime'] - msg = 'the serialized datetime before migration should not contain a plus: {}'.format(serialized_original) - self.assertTrue('+' not in serialized_original, msg=msg) - - # Migrate to v0.6 - migrate_v5_to_v6(metadata, data) - verify_metadata_version(metadata, version='0.6') - - serialized_migrated = data['node_attributes'][key]['scheduler_lastchecktime'] - self.assertEqual(serialized_migrated, serialized_original + '+00:00') - break - - else: - raise RuntimeError( - 'the archive `export_v0.5_simple.aiida` did not contain a node with the attribute ' - '`scheduler_lastchecktime` which is required for this test.' - ) + archive_name = 'export_v0.5_simple.aiida' + archive_v5 = get_archive_file(archive_name, **self.core_archive) + + with Archive(archive_v5) as archive: + for key, values in archive.data['node_attributes'].items(): + if 'scheduler_lastchecktime' not in values: + continue + + serialized_original = values['scheduler_lastchecktime'] + msg = 'the serialized datetime before migration should not contain a plus: {}'.format( + serialized_original + ) + self.assertTrue('+' not in serialized_original, msg=msg) + + # Migrate to v0.6 + migrate_v5_to_v6(archive) + verify_archive_version(archive.version_format, version='0.6') + + serialized_migrated = archive.data['node_attributes'][key]['scheduler_lastchecktime'] + self.assertEqual(serialized_migrated, serialized_original + '+00:00') + break + + else: + raise RuntimeError( + 'the archive `{}` did not contain a node with the attribute ' + '`scheduler_lastchecktime` which is required for this test.'.format(archive_name) + ) def test_migrate_v5_to_v6_complete(self): """Test migration for file containing complete v0.5 era possibilities""" - # Get metadata.json and data.json as dicts from v0.5 file archive - metadata, data = get_json_files('export_v0.5_manual.aiida', **self.external_archive) - verify_metadata_version(metadata, version='0.5') - - # Migrate to v0.6 - migrate_v5_to_v6(metadata, data) - verify_metadata_version(metadata, version='0.6') + archive_v5 = get_archive_file('export_v0.5_manual.aiida', **self.external_archive) + with Archive(archive_v5) as archive: + migrate_v5_to_v6(archive) + data = archive.data self.maxDiff = None # pylint: disable=invalid-name # Explicitly check that conversion dictionaries were removed diff --git a/aiida/backends/tests/tools/importexport/migration/test_v06_to_v07.py b/aiida/backends/tests/tools/importexport/migration/test_v06_to_v07.py index e29949ac51..cb3a1aa2a2 100644 --- a/aiida/backends/tests/tools/importexport/migration/test_v06_to_v07.py +++ b/aiida/backends/tests/tools/importexport/migration/test_v06_to_v07.py @@ -10,8 +10,9 @@ """Test export file migration from export version 0.6 to 0.7""" from aiida.backends.testbase import AiidaTestCase -from aiida.backends.tests.utils.archives import get_json_files -from aiida.tools.importexport.migration.utils import verify_metadata_version +from aiida.backends.tests.utils.archives import get_archive_file +from aiida.tools.importexport.common.archive import Archive +from aiida.tools.importexport.migration.utils import verify_archive_version from aiida.tools.importexport.migration.v06_to_v07 import ( migrate_v6_to_v7, migration_data_migration_legacy_process_attributes ) @@ -32,17 +33,22 @@ def test_migrate_v6_to_v7(self): """Test migration for file containing complete v0.6 era possibilities""" from aiida import get_version - # Get metadata.json and data.json as dicts from v0.6 file archive - metadata_v6, data_v6 = get_json_files('export_v0.6_simple.aiida', **self.core_archive) - verify_metadata_version(metadata_v6, version='0.6') + archive_v6 = get_archive_file('export_v0.6_simple.aiida', **self.core_archive) + archive_v7 = get_archive_file('export_v0.7_simple.aiida', **self.core_archive) - # Get metadata.json and data.json as dicts from v0.7 file archive - metadata_v7, data_v7 = get_json_files('export_v0.7_simple.aiida', **self.core_archive) - verify_metadata_version(metadata_v7, version='0.7') + with Archive(archive_v6) as archive: + verify_archive_version(archive.version_format, '0.6') + migrate_v6_to_v7(archive) + verify_archive_version(archive.version_format, '0.7') - # Migrate to v0.7 - migrate_v6_to_v7(metadata_v6, data_v6) - verify_metadata_version(metadata_v6, version='0.7') + data_v6 = archive.data + metadata_v6 = archive.meta_data + + with Archive(archive_v7) as archive: + verify_archive_version(archive.version_format, '0.7') + + data_v7 = archive.data + metadata_v7 = archive.meta_data # Remove AiiDA version, since this may change irregardless of the migration function metadata_v6.pop('aiida_version') @@ -70,13 +76,15 @@ def test_migrate_v6_to_v7(self): def test_migrate_v6_to_v7_complete(self): """Test migration for file containing complete v0.6 era possibilities""" - # Get metadata.json and data.json as dicts from v0.6 file archive - metadata, data = get_json_files('export_v0.6_manual.aiida', **self.external_archive) - verify_metadata_version(metadata, version='0.6') + archive_v6 = get_archive_file('export_v0.6_manual.aiida', **self.external_archive) + + with Archive(archive_v6) as archive: + verify_archive_version(archive.version_format, version='0.6') + migrate_v6_to_v7(archive) + verify_archive_version(archive.version_format, version='0.7') - # Migrate to v0.7 - migrate_v6_to_v7(metadata, data) - verify_metadata_version(metadata, version='0.7') + data = archive.data + metadata = archive.meta_data self.maxDiff = None # pylint: disable=invalid-name # Check attributes of process.* nodes diff --git a/aiida/backends/tests/tools/importexport/migration/test_v07_to_v08.py b/aiida/backends/tests/tools/importexport/migration/test_v07_to_v08.py index a3877bb234..a5f1a288aa 100644 --- a/aiida/backends/tests/tools/importexport/migration/test_v07_to_v08.py +++ b/aiida/backends/tests/tools/importexport/migration/test_v07_to_v08.py @@ -10,9 +10,10 @@ """Test export file migration from export version 0.7 to 0.8""" from aiida.backends.testbase import AiidaTestCase -from aiida.backends.tests.utils.archives import get_json_files -from aiida.tools.importexport.migration.utils import verify_metadata_version -from aiida.tools.importexport.migration.v07_to_v08 import (migrate_v7_to_v8, migration_default_link_label) +from aiida.backends.tests.utils.archives import get_archive_file +from aiida.tools.importexport.common.archive import Archive +from aiida.tools.importexport.migration.utils import verify_archive_version +from aiida.tools.importexport.migration.v07_to_v08 import migrate_v7_to_v8, migration_default_link_label class TestMigrateV07toV08(AiidaTestCase): @@ -30,17 +31,22 @@ def test_migrate_v7_to_v8(self): """Test migration for file containing complete v0.7 era possibilities""" from aiida import get_version - # Get metadata.json and data.json as dicts from v0.7 file archive - metadata_v7, data_v7 = get_json_files('export_v0.7_simple.aiida', **self.core_archive) - verify_metadata_version(metadata_v7, version='0.7') + archive_v7 = get_archive_file('export_v0.7_simple.aiida', **self.core_archive) + archive_v8 = get_archive_file('export_v0.8_simple.aiida', **self.core_archive) - # Get metadata.json and data.json as dicts from v0.8 file archive - metadata_v8, data_v8 = get_json_files('export_v0.8_simple.aiida', **self.core_archive) - verify_metadata_version(metadata_v8, version='0.8') + with Archive(archive_v7) as archive: + verify_archive_version(archive.version_format, '0.7') + migrate_v7_to_v8(archive) + verify_archive_version(archive.version_format, '0.8') - # Migrate to v0.8 - migrate_v7_to_v8(metadata_v7, data_v7) - verify_metadata_version(metadata_v7, version='0.8') + data_v7 = archive.data + metadata_v7 = archive.meta_data + + with Archive(archive_v8) as archive: + verify_archive_version(archive.version_format, '0.8') + + data_v8 = archive.data + metadata_v8 = archive.meta_data # Remove AiiDA version, since this may change irregardless of the migration function metadata_v7.pop('aiida_version') @@ -68,13 +74,14 @@ def test_migrate_v7_to_v8(self): def test_migrate_v7_to_v8_complete(self): """Test migration for file containing complete v0.7 era possibilities""" - # Get metadata.json and data.json as dicts from v0.7 file archive - metadata, data = get_json_files('export_v0.7_manual.aiida', **self.external_archive) - verify_metadata_version(metadata, version='0.7') + archive_v7 = get_archive_file('export_v0.7_manual.aiida', **self.external_archive) + + with Archive(archive_v7) as archive: + verify_archive_version(archive.version_format, version='0.7') + migrate_v7_to_v8(archive) + verify_archive_version(archive.version_format, version='0.8') - # Migrate to v0.8 - migrate_v7_to_v8(metadata, data) - verify_metadata_version(metadata, version='0.8') + data = archive.data self.maxDiff = None # pylint: disable=invalid-name # Check that no links have the label '_return', since it should now be 'result' diff --git a/aiida/backends/tests/tools/importexport/test_specific_import.py b/aiida/backends/tests/tools/importexport/test_specific_import.py index c548f20327..41fb402bc2 100644 --- a/aiida/backends/tests/tools/importexport/test_specific_import.py +++ b/aiida/backends/tests/tools/importexport/test_specific_import.py @@ -13,8 +13,6 @@ import shutil import tempfile -import unittest - import numpy as np from aiida import orm @@ -209,16 +207,13 @@ def test_missing_node_repo_folder_export(self, temp_dir): @with_temp_dir def test_missing_node_repo_folder_import(self, temp_dir): """ - Make sure `~aiida.tools.importexport.common.exceptions.CorruptArchive` is raised during import when missing - Node repository folder. + Make sure :py:class:`~aiida.tools.importexport.common.exceptions.CorruptArchive` is raised during import + when missing Node repository folder. Create and export a Node and manually remove its repository folder in the export file. - Attempt to import it and make sure `~aiida.tools.importexport.common.exceptions.CorruptArchive` is raised, - due to the missing folder. + Attempt to import it and make sure :py:class:`~aiida.tools.importexport.common.exceptions.CorruptArchive` + is raised, due to the missing folder. """ - import tarfile - - from aiida.common.folders import SandboxFolder - from aiida.tools.importexport.common.archive import extract_tar + from aiida.tools.importexport import Archive from aiida.tools.importexport.common.config import NODES_EXPORT_SUBFOLDER from aiida.tools.importexport.common.utils import export_shard_uuid @@ -239,16 +234,18 @@ def test_missing_node_repo_folder_import(self, temp_dir): # Untar export file, remove repository folder, re-tar node_shard_uuid = export_shard_uuid(node_uuid) node_top_folder = node_shard_uuid.split('/')[0] - with SandboxFolder() as folder: - extract_tar(filename, folder, silent=True, nodes_export_subfolder=NODES_EXPORT_SUBFOLDER) - node_folder = folder.get_subfolder(os.path.join(NODES_EXPORT_SUBFOLDER, node_shard_uuid)) + with Archive(filename, silent=True) as archive: + archive.unpack() + + node_folder = archive.folder.get_subfolder(os.path.join(NODES_EXPORT_SUBFOLDER, node_shard_uuid)) self.assertTrue( node_folder.exists(), msg="The Node's repository folder should still exist in the export file" ) # Removing the Node's repository folder from the export file shutil.rmtree( - folder.get_subfolder(os.path.join(NODES_EXPORT_SUBFOLDER, node_top_folder)).abspath, ignore_errors=True + archive.folder.get_subfolder(os.path.join(NODES_EXPORT_SUBFOLDER, node_top_folder)).abspath, + ignore_errors=True ) self.assertFalse( node_folder.exists(), @@ -256,8 +253,7 @@ def test_missing_node_repo_folder_import(self, temp_dir): ) filename_corrupt = os.path.join(temp_dir, 'export_corrupt.tar.gz') - with tarfile.open(filename_corrupt, 'w:gz', format=tarfile.PAX_FORMAT, dereference=True) as tar: - tar.add(folder.abspath, arcname='') + archive.repack(filename_corrupt) # Try to import, check it raises and check the raise message with self.assertRaises(exceptions.CorruptArchive) as exc: @@ -267,7 +263,6 @@ def test_missing_node_repo_folder_import(self, temp_dir): 'Unable to find the repository folder for Node with UUID={}'.format(node_uuid), str(exc.exception) ) - @unittest.skip('Reenable when issue #3199 is solve (PR #3242): Fix `extract_tree`') @with_temp_dir def test_empty_repo_folder_export(self, temp_dir): """Check a Node's empty repository folder is exported properly""" @@ -331,30 +326,29 @@ def test_import_folder(self): It is important to check that the source directory or any of its contents are not deleted after import. """ - from aiida.common.folders import SandboxFolder from aiida.backends.tests.utils.archives import get_archive_file - from aiida.tools.importexport.common.archive import extract_zip + from aiida.tools.importexport import Archive - archive = get_archive_file('arithmetic.add.aiida', filepath='calcjob') + archive_filepath = get_archive_file('arithmetic.add.aiida', filepath='calcjob') - with SandboxFolder() as temp_dir: - extract_zip(archive, temp_dir, silent=True) + with Archive(archive_filepath, silent=True) as archive: + archive.unpack() - # Make sure the JSON files and the nodes subfolder was correctly extracted (is present), + # Make sure the JSON files and the nodes subfolder exists and are correctly extracted, # then try to import it by passing the extracted folder to the import function. for name in {'metadata.json', 'data.json', 'nodes'}: - self.assertTrue(os.path.exists(os.path.join(temp_dir.abspath, name))) + self.assertTrue(os.path.exists(os.path.join(archive.folder.abspath, name))) # Get list of all folders in extracted archive org_folders = [] - for dirpath, dirnames, _ in os.walk(temp_dir.abspath): + for dirpath, dirnames, _ in os.walk(archive.folder.abspath): org_folders += [os.path.join(dirpath, dirname) for dirname in dirnames] - import_data(temp_dir.abspath, silent=True) + import_data(archive.folder.abspath, silent=True) # Check nothing from the source was deleted src_folders = [] - for dirpath, dirnames, _ in os.walk(temp_dir.abspath): + for dirpath, dirnames, _ in os.walk(archive.folder.abspath): src_folders += [os.path.join(dirpath, dirname) for dirname in dirnames] self.maxDiff = None # pylint: disable=invalid-name self.assertListEqual(org_folders, src_folders) diff --git a/aiida/backends/tests/utils/archives.py b/aiida/backends/tests/utils/archives.py index e1329ed3a0..79aa089340 100644 --- a/aiida/backends/tests/utils/archives.py +++ b/aiida/backends/tests/utils/archives.py @@ -10,14 +10,8 @@ """Test utility to import, inspect, or migrate AiiDA export archives.""" import os -import io -import tarfile -import zipfile -from aiida.common import json -from aiida.common.exceptions import NotExistent -from aiida.tools.importexport.common.archive import extract_tar, extract_zip -from aiida.common.folders import SandboxFolder +from aiida.tools.importexport.common.archive import Archive def get_archive_file(archive, filepath=None, external_module=None): @@ -83,82 +77,15 @@ def import_archive(archive, filepath=None, external_module=None): import_data(dirpath_archive, silent=True) -def get_json_files(archive, silent=True, filepath=None, external_module=None): - """Get metadata.json and data.json from an exported AiiDA archive +class NoContextArchive(Archive): + """Test class for :py:class:`aiida.tools.importexport.common.archive.Archive` that breaks rule of context""" - :param archive: the relative filename of the archive - :param silent: Whether or not the extraction should be silent - :param filepath: str of directories of where to find archive (starting "/"s are irrelevant) - :param external_module: string with name of external module, where archive can be found - """ - # Get archive - dirpath_archive = get_archive_file(archive, filepath=filepath, external_module=external_module) + def __init__(self, filepath='_test_filename.aiida', metadata=None, data=None, unpacked=True, **kwargs): + super(NoContextArchive, self).__init__(filepath, **kwargs) + self._silent = True + self._meta_data = metadata + self._data = data + self._unpacked = unpacked - # Unpack archive - with SandboxFolder(sandbox_in_repo=False) as folder: - if zipfile.is_zipfile(dirpath_archive): - extract_zip(dirpath_archive, folder, silent=silent) - elif tarfile.is_tarfile(dirpath_archive): - extract_tar(dirpath_archive, folder, silent=silent) - else: - raise ValueError('invalid file format, expected either a zip archive or gzipped tarball') - - try: - with io.open(folder.get_abs_path('data.json'), 'r', encoding='utf8') as fhandle: - data = json.load(fhandle) - with io.open(folder.get_abs_path('metadata.json'), 'r', encoding='utf8') as fhandle: - metadata = json.load(fhandle) - except IOError: - raise NotExistent('export archive does not contain the required file {}'.format(fhandle.filename)) - - # Return metadata.json and data.json - return metadata, data - - -def migrate_archive(input_file, output_file, silent=True): - """Migrate contents using `migrate_recursively` - This is essentially similar to `verdi export migrate`. - However, since this command may be disabled, this function simulates it and keeps the tests working. - - :param input_file: filename with full path for archive to be migrated - :param output_file: filename with full path for archive to be created after migration - """ - from aiida.tools.importexport.migration import migrate_recursively - - # Unpack archive, migrate, and re-pack archive - with SandboxFolder(sandbox_in_repo=False) as folder: - if zipfile.is_zipfile(input_file): - extract_zip(input_file, folder, silent=silent) - elif tarfile.is_tarfile(input_file): - extract_tar(input_file, folder, silent=silent) - else: - raise ValueError('invalid file format, expected either a zip archive or gzipped tarball') - - try: - with io.open(folder.get_abs_path('data.json'), 'r', encoding='utf8') as fhandle: - data = json.load(fhandle) - with io.open(folder.get_abs_path('metadata.json'), 'r', encoding='utf8') as fhandle: - metadata = json.load(fhandle) - except IOError: - raise NotExistent('export archive does not contain the required file {}'.format(fhandle.filename)) - - # Migrate - migrate_recursively(metadata, data, folder) - - # Write json files - with io.open(folder.get_abs_path('data.json'), 'wb') as fhandle: - json.dump(data, fhandle, indent=4) - - with io.open(folder.get_abs_path('metadata.json'), 'wb') as fhandle: - json.dump(metadata, fhandle, indent=4) - - # Pack archive - compression = zipfile.ZIP_DEFLATED - with zipfile.ZipFile(output_file, mode='w', compression=compression, allowZip64=True) as archive: - src = folder.abspath - for dirpath, dirnames, filenames in os.walk(src): - relpath = os.path.relpath(dirpath, src) - for filename in dirnames + filenames: - real_src = os.path.join(dirpath, filename) - real_dest = os.path.join(relpath, filename) - archive.write(real_src, real_dest) + def _ensure_within_context(self): + """Do not raise if not within context""" diff --git a/aiida/cmdline/commands/cmd_export.py b/aiida/cmdline/commands/cmd_export.py index 7d972f5232..68a64b7ed2 100644 --- a/aiida/cmdline/commands/cmd_export.py +++ b/aiida/cmdline/commands/cmd_export.py @@ -10,7 +10,6 @@ # pylint: disable=too-many-arguments,import-error,too-many-locals """`verdi export` command.""" -import io import os import click @@ -143,63 +142,22 @@ def create( @verdi_export.command('migrate') @arguments.INPUT_FILE() @arguments.OUTPUT_FILE() -@options.ARCHIVE_FORMAT() @options.FORCE(help='overwrite output file if it already exists') @options.SILENT() -def migrate(input_file, output_file, force, silent, archive_format): +def migrate(input_file, output_file, force, silent): # pylint: disable=too-many-locals,too-many-statements,too-many-branches """ Migrate an old export archive file to the most recent format. """ - import tarfile - import zipfile - - from aiida.common import json - from aiida.common.folders import SandboxFolder - from aiida.tools.importexport import migration, extract_zip, extract_tar + from aiida.tools.importexport import migrate_archive, ArchiveMigrationError if os.path.exists(output_file) and not force: echo.echo_critical('the output file already exists') - with SandboxFolder(sandbox_in_repo=False) as folder: - - if zipfile.is_zipfile(input_file): - extract_zip(input_file, folder, silent=silent) - elif tarfile.is_tarfile(input_file): - extract_tar(input_file, folder, silent=silent) - else: - echo.echo_critical('invalid file format, expected either a zip archive or gzipped tarball') - - try: - with io.open(folder.get_abs_path('data.json'), 'r', encoding='utf8') as fhandle: - data = json.load(fhandle) - with io.open(folder.get_abs_path('metadata.json'), 'r', encoding='utf8') as fhandle: - metadata = json.load(fhandle) - except IOError: - echo.echo_critical('export archive does not contain the required file {}'.format(fhandle.filename)) - - old_version = migration.verify_metadata_version(metadata) - new_version = migration.migrate_recursively(metadata, data, folder) - - with io.open(folder.get_abs_path('data.json'), 'wb') as fhandle: - json.dump(data, fhandle, indent=4) - - with io.open(folder.get_abs_path('metadata.json'), 'wb') as fhandle: - json.dump(metadata, fhandle) - - if archive_format in ['zip', 'zip-uncompressed']: - compression = zipfile.ZIP_DEFLATED if archive_format == 'zip' else zipfile.ZIP_STORED - with zipfile.ZipFile(output_file, mode='w', compression=compression, allowZip64=True) as archive: - src = folder.abspath - for dirpath, dirnames, filenames in os.walk(src): - relpath = os.path.relpath(dirpath, src) - for filename in dirnames + filenames: - real_src = os.path.join(dirpath, filename) - real_dest = os.path.join(relpath, filename) - archive.write(real_src, real_dest) - elif archive_format == 'tar.gz': - with tarfile.open(output_file, 'w:gz', format=tarfile.PAX_FORMAT, dereference=True) as archive: - archive.add(folder.abspath, arcname='') - + try: + old_version, new_version = migrate_archive(input_file, output_file, overwrite=force, silent=silent) + except ArchiveMigrationError as why: + echo.echo_critical('An error occurred while migrating {}: {}'.format(input_file, why)) + else: if not silent: echo.echo_success('migrated the archive from version {} to {}'.format(old_version, new_version)) diff --git a/aiida/tools/importexport/__init__.py b/aiida/tools/importexport/__init__.py index 1c47570718..587f71698d 100644 --- a/aiida/tools/importexport/__init__.py +++ b/aiida/tools/importexport/__init__.py @@ -19,5 +19,6 @@ from .dbexport import * from .dbimport import * from .common import * +from .migration import * -__all__ = (dbexport.__all__ + dbimport.__all__ + common.__all__) +__all__ = (dbexport.__all__ + dbimport.__all__ + common.__all__ + migration.__all__) diff --git a/aiida/tools/importexport/common/archive.py b/aiida/tools/importexport/common/archive.py index 14b66db85e..5b2ebaf4b4 100644 --- a/aiida/tools/importexport/common/archive.py +++ b/aiida/tools/importexport/common/archive.py @@ -7,6 +7,7 @@ # For further information on the license, see the LICENSE.txt file # # For further information please visit http://www.aiida.net # ########################################################################### +# pylint: disable=fixme """Utility functions and classes to interact with AiiDA export archives.""" import io @@ -18,12 +19,32 @@ from wrapt import decorator from aiida.common import json -from aiida.common.exceptions import ContentNotExistent, InvalidOperation from aiida.common.folders import SandboxFolder from aiida.tools.importexport.common.config import NODES_EXPORT_SUBFOLDER -from aiida.tools.importexport.common.exceptions import CorruptArchive +from aiida.tools.importexport.common.exceptions import CorruptArchive, InvalidArchiveOperation, ArchiveOperationError -__all__ = ('Archive', 'extract_zip', 'extract_tar', 'extract_tree') +__all__ = ('Archive',) + +FILENAME_DATA = 'data.json' +FILENAME_METADATA = 'metadata.json' +JSON_FILES = [FILENAME_DATA, FILENAME_METADATA] + + +@decorator +def ensure_within_context(wrapped, instance, args, kwargs): + """Decorator to ensure that the instance is called within a context manager.""" + if instance: + instance._ensure_within_context() # pylint: disable=protected-access + return wrapped(*args, **kwargs) + + +@decorator +def ensure_unpacked(wrapped, instance, args, kwargs): + """Decorator to ensure that the archive is unpacked before entering the decorated function.""" + if instance and not instance.unpacked: + instance.unpack() + + return wrapped(*args, **kwargs) class Archive: @@ -37,60 +58,135 @@ class Archive: archive.version """ + # TODO: These sets should be determined from config.py - or similar - when the export function is updated to do the + # same. This will make it easy to update from a single place in the future, if needed. + MANDATORY_DATA_KEYS = {'node_attributes', 'node_extras', 'export_data', 'links_uuid', 'groups_uuid'} + MANDATORY_METADATA_KEYS = {'aiida_version', 'export_version', 'all_fields_info', 'unique_identifiers'} - FILENAME_DATA = 'data.json' - FILENAME_METADATA = 'metadata.json' - - def __init__(self, filepath): + def __init__(self, filepath, silent=True): self._filepath = filepath + self._silent = silent self._folder = None self._unpacked = False + self._keep = False self._data = None self._meta_data = None + self._archive_format = None def __enter__(self): """Instantiate a SandboxFolder into which the archive can be lazily unpacked.""" - self._folder = SandboxFolder() + self._folder = SandboxFolder(sandbox_in_repo=True) return self def __exit__(self, exc_type, exc_value, traceback): - """Clean the sandbox folder if it was instatiated.""" - if self.folder: + """Clean the sandbox folder if it was instantiated.""" + if not self.keep and self.folder: self.folder.erase() - @decorator - def ensure_within_context(wrapped, instance, args, kwargs): # pylint: disable=no-self-argument - """Decorator to ensure that the instance is called within a context manager.""" - if instance and not instance.folder: - raise InvalidOperation('the Archive class should be used within a context') - - return wrapped(*args, **kwargs) # pylint: disable=not-callable - - @decorator - def ensure_unpacked(wrapped, instance, args, kwargs): # pylint: disable=no-self-argument - """Decorator to ensure that the archive is unpacked before entering the decorated function.""" - if instance and not instance.unpacked: - instance.unpack() - - return wrapped(*args, **kwargs) # pylint: disable=not-callable + def _ensure_within_context(self): + """Overridable function that processes decorator ``@ensure_within_context``""" + if not self.folder: + raise InvalidArchiveOperation('the Archive class should be used within a context') @ensure_within_context def unpack(self): """Unpack the archive and store the contents in a sandbox.""" - if os.path.isdir(self.filepath): - extract_tree(self.filepath, self.folder) - elif tarfile.is_tarfile(self.filepath): - extract_tar(self.filepath, self.folder, silent=True, nodes_export_subfolder=NODES_EXPORT_SUBFOLDER) - elif zipfile.is_zipfile(self.filepath): - extract_zip(self.filepath, self.folder, silent=True, nodes_export_subfolder=NODES_EXPORT_SUBFOLDER) - else: - raise CorruptArchive('unrecognized archive format') + try: + self._archive_format = 'zip' + if os.path.isdir(self.filepath): + self._redirect_archive_folder() + # Update self.folder to be a Folder() + # Since we are now dealing with source data, we do not want to delete the folder upon exit. + # self._folder = get_tree(self.filepath) + self._keep = True + elif tarfile.is_tarfile(self.filepath): + extract_tar( + self.filepath, self.folder, silent=self._silent, nodes_export_subfolder=NODES_EXPORT_SUBFOLDER + ) + self._archive_format = 'tar' + elif zipfile.is_zipfile(self.filepath): + extract_zip( + self.filepath, self.folder, silent=self._silent, nodes_export_subfolder=NODES_EXPORT_SUBFOLDER + ) + else: + raise CorruptArchive('unrecognized archive format') + except Exception as why: + raise CorruptArchive('Error during unpacking of {}: {}'.format(self.filepath, why)) if not self.folder.get_content_list(): - raise ContentNotExistent('the provided archive {} is empty'.format(self.filepath)) + raise CorruptArchive('the provided archive {} is empty'.format(self.filepath)) self._unpacked = True + @ensure_unpacked + @ensure_within_context + def repack(self, output_filepath=None, overwrite=False): + """Repack the archive. + + Set new filepath name for repacked archive (if requested), write current data and meta_data dictionaries + to folder. + + :param output_filepath: the filename, if an absolute path is not provided, + it will be created in the current working directory. + :type output_filepath: str + :param overwrite: whether or not to overwrite output_filepath, if it already exists (default: False). + :type overwrite: bool + """ + import six + + # Try to create an output_filepath automatically (based on time and date), if none has been specified + if not output_filepath: + output_filepath = self._new_output_filename(overwrite) + elif not isinstance(output_filepath, six.string_types): + raise InvalidArchiveOperation( + 'output_filepath must be a string, instead {} was given'.format(type(output_filepath)) + ) + + if os.path.exists(output_filepath) and not overwrite: + raise ArchiveOperationError('output file for repacking already exists') + + self._write_json_file(FILENAME_DATA, self.data) + self._write_json_file(FILENAME_METADATA, self.meta_data) + + if self._archive_format == 'tar': + with tarfile.open(output_filepath, 'w:gz', format=tarfile.PAX_FORMAT, dereference=True) as out_file: + out_file.add(self.folder.abspath, arcname='') + elif self._archive_format == 'zip': + with zipfile.ZipFile( + output_filepath, mode='w', compression=zipfile.ZIP_DEFLATED, allowZip64=True + ) as out_file: + for dirpath, dirnames, filenames in os.walk(self.folder.abspath): + relpath = os.path.relpath(dirpath, self.folder.abspath) + for filename in dirnames + filenames: + real_src = os.path.join(dirpath, filename) + real_dest = os.path.join(relpath, filename) + out_file.write(real_src, real_dest) + else: + raise ArchiveOperationError('archive_format must be set prior to exit, when Archive should be kept') + + def _new_output_filename(self, overwrite): + """Create and return new filename based on self.filepath to repack into. + + :param overwrite: whether or not to overwrite existing file, if necessary. + :type overwrite: bool + """ + from aiida.common import timezone + + filepath_split = os.path.basename(self.filepath).split('.') + unique_name = timezone.localtime(timezone.now()).strftime('%Y%m%d-%H%M%S') + new_filename = '.'.join([ + '{}_migrated-{}'.format('.'.join(filepath_split[:-1]), unique_name), filepath_split[-1] + ]) + i = 0 + while os.path.exists(new_filename) and not overwrite: + i += 1 + basename = new_filename.split('.') + new_filename = '.'.join([basename[0] + '_{}'.format(i), basename[1]]) + if i == 100: + raise ArchiveOperationError('an output file for repacking cannot be created') + + return new_filename + @property def filepath(self): """Return the filepath of the archive @@ -101,12 +197,19 @@ def filepath(self): @property def folder(self): - """Return the sandbox folder + """Return the folder - :return: sandbox folder :class:`aiida.common.folders.SandboxFolder` + :return: folder + :rtype: :py:class:`~aiida.common.folders.SandboxFolder`, :py:class:`~aiida.common.folders.Folder` """ return self._folder + @property + @ensure_within_context + def keep(self): + """Return whether to keep the archive upon exit.""" + return self._keep + @property @ensure_within_context def data(self): @@ -115,7 +218,7 @@ def data(self): :return: dictionary with contents of data file """ if self._data is None: - self._data = self._read_json_file(self.FILENAME_DATA) + self._data = self._read_json_file(FILENAME_DATA) return self._data @@ -127,7 +230,7 @@ def meta_data(self): :return: dictionary with contents of meta data file """ if self._meta_data is None: - self._meta_data = self._read_json_file(self.FILENAME_METADATA) + self._meta_data = self._read_json_file(FILENAME_METADATA) return self._meta_data @@ -159,8 +262,8 @@ def get_data_statistics(self): :return: a dictionary with basic details """ - export_data = self.data.get('export_data', {}) - links_data = self.data.get('links_uuid', {}) + export_data = self.get_data_member('export_data') + links_data = self.get_data_member('links_uuid') computers = export_data.get('Computer', {}) groups = export_data.get('Group', {}) @@ -184,7 +287,7 @@ def version_aiida(self): :return: version number """ - return self.meta_data['aiida_version'] + return self.get_meta_data_member('aiida_version') @property @ensure_within_context @@ -193,7 +296,7 @@ def version_format(self): :return: version number """ - return self.meta_data['export_version'] + return self.get_meta_data_member('export_version') @property @ensure_within_context @@ -202,10 +305,7 @@ def conversion_info(self): :return: list of conversion notifications """ - try: - return self.meta_data['conversion_info'] - except KeyError: - return None + return self.get_meta_data_member('conversion_info') @ensure_within_context @ensure_unpacked @@ -214,9 +314,104 @@ def _read_json_file(self, filename): :param filename: the filename relative to the sandbox folder :return: a dictionary with the loaded JSON content + + :raises ~aiida.tools.importexport.common.exceptions.CorruptArchive: if there was an error reading the JSON file """ - with io.open(self.folder.get_abs_path(filename), 'r', encoding='utf8') as fhandle: - return json.load(fhandle) + try: + with io.open(self.folder.get_abs_path(filename), 'r', encoding='utf8') as fhandle: + return json.load(fhandle) + except OSError as error: + raise CorruptArchive('Error reading {}: {}'.format(filename, error)) + + @ensure_within_context + @ensure_unpacked + def _read_json_file_content(self, json_filename, key): + """Return value in JSON file + + :param json_filename: Valid archive JSON file name. + :type json_filename: str + :param key: Key matching the value to be returned. + :type key: str + + :return: Value of key in JSON file + :rtype: dict, str, list + + :raises ~aiida.tools.importexport.common.exceptions.CorruptArchive: if the key cannot be found in JSON file + """ + if json_filename == FILENAME_DATA: + json_content = self.data + mandatory_keys = self.MANDATORY_DATA_KEYS + elif json_filename == FILENAME_METADATA: + json_content = self.meta_data + mandatory_keys = self.MANDATORY_METADATA_KEYS + else: + raise CorruptArchive('{} is not a valid archive JSON file'.format(json_content)) + + try: + return json_content[key] + except KeyError: + if key not in mandatory_keys: + return {} + + raise CorruptArchive('{} is missing the mandatory key "{}"'.format(json_filename, key)) + + @ensure_within_context + def get_data_member(self, member): + """Return value in data.json + + :param member: Member to be returned, i.e., the dictionary key matching the value to be returned + :type member: str + """ + return self._read_json_file_content(FILENAME_DATA, member) + + @ensure_within_context + def get_meta_data_member(self, member): + """Return value in metadata.json + + :param member: Member to be returned, i.e., the dictionary key matching the value to be returned + :type member: str + """ + return self._read_json_file_content(FILENAME_METADATA, member) + + @ensure_within_context + @ensure_unpacked + def _write_json_file(self, filename, data): + """Write the contents of a dictionary to a JSON file in the unpacked archive. + + :param filename: the filename relative to the sandbox folder + :type filename: str + :param data: the data to be written to a JSON file + :type data: dict + + :raises ~aiida.tools.importexport.common.exceptions.CorruptArchive: if there was an error writing the JSON file + """ + try: + with io.open(self.folder.get_abs_path(filename), 'wb') as fhandle: + json.dump(data, fhandle) + + self._keep = True + except OSError as error: + raise ArchiveOperationError('Error writing {}: {}'.format(filename, error)) + + @ensure_within_context + def _redirect_archive_folder(self): + """Return new Folder, pointing to path, which will not erase path upon exit. + + Erase SandboxFolder, if it was instantiated (it shouldn't have been). + Redirect self.folder to point at self.filepath, since this should be a valid unpacked archive. + Before this, however, do a superficial check of the folder's contents. + """ + from aiida.common.folders import Folder + + if self.folder and isinstance(self.folder, SandboxFolder): + self.folder.erase() + + # Make sure necessary top-level files and folder exists in the filepath + for required_file_or_folder in JSON_FILES + [NODES_EXPORT_SUBFOLDER]: + if not os.path.exists(os.path.join(self.filepath, required_file_or_folder)): + raise CorruptArchive('required file or folder `{}` is not included'.format(required_file_or_folder)) + + self._folder = Folder(self.filepath) def extract_zip(infile, folder, nodes_export_subfolder=None, silent=False): @@ -252,18 +447,14 @@ def extract_zip(infile, folder, nodes_export_subfolder=None, silent=False): try: with zipfile.ZipFile(infile, 'r', allowZip64=True) as handle: - if not handle.namelist(): + if not handle.infolist(): raise CorruptArchive('no files detected') - try: - handle.extract(path=folder.abspath, member='metadata.json') - except KeyError: - raise CorruptArchive('required file `metadata.json` is not included') - - try: - handle.extract(path=folder.abspath, member='data.json') - except KeyError: - raise CorruptArchive('required file `data.json` is not included') + for json_file in JSON_FILES: + try: + handle.extract(path=folder.abspath, member=json_file) + except KeyError: + raise CorruptArchive('required file `{}` is not included'.format(json_file)) if not silent: print('EXTRACTING NODE DATA...') @@ -286,7 +477,7 @@ def extract_tar(infile, folder, nodes_export_subfolder=None, silent=False): :param infile: file path :type infile: str - :param folder: a temporary fodler used to extract the file tree + :param folder: a temporary folder used to extract the file tree :type folder: :py:class:`~aiida.common.folders.SandboxFolder` :param nodes_export_subfolder: name of the subfolder for AiiDA nodes @@ -312,15 +503,11 @@ def extract_tar(infile, folder, nodes_export_subfolder=None, silent=False): try: with tarfile.open(infile, 'r:*', format=tarfile.PAX_FORMAT) as handle: - try: - handle.extract(path=folder.abspath, member=handle.getmember('metadata.json')) - except KeyError: - raise CorruptArchive('required file `metadata.json` is not included') - - try: - handle.extract(path=folder.abspath, member=handle.getmember('data.json')) - except KeyError: - raise CorruptArchive('required file `data.json` is not included') + for json_file in JSON_FILES: + try: + handle.extract(path=folder.abspath, member=handle.getmember(json_file)) + except KeyError: + raise CorruptArchive('required file `{}` is not included'.format(json_file)) if not silent: print('EXTRACTING NODE DATA...') @@ -342,22 +529,4 @@ def extract_tar(infile, folder, nodes_export_subfolder=None, silent=False): continue handle.extract(path=folder.abspath, member=member) except tarfile.ReadError: - raise ValueError('The input file format for import is not valid (1)') - - -def extract_tree(infile, folder): - """Prepare to import nodes from plain file system tree by copying in the given sandbox folder. - - .. note:: the contents of the unpacked archive directory are copied into the sandbox folder, because the files will - anyway haven to be copied to the repository at some point. By copying the contents of the source directory now - and continuing operation only on the sandbox folder, we do not risk to modify the source files accidentally. - During import then, the node files from the sandbox can be moved to the repository, so they won't have to be - copied again in any case. - - :param infile: absolute filepath point to the unpacked archive directory - :type infile: str - - :param folder: a temporary folder to which the archive contents are copied - :type folder: :py:class:`~aiida.common.folders.SandboxFolder` - """ - folder.replace_with_folder(infile, move=False, overwrite=True) + raise ValueError('The input file format for import is not valid (not a compressed tar file)') diff --git a/aiida/tools/importexport/common/exceptions.py b/aiida/tools/importexport/common/exceptions.py index 3e641f1139..141b81f100 100644 --- a/aiida/tools/importexport/common/exceptions.py +++ b/aiida/tools/importexport/common/exceptions.py @@ -16,9 +16,10 @@ from aiida.common.exceptions import AiidaException __all__ = ( - 'ExportImportException', 'ArchiveExportError', 'ArchiveImportError', 'CorruptArchive', - 'IncompatibleArchiveVersionError', 'ExportValidationError', 'ImportUniquenessError', 'ImportValidationError', - 'ArchiveMigrationError', 'MigrationValidationError', 'DanglingLinkError' + 'ExportImportException', 'ArchiveExportError', 'ArchiveImportError', 'IncompatibleArchiveVersionError', + 'CorruptArchive', 'InvalidArchiveOperation', 'ArchiveOperationError', 'ExportValidationError', + 'ImportUniquenessError', 'ImportValidationError', 'ArchiveMigrationError', 'MigrationValidationError', + 'DanglingLinkError' ) @@ -34,12 +35,21 @@ class ArchiveImportError(ExportImportException): """Base class for all AiiDA import exceptions.""" +class IncompatibleArchiveVersionError(ExportImportException): + """Raised when trying to import an export archive with an incompatible schema version.""" + + class CorruptArchive(ExportImportException): """Raised when an operation is applied to a corrupt export archive, e.g. missing files or invalid formats.""" -class IncompatibleArchiveVersionError(ExportImportException): - """Raised when trying to import an export archive with an incompatible schema version.""" +class InvalidArchiveOperation(ExportImportException): + """Invalid operation in the class :py:class`~aiida.tools.importexport.common.archive.Archive`""" + + +class ArchiveOperationError(InvalidArchiveOperation): + """Operation error (e.g., OSError or IOError) + in the class :py:class`~aiida.tools.importexport.common.archive.Archive`""" class ExportValidationError(ArchiveExportError): diff --git a/aiida/tools/importexport/dbimport/backends/django/__init__.py b/aiida/tools/importexport/dbimport/backends/django/__init__.py index 56760b305c..8f3e2926d3 100644 --- a/aiida/tools/importexport/dbimport/backends/django/__init__.py +++ b/aiida/tools/importexport/dbimport/backends/django/__init__.py @@ -24,7 +24,7 @@ from aiida.orm.utils.repository import Repository from aiida.orm import QueryBuilder, Node, Group from aiida.tools.importexport.common import exceptions -from aiida.tools.importexport.common.archive import extract_tree, extract_tar, extract_zip +from aiida.tools.importexport.common.archive import Archive from aiida.tools.importexport.common.config import DUPL_SUFFIX, IMPORTGROUP_TYPE, EXPORT_VERSION, NODES_EXPORT_SUBFOLDER from aiida.tools.importexport.common.config import ( NODE_ENTITY_NAME, GROUP_ENTITY_NAME, COMPUTER_ENTITY_NAME, USER_ENTITY_NAME, LOG_ENTITY_NAME, COMMENT_ENTITY_NAME @@ -117,45 +117,15 @@ def import_data_dj( ################ # EXTRACT DATA # ################ - # The sandbox has to remain open until the end - with SandboxFolder() as folder: - if os.path.isdir(in_path): - extract_tree(in_path, folder) - else: - if tarfile.is_tarfile(in_path): - extract_tar(in_path, folder, silent=silent, nodes_export_subfolder=NODES_EXPORT_SUBFOLDER) - elif zipfile.is_zipfile(in_path): - try: - extract_zip(in_path, folder, silent=silent, nodes_export_subfolder=NODES_EXPORT_SUBFOLDER) - except ValueError as exc: - print('The following problem occured while processing the provided file: {}'.format(exc)) - return - else: - raise exceptions.ImportValidationError( - 'Unable to detect the input file format, it is neither a ' - '(possibly compressed) tar file, nor a zip file.' - ) - - if not folder.get_content_list(): - raise exceptions.CorruptArchive('The provided file/folder ({}) is empty'.format(in_path)) - try: - with io.open(folder.get_abs_path('metadata.json'), 'r', encoding='utf8') as fhandle: - metadata = json.load(fhandle) - - with io.open(folder.get_abs_path('data.json'), 'r', encoding='utf8') as fhandle: - data = json.load(fhandle) - except IOError as error: - raise exceptions.CorruptArchive( - 'Unable to find the file {} in the import file or folder'.format(error.filename) - ) - + # The archive has to remain open until the end (it is a sandbox) + with Archive(in_path, silent=silent) as archive: ###################### # PRELIMINARY CHECKS # ###################### - export_version = StrictVersion(str(metadata['export_version'])) + export_version = StrictVersion(str(archive.meta_data['export_version'])) if export_version != expected_export_version: msg = 'Export file version is {}, can import only version {}'\ - .format(metadata['export_version'], expected_export_version) + .format(archive.meta_data['export_version'], expected_export_version) if export_version < expected_export_version: msg += "\nUse 'verdi export migrate' to update this export file." else: @@ -166,11 +136,11 @@ def import_data_dj( ########################################################################## # CREATE UUID REVERSE TABLES AND CHECK IF I HAVE ALL NODES FOR THE LINKS # ########################################################################## - linked_nodes = set(chain.from_iterable((l['input'], l['output']) for l in data['links_uuid'])) - group_nodes = set(chain.from_iterable(data['groups_uuid'].values())) + linked_nodes = set(chain.from_iterable((l['input'], l['output']) for l in archive.data['links_uuid'])) + group_nodes = set(chain.from_iterable(archive.data['groups_uuid'].values())) - if NODE_ENTITY_NAME in data['export_data']: - import_nodes_uuid = set(v['uuid'] for v in data['export_data'][NODE_ENTITY_NAME].values()) + if NODE_ENTITY_NAME in archive.data['export_data']: + import_nodes_uuid = set(v['uuid'] for v in archive.data['export_data'][NODE_ENTITY_NAME].values()) else: import_nodes_uuid = set() @@ -195,7 +165,7 @@ def import_data_dj( COMMENT_ENTITY_NAME ) - for import_field_name in metadata['all_fields_info']: + for import_field_name in archive.meta_data['all_fields_info']: if import_field_name not in model_order: raise exceptions.ImportValidationError( "You are trying to import an unknown model '{}'!".format(import_field_name) @@ -203,7 +173,7 @@ def import_data_dj( for idx, model_name in enumerate(model_order): dependencies = [] - for field in metadata['all_fields_info'][model_name].values(): + for field in archive.meta_data['all_fields_info'][model_name].values(): try: dependencies.append(field['requires']) except KeyError: @@ -219,11 +189,11 @@ def import_data_dj( # CREATE IMPORT DATA DIRECT UNIQUE_FIELD MAPPINGS # ################################################### import_unique_ids_mappings = {} - for model_name, import_data in data['export_data'].items(): - if model_name in metadata['unique_identifiers']: + for model_name, import_data in archive.data['export_data'].items(): + if model_name in archive.meta_data['unique_identifiers']: # I have to reconvert the pk to integer import_unique_ids_mappings[model_name] = { - int(k): v[metadata['unique_identifiers'][model_name]] for k, v in import_data.items() + int(k): v[archive.meta_data['unique_identifiers'][model_name]] for k, v in import_data.items() } ############### @@ -239,8 +209,8 @@ def import_data_dj( for model_name in model_order: cls_signature = entity_names_to_signatures[model_name] model = get_object_from_string(cls_signature) - fields_info = metadata['all_fields_info'].get(model_name, {}) - unique_identifier = metadata['unique_identifiers'].get(model_name, None) + fields_info = archive.meta_data['all_fields_info'].get(model_name, {}) + unique_identifier = archive.meta_data['unique_identifiers'].get(model_name, None) new_entries[model_name] = {} existing_entries[model_name] = {} @@ -248,11 +218,13 @@ def import_data_dj( foreign_ids_reverse_mappings[model_name] = {} # Not necessarily all models are exported - if model_name in data['export_data']: + if model_name in archive.data['export_data']: # skip nodes that are already present in the DB if unique_identifier is not None: - import_unique_ids = set(v[unique_identifier] for v in data['export_data'][model_name].values()) + import_unique_ids = set( + v[unique_identifier] for v in archive.data['export_data'][model_name].values() + ) relevant_db_entries_result = model.objects.filter( **{'{}__in'.format(unique_identifier): import_unique_ids} @@ -263,7 +235,7 @@ def import_data_dj( } foreign_ids_reverse_mappings[model_name] = {k: v.pk for k, v in relevant_db_entries.items()} - for key, value in data['export_data'][model_name].items(): + for key, value in archive.data['export_data'][model_name].items(): if value[unique_identifier] in relevant_db_entries.keys(): # Already in DB existing_entries[model_name][key] = value @@ -271,7 +243,7 @@ def import_data_dj( # To be added new_entries[model_name][key] = value else: - new_entries[model_name] = data['export_data'][model_name].copy() + new_entries[model_name] = archive.data['export_data'][model_name].copy() # Show Comment mode if not silent if not silent: @@ -281,8 +253,8 @@ def import_data_dj( for model_name in model_order: cls_signature = entity_names_to_signatures[model_name] model = get_object_from_string(cls_signature) - fields_info = metadata['all_fields_info'].get(model_name, {}) - unique_identifier = metadata['unique_identifiers'].get(model_name, None) + fields_info = archive.meta_data['all_fields_info'].get(model_name, {}) + unique_identifier = archive.meta_data['unique_identifiers'].get(model_name, None) # EXISTING ENTRIES for import_entry_pk, entry_data in existing_entries[model_name].items(): @@ -382,7 +354,7 @@ def import_data_dj( # Before storing entries in the DB, I store the files (if these are nodes). # Note: only for new entries! - subfolder = folder.get_subfolder( + subfolder = archive.folder.get_subfolder( os.path.join(NODES_EXPORT_SUBFOLDER, export_shard_uuid(import_entry_uuid)) ) if not subfolder.exists(): @@ -392,8 +364,13 @@ def import_data_dj( ) destdir = RepositoryFolder(section=Repository._section_name, uuid=import_entry_uuid) # Replace the folder, possibly destroying existing previous folders, and move the files - # (faster if we are on the same filesystem, and in any case the source is a SandboxFolder) - destdir.replace_with_folder(subfolder.abspath, move=True, overwrite=True) + # (faster if we are on the same filesystem, and in any case the source is a SandboxFolder + # - for extract_tar and extract_zip). + # If extract_tree, do not move, i.e. do not destroy the source. + if issubclass(subfolder.__class__, SandboxFolder): + destdir.replace_with_folder(subfolder.abspath, move=True, overwrite=True) + else: + destdir.replace_with_folder(subfolder.abspath, move=False, overwrite=True) # For DbNodes, we also have to store its attributes if not silent: @@ -401,7 +378,7 @@ def import_data_dj( # Get attributes from import file try: - object_.attributes = data['node_attributes'][str(import_entry_pk)] + object_.attributes = archive.data['node_attributes'][str(import_entry_pk)] except KeyError: raise exceptions.CorruptArchive( 'Unable to find attribute info for Node with UUID={}'.format(import_entry_uuid) @@ -414,7 +391,7 @@ def import_data_dj( # Get extras from import file try: - extras = data['node_extras'][str(import_entry_pk)] + extras = archive.data['node_extras'][str(import_entry_pk)] except KeyError: raise exceptions.CorruptArchive( 'Unable to find extra info for Node with UUID={}'.format(import_entry_uuid) @@ -450,7 +427,7 @@ def import_data_dj( # Get extras from import file try: - extras = data['node_extras'][str(import_entry_pk)] + extras = archive.data['node_extras'][str(import_entry_pk)] except KeyError: raise exceptions.CorruptArchive( 'Unable to find extra info for ode with UUID={}'.format(import_entry_uuid) @@ -499,7 +476,7 @@ def import_data_dj( if not silent: print('STORING NODE LINKS...') - import_links = data['links_uuid'] + import_links = archive.data['links_uuid'] links_to_store = [] # Needed, since QueryBuilder does not yet work for recently saved Nodes @@ -636,7 +613,7 @@ def import_data_dj( if not silent: print('STORING GROUP ELEMENTS...') - import_groups = data['groups_uuid'] + import_groups = archive.data['groups_uuid'] for groupuuid, groupnodes in import_groups.items(): # TODO: cache these to avoid too many queries group_ = models.DbGroup.objects.get(uuid=groupuuid) diff --git a/aiida/tools/importexport/dbimport/backends/sqla/__init__.py b/aiida/tools/importexport/dbimport/backends/sqla/__init__.py index f5100ae2a8..8a9f7d1fa6 100644 --- a/aiida/tools/importexport/dbimport/backends/sqla/__init__.py +++ b/aiida/tools/importexport/dbimport/backends/sqla/__init__.py @@ -26,7 +26,7 @@ from aiida.orm.utils.repository import Repository from aiida.tools.importexport.common import exceptions -from aiida.tools.importexport.common.archive import extract_tree, extract_tar, extract_zip +from aiida.tools.importexport.common.archive import Archive from aiida.tools.importexport.common.config import DUPL_SUFFIX, IMPORTGROUP_TYPE, EXPORT_VERSION, NODES_EXPORT_SUBFOLDER from aiida.tools.importexport.common.config import ( NODE_ENTITY_NAME, GROUP_ENTITY_NAME, COMPUTER_ENTITY_NAME, USER_ENTITY_NAME, LOG_ENTITY_NAME, COMMENT_ENTITY_NAME @@ -124,40 +124,14 @@ def import_data_sqla( # EXTRACT DATA # ################ # The sandbox has to remain open until the end - with SandboxFolder() as folder: - if os.path.isdir(in_path): - extract_tree(in_path, folder) - else: - if tarfile.is_tarfile(in_path): - extract_tar(in_path, folder, silent=silent, nodes_export_subfolder=NODES_EXPORT_SUBFOLDER) - elif zipfile.is_zipfile(in_path): - extract_zip(in_path, folder, silent=silent, nodes_export_subfolder=NODES_EXPORT_SUBFOLDER) - else: - raise exceptions.ImportValidationError( - 'Unable to detect the input file format, it is neither a ' - '(possibly compressed) tar file, nor a zip file.' - ) - - if not folder.get_content_list(): - raise exceptions.CorruptArchive('The provided file/folder ({}) is empty'.format(in_path)) - try: - with io.open(folder.get_abs_path('metadata.json'), encoding='utf8') as fhandle: - metadata = json.load(fhandle) - - with io.open(folder.get_abs_path('data.json'), encoding='utf8') as fhandle: - data = json.load(fhandle) - except IOError as error: - raise exceptions.CorruptArchive( - 'Unable to find the file {} in the import file or folder'.format(error.filename) - ) - + with Archive(in_path, silent=silent) as archive: ###################### # PRELIMINARY CHECKS # ###################### - export_version = StrictVersion(str(metadata['export_version'])) + export_version = StrictVersion(str(archive.meta_data['export_version'])) if export_version != expected_export_version: msg = 'Export file version is {}, can import only version {}'\ - .format(metadata['export_version'], expected_export_version) + .format(archive.meta_data['export_version'], expected_export_version) if export_version < expected_export_version: msg += "\nUse 'verdi export migrate' to update this export file." else: @@ -169,15 +143,15 @@ def import_data_sqla( # CREATE UUID REVERSE TABLES AND CHECK IF # # I HAVE ALL NODES FOR THE LINKS # ################################################################### - linked_nodes = set(chain.from_iterable((l['input'], l['output']) for l in data['links_uuid'])) - group_nodes = set(chain.from_iterable(data['groups_uuid'].values())) + linked_nodes = set(chain.from_iterable((l['input'], l['output']) for l in archive.data['links_uuid'])) + group_nodes = set(chain.from_iterable(archive.data['groups_uuid'].values())) # Check that UUIDs are valid linked_nodes = set(x for x in linked_nodes if validate_uuid(x)) group_nodes = set(x for x in group_nodes if validate_uuid(x)) import_nodes_uuid = set() - for value in data['export_data'].get(NODE_ENTITY_NAME, {}).values(): + for value in archive.data['export_data'].get(NODE_ENTITY_NAME, {}).values(): import_nodes_uuid.add(value['uuid']) unknown_nodes = linked_nodes.union(group_nodes) - import_nodes_uuid @@ -203,7 +177,7 @@ def import_data_sqla( # I make a new list that contains the entity names: # eg: ['User', 'Computer', 'Node', 'Group'] all_entity_names = [signatures_to_entity_names[entity_sig] for entity_sig in entity_sig_order] - for import_field_name in metadata['all_fields_info']: + for import_field_name in archive.meta_data['all_fields_info']: if import_field_name not in all_entity_names: raise exceptions.ImportValidationError( "You are trying to import an unknown model '{}'!".format(import_field_name) @@ -213,7 +187,7 @@ def import_data_sqla( dependencies = [] entity_name = signatures_to_entity_names[entity_sig] # for every field, I checked the dependencies given as value for key requires - for field in metadata['all_fields_info'][entity_name].values(): + for field in archive.meta_data['all_fields_info'][entity_name].values(): try: dependencies.append(field['requires']) except KeyError: @@ -238,12 +212,12 @@ def import_data_sqla( # } import_unique_ids_mappings = {} # Export data since v0.3 contains the keys entity_name - for entity_name, import_data in data['export_data'].items(): + for entity_name, import_data in archive.data['export_data'].items(): # Again I need the entity_name since that's what's being stored since 0.3 - if entity_name in metadata['unique_identifiers']: + if entity_name in archive.meta_data['unique_identifiers']: # I have to reconvert the pk to integer import_unique_ids_mappings[entity_name] = { - int(k): v[metadata['unique_identifiers'][entity_name]] for k, v in import_data.items() + int(k): v[archive.meta_data['unique_identifiers'][entity_name]] for k, v in import_data.items() } ############### # IMPORT DATA # @@ -263,7 +237,7 @@ def import_data_sqla( entity_name = signatures_to_entity_names[entity_sig] entity = entity_names_to_entities[entity_name] # I get the unique identifier, since v0.3 stored under entity_name - unique_identifier = metadata['unique_identifiers'].get(entity_name, None) + unique_identifier = archive.meta_data['unique_identifiers'].get(entity_name, None) # so, new_entries. Also, since v0.3 it makes more sense to use the entity_name new_entries[entity_name] = {} @@ -271,10 +245,12 @@ def import_data_sqla( foreign_ids_reverse_mappings[entity_name] = {} # Not necessarily all models are exported - if entity_name in data['export_data']: + if entity_name in archive.data['export_data']: if unique_identifier is not None: - import_unique_ids = set(v[unique_identifier] for v in data['export_data'][entity_name].values()) + import_unique_ids = set( + v[unique_identifier] for v in archive.data['export_data'][entity_name].values() + ) relevant_db_entries = dict() if import_unique_ids: @@ -297,7 +273,7 @@ def import_data_sqla( } imported_comp_names = set() - for key, value in data['export_data'][entity_name].items(): + for key, value in archive.data['export_data'][entity_name].items(): if entity_name == GROUP_ENTITY_NAME: # Check if there is already a group with the same name, # and if so, recreate the name @@ -363,7 +339,7 @@ def import_data_sqla( new_entries[entity_name][key] = value else: # Why the copy: - new_entries[entity_name] = data['export_data'][entity_name].copy() + new_entries[entity_name] = archive.data['export_data'][entity_name].copy() # Show Comment mode if not silent if not silent: @@ -373,8 +349,8 @@ def import_data_sqla( for entity_sig in entity_sig_order: entity_name = signatures_to_entity_names[entity_sig] entity = entity_names_to_entities[entity_name] - fields_info = metadata['all_fields_info'].get(entity_name, {}) - unique_identifier = metadata['unique_identifiers'].get(entity_name, '') + fields_info = archive.meta_data['all_fields_info'].get(entity_name, {}) + unique_identifier = archive.meta_data['unique_identifiers'].get(entity_name, '') # EXISTING ENTRIES for import_entry_pk, entry_data in existing_entries[entity_name].items(): @@ -463,7 +439,7 @@ def import_data_sqla( # Before storing entries in the DB, I store the files (if these are nodes). # Note: only for new entries! - subfolder = folder.get_subfolder( + subfolder = archive.folder.get_subfolder( os.path.join(NODES_EXPORT_SUBFOLDER, export_shard_uuid(import_entry_uuid)) ) if not subfolder.exists(): @@ -473,13 +449,18 @@ def import_data_sqla( ) destdir = RepositoryFolder(section=Repository._section_name, uuid=import_entry_uuid) # Replace the folder, possibly destroying existing previous folders, and move the files - # (faster if we are on the same filesystem, and in any case the source is a SandboxFolder) - destdir.replace_with_folder(subfolder.abspath, move=True, overwrite=True) + # (faster if we are on the same filesystem, and in any case the source is a SandboxFolder + # - for extract_tar and extract_zip) + # If extract_tree, do not move, i.e. do not destroy the source. + if issubclass(subfolder.__class__, SandboxFolder): + destdir.replace_with_folder(subfolder.abspath, move=True, overwrite=True) + else: + destdir.replace_with_folder(subfolder.abspath, move=False, overwrite=True) # For Nodes, we also have to store Attributes! # Get attributes from import file try: - object_.attributes = data['node_attributes'][str(import_entry_pk)] + object_.attributes = archive.data['node_attributes'][str(import_entry_pk)] except KeyError: raise exceptions.CorruptArchive( 'Unable to find attribute info for Node with UUID={}'.format(import_entry_uuid) @@ -491,7 +472,7 @@ def import_data_sqla( if not silent: print('STORING NEW NODE EXTRAS...') try: - extras = data['node_extras'][str(import_entry_pk)] + extras = archive.data['node_extras'][str(import_entry_pk)] except KeyError: raise exceptions.CorruptArchive( 'Unable to find extra info for Node with UUID={}'.format(import_entry_uuid) @@ -526,7 +507,7 @@ def import_data_sqla( # Get extras from import file try: - extras = data['node_extras'][str(import_entry_pk)] + extras = archive.data['node_extras'][str(import_entry_pk)] except KeyError: raise exceptions.CorruptArchive( 'Unable to find extra info for Node with UUID={}'.format(import_entry_uuid) @@ -582,7 +563,7 @@ def import_data_sqla( if not silent: print('STORING NODE LINKS...') - import_links = data['links_uuid'] + import_links = archive.data['links_uuid'] for link in import_links: # Check for dangling Links within the, supposed, self-consistent archive @@ -626,7 +607,7 @@ def import_data_sqla( if not silent: print('STORING GROUP ELEMENTS...') - import_groups = data['groups_uuid'] + import_groups = archive.data['groups_uuid'] for groupuuid, groupnodes in import_groups.items(): # # TODO: cache these to avoid too many queries qb_group = QueryBuilder().append(Group, filters={'uuid': {'==': groupuuid}}) diff --git a/aiida/tools/importexport/migration/__init__.py b/aiida/tools/importexport/migration/__init__.py index e5772c1f8f..51f3a74aea 100644 --- a/aiida/tools/importexport/migration/__init__.py +++ b/aiida/tools/importexport/migration/__init__.py @@ -10,9 +10,9 @@ """Migration export files from old export versions to the newest, used by `verdi export migrate` command.""" from aiida.cmdline.utils import echo -from aiida.tools.importexport.common.exceptions import DanglingLinkError +from aiida.tools.importexport.common import exceptions -from .utils import verify_metadata_version +from .utils import verify_archive_version from .v01_to_v02 import migrate_v1_to_v2 from .v02_to_v03 import migrate_v2_to_v3 from .v03_to_v04 import migrate_v3_to_v4 @@ -21,7 +21,7 @@ from .v06_to_v07 import migrate_v6_to_v7 from .v07_to_v08 import migrate_v7_to_v8 -__all__ = ('migrate_recursively', 'verify_metadata_version') +__all__ = ('migrate_archive', 'verify_archive_version') MIGRATE_FUNCTIONS = { '0.1': migrate_v1_to_v2, @@ -34,34 +34,66 @@ } -def migrate_recursively(metadata, data, folder): +def migrate_recursively(archive): """ Recursive migration of export files from v0.1 to newest version, See specific migration functions for detailed descriptions. - :param metadata: the content of an export archive metadata.json file - :param data: the content of an export archive data.json file - :param folder: SandboxFolder in which the archive has been unpacked (workdir) + :param archive: The export archive to be migrated. + :type archive: :py:class:`~aiida.tools.importexport.common.archive.Archive` """ - from aiida.tools.importexport import EXPORT_VERSION as newest_version + from aiida.tools.importexport.common.config import EXPORT_VERSION as newest_version - old_version = verify_metadata_version(metadata) + old_version = archive.version_format - try: - if old_version == newest_version: - echo.echo_critical('Your export file is already at the newest export version {}'.format(newest_version)) - elif old_version in MIGRATE_FUNCTIONS: - MIGRATE_FUNCTIONS[old_version](metadata, data, folder) - else: - echo.echo_critical('Cannot migrate from version {}'.format(old_version)) - except ValueError as exception: - echo.echo_critical(exception) - except DanglingLinkError: - echo.echo_critical('Export file is invalid because it contains dangling links') - - new_version = verify_metadata_version(metadata) - - if new_version < newest_version: - new_version = migrate_recursively(metadata, data, folder) + if old_version == newest_version: + raise exceptions.MigrationValidationError( + 'Your export file is already at the newest export version {}'.format(newest_version) + ) + elif old_version in MIGRATE_FUNCTIONS: + MIGRATE_FUNCTIONS[old_version](archive) + else: + raise exceptions.MigrationValidationError('Cannot migrate from version {}'.format(old_version)) + + new_version = archive.version_format + + if new_version: + if new_version < newest_version: + new_version = migrate_recursively(archive) + else: + raise exceptions.MigrationValidationError('Archive version could not be determined') return new_version + + +def migrate_archive(source, output=None, overwrite=False, silent=False): + """Unpack, migrate, and repack an export archive + + :param source: Path to source archive to be migrated. + :type source: str + :param output: Path to newly migrated archive. + :type output: str + :param overwrite: Whether or not to overwrite the newly migrated archive, if it already exists. + :type overwrite: bool + :param silent: Whether or not to unpack archive and migrate silently. + :type silent: bool + + :return: Results overview + :rtype: dict + """ + import os + from aiida.tools.importexport.common.archive import Archive + + if output and os.path.exists(output) and not overwrite: + raise exceptions.MigrationValidationError('The output file already exists') + + try: + with Archive(source, silent=silent) as archive: + old_version = archive.version_format + new_version = migrate_recursively(archive) + + archive.repack(output_filepath=output, overwrite=overwrite) + except Exception as why: + raise exceptions.ArchiveMigrationError(why) + + return old_version, new_version diff --git a/aiida/tools/importexport/migration/utils.py b/aiida/tools/importexport/migration/utils.py index a1c5b7d2c9..64179a3c83 100644 --- a/aiida/tools/importexport/migration/utils.py +++ b/aiida/tools/importexport/migration/utils.py @@ -9,32 +9,24 @@ ########################################################################### """Utility functions for migration of export-files.""" -from aiida.tools.importexport.common import exceptions +def verify_archive_version(archive_version, version): + """Utility function to verify that the archive has the correct version number. -def verify_metadata_version(metadata, version=None): - """Utility function to verify that the metadata has the correct version number. - - If no version number is passed, it will just extract the version number and return it. - - :param metadata: the content of an export archive metadata.json file - :param version: string version number that the metadata is expected to have + :param archive_version: the version from an export archive metadata.json file + :type archive_version: str + :param version: version number that the archive is expected to have + :type version: str """ - try: - metadata_version = metadata['export_version'] - except KeyError: - raise exceptions.ArchiveMigrationError("metadata is missing the 'export_version' key") + from aiida.tools.importexport.common.exceptions import MigrationValidationError - if version is None: - return metadata_version - - if metadata_version != version: - raise exceptions.MigrationValidationError( - 'expected export file with version {} but found version {}'.format(version, metadata_version) + if not isinstance(archive_version, str) or not isinstance(version, str): + raise MigrationValidationError('Only strings are accepted for "verify_archive_version"') + if archive_version != version: + raise MigrationValidationError( + 'expected export file with version {} but found version {}'.format(version, archive_version) ) - return None - def update_metadata(metadata, version): """Update the metadata with a new version number and a notification of the conversion that was executed. @@ -50,7 +42,6 @@ def update_metadata(metadata, version): conversion_message = 'Converted from version {} to {} with AiiDA v{}'.format(old_version, version, get_version()) conversion_info.append(conversion_message) - metadata['aiida_version'] = get_version() metadata['export_version'] = version metadata['conversion_info'] = conversion_info diff --git a/aiida/tools/importexport/migration/v01_to_v02.py b/aiida/tools/importexport/migration/v01_to_v02.py index 2b044d37f6..adf4f2966a 100644 --- a/aiida/tools/importexport/migration/v01_to_v02.py +++ b/aiida/tools/importexport/migration/v01_to_v02.py @@ -10,16 +10,16 @@ """Migration from v0.1 to v0.2, used by `verdi export migrate` command.""" # pylint: disable=unused-argument -from aiida.tools.importexport.migration.utils import verify_metadata_version, update_metadata +from aiida.tools.importexport.migration.utils import verify_archive_version, update_metadata -def migrate_v1_to_v2(metadata, data, *args): - """ - Migration of export files from v0.1 to v0.2, which means generalizing the - field names with respect to the database backend +def migrate_v1_to_v2(archive): + """Migration of export files from v0.1 to v0.2 + + Which means generalizing the field names with respect to the database backend. - :param metadata: the content of an export archive metadata.json file - :param data: the content of an export archive data.json file + :param archive: The export archive to be migrated. + :type archive: :py:class:`~aiida.tools.importexport.common.archive.Archive` """ old_version = '0.1' new_version = '0.2' @@ -27,7 +27,10 @@ def migrate_v1_to_v2(metadata, data, *args): old_start = 'aiida.djsite' new_start = 'aiida.backends.djsite' - verify_metadata_version(metadata, old_version) + metadata = archive.meta_data + data = archive.data + + verify_archive_version(archive.version_format, old_version) update_metadata(metadata, new_version) def get_new_string(old_string): diff --git a/aiida/tools/importexport/migration/v02_to_v03.py b/aiida/tools/importexport/migration/v02_to_v03.py index ef3cfe62ff..163fec4920 100644 --- a/aiida/tools/importexport/migration/v02_to_v03.py +++ b/aiida/tools/importexport/migration/v02_to_v03.py @@ -13,17 +13,17 @@ import enum from aiida.tools.importexport.common.exceptions import DanglingLinkError -from aiida.tools.importexport.migration.utils import verify_metadata_version, update_metadata +from aiida.tools.importexport.migration.utils import verify_archive_version, update_metadata -def migrate_v2_to_v3(metadata, data, *args): - """ - Migration of export files from v0.2 to v0.3, which means adding the link - types to the link entries and making the entity key names backend agnostic +def migrate_v2_to_v3(archive): + """Migration of export files from v0.2 to v0.3 + + Which means adding the link types to the link entries and making the entity key names backend agnostic by effectively removing the prefix 'aiida.backends.djsite.db.models' - :param data: the content of an export archive data.json file - :param metadata: the content of an export archive metadata.json file + :param archive: The export archive to be migrated. + :type archive: :py:class:`~aiida.tools.importexport.common.archive.Archive` """ old_version = '0.2' @@ -56,7 +56,10 @@ class NodeType(enum.Enum): # pylint: disable=too-few-public-methods 'aiida.backends.djsite.db.models.DbAttribute': 'Attribute' } - verify_metadata_version(metadata, old_version) + metadata = archive.meta_data + data = archive.data + + verify_archive_version(archive.version_format, old_version) update_metadata(metadata, new_version) # Create a mapping from node uuid to node type diff --git a/aiida/tools/importexport/migration/v03_to_v04.py b/aiida/tools/importexport/migration/v03_to_v04.py index 7d60e9be00..5ced653469 100644 --- a/aiida/tools/importexport/migration/v03_to_v04.py +++ b/aiida/tools/importexport/migration/v03_to_v04.py @@ -30,7 +30,7 @@ import numpy as np from aiida.cmdline.utils import echo -from aiida.tools.importexport.migration.utils import verify_metadata_version, update_metadata, remove_fields +from aiida.tools.importexport.migration.utils import verify_archive_version, update_metadata, remove_fields def migration_base_data_plugin_type_string(data): @@ -431,18 +431,23 @@ def add_extras(data): data.update({'node_extras': node_extras, 'node_extras_conversion': node_extras_conversion}) -def migrate_v3_to_v4(metadata, data, folder, *args): # pylint: disable=unused-argument - """ - Migration of export files from v0.3 to v0.4 +def migrate_v3_to_v4(archive): + """Migration of export files from v0.3 to v0.4 Note concerning migration 0032 - REV. 1.0.32: Remove legacy workflow tables: DbWorkflow, DbWorkflowData, DbWorkflowStep These were (according to Antimo Marrazzo) never exported. + + :param archive: The export archive to be migrated. + :type archive: :py:class:`~aiida.tools.importexport.common.archive.Archive` """ old_version = '0.3' new_version = '0.4' - verify_metadata_version(metadata, old_version) + metadata = archive.meta_data + data = archive.data + + verify_archive_version(archive.version_format, old_version) update_metadata(metadata, new_version) # Apply migrations in correct sequential order @@ -456,7 +461,7 @@ def migrate_v3_to_v4(metadata, data, folder, *args): # pylint: disable=unused-a migration_dbgroup_type_string_change_content(data) migration_calc_job_option_attribute_keys(data) migration_move_data_within_node_module(data) - migration_trajectory_symbols_to_attribute(data, folder) + migration_trajectory_symbols_to_attribute(data, archive.folder) migration_remove_node_prefix(data) migration_rename_parameter_data_to_dict(data) migration_dbnode_type_to_dbnode_node_type(metadata, data) diff --git a/aiida/tools/importexport/migration/v04_to_v05.py b/aiida/tools/importexport/migration/v04_to_v05.py index 6f2794c3fa..43f00700cb 100644 --- a/aiida/tools/importexport/migration/v04_to_v05.py +++ b/aiida/tools/importexport/migration/v04_to_v05.py @@ -25,7 +25,7 @@ """ # pylint: disable=invalid-name -from aiida.tools.importexport.migration.utils import verify_metadata_version, update_metadata, remove_fields +from aiida.tools.importexport.migration.utils import verify_archive_version, update_metadata, remove_fields def migration_drop_node_columns_nodeversion_public(metadata, data): @@ -48,16 +48,21 @@ def migration_drop_computer_transport_params(metadata, data): remove_fields(metadata, data, [entity], [field]) -def migrate_v4_to_v5(metadata, data, *args): # pylint: disable=unused-argument - """ - Migration of export files from v0.4 to v0.5 +def migrate_v4_to_v5(archive): + """Migration of export files from v0.4 to v0.5 + + This is from migration 0034 (drop_node_columns_nodeversion_public) and onwards. - This is from migration 0034 (drop_node_columns_nodeversion_public) and onwards + :param archive: The export archive to be migrated. + :type archive: :py:class:`~aiida.tools.importexport.common.archive.Archive` """ old_version = '0.4' new_version = '0.5' - verify_metadata_version(metadata, old_version) + metadata = archive.meta_data + data = archive.data + + verify_archive_version(archive.version_format, old_version) update_metadata(metadata, new_version) # Apply migrations diff --git a/aiida/tools/importexport/migration/v05_to_v06.py b/aiida/tools/importexport/migration/v05_to_v06.py index f2e4311448..7067a874fa 100644 --- a/aiida/tools/importexport/migration/v05_to_v06.py +++ b/aiida/tools/importexport/migration/v05_to_v06.py @@ -25,7 +25,7 @@ """ # pylint: disable=invalid-name -from aiida.tools.importexport.migration.utils import verify_metadata_version, update_metadata +from aiida.tools.importexport.migration.utils import verify_archive_version, update_metadata def migrate_deserialized_datetime(data, conversion): @@ -131,13 +131,19 @@ def migration_migrate_legacy_job_calculation_data(data): values['process_label'] = 'Legacy JobCalculation' -def migrate_v5_to_v6(metadata, data, *args): # pylint: disable=unused-argument - """Migration of export files from v0.5 to v0.6""" +def migrate_v5_to_v6(archive): + """Migration of export files from v0.5 to v0.6 + + :param archive: The export archive to be migrated. + :type archive: :py:class:`~aiida.tools.importexport.common.archive.Archive` + """ old_version = '0.5' new_version = '0.6' - verify_metadata_version(metadata, old_version) - update_metadata(metadata, new_version) + data = archive.data + + verify_archive_version(archive.version_format, old_version) + update_metadata(archive.meta_data, new_version) # Apply migrations migration_serialize_datetime_objects(data) diff --git a/aiida/tools/importexport/migration/v06_to_v07.py b/aiida/tools/importexport/migration/v06_to_v07.py index 5a1cbe0582..a9185da856 100644 --- a/aiida/tools/importexport/migration/v06_to_v07.py +++ b/aiida/tools/importexport/migration/v06_to_v07.py @@ -25,7 +25,7 @@ """ # pylint: disable=invalid-name -from aiida.tools.importexport.migration.utils import verify_metadata_version, update_metadata +from aiida.tools.importexport.migration.utils import verify_archive_version, update_metadata def migration_data_migration_legacy_process_attributes(data): @@ -112,14 +112,20 @@ def remove_attribute_link_metadata(metadata): metadata[dictionary].pop(entity, None) -def migrate_v6_to_v7(metadata, data, *args): # pylint: disable=unused-argument - """Migration of export files from v0.6 to v0.7""" +def migrate_v6_to_v7(archive): + """Migration of export files from v0.6 to v0.7 + + :param archive: The export archive to be migrated. + :type archive: :py:class:`~aiida.tools.importexport.common.archive.Archive` + """ old_version = '0.6' new_version = '0.7' - verify_metadata_version(metadata, old_version) + metadata = archive.meta_data + + verify_archive_version(archive.version_format, old_version) update_metadata(metadata, new_version) # Apply migrations - migration_data_migration_legacy_process_attributes(data) + migration_data_migration_legacy_process_attributes(archive.data) remove_attribute_link_metadata(metadata) diff --git a/aiida/tools/importexport/migration/v07_to_v08.py b/aiida/tools/importexport/migration/v07_to_v08.py index 2049d1601d..cc8fb46434 100644 --- a/aiida/tools/importexport/migration/v07_to_v08.py +++ b/aiida/tools/importexport/migration/v07_to_v08.py @@ -25,7 +25,7 @@ """ # pylint: disable=invalid-name -from aiida.tools.importexport.migration.utils import verify_metadata_version, update_metadata +from aiida.tools.importexport.migration.utils import verify_archive_version, update_metadata def migration_default_link_label(data): @@ -38,12 +38,19 @@ def migration_default_link_label(data): link['label'] = 'result' -def migrate_v7_to_v8(metadata, data, *args): # pylint: disable=unused-argument - """Migration of export files from v0.7 to v0.8.""" +def migrate_v7_to_v8(archive): + """Migration of export files from v0.7 to v0.8. + + :param archive: The export archive to be migrated. + :type archive: :py:class:`~aiida.tools.importexport.common.archive.Archive` + """ old_version = '0.7' new_version = '0.8' - verify_metadata_version(metadata, old_version) + data = archive.data + metadata = archive.meta_data + + verify_archive_version(archive.version_format, old_version) update_metadata(metadata, new_version) # Apply migrations