From b2c9a7bd8e4bc20e47b039b02fc2be8a1806fd5c Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Wed, 31 Jan 2024 10:10:37 +0000 Subject: [PATCH] feat: files missing in upload clearly marked as such --- viewer/download_structures.py | 47 ++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/viewer/download_structures.py b/viewer/download_structures.py index 4382491e..65f94efc 100644 --- a/viewer/download_structures.py +++ b/viewer/download_structures.py @@ -12,6 +12,7 @@ import uuid import zipfile from datetime import datetime, timedelta, timezone +from io import BytesIO from pathlib import Path from typing import Any, Dict @@ -139,21 +140,21 @@ def _add_file_to_zip(ziparchive, param, filepath): Returns: [boolean]: [True of record added] """ + logger.debug('+_add_file_to_zip: %s, %s', param, filepath) if not filepath: # Odd - assume success logger.error('No filepath value') return True fullpath = os.path.join(settings.MEDIA_ROOT, filepath) - + cleaned_filename = clean_filename(filepath) + archive_path = os.path.join(_ZIP_FILEPATHS[param], cleaned_filename) if os.path.isfile(fullpath): - cleaned_filename = clean_filename(filepath) - ziparchive.write( - fullpath, os.path.join(_ZIP_FILEPATHS[param], cleaned_filename) - ) + ziparchive.write(fullpath, archive_path) return True else: logger.warning('filepath "%s" is not a file', filepath) + _add_empty_file(ziparchive, archive_path) return False @@ -165,6 +166,17 @@ def _is_mol_or_sdf(path): return Path(path).suffix.lower() in ('.sdf', '.mol') +def _add_empty_file(ziparchive, archive_path): + """When file is missing, add an empty file to the archive. + + + Used to send an explicit signal to the downloader that the file is + missing. + """ + logger.debug('+_add_empty_file: %s', archive_path) + ziparchive.writestr(f'{archive_path}_FILE_NOT_IN_UPLOAD', BytesIO(b'').getvalue()) + + def _read_and_patch_molecule_name(path, molecule_name=None): """Patches the source file (expected to be a MOL or SDF file) by adding the molecule name (the basename of the file) and returning the @@ -218,6 +230,7 @@ def _add_file_to_zip_aligned(ziparchive, code, filepath): Returns: [boolean]: [True of record added to archive] """ + logger.debug('+_add_file_to_zip_aligned: %s, %s', code, filepath) if not filepath: # Odd - assume success logger.error('No filepath value') @@ -229,9 +242,9 @@ def _add_file_to_zip_aligned(ziparchive, code, filepath): except AttributeError: filepath = str(Path(settings.MEDIA_ROOT).joinpath(filepath)) + # strip off the leading parts of path + archive_path = str(Path(*Path(filepath).parts[7:])) if Path(filepath).is_file(): - # strip off the leading parts of path - archive_path = str(Path(*Path(filepath).parts[7:])) if _is_mol_or_sdf(filepath): # It's a MOL or SD file. # Read and (potentially) adjust the file @@ -244,6 +257,7 @@ def _add_file_to_zip_aligned(ziparchive, code, filepath): return True else: logger.warning('filepath "%s" is not a file', filepath) + _add_empty_file(ziparchive, archive_path) return False @@ -370,16 +384,19 @@ def _trans_matrix_files_zip(ziparchive, target): experiment_upload.reference_structure_transforms, ) for tmf in trans_matrix_files: - if tmf: + filepath = Path(settings.MEDIA_ROOT).joinpath(str(tmf)) + archive_path = os.path.join( + _ZIP_FILEPATHS['trans_matrix_info'], + Path(str(tmf)).name, + ) + if filepath.is_file(): ziparchive.write( - Path(settings.MEDIA_ROOT).joinpath(str(tmf)), - os.path.join( - _ZIP_FILEPATHS['trans_matrix_info'], - Path(str(tmf)).name, - ), + filepath, + archive_path, ) else: - logger.info('File %s does not exist', Path(str(tmf)).name) + logger.warning('File %s does not exist', Path(str(tmf)).name) + _add_empty_file(ziparchive, archive_path) def _extra_files_zip(ziparchive, target): @@ -561,7 +578,7 @@ def _create_structures_zip(target, zip_contents, file_url, original_search, host errors += 1 logger.warning('After _add_file_to_zip() errors=%s', errors) - if zip_contents['metadata_info']: + if zip_contents['trans_matrix_info']: _trans_matrix_files_zip(ziparchive, target) _extra_files_zip(ziparchive, target)