xchem · kaliif · Sep 3, 2024 · Sep 3, 2024 · Sep 3, 2024 · Sep 3, 2024
diff --git a/viewer/download_structures.py b/viewer/download_structures.py
@@ -528,39 +528,36 @@ def _extra_files_zip(ziparchive, target):
 
     num_processed = 0
     num_extra_dir = 0
-    for experiment_upload in target.experimentupload_set.order_by('commit_datetime'):
-        extra_files = (
-            Path(settings.MEDIA_ROOT)
-            .joinpath(settings.TARGET_LOADER_MEDIA_DIRECTORY)
-            .joinpath(experiment_upload.task_id)
-        )
+    # taking the latest upload for now
 
-        # taking the latest upload for now
-        # add unpacked zip directory
-        extra_files = [d for d in list(extra_files.glob("*")) if d.is_dir()][0]
-
-        # add upload_[d] dir
-        extra_files = next(extra_files.glob("upload_*"))
-        extra_files = extra_files.joinpath('extra_files')
-
-        logger.debug('extra_files path 2: %s', extra_files)
-        logger.info('Processing extra files (%s)...', extra_files)
-
-        if extra_files.is_dir():
-            num_extra_dir = num_extra_dir + 1
-            for dirpath, _, files in os.walk(extra_files):
-                for file in files:
-                    filepath = os.path.join(dirpath, file)
-                    logger.info('Adding extra file "%s"...', filepath)
-                    ziparchive.write(
-                        filepath,
-                        os.path.join(
-                            f'{_ZIP_FILEPATHS["extra_files"]}_{num_extra_dir}', file
-                        ),
-                    )
-                    num_processed += 1
-        else:
-            logger.info('Directory does not exist (%s)...', extra_files)
+    experiment_upload = target.experimentupload_set.order_by('commit_datetime').last()
+    extra_files = (
+        Path(settings.MEDIA_ROOT)
+        .joinpath(settings.TARGET_LOADER_MEDIA_DIRECTORY)
+        .joinpath(target.zip_archive.name)
+        .joinpath(experiment_upload.upload_data_dir)
+    )
+
+    extra_files = extra_files.joinpath('extra_files')
+
+    logger.debug('extra_files path 2: %s', extra_files)
+    logger.info('Processing extra files (%s)...', extra_files)
+
+    if extra_files.is_dir():
+        num_extra_dir = num_extra_dir + 1
+        for dirpath, _, files in os.walk(extra_files):
+            for file in files:
+                filepath = os.path.join(dirpath, file)
+                logger.info('Adding extra file "%s"...', filepath)
+                ziparchive.write(
+                    filepath,
+                    os.path.join(
+                        f'{_ZIP_FILEPATHS["extra_files"]}_{num_extra_dir}', file
+                    ),
+                )
+                num_processed += 1
+    else:
+        logger.info('Directory does not exist (%s)...', extra_files)
 
     if num_processed == 0:
         logger.info('No extra files found')
@@ -571,44 +568,39 @@ def _extra_files_zip(ziparchive, target):
 def _yaml_files_zip(ziparchive, target, transforms_requested: bool = False) -> None:
     """Add all yaml files (except transforms) from upload to ziparchive"""
 
-    for experiment_upload in target.experimentupload_set.order_by('commit_datetime'):
-        yaml_paths = (
-            Path(settings.MEDIA_ROOT)
-            .joinpath(settings.TARGET_LOADER_MEDIA_DIRECTORY)
-            .joinpath(experiment_upload.task_id)
+    experiment_upload = target.experimentupload_set.order_by('commit_datetime').last()
+    yaml_paths = (
+        Path(settings.MEDIA_ROOT)
+        .joinpath(settings.TARGET_LOADER_MEDIA_DIRECTORY)
+        .joinpath(target.zip_archive.name)
+        .joinpath(experiment_upload.upload_data_dir)
+    )
+
+    transforms = [
+        Path(f.name).name
+        for f in (
+            experiment_upload.conformer_site_transforms,
+            experiment_upload.neighbourhood_transforms,
+            experiment_upload.reference_structure_transforms,
         )
+    ]
 
-        transforms = [
-            Path(f.name).name
-            for f in (
-                experiment_upload.neighbourhood_transforms,
-                experiment_upload.neighbourhood_transforms,
-                experiment_upload.neighbourhood_transforms,
-            )
-        ]
-        # taking the latest upload for now
-        # add unpacked zip directory
-        yaml_paths = [d for d in list(yaml_paths.glob("*")) if d.is_dir()][0]
-
-        # add upload_[d] dir
-        yaml_paths = next(yaml_paths.glob("upload_*"))
-
-        archive_path = Path('yaml_files').joinpath(yaml_paths.parts[-1])
-
-        yaml_files = [
-            f
-            for f in list(yaml_paths.glob("*.yaml"))
-            if f.is_file() and f.name not in transforms
-        ]
-
-        logger.info('Processing yaml files (%s)...', yaml_files)
-
-        for file in yaml_files:
-            logger.info('Adding yaml file "%s"...', file)
-            if not transforms_requested and file.name == 'neighbourhoods.yaml':
-                # don't add this file if transforms are not requested
-                continue
-            ziparchive.write(file, str(Path(archive_path).joinpath(file.name)))
+    archive_path = Path('yaml_files').joinpath(yaml_paths.parts[-1])
+
+    yaml_files = [
+        f
+        for f in list(yaml_paths.glob("*.yaml"))
+        if f.is_file() and f.name not in transforms
+    ]
+
+    logger.info('Processing yaml files (%s)...', yaml_files)
+
+    for file in yaml_files:
+        logger.info('Adding yaml file "%s"...', file)
+        if not transforms_requested and file.name == 'neighbourhoods.yaml':
+            # don't add this file if transforms are not requested
+            continue
+        ziparchive.write(file, str(Path(archive_path).joinpath(file.name)))
 
 
 def _document_file_zip(ziparchive, download_path, original_search, host):

diff --git a/viewer/target_loader.py b/viewer/target_loader.py
@@ -4,8 +4,8 @@
 import logging
 import math
 import os
+import shutil
 import tarfile
-import uuid
 from collections.abc import Callable
 from dataclasses import dataclass, field
 from enum import Enum
@@ -44,7 +44,7 @@
     XtalformQuatAssembly,
     XtalformSite,
 )
-from viewer.utils import alphanumerator
+from viewer.utils import alphanumerator, sanitize_directory_name
 
 logger = logging.getLogger(__name__)
 
@@ -475,29 +475,23 @@ def __init__(
         )
 
         # work out where the data finally lands
-        # path = Path(settings.MEDIA_ROOT).joinpath(TARGET_LOADER_DATA)
         path = Path(TARGET_LOADER_MEDIA_DIRECTORY)
 
-        # give each upload a unique directory. since I already have
-        # task_id, why not reuse it
+        # give each upload a unique directory
+        # update: resolving issue 1311 introduced a bug, where
+        # subsequent uploads overwrote file paths and files appeared
+        # to be missing. changing the directory structure so this
+        # wouldn't be an issue, the new structure is
+        # target_loader_data/target_title/upload_(n)/...
         if task:
-            path = path.joinpath(str(task.request.id))
             self.experiment_upload.task_id = task.request.id
-        else:
-            # unless of course I don't have task..
-            # TODO: i suspect this will never be used.
-            path_uuid = uuid.uuid4().hex
-            path = path.joinpath(path_uuid)
-            self.experiment_upload.task_id = path_uuid
 
         # figure out absolute and relative paths to final
         # location. relative path is added to db field, this will be
         # used in url requests to retrieve the file. absolute path is
         # for moving the file to the final location
-        self._final_path = path.joinpath(self.bundle_name)
-        self._abs_final_path = (
-            Path(settings.MEDIA_ROOT).joinpath(path).joinpath(self.bundle_name)
-        )
+        self._final_path = path
+        self._abs_final_path = Path(settings.MEDIA_ROOT).joinpath(path)
         # but don't create now, this comes later
 
         # to be used in logging messages, if no task, means invoked
@@ -1501,6 +1495,21 @@ def process_bundle(self):
             display_name=self.target_name,
         )
 
+        if target_created:
+            # mypy thinks target and target_name are None
+            target_dir = sanitize_directory_name(self.target_name, self.abs_final_path)  # type: ignore [arg-type]
+            self.target.zip_archive = target_dir  # type: ignore [attr-defined]
+            self.target.save()  # type: ignore [attr-defined]
+        else:
+            # NB! using existing field zip_archive to point to the
+            # location of the archives, not the archives
+            # themselves. The field was unused, and because of the
+            # versioned uploads, there's no single archive anymore
+            target_dir = str(self.target.zip_archive)  # type: ignore [attr-defined]
+
+        self._final_path = self._final_path.joinpath(target_dir)
+        self._abs_final_path = self._abs_final_path.joinpath(target_dir)
+
         # TODO: original target loader's function get_create_projects
         # seems to handle more cases. adopt or copy
         visit = self.proposal_ref.split()[0]
@@ -2200,8 +2209,16 @@ def load_target(
 
 def _move_and_save_target_experiment(target_loader):
     # Move the uploaded file to its final location
-    target_loader.abs_final_path.mkdir(parents=True)
-    target_loader.raw_data.rename(target_loader.abs_final_path)
+    try:
+        target_loader.abs_final_path.mkdir(parents=True)
+    except FileExistsError:
+        # subsequent upload, directory already exists
+        pass
+
+    shutil.move(
+        str(target_loader.raw_data.joinpath(target_loader.version_dir)),
+        str(target_loader.abs_final_path),
+    )
     Path(target_loader.bundle_path).rename(
         target_loader.abs_final_path.parent.joinpath(target_loader.data_bundle)
     )

diff --git a/viewer/utils.py b/viewer/utils.py
@@ -3,6 +3,7 @@
 import json
 import logging
 import os
+import re
 import shutil
 import string
 import tempfile
@@ -530,3 +531,38 @@ def email_task_completion(
     send_mail(subject, message, email_from, recipient_list, fail_silently=True)
     logger.info('- email_notify_task_completion')
     return
+
+
+def sanitize_directory_name(name: str, path: Path | None = None) -> str:
+    """
+    Sanitize a string to ensure it only contains characters allowed in UNIX directory names.
+
+    Parameters:
+    name: The input string to sanitize.
+    path (optional): the parent directory where the directory would reside, to check if unique
+
+    Returns:
+    str: A sanitized string with only allowed characters.
+    """
+    # Define allowed characters regex
+    allowed_chars = re.compile(r'[^a-zA-Z0-9._-]')
+
+    # Replace disallowed characters with an underscore
+    sanitized_name = allowed_chars.sub('_', name.strip())
+
+    # Replace multiple underscores with a single underscore
+    sanitized_name = re.sub(r'__+', '_', sanitized_name)
+    logger.debug('sanitized name: %s', sanitized_name)
+    if path:
+        target_dirs = [d.name for d in list(path.glob("*")) if d.is_dir()]
+        logger.debug('target dirs: %s', target_dirs)
+        new_name = sanitized_name
+        suf = 1
+        while new_name in target_dirs:
+            suf = suf + 1
+            new_name = f'{sanitized_name}_{suf}'
+            logger.debug('looping suffix: %s', new_name)
+
+        sanitized_name = new_name
+
+    return sanitized_name