From a4665f62699fec45d8b6d6ea7ed95645fe9dafdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20M=C3=B6nch?= Date: Fri, 5 Jul 2024 11:41:49 +0200 Subject: [PATCH 1/2] fix: strip complete base directory from tarfile paths Strip the complete input base directory from the paths the are used in the study-visit tar-files. This commit fixes issue --- bin/make_studyvisit_archive | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bin/make_studyvisit_archive b/bin/make_studyvisit_archive index fe52b53..d7bf875 100755 --- a/bin/make_studyvisit_archive +++ b/bin/make_studyvisit_archive @@ -85,6 +85,7 @@ def write_archive( # adjust properties to make archive builds reproducible tinfo = normalize_tarinfo( tinfo, + input_base_dir, archive_content_base_dir, # go with the reported timestamp from DICOM or with default content[p] or default_timestamp, @@ -94,9 +95,10 @@ def write_archive( tar.addfile(tinfo, fp) -def normalize_tarinfo(tinfo, archive_path, timestamp): +def normalize_tarinfo(tinfo, input_base_dir, archive_path, timestamp): # strip first level and replace with generated archive root dir name - tinfo.name = str(Path(archive_path, *Path(tinfo.name).parts[1:])) + input_path = (Path('/') / tinfo.name).relative_to(input_base_dir) + tinfo.name = str(Path(archive_path) / input_path) # be safe tinfo.uid = 0 tinfo.gid = 0 From 035df9cb16a97b1304c5c61ab3b2257aaff11297 Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Fri, 5 Jul 2024 13:15:31 +0200 Subject: [PATCH 2/2] enh(tests): add regression test for path handling --- tests/modules/__init__.py | 0 tests/modules/make_studyvisit_archive.py | 1 + tests/test_path_handling.py | 24 ++++++++++++++++++++++++ 3 files changed, 25 insertions(+) create mode 100644 tests/modules/__init__.py create mode 120000 tests/modules/make_studyvisit_archive.py create mode 100644 tests/test_path_handling.py diff --git a/tests/modules/__init__.py b/tests/modules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/modules/make_studyvisit_archive.py b/tests/modules/make_studyvisit_archive.py new file mode 120000 index 0000000..b595cbf --- /dev/null +++ b/tests/modules/make_studyvisit_archive.py @@ -0,0 +1 @@ +../../bin/make_studyvisit_archive \ No newline at end of file diff --git a/tests/test_path_handling.py b/tests/test_path_handling.py new file mode 100644 index 0000000..f7529c4 --- /dev/null +++ b/tests/test_path_handling.py @@ -0,0 +1,24 @@ + +import sys +import tarfile +from pathlib import Path + +from .modules.make_studyvisit_archive import main + + +def test_path_handling(tmp_path): + base_dir = tmp_path / 'input' / 'd_1' / 'd_1_1' / 'd_1_1_1' + base_dir.mkdir(parents=True) + (base_dir / 'file1.txt').write_text('content 1') + (base_dir / 'file2.txt').write_text('content 2') + + output_base_dir = tmp_path / 'output' + + study_id, visit_id = 'study_1', 'visit_1' + main(str(base_dir), str(output_base_dir), study_id, visit_id) + + tar_file = tarfile.open(output_base_dir / study_id / f'{visit_id}_dicom.tar') + assert tar_file.getnames() == [ + f'{study_id}_{visit_id}/file1.txt', + f'{study_id}_{visit_id}/file2.txt', + ]