Skip to content

Commit a860bfc

Browse files
authored
Merge pull request #547 from xchem/m2ms-1326-download-zip-api
Updates to api/download_structures endpoint (issue 1326)
2 parents 49f13cb + 5f268d7 commit a860bfc

6 files changed

+141
-144
lines changed

viewer/download_structures.py

+102-129
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
)
3232
from viewer.utils import clean_filename
3333

34+
from .serializers import DownloadStructuresSerializer
35+
3436
logger = logging.getLogger(__name__)
3537

3638
# Length of time to keep records of dynamic links.
@@ -41,18 +43,23 @@
4143
# the protein code subdirectory of the aligned directory
4244
# (as for the target upload).
4345
_ZIP_FILEPATHS = {
44-
'apo_file': ('aligned'),
45-
'bound_file': ('aligned'),
46-
'cif_info': ('aligned'),
47-
'mtz_info': ('aligned'),
48-
'map_info': ('aligned'),
49-
'sigmaa_file': ('aligned'),
50-
'diff_file': ('aligned'),
51-
'event_file': ('aligned'),
52-
'sdf_info': ('aligned'),
46+
'apo_file': ('aligned'), # SiteObservation: apo_file
47+
'apo_solv_file': ('aligned'), # SiteObservation: apo_solv_file
48+
'apo_desolv_file': ('aligned'), # SiteObservation: apo_desolv_file
49+
'bound_file': ('aligned'), # SiteObservation: bound_file
50+
'sdf_info': ('aligned'), # SiteObservation: ligand_mol_file (indirectly)
51+
'ligand_pdb': ('aligned'), # SiteObservation: ligand_pdb
52+
'smiles_info': (''), # SiteObservation: smiles_info (indirectly)
53+
# those above are all controlled by serializer's all_aligned_structures flag
54+
'sigmaa_file': ('aligned'), # SiteObservation: sigmaa_file
55+
'diff_file': ('aligned'), # SiteObservation: diff_file
56+
'event_file': ('aligned'), # SiteObservation: ligand_pdb
57+
'pdb_info': ('aligned'), # Experiment: cif_info
58+
'cif_info': ('aligned'), # Experiment: cif_info
59+
'mtz_info': ('aligned'), # Experiment: mtz_info
60+
'map_info': ('aligned'), # Experiment: map_info (multiple files)
5361
'single_sdf_file': (''),
5462
'metadata_info': (''),
55-
'smiles_info': (''),
5663
'trans_matrix_info': (''),
5764
'extra_files': ('extra_files'),
5865
'readme': (''),
@@ -107,14 +114,18 @@ class ArchiveFile:
107114
# NB you may need to add a version number to this at some point...
108115
zip_template = {
109116
'proteins': {
110-
'apo_file': {}, # from experiment
111-
'bound_file': {}, # x
112-
'cif_info': {}, # from experiment
113-
'mtz_info': {}, # from experiment
114-
'map_info': {}, # from experiment
115-
'event_file': {}, # x
117+
'apo_file': {},
118+
'apo_solv_file': {},
119+
'apo_desolv_file': {},
120+
'bound_file': {},
121+
'pdb_info': {},
122+
'cif_info': {},
123+
'mtz_info': {},
124+
'map_info': {},
125+
'event_file': {},
116126
'diff_file': {},
117127
'sigmaa_file': {},
128+
'ligand_pdb': {},
118129
},
119130
'molecules': {
120131
'sdf_files': {},
@@ -133,36 +144,6 @@ class ArchiveFile:
133144
_METADATA_FILE = 'metadata.csv'
134145

135146

136-
def _add_file_to_zip(ziparchive, param, filepath):
137-
"""Add the requested file to the zip archive.
138-
139-
Args:
140-
ziparchive: Handle of zip archive
141-
param: parameter of filelist
142-
filepath: filepath from record
143-
144-
Returns:
145-
[boolean]: [True of record added]
146-
"""
147-
logger.debug('+_add_file_to_zip: %s, %s', param, filepath)
148-
if not filepath:
149-
# Odd - assume success
150-
logger.error('No filepath value')
151-
return True
152-
153-
fullpath = os.path.join(settings.MEDIA_ROOT, filepath)
154-
cleaned_filename = clean_filename(filepath)
155-
archive_path = os.path.join(_ZIP_FILEPATHS[param], cleaned_filename)
156-
if os.path.isfile(fullpath):
157-
ziparchive.write(fullpath, archive_path)
158-
return True
159-
else:
160-
logger.warning('filepath "%s" is not a file', filepath)
161-
_add_empty_file(ziparchive, archive_path)
162-
163-
return False
164-
165-
166147
def _is_mol_or_sdf(path):
167148
"""Returns True if the file and path look like a MOL or SDF file.
168149
It does this by simply checking the file's extension.
@@ -220,6 +201,27 @@ def _read_and_patch_molecule_name(path, molecule_name=None):
220201
return content
221202

222203

204+
def _patch_molecule_name(site_observation):
205+
"""Patch the MOL or SDF file with molecule name.
206+
207+
Processes the content of ligand_mol attribute of the
208+
site_observation object. Returns the content as string.
209+
210+
Alternative to _read_and_patch_molecule_name function above
211+
which operates on files. As ligand_mol is now stored as text,
212+
slightly different approach was necessary.
213+
214+
"""
215+
logger.debug('Patching MOL/SDF of "%s"', site_observation)
216+
217+
# Now read the file, checking the first line
218+
# and setting it to the molecule name if it's blank.
219+
lines = site_observation.ligand_mol_file.split('\n')
220+
if not lines[0].strip():
221+
lines[0] = site_observation.long_code
222+
return '\n'.join(lines)
223+
224+
223225
def _add_file_to_zip_aligned(ziparchive, code, archive_file):
224226
"""Add the requested file to the zip archive.
225227
@@ -253,10 +255,11 @@ def _add_file_to_zip_aligned(ziparchive, code, archive_file):
253255
ziparchive.write(filepath, archive_file.archive_path)
254256
return True
255257
elif archive_file.site_observation:
256-
# NB! this bypasses _read_and_patch_molecule_name. problem?
257258
ziparchive.writestr(
258-
archive_file.archive_path, archive_file.site_observation.ligand_mol_file
259+
archive_file.archive_path,
260+
_patch_molecule_name(archive_file.site_observation),
259261
)
262+
return True
260263
else:
261264
logger.warning('filepath "%s" is not a file', filepath)
262265
_add_empty_file(ziparchive, archive_file.archive_path)
@@ -274,17 +277,14 @@ def _add_file_to_sdf(combined_sdf_file, archive_file):
274277
Returns:
275278
[boolean]: [True of record added]
276279
"""
277-
media_root = settings.MEDIA_ROOT
278-
279280
if not archive_file.path:
280281
# Odd - assume success
281282
logger.error('No filepath value')
282283
return True
283284

284-
fullpath = os.path.join(media_root, archive_file.path)
285-
if os.path.isfile(fullpath):
285+
if archive_file.path and archive_file.path != 'None':
286286
with open(combined_sdf_file, 'a', encoding='utf-8') as f_out:
287-
patched_sdf_content = _read_and_patch_molecule_name(fullpath)
287+
patched_sdf_content = _patch_molecule_name(archive_file.site_observation)
288288
f_out.write(patched_sdf_content)
289289
return True
290290
else:
@@ -304,8 +304,9 @@ def _protein_files_zip(zip_contents, ziparchive, error_file):
304304

305305
for prot, prot_file in files.items():
306306
for f in prot_file:
307+
# memo to self: f is ArchiveFile object
307308
if not _add_file_to_zip_aligned(ziparchive, prot, f):
308-
error_file.write(f'{param},{prot},{f}\n')
309+
error_file.write(f'{param},{prot},{f.archive_path}\n')
309310
prot_errors += 1
310311

311312
return prot_errors
@@ -499,7 +500,7 @@ def _extra_files_zip(ziparchive, target):
499500
logger.info('Processed %s extra files', num_processed)
500501

501502

502-
def _yaml_files_zip(ziparchive, target):
503+
def _yaml_files_zip(ziparchive, target, transforms_requested: bool = False) -> None:
503504
"""Add all yaml files (except transforms) from upload to ziparchive"""
504505

505506
for experiment_upload in target.experimentupload_set.order_by('commit_datetime'):
@@ -536,6 +537,9 @@ def _yaml_files_zip(ziparchive, target):
536537

537538
for file in yaml_files:
538539
logger.info('Adding yaml file "%s"...', file)
540+
if not transforms_requested and file.name == 'neighbourhoods.yaml':
541+
# don't add this file if transforms are not requested
542+
continue
539543
ziparchive.write(file, str(Path(archive_path).joinpath(file.name)))
540544

541545

@@ -659,16 +663,7 @@ def _create_structures_zip(target, zip_contents, file_url, original_search, host
659663
if zip_contents['molecules']['smiles_info']:
660664
_smiles_files_zip(zip_contents, ziparchive, download_path)
661665

662-
# Add the metadata file from the target
663-
# if zip_contents['metadata_info'] and not _add_file_to_zip(
664-
# ziparchive, 'metadata_info', zip_contents['metadata_info']
665-
# ):
666-
# error_file.write(
667-
# f"metadata_info,{target},{zip_contents['metadata_info']}\n"
668-
# )
669-
# errors += 1
670-
# logger.warning('After _add_file_to_zip() errors=%s', errors)
671-
666+
# compile and add metadata.csv
672667
if zip_contents['metadata_info']:
673668
_metadate_file_zip(ziparchive, target)
674669

@@ -677,7 +672,9 @@ def _create_structures_zip(target, zip_contents, file_url, original_search, host
677672

678673
_extra_files_zip(ziparchive, target)
679674

680-
_yaml_files_zip(ziparchive, target)
675+
_yaml_files_zip(
676+
ziparchive, target, transforms_requested=zip_contents['trans_matrix_info']
677+
)
681678

682679
_document_file_zip(ziparchive, download_path, original_search, host)
683680

@@ -739,29 +736,29 @@ def _create_structures_dict(site_obvs, protein_params, other_params):
739736
afile = []
740737
for f in model_attr:
741738
# here the model_attr is already stringified
739+
apath = Path('crystallographic_files').joinpath(so.code)
742740
if model_attr and model_attr != 'None':
743741
archive_path = str(
744-
Path('crystallographic_files')
745-
.joinpath(so.code)
746-
.joinpath(
742+
apath.joinpath(
747743
Path(f)
748744
.parts[-1]
749745
.replace(so.experiment.code, so.code)
750746
)
751747
)
752748
else:
753-
archive_path = param
749+
archive_path = str(apath.joinpath(param))
754750
afile.append(ArchiveFile(path=f, archive_path=archive_path))
755751

756752
elif param in [
757753
'bound_file',
754+
'apo_file',
758755
'apo_solv_file',
759756
'apo_desolv_file',
760-
'apo_file',
761757
'sigmaa_file',
762758
'event_file',
763759
'artefacts_file',
764760
'pdb_header_file',
761+
'ligand_pdb',
765762
'diff_file',
766763
]:
767764
# siteobservation object
@@ -770,18 +767,17 @@ def _create_structures_dict(site_obvs, protein_params, other_params):
770767
logger.debug(
771768
'Adding param to zip: %s, value: %s', param, model_attr
772769
)
770+
apath = Path('aligned_files').joinpath(so.code)
773771
if model_attr and model_attr != 'None':
774772
archive_path = str(
775-
Path('aligned_files')
776-
.joinpath(so.code)
777-
.joinpath(
773+
apath.joinpath(
778774
Path(model_attr.name)
779775
.parts[-1]
780776
.replace(so.longcode, so.code)
781777
)
782778
)
783779
else:
784-
archive_path = param
780+
archive_path = str(apath.joinpath(param))
785781

786782
afile = [
787783
ArchiveFile(
@@ -795,11 +791,8 @@ def _create_structures_dict(site_obvs, protein_params, other_params):
795791

796792
zip_contents['proteins'][param][so.code] = afile
797793

798-
if other_params['single_sdf_file'] is True:
799-
zip_contents['molecules']['single_sdf_file'] = True
800-
801-
if other_params['sdf_info'] is True:
802-
zip_contents['molecules']['sdf_info'] = True
794+
zip_contents['molecules']['single_sdf_file'] = other_params['single_sdf_file']
795+
zip_contents['molecules']['sdf_info'] = other_params['sdf_info']
803796

804797
# sdf information is held as a file on the Molecule record.
805798
if other_params['sdf_info'] or other_params['single_sdf_file']:
@@ -866,55 +859,35 @@ def get_download_params(request):
866859
Returns:
867860
protein_params, other_params
868861
"""
869-
protein_param_flags = [
870-
'apo_file',
871-
'bound_file',
872-
'cif_info',
873-
'mtz_info',
874-
'map_info',
875-
'event_file',
876-
'sigmaa_file',
877-
'diff_file',
878-
]
879-
880-
other_param_flags = [
881-
'sdf_info',
882-
'single_sdf_file',
883-
'metadata_info',
884-
'smiles_info',
885-
'trans_matrix_info',
886-
]
887-
888-
# protein_params = {'pdb_info': request.data['pdb_info'],
889-
# 'bound_info': request.data['bound_info'],
890-
# 'cif_info': request.data['cif_info'],
891-
# 'mtz_info': request.data['mtz_info'],
892-
# 'diff_info': request.data['diff_info'],
893-
# 'event_info': request.data['event_info'],
894-
# 'sigmaa_info': request.data['sigmaa_info'],
895-
# 'trans_matrix_info':
896-
# request.data['trans_matrix_info']}
897-
protein_params = {}
898-
for param in protein_param_flags:
899-
protein_params[param] = False
900-
if param in request.data and request.data[param] in [True, 'true']:
901-
protein_params[param] = True
902-
903-
# other_params = {'sdf_info': request.data['sdf_info'],
904-
# 'single_sdf_file': request.data['single_sdf_file'],
905-
# 'metadata_info': request.data['metadata_info'],
906-
# 'smiles_info': request.data['smiles_info']}
907-
other_params = {}
908-
for param in other_param_flags:
909-
other_params[param] = False
910-
if param in request.data and request.data[param] in [True, 'true']:
911-
other_params[param] = True
912-
913-
static_link = False
914-
if 'static_link' in request.data and (
915-
request.data['static_link'] is True or request.data['static_link'] == 'true'
916-
):
917-
static_link = True
862+
863+
serializer = DownloadStructuresSerializer(data=request.data)
864+
serializer.is_valid()
865+
logger.debug('serializer data: %s', serializer.validated_data)
866+
867+
protein_params = {
868+
'pdb_info': serializer.validated_data['pdb_info'],
869+
'apo_file': serializer.validated_data['all_aligned_structures'],
870+
'bound_file': serializer.validated_data['all_aligned_structures'],
871+
'apo_solv_file': serializer.validated_data['all_aligned_structures'],
872+
'apo_desolv_file': serializer.validated_data['all_aligned_structures'],
873+
'ligand_pdb': serializer.validated_data['all_aligned_structures'],
874+
'cif_info': serializer.validated_data['cif_info'],
875+
'mtz_info': serializer.validated_data['mtz_info'],
876+
'map_info': serializer.validated_data['map_info'],
877+
'event_file': serializer.validated_data['event_file'],
878+
'sigmaa_file': serializer.validated_data['sigmaa_file'],
879+
'diff_file': serializer.validated_data['diff_file'],
880+
}
881+
882+
other_params = {
883+
'sdf_info': serializer.validated_data['all_aligned_structures'],
884+
'single_sdf_file': serializer.validated_data['single_sdf_file'],
885+
'metadata_info': serializer.validated_data['metadata_info'],
886+
'smiles_info': serializer.validated_data['all_aligned_structures'],
887+
'trans_matrix_info': serializer.validated_data['trans_matrix_info'],
888+
}
889+
890+
static_link = serializer.validated_data['static_link']
918891

919892
return protein_params, other_params, static_link
920893

0 commit comments

Comments
 (0)