31
31
)
32
32
from viewer .utils import clean_filename
33
33
34
+ from .serializers import DownloadStructuresSerializer
35
+
34
36
logger = logging .getLogger (__name__ )
35
37
36
38
# Length of time to keep records of dynamic links.
41
43
# the protein code subdirectory of the aligned directory
42
44
# (as for the target upload).
43
45
_ZIP_FILEPATHS = {
44
- 'apo_file' : ('aligned' ),
45
- 'bound_file' : ('aligned' ),
46
- 'cif_info' : ('aligned' ),
47
- 'mtz_info' : ('aligned' ),
48
- 'map_info' : ('aligned' ),
49
- 'sigmaa_file' : ('aligned' ),
50
- 'diff_file' : ('aligned' ),
51
- 'event_file' : ('aligned' ),
52
- 'sdf_info' : ('aligned' ),
46
+ 'apo_file' : ('aligned' ), # SiteObservation: apo_file
47
+ 'apo_solv_file' : ('aligned' ), # SiteObservation: apo_solv_file
48
+ 'apo_desolv_file' : ('aligned' ), # SiteObservation: apo_desolv_file
49
+ 'bound_file' : ('aligned' ), # SiteObservation: bound_file
50
+ 'sdf_info' : ('aligned' ), # SiteObservation: ligand_mol_file (indirectly)
51
+ 'ligand_pdb' : ('aligned' ), # SiteObservation: ligand_pdb
52
+ 'smiles_info' : ('' ), # SiteObservation: smiles_info (indirectly)
53
+ # those above are all controlled by serializer's all_aligned_structures flag
54
+ 'sigmaa_file' : ('aligned' ), # SiteObservation: sigmaa_file
55
+ 'diff_file' : ('aligned' ), # SiteObservation: diff_file
56
+ 'event_file' : ('aligned' ), # SiteObservation: ligand_pdb
57
+ 'pdb_info' : ('aligned' ), # Experiment: cif_info
58
+ 'cif_info' : ('aligned' ), # Experiment: cif_info
59
+ 'mtz_info' : ('aligned' ), # Experiment: mtz_info
60
+ 'map_info' : ('aligned' ), # Experiment: map_info (multiple files)
53
61
'single_sdf_file' : ('' ),
54
62
'metadata_info' : ('' ),
55
- 'smiles_info' : ('' ),
56
63
'trans_matrix_info' : ('' ),
57
64
'extra_files' : ('extra_files' ),
58
65
'readme' : ('' ),
@@ -107,14 +114,18 @@ class ArchiveFile:
107
114
# NB you may need to add a version number to this at some point...
108
115
zip_template = {
109
116
'proteins' : {
110
- 'apo_file' : {}, # from experiment
111
- 'bound_file' : {}, # x
112
- 'cif_info' : {}, # from experiment
113
- 'mtz_info' : {}, # from experiment
114
- 'map_info' : {}, # from experiment
115
- 'event_file' : {}, # x
117
+ 'apo_file' : {},
118
+ 'apo_solv_file' : {},
119
+ 'apo_desolv_file' : {},
120
+ 'bound_file' : {},
121
+ 'pdb_info' : {},
122
+ 'cif_info' : {},
123
+ 'mtz_info' : {},
124
+ 'map_info' : {},
125
+ 'event_file' : {},
116
126
'diff_file' : {},
117
127
'sigmaa_file' : {},
128
+ 'ligand_pdb' : {},
118
129
},
119
130
'molecules' : {
120
131
'sdf_files' : {},
@@ -133,36 +144,6 @@ class ArchiveFile:
133
144
_METADATA_FILE = 'metadata.csv'
134
145
135
146
136
- def _add_file_to_zip (ziparchive , param , filepath ):
137
- """Add the requested file to the zip archive.
138
-
139
- Args:
140
- ziparchive: Handle of zip archive
141
- param: parameter of filelist
142
- filepath: filepath from record
143
-
144
- Returns:
145
- [boolean]: [True of record added]
146
- """
147
- logger .debug ('+_add_file_to_zip: %s, %s' , param , filepath )
148
- if not filepath :
149
- # Odd - assume success
150
- logger .error ('No filepath value' )
151
- return True
152
-
153
- fullpath = os .path .join (settings .MEDIA_ROOT , filepath )
154
- cleaned_filename = clean_filename (filepath )
155
- archive_path = os .path .join (_ZIP_FILEPATHS [param ], cleaned_filename )
156
- if os .path .isfile (fullpath ):
157
- ziparchive .write (fullpath , archive_path )
158
- return True
159
- else :
160
- logger .warning ('filepath "%s" is not a file' , filepath )
161
- _add_empty_file (ziparchive , archive_path )
162
-
163
- return False
164
-
165
-
166
147
def _is_mol_or_sdf (path ):
167
148
"""Returns True if the file and path look like a MOL or SDF file.
168
149
It does this by simply checking the file's extension.
@@ -220,6 +201,27 @@ def _read_and_patch_molecule_name(path, molecule_name=None):
220
201
return content
221
202
222
203
204
+ def _patch_molecule_name (site_observation ):
205
+ """Patch the MOL or SDF file with molecule name.
206
+
207
+ Processes the content of ligand_mol attribute of the
208
+ site_observation object. Returns the content as string.
209
+
210
+ Alternative to _read_and_patch_molecule_name function above
211
+ which operates on files. As ligand_mol is now stored as text,
212
+ slightly different approach was necessary.
213
+
214
+ """
215
+ logger .debug ('Patching MOL/SDF of "%s"' , site_observation )
216
+
217
+ # Now read the file, checking the first line
218
+ # and setting it to the molecule name if it's blank.
219
+ lines = site_observation .ligand_mol_file .split ('\n ' )
220
+ if not lines [0 ].strip ():
221
+ lines [0 ] = site_observation .long_code
222
+ return '\n ' .join (lines )
223
+
224
+
223
225
def _add_file_to_zip_aligned (ziparchive , code , archive_file ):
224
226
"""Add the requested file to the zip archive.
225
227
@@ -253,10 +255,11 @@ def _add_file_to_zip_aligned(ziparchive, code, archive_file):
253
255
ziparchive .write (filepath , archive_file .archive_path )
254
256
return True
255
257
elif archive_file .site_observation :
256
- # NB! this bypasses _read_and_patch_molecule_name. problem?
257
258
ziparchive .writestr (
258
- archive_file .archive_path , archive_file .site_observation .ligand_mol_file
259
+ archive_file .archive_path ,
260
+ _patch_molecule_name (archive_file .site_observation ),
259
261
)
262
+ return True
260
263
else :
261
264
logger .warning ('filepath "%s" is not a file' , filepath )
262
265
_add_empty_file (ziparchive , archive_file .archive_path )
@@ -274,17 +277,14 @@ def _add_file_to_sdf(combined_sdf_file, archive_file):
274
277
Returns:
275
278
[boolean]: [True of record added]
276
279
"""
277
- media_root = settings .MEDIA_ROOT
278
-
279
280
if not archive_file .path :
280
281
# Odd - assume success
281
282
logger .error ('No filepath value' )
282
283
return True
283
284
284
- fullpath = os .path .join (media_root , archive_file .path )
285
- if os .path .isfile (fullpath ):
285
+ if archive_file .path and archive_file .path != 'None' :
286
286
with open (combined_sdf_file , 'a' , encoding = 'utf-8' ) as f_out :
287
- patched_sdf_content = _read_and_patch_molecule_name ( fullpath )
287
+ patched_sdf_content = _patch_molecule_name ( archive_file . site_observation )
288
288
f_out .write (patched_sdf_content )
289
289
return True
290
290
else :
@@ -304,8 +304,9 @@ def _protein_files_zip(zip_contents, ziparchive, error_file):
304
304
305
305
for prot , prot_file in files .items ():
306
306
for f in prot_file :
307
+ # memo to self: f is ArchiveFile object
307
308
if not _add_file_to_zip_aligned (ziparchive , prot , f ):
308
- error_file .write (f'{ param } ,{ prot } ,{ f } \n ' )
309
+ error_file .write (f'{ param } ,{ prot } ,{ f . archive_path } \n ' )
309
310
prot_errors += 1
310
311
311
312
return prot_errors
@@ -499,7 +500,7 @@ def _extra_files_zip(ziparchive, target):
499
500
logger .info ('Processed %s extra files' , num_processed )
500
501
501
502
502
- def _yaml_files_zip (ziparchive , target ) :
503
+ def _yaml_files_zip (ziparchive , target , transforms_requested : bool = False ) -> None :
503
504
"""Add all yaml files (except transforms) from upload to ziparchive"""
504
505
505
506
for experiment_upload in target .experimentupload_set .order_by ('commit_datetime' ):
@@ -536,6 +537,9 @@ def _yaml_files_zip(ziparchive, target):
536
537
537
538
for file in yaml_files :
538
539
logger .info ('Adding yaml file "%s"...' , file )
540
+ if not transforms_requested and file .name == 'neighbourhoods.yaml' :
541
+ # don't add this file if transforms are not requested
542
+ continue
539
543
ziparchive .write (file , str (Path (archive_path ).joinpath (file .name )))
540
544
541
545
@@ -659,16 +663,7 @@ def _create_structures_zip(target, zip_contents, file_url, original_search, host
659
663
if zip_contents ['molecules' ]['smiles_info' ]:
660
664
_smiles_files_zip (zip_contents , ziparchive , download_path )
661
665
662
- # Add the metadata file from the target
663
- # if zip_contents['metadata_info'] and not _add_file_to_zip(
664
- # ziparchive, 'metadata_info', zip_contents['metadata_info']
665
- # ):
666
- # error_file.write(
667
- # f"metadata_info,{target},{zip_contents['metadata_info']}\n"
668
- # )
669
- # errors += 1
670
- # logger.warning('After _add_file_to_zip() errors=%s', errors)
671
-
666
+ # compile and add metadata.csv
672
667
if zip_contents ['metadata_info' ]:
673
668
_metadate_file_zip (ziparchive , target )
674
669
@@ -677,7 +672,9 @@ def _create_structures_zip(target, zip_contents, file_url, original_search, host
677
672
678
673
_extra_files_zip (ziparchive , target )
679
674
680
- _yaml_files_zip (ziparchive , target )
675
+ _yaml_files_zip (
676
+ ziparchive , target , transforms_requested = zip_contents ['trans_matrix_info' ]
677
+ )
681
678
682
679
_document_file_zip (ziparchive , download_path , original_search , host )
683
680
@@ -739,29 +736,29 @@ def _create_structures_dict(site_obvs, protein_params, other_params):
739
736
afile = []
740
737
for f in model_attr :
741
738
# here the model_attr is already stringified
739
+ apath = Path ('crystallographic_files' ).joinpath (so .code )
742
740
if model_attr and model_attr != 'None' :
743
741
archive_path = str (
744
- Path ('crystallographic_files' )
745
- .joinpath (so .code )
746
- .joinpath (
742
+ apath .joinpath (
747
743
Path (f )
748
744
.parts [- 1 ]
749
745
.replace (so .experiment .code , so .code )
750
746
)
751
747
)
752
748
else :
753
- archive_path = param
749
+ archive_path = str ( apath . joinpath ( param ))
754
750
afile .append (ArchiveFile (path = f , archive_path = archive_path ))
755
751
756
752
elif param in [
757
753
'bound_file' ,
754
+ 'apo_file' ,
758
755
'apo_solv_file' ,
759
756
'apo_desolv_file' ,
760
- 'apo_file' ,
761
757
'sigmaa_file' ,
762
758
'event_file' ,
763
759
'artefacts_file' ,
764
760
'pdb_header_file' ,
761
+ 'ligand_pdb' ,
765
762
'diff_file' ,
766
763
]:
767
764
# siteobservation object
@@ -770,18 +767,17 @@ def _create_structures_dict(site_obvs, protein_params, other_params):
770
767
logger .debug (
771
768
'Adding param to zip: %s, value: %s' , param , model_attr
772
769
)
770
+ apath = Path ('aligned_files' ).joinpath (so .code )
773
771
if model_attr and model_attr != 'None' :
774
772
archive_path = str (
775
- Path ('aligned_files' )
776
- .joinpath (so .code )
777
- .joinpath (
773
+ apath .joinpath (
778
774
Path (model_attr .name )
779
775
.parts [- 1 ]
780
776
.replace (so .longcode , so .code )
781
777
)
782
778
)
783
779
else :
784
- archive_path = param
780
+ archive_path = str ( apath . joinpath ( param ))
785
781
786
782
afile = [
787
783
ArchiveFile (
@@ -795,11 +791,8 @@ def _create_structures_dict(site_obvs, protein_params, other_params):
795
791
796
792
zip_contents ['proteins' ][param ][so .code ] = afile
797
793
798
- if other_params ['single_sdf_file' ] is True :
799
- zip_contents ['molecules' ]['single_sdf_file' ] = True
800
-
801
- if other_params ['sdf_info' ] is True :
802
- zip_contents ['molecules' ]['sdf_info' ] = True
794
+ zip_contents ['molecules' ]['single_sdf_file' ] = other_params ['single_sdf_file' ]
795
+ zip_contents ['molecules' ]['sdf_info' ] = other_params ['sdf_info' ]
803
796
804
797
# sdf information is held as a file on the Molecule record.
805
798
if other_params ['sdf_info' ] or other_params ['single_sdf_file' ]:
@@ -866,55 +859,35 @@ def get_download_params(request):
866
859
Returns:
867
860
protein_params, other_params
868
861
"""
869
- protein_param_flags = [
870
- 'apo_file' ,
871
- 'bound_file' ,
872
- 'cif_info' ,
873
- 'mtz_info' ,
874
- 'map_info' ,
875
- 'event_file' ,
876
- 'sigmaa_file' ,
877
- 'diff_file' ,
878
- ]
879
-
880
- other_param_flags = [
881
- 'sdf_info' ,
882
- 'single_sdf_file' ,
883
- 'metadata_info' ,
884
- 'smiles_info' ,
885
- 'trans_matrix_info' ,
886
- ]
887
-
888
- # protein_params = {'pdb_info': request.data['pdb_info'],
889
- # 'bound_info': request.data['bound_info'],
890
- # 'cif_info': request.data['cif_info'],
891
- # 'mtz_info': request.data['mtz_info'],
892
- # 'diff_info': request.data['diff_info'],
893
- # 'event_info': request.data['event_info'],
894
- # 'sigmaa_info': request.data['sigmaa_info'],
895
- # 'trans_matrix_info':
896
- # request.data['trans_matrix_info']}
897
- protein_params = {}
898
- for param in protein_param_flags :
899
- protein_params [param ] = False
900
- if param in request .data and request .data [param ] in [True , 'true' ]:
901
- protein_params [param ] = True
902
-
903
- # other_params = {'sdf_info': request.data['sdf_info'],
904
- # 'single_sdf_file': request.data['single_sdf_file'],
905
- # 'metadata_info': request.data['metadata_info'],
906
- # 'smiles_info': request.data['smiles_info']}
907
- other_params = {}
908
- for param in other_param_flags :
909
- other_params [param ] = False
910
- if param in request .data and request .data [param ] in [True , 'true' ]:
911
- other_params [param ] = True
912
-
913
- static_link = False
914
- if 'static_link' in request .data and (
915
- request .data ['static_link' ] is True or request .data ['static_link' ] == 'true'
916
- ):
917
- static_link = True
862
+
863
+ serializer = DownloadStructuresSerializer (data = request .data )
864
+ serializer .is_valid ()
865
+ logger .debug ('serializer data: %s' , serializer .validated_data )
866
+
867
+ protein_params = {
868
+ 'pdb_info' : serializer .validated_data ['pdb_info' ],
869
+ 'apo_file' : serializer .validated_data ['all_aligned_structures' ],
870
+ 'bound_file' : serializer .validated_data ['all_aligned_structures' ],
871
+ 'apo_solv_file' : serializer .validated_data ['all_aligned_structures' ],
872
+ 'apo_desolv_file' : serializer .validated_data ['all_aligned_structures' ],
873
+ 'ligand_pdb' : serializer .validated_data ['all_aligned_structures' ],
874
+ 'cif_info' : serializer .validated_data ['cif_info' ],
875
+ 'mtz_info' : serializer .validated_data ['mtz_info' ],
876
+ 'map_info' : serializer .validated_data ['map_info' ],
877
+ 'event_file' : serializer .validated_data ['event_file' ],
878
+ 'sigmaa_file' : serializer .validated_data ['sigmaa_file' ],
879
+ 'diff_file' : serializer .validated_data ['diff_file' ],
880
+ }
881
+
882
+ other_params = {
883
+ 'sdf_info' : serializer .validated_data ['all_aligned_structures' ],
884
+ 'single_sdf_file' : serializer .validated_data ['single_sdf_file' ],
885
+ 'metadata_info' : serializer .validated_data ['metadata_info' ],
886
+ 'smiles_info' : serializer .validated_data ['all_aligned_structures' ],
887
+ 'trans_matrix_info' : serializer .validated_data ['trans_matrix_info' ],
888
+ }
889
+
890
+ static_link = serializer .validated_data ['static_link' ]
918
891
919
892
return protein_params , other_params , static_link
920
893
0 commit comments