@@ -102,10 +102,9 @@ def save_pdb_zip(
102
102
test_pdb_code = f'{ code } #{ rand_str } '
103
103
zfile_hashvals [code ] = rand_str
104
104
105
- fn = test_pdb_code + '.pdb'
106
-
105
+ fn = f'{ test_pdb_code } .pdb'
107
106
pdb_path = default_storage .save (
108
- 'tmp/' + fn , ContentFile (zf .read (filename ))
107
+ f 'tmp/{ fn } ' , ContentFile (zf .read (filename ))
109
108
)
110
109
zfile [test_pdb_code ] = pdb_path
111
110
@@ -148,7 +147,7 @@ def process_pdb(self, pdb_code, target, zfile, zfile_hashvals) -> SiteObservatio
148
147
pdb_fp = zfile [pdb_code ]
149
148
pdb_fn = zfile [pdb_code ].split ('/' )[- 1 ]
150
149
151
- new_filename = settings .MEDIA_ROOT + ' pdbs/' + pdb_fn
150
+ new_filename = f' { settings .MEDIA_ROOT } pdbs/{ pdb_fn } '
152
151
old_filename = settings .MEDIA_ROOT + pdb_fp
153
152
shutil .copy (old_filename , new_filename )
154
153
@@ -162,13 +161,13 @@ def process_pdb(self, pdb_code, target, zfile, zfile_hashvals) -> SiteObservatio
162
161
if created :
163
162
target_obj = Target .objects .get (title = target )
164
163
site_obvs .target_id = target_obj
165
- site_obvs .pdb_info = 'pdbs/' + pdb_fn
164
+ site_obvs .pdb_info = f 'pdbs/{ pdb_fn } '
166
165
site_obvs .save ()
167
166
168
167
return site_obvs
169
168
170
169
# use zfile object for pdb files uploaded in zip
171
- def get_prot (
170
+ def get_site_observation (
172
171
self , mol , target , compound_set , zfile , zfile_hashvals
173
172
) -> Optional [SiteObservation ]:
174
173
# The returned protein object may be None
@@ -185,32 +184,45 @@ def get_prot(
185
184
zfile_hashvals = zfile_hashvals ,
186
185
)
187
186
else :
188
- name = compound_set .target .title + '-' + pdb_fn
189
-
190
- # try to get single exact match
191
- # name.split(':')[0].split('_')[0]
187
+ name = f'{ compound_set .target .title } -{ pdb_fn } '
192
188
try :
193
189
site_obvs = SiteObservation .objects .get (code__contains = name )
194
190
except SiteObservation .DoesNotExist :
195
- # SiteObservation lookup failed.
191
+ # Initial SiteObservation lookup failed.
196
192
logger .warning (
197
193
'Failed to get SiteObservation object (target=%s name=%s)' ,
198
194
compound_set .target .title ,
199
195
name ,
200
196
)
201
- # Try an alternative .
202
- # If all else fails then the prot_obj will be 'None'
197
+ # Try alternatives .
198
+ # If all else fails then the site_obvs will be 'None'
203
199
qs = SiteObservation .objects .filter (code__contains = name )
204
- if not qs .exists ():
205
- qs = SiteObservation .objects .filter (
206
- code__contains = name .split (':' )[0 ].split ('_' )[0 ]
200
+ if qs .exists ():
201
+ logger .info (
202
+ 'Found SiteObservation containing name=%s qs=%s' ,
203
+ name ,
204
+ qs ,
207
205
)
206
+ else :
207
+ alt_name = name .split (':' )[0 ].split ('_' )[0 ]
208
+ qs = SiteObservation .objects .filter (code__contains = alt_name )
209
+ if qs .exists ():
210
+ logger .info (
211
+ 'Found SiteObservation containing alternative name=%s qs=%s' ,
212
+ alt_name ,
213
+ qs ,
214
+ )
208
215
if qs .count () > 0 :
216
+ logger .debug (
217
+ 'Found alternative (target=%s name=%s)' ,
218
+ compound_set .target .title ,
219
+ name ,
220
+ )
209
221
site_obvs = qs [0 ]
210
222
211
223
if not site_obvs :
212
224
logger .warning (
213
- 'No SiteObservation object (target=%s pdb_fn=%s)' ,
225
+ 'No SiteObservation found (target=%s pdb_fn=%s)' ,
214
226
compound_set .target .title ,
215
227
pdb_fn ,
216
228
)
@@ -226,12 +238,7 @@ def create_mol(self, inchi, long_inchi=None, name=None) -> Compound:
226
238
cpd = Compound .objects .filter (inchi = inchi )
227
239
sanitized_mol = Chem .MolFromInchi (inchi , sanitize = True )
228
240
229
- if len (cpd ) != 0 :
230
- new_mol = cpd [0 ]
231
- elif len (cpd ) == 0 :
232
- # add molecule and return the object
233
- new_mol = Compound ()
234
-
241
+ new_mol = cpd [0 ] if len (cpd ) != 0 else Compound ()
235
242
new_mol .smiles = Chem .MolToSmiles (sanitized_mol )
236
243
new_mol .inchi = inchi
237
244
if long_inchi :
@@ -258,7 +265,7 @@ def create_mol(self, inchi, long_inchi=None, name=None) -> Compound:
258
265
259
266
return new_mol
260
267
261
- def set_props (self , cpd , props , compound_set ) -> ScoreDescription :
268
+ def set_props (self , cpd , props , compound_set ) -> List [ ScoreDescription ] :
262
269
if 'ref_mols' and 'ref_pdb' not in list (props .keys ()):
263
270
raise Exception ('ref_mols and ref_pdb not set!' )
264
271
set_obj = ScoreDescription .objects .filter (computed_set = compound_set )
@@ -337,88 +344,94 @@ def set_mol(
337
344
338
345
_ = mol .GetProp ('original SMILES' )
339
346
340
- # Try to get the protein object .
347
+ # Try to get the SiteObservation .
341
348
# This may fail.
342
- prot = self .get_prot (
349
+ prot = self .get_site_observation (
343
350
mol , target , compound_set , zfile , zfile_hashvals = zfile_hashvals
344
351
)
345
352
if not prot :
346
- logger .warning ('get_prot() failed to return a Protein object' )
353
+ logger .warning ('get_prot() failed to return a SiteObservation object' )
347
354
348
- # need to add Compound before saving
349
- # see if anything exists already
350
- existing = ComputedMolecule .objects .filter (
355
+ # Need a ComputedMolecule before saving.
356
+ # Check if anything exists already...
357
+ existing_computed_molecules = ComputedMolecule .objects .filter (
351
358
name = name , smiles = smiles , computed_set = compound_set
352
359
)
353
360
354
- if len (existing ) == 1 :
355
- computed_molecule : ComputedMolecule = existing [0 ]
356
- elif len (existing ) > 1 :
357
- for exist in existing :
361
+ computed_molecule : Optional [ComputedMolecule ] = None
362
+ if len (existing_computed_molecules ) == 1 :
363
+ logger .info (
364
+ 'Using existing ComputedMolecule %s' , existing_computed_molecules [0 ]
365
+ )
366
+ computed_molecule = existing_computed_molecules [0 ]
367
+ elif len (existing_computed_molecules ) > 1 :
368
+ logger .warning ('Deleting existing ComputedMolecules (more than 1 found' )
369
+ for exist in existing_computed_molecules :
370
+ logger .info ('Deleting ComputedMolecule %s' , exist )
358
371
exist .delete ()
359
372
computed_molecule = ComputedMolecule ()
360
- elif len (existing ) == 0 :
373
+ if not computed_molecule :
374
+ logger .info ('Creating new ComputedMolecule' )
361
375
computed_molecule = ComputedMolecule ()
362
376
377
+ assert computed_molecule
363
378
computed_molecule .compound = ref_cpd
364
379
computed_molecule .computed_set = compound_set
365
380
computed_molecule .sdf_info = mol_block
366
381
computed_molecule .name = name
367
382
computed_molecule .smiles = smiles
383
+ # Extract possible reference URL and Rationale
384
+ computed_molecule .ref_url = (
385
+ mol .GetProp ('ref_url' ) if mol .HasProp ('ref_url' ) else None
386
+ )
387
+ computed_molecule .rationale = (
388
+ mol .GetProp ('rationale' ) if mol .HasProp ('rationale' ) else None
389
+ )
368
390
# To void the error
369
391
# needs to have a value for field "id"
370
392
# before this many-to-many relationship can be used.
371
393
# We must save this ComputedMolecule before adding inspirations
372
394
computed_molecule .save ()
373
395
for insp_frag in insp_frags :
374
396
computed_molecule .computed_inspirations .add (insp_frag )
397
+ # Done
375
398
computed_molecule .save ()
376
399
377
400
return computed_molecule
378
401
379
402
def get_submission_info (self , description_mol ) -> ComputedSetSubmitter :
380
403
y_m_d = description_mol .GetProp ('generation_date' ).split ('-' )
381
-
382
- submitter = ComputedSetSubmitter .objects .get_or_create (
404
+ return ComputedSetSubmitter .objects .get_or_create (
383
405
name = description_mol .GetProp ('submitter_name' ),
384
406
method = description_mol .GetProp ('method' ),
385
407
email = description_mol .GetProp ('submitter_email' ),
386
408
institution = description_mol .GetProp ('submitter_institution' ),
387
409
generation_date = datetime .date (int (y_m_d [0 ]), int (y_m_d [1 ]), int (y_m_d [2 ])),
388
410
)[0 ]
389
411
390
- return submitter
391
-
392
412
def process_mol (
393
413
self , mol , target , compound_set , filename , zfile = None , zfile_hashvals = None
394
- ) -> ScoreDescription :
414
+ ) -> List [ ScoreDescription ] :
395
415
cpd = self .set_mol (mol , target , compound_set , filename , zfile , zfile_hashvals )
396
416
other_props = mol .GetPropsAsDict ()
397
- score_description = self .set_props (cpd , other_props , compound_set )
417
+ return self .set_props (cpd , other_props , compound_set )
398
418
399
- return score_description
400
-
401
- def set_descriptions ( self , filename , computed_set : ComputedSet ) -> List [str ]:
419
+ def set_descriptions (
420
+ self , filename , computed_set : ComputedSet
421
+ ) -> List [Chem . rdchem . Mol ]:
402
422
suppl = Chem .SDMolSupplier (str (filename ))
403
423
description_mol = suppl [0 ]
404
424
405
- mols = []
406
-
407
- for i in range (1 , len (suppl )):
408
- mols .append (suppl [i ])
409
-
425
+ mols = [suppl [i ] for i in range (1 , len (suppl ))]
410
426
descriptions_needed = list (
411
- set (
412
- [
413
- item
414
- for sublist in [list (m .GetPropsAsDict ().keys ()) for m in mols ]
415
- for item in sublist
416
- ]
417
- )
427
+ {
428
+ item
429
+ for sublist in [list (m .GetPropsAsDict ().keys ()) for m in mols ]
430
+ for item in sublist
431
+ }
418
432
)
419
433
420
434
submitter = self .get_submission_info (description_mol )
421
-
422
435
description_dict = description_mol .GetPropsAsDict ()
423
436
version = description_mol .GetProp ('_Name' )
424
437
computed_set .spec_version = version .split ('_' )[- 1 ]
@@ -472,6 +485,7 @@ def task(self) -> ComputedSet:
472
485
f' (unique_name="{ unique_name } " len_existing={ len_existing } )'
473
486
)
474
487
else :
488
+ logger .info ('Creating new ComputedSet' )
475
489
computed_set = ComputedSet ()
476
490
477
491
text_scores = TextScoreValues .objects .filter (score__computed_set = computed_set )
@@ -498,15 +512,17 @@ def task(self) -> ComputedSet:
498
512
# Here the ComputedSet owner will take on a default (anonymous) value.
499
513
assert settings .AUTHENTICATE_UPLOAD is False
500
514
computed_set .save ()
515
+ logger .info ('%s' , computed_set )
501
516
502
517
# set descriptions and get all other mols back
503
518
mols_to_process = self .set_descriptions (
504
519
filename = sdf_filename , computed_set = computed_set
505
520
)
506
521
507
522
# process every other mol
523
+ logger .info ('%s mols_to_process=%s' , computed_set , len (mols_to_process ))
508
524
for i in range (len (mols_to_process )):
509
- self .process_mol (
525
+ _ = self .process_mol (
510
526
mols_to_process [i ],
511
527
self .target ,
512
528
computed_set ,
@@ -515,17 +531,15 @@ def task(self) -> ComputedSet:
515
531
self .zfile_hashvals ,
516
532
)
517
533
518
- # check that molecules have been added to the compound set
519
- _ = ComputedMolecule .objects .filter (computed_set = computed_set )
520
-
521
534
# check compound set folder exists.
522
535
cmp_set_folder = os .path .join (settings .MEDIA_ROOT , 'compound_sets' )
523
536
if not os .path .isdir (cmp_set_folder ):
537
+ logger .info ('Making ComputedSet folder (%s)' , cmp_set_folder )
524
538
os .mkdir (cmp_set_folder )
525
539
526
540
# move and save the compound set
527
541
new_filename = (
528
- settings .MEDIA_ROOT + ' compound_sets/' + sdf_filename .split ('/' )[- 1 ]
542
+ f' { settings .MEDIA_ROOT } compound_sets/' + sdf_filename .split ('/' )[- 1 ]
529
543
)
530
544
shutil .copy (sdf_filename , new_filename )
531
545
# os.renames(sdf_filename, new_filename)
@@ -537,6 +551,7 @@ def task(self) -> ComputedSet:
537
551
# print(list(set(old_mols)))
538
552
539
553
for old_mol in old_mols :
554
+ logger .info ('Deleting old molecule %s' , old_mol )
540
555
old_mol .delete ()
541
556
542
557
return computed_set
0 commit comments