Skip to content

Commit 701f30a

Browse files
authoredDec 8, 2023
Merge pull request #460 from xchem/m2ms-1202-achristie
Adds support for ref-url and rationale in RHS uploaded ComputedMolecules
2 parents 8c1e967 + a883211 commit 701f30a

File tree

4 files changed

+118
-63
lines changed

4 files changed

+118
-63
lines changed
 

‎README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ for the viewer's model run the following: -
210210

211211
docker-compose -f docker-compose-migrate.yml up -d
212212

213-
# Then enter the backend container with: -
213+
Then enter the backend container with: -
214214

215215
Then from within the backend container make the migrations
216216
(in this case for the `viewer`)...

‎viewer/cset_upload.py

+77-62
Original file line numberDiff line numberDiff line change
@@ -102,10 +102,9 @@ def save_pdb_zip(
102102
test_pdb_code = f'{code}#{rand_str}'
103103
zfile_hashvals[code] = rand_str
104104

105-
fn = test_pdb_code + '.pdb'
106-
105+
fn = f'{test_pdb_code}.pdb'
107106
pdb_path = default_storage.save(
108-
'tmp/' + fn, ContentFile(zf.read(filename))
107+
f'tmp/{fn}', ContentFile(zf.read(filename))
109108
)
110109
zfile[test_pdb_code] = pdb_path
111110

@@ -148,7 +147,7 @@ def process_pdb(self, pdb_code, target, zfile, zfile_hashvals) -> SiteObservatio
148147
pdb_fp = zfile[pdb_code]
149148
pdb_fn = zfile[pdb_code].split('/')[-1]
150149

151-
new_filename = settings.MEDIA_ROOT + 'pdbs/' + pdb_fn
150+
new_filename = f'{settings.MEDIA_ROOT}pdbs/{pdb_fn}'
152151
old_filename = settings.MEDIA_ROOT + pdb_fp
153152
shutil.copy(old_filename, new_filename)
154153

@@ -162,13 +161,13 @@ def process_pdb(self, pdb_code, target, zfile, zfile_hashvals) -> SiteObservatio
162161
if created:
163162
target_obj = Target.objects.get(title=target)
164163
site_obvs.target_id = target_obj
165-
site_obvs.pdb_info = 'pdbs/' + pdb_fn
164+
site_obvs.pdb_info = f'pdbs/{pdb_fn}'
166165
site_obvs.save()
167166

168167
return site_obvs
169168

170169
# use zfile object for pdb files uploaded in zip
171-
def get_prot(
170+
def get_site_observation(
172171
self, mol, target, compound_set, zfile, zfile_hashvals
173172
) -> Optional[SiteObservation]:
174173
# The returned protein object may be None
@@ -185,32 +184,45 @@ def get_prot(
185184
zfile_hashvals=zfile_hashvals,
186185
)
187186
else:
188-
name = compound_set.target.title + '-' + pdb_fn
189-
190-
# try to get single exact match
191-
# name.split(':')[0].split('_')[0]
187+
name = f'{compound_set.target.title}-{pdb_fn}'
192188
try:
193189
site_obvs = SiteObservation.objects.get(code__contains=name)
194190
except SiteObservation.DoesNotExist:
195-
# SiteObservation lookup failed.
191+
# Initial SiteObservation lookup failed.
196192
logger.warning(
197193
'Failed to get SiteObservation object (target=%s name=%s)',
198194
compound_set.target.title,
199195
name,
200196
)
201-
# Try an alternative.
202-
# If all else fails then the prot_obj will be 'None'
197+
# Try alternatives.
198+
# If all else fails then the site_obvs will be 'None'
203199
qs = SiteObservation.objects.filter(code__contains=name)
204-
if not qs.exists():
205-
qs = SiteObservation.objects.filter(
206-
code__contains=name.split(':')[0].split('_')[0]
200+
if qs.exists():
201+
logger.info(
202+
'Found SiteObservation containing name=%s qs=%s',
203+
name,
204+
qs,
207205
)
206+
else:
207+
alt_name = name.split(':')[0].split('_')[0]
208+
qs = SiteObservation.objects.filter(code__contains=alt_name)
209+
if qs.exists():
210+
logger.info(
211+
'Found SiteObservation containing alternative name=%s qs=%s',
212+
alt_name,
213+
qs,
214+
)
208215
if qs.count() > 0:
216+
logger.debug(
217+
'Found alternative (target=%s name=%s)',
218+
compound_set.target.title,
219+
name,
220+
)
209221
site_obvs = qs[0]
210222

211223
if not site_obvs:
212224
logger.warning(
213-
'No SiteObservation object (target=%s pdb_fn=%s)',
225+
'No SiteObservation found (target=%s pdb_fn=%s)',
214226
compound_set.target.title,
215227
pdb_fn,
216228
)
@@ -226,12 +238,7 @@ def create_mol(self, inchi, long_inchi=None, name=None) -> Compound:
226238
cpd = Compound.objects.filter(inchi=inchi)
227239
sanitized_mol = Chem.MolFromInchi(inchi, sanitize=True)
228240

229-
if len(cpd) != 0:
230-
new_mol = cpd[0]
231-
elif len(cpd) == 0:
232-
# add molecule and return the object
233-
new_mol = Compound()
234-
241+
new_mol = cpd[0] if len(cpd) != 0 else Compound()
235242
new_mol.smiles = Chem.MolToSmiles(sanitized_mol)
236243
new_mol.inchi = inchi
237244
if long_inchi:
@@ -258,7 +265,7 @@ def create_mol(self, inchi, long_inchi=None, name=None) -> Compound:
258265

259266
return new_mol
260267

261-
def set_props(self, cpd, props, compound_set) -> ScoreDescription:
268+
def set_props(self, cpd, props, compound_set) -> List[ScoreDescription]:
262269
if 'ref_mols' and 'ref_pdb' not in list(props.keys()):
263270
raise Exception('ref_mols and ref_pdb not set!')
264271
set_obj = ScoreDescription.objects.filter(computed_set=compound_set)
@@ -337,88 +344,94 @@ def set_mol(
337344

338345
_ = mol.GetProp('original SMILES')
339346

340-
# Try to get the protein object.
347+
# Try to get the SiteObservation.
341348
# This may fail.
342-
prot = self.get_prot(
349+
prot = self.get_site_observation(
343350
mol, target, compound_set, zfile, zfile_hashvals=zfile_hashvals
344351
)
345352
if not prot:
346-
logger.warning('get_prot() failed to return a Protein object')
353+
logger.warning('get_prot() failed to return a SiteObservation object')
347354

348-
# need to add Compound before saving
349-
# see if anything exists already
350-
existing = ComputedMolecule.objects.filter(
355+
# Need a ComputedMolecule before saving.
356+
# Check if anything exists already...
357+
existing_computed_molecules = ComputedMolecule.objects.filter(
351358
name=name, smiles=smiles, computed_set=compound_set
352359
)
353360

354-
if len(existing) == 1:
355-
computed_molecule: ComputedMolecule = existing[0]
356-
elif len(existing) > 1:
357-
for exist in existing:
361+
computed_molecule: Optional[ComputedMolecule] = None
362+
if len(existing_computed_molecules) == 1:
363+
logger.info(
364+
'Using existing ComputedMolecule %s', existing_computed_molecules[0]
365+
)
366+
computed_molecule = existing_computed_molecules[0]
367+
elif len(existing_computed_molecules) > 1:
368+
logger.warning('Deleting existing ComputedMolecules (more than 1 found')
369+
for exist in existing_computed_molecules:
370+
logger.info('Deleting ComputedMolecule %s', exist)
358371
exist.delete()
359372
computed_molecule = ComputedMolecule()
360-
elif len(existing) == 0:
373+
if not computed_molecule:
374+
logger.info('Creating new ComputedMolecule')
361375
computed_molecule = ComputedMolecule()
362376

377+
assert computed_molecule
363378
computed_molecule.compound = ref_cpd
364379
computed_molecule.computed_set = compound_set
365380
computed_molecule.sdf_info = mol_block
366381
computed_molecule.name = name
367382
computed_molecule.smiles = smiles
383+
# Extract possible reference URL and Rationale
384+
computed_molecule.ref_url = (
385+
mol.GetProp('ref_url') if mol.HasProp('ref_url') else None
386+
)
387+
computed_molecule.rationale = (
388+
mol.GetProp('rationale') if mol.HasProp('rationale') else None
389+
)
368390
# To void the error
369391
# needs to have a value for field "id"
370392
# before this many-to-many relationship can be used.
371393
# We must save this ComputedMolecule before adding inspirations
372394
computed_molecule.save()
373395
for insp_frag in insp_frags:
374396
computed_molecule.computed_inspirations.add(insp_frag)
397+
# Done
375398
computed_molecule.save()
376399

377400
return computed_molecule
378401

379402
def get_submission_info(self, description_mol) -> ComputedSetSubmitter:
380403
y_m_d = description_mol.GetProp('generation_date').split('-')
381-
382-
submitter = ComputedSetSubmitter.objects.get_or_create(
404+
return ComputedSetSubmitter.objects.get_or_create(
383405
name=description_mol.GetProp('submitter_name'),
384406
method=description_mol.GetProp('method'),
385407
email=description_mol.GetProp('submitter_email'),
386408
institution=description_mol.GetProp('submitter_institution'),
387409
generation_date=datetime.date(int(y_m_d[0]), int(y_m_d[1]), int(y_m_d[2])),
388410
)[0]
389411

390-
return submitter
391-
392412
def process_mol(
393413
self, mol, target, compound_set, filename, zfile=None, zfile_hashvals=None
394-
) -> ScoreDescription:
414+
) -> List[ScoreDescription]:
395415
cpd = self.set_mol(mol, target, compound_set, filename, zfile, zfile_hashvals)
396416
other_props = mol.GetPropsAsDict()
397-
score_description = self.set_props(cpd, other_props, compound_set)
417+
return self.set_props(cpd, other_props, compound_set)
398418

399-
return score_description
400-
401-
def set_descriptions(self, filename, computed_set: ComputedSet) -> List[str]:
419+
def set_descriptions(
420+
self, filename, computed_set: ComputedSet
421+
) -> List[Chem.rdchem.Mol]:
402422
suppl = Chem.SDMolSupplier(str(filename))
403423
description_mol = suppl[0]
404424

405-
mols = []
406-
407-
for i in range(1, len(suppl)):
408-
mols.append(suppl[i])
409-
425+
mols = [suppl[i] for i in range(1, len(suppl))]
410426
descriptions_needed = list(
411-
set(
412-
[
413-
item
414-
for sublist in [list(m.GetPropsAsDict().keys()) for m in mols]
415-
for item in sublist
416-
]
417-
)
427+
{
428+
item
429+
for sublist in [list(m.GetPropsAsDict().keys()) for m in mols]
430+
for item in sublist
431+
}
418432
)
419433

420434
submitter = self.get_submission_info(description_mol)
421-
422435
description_dict = description_mol.GetPropsAsDict()
423436
version = description_mol.GetProp('_Name')
424437
computed_set.spec_version = version.split('_')[-1]
@@ -472,6 +485,7 @@ def task(self) -> ComputedSet:
472485
f' (unique_name="{unique_name}" len_existing={len_existing})'
473486
)
474487
else:
488+
logger.info('Creating new ComputedSet')
475489
computed_set = ComputedSet()
476490

477491
text_scores = TextScoreValues.objects.filter(score__computed_set=computed_set)
@@ -498,15 +512,17 @@ def task(self) -> ComputedSet:
498512
# Here the ComputedSet owner will take on a default (anonymous) value.
499513
assert settings.AUTHENTICATE_UPLOAD is False
500514
computed_set.save()
515+
logger.info('%s', computed_set)
501516

502517
# set descriptions and get all other mols back
503518
mols_to_process = self.set_descriptions(
504519
filename=sdf_filename, computed_set=computed_set
505520
)
506521

507522
# process every other mol
523+
logger.info('%s mols_to_process=%s', computed_set, len(mols_to_process))
508524
for i in range(len(mols_to_process)):
509-
self.process_mol(
525+
_ = self.process_mol(
510526
mols_to_process[i],
511527
self.target,
512528
computed_set,
@@ -515,17 +531,15 @@ def task(self) -> ComputedSet:
515531
self.zfile_hashvals,
516532
)
517533

518-
# check that molecules have been added to the compound set
519-
_ = ComputedMolecule.objects.filter(computed_set=computed_set)
520-
521534
# check compound set folder exists.
522535
cmp_set_folder = os.path.join(settings.MEDIA_ROOT, 'compound_sets')
523536
if not os.path.isdir(cmp_set_folder):
537+
logger.info('Making ComputedSet folder (%s)', cmp_set_folder)
524538
os.mkdir(cmp_set_folder)
525539

526540
# move and save the compound set
527541
new_filename = (
528-
settings.MEDIA_ROOT + 'compound_sets/' + sdf_filename.split('/')[-1]
542+
f'{settings.MEDIA_ROOT}compound_sets/' + sdf_filename.split('/')[-1]
529543
)
530544
shutil.copy(sdf_filename, new_filename)
531545
# os.renames(sdf_filename, new_filename)
@@ -537,6 +551,7 @@ def task(self) -> ComputedSet:
537551
# print(list(set(old_mols)))
538552

539553
for old_mol in old_mols:
554+
logger.info('Deleting old molecule %s', old_mol)
540555
old_mol.delete()
541556

542557
return computed_set
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Generated by Django 3.2.20 on 2023-12-08 10:45
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
dependencies = [
8+
('viewer', '0021_fix_computedset_table'),
9+
]
10+
11+
operations = [
12+
migrations.AddField(
13+
model_name='computedmolecule',
14+
name='rationale',
15+
field=models.TextField(
16+
blank=True,
17+
help_text='An optional rationale for this molecule',
18+
null=True,
19+
),
20+
),
21+
migrations.AddField(
22+
model_name='computedmolecule',
23+
name='ref_url',
24+
field=models.TextField(
25+
blank=True,
26+
help_text='An optional url linking to the reference for this molecule',
27+
null=True,
28+
),
29+
),
30+
]

‎viewer/models.py

+10
Original file line numberDiff line numberDiff line change
@@ -950,6 +950,16 @@ class ComputedMolecule(models.Model):
950950
name = models.CharField(max_length=50)
951951
smiles = models.CharField(max_length=255)
952952
computed_inspirations = models.ManyToManyField(SiteObservation, blank=True)
953+
ref_url = models.TextField(
954+
null=True,
955+
blank=True,
956+
help_text="An optional url linking to the reference for this molecule",
957+
)
958+
rationale = models.TextField(
959+
null=True,
960+
blank=True,
961+
help_text="An optional rationale for this molecule",
962+
)
953963

954964
def __str__(self) -> str:
955965
return f"{self.smiles}"

0 commit comments

Comments
 (0)