Skip to content

Commit e16909a

Browse files
authored
Merge pull request #500 from xchem/m2ms-1290-upload-failures
Fixed 2 bugs in target loader
2 parents f0417ed + 2ea75ff commit e16909a

File tree

3 files changed

+92
-73
lines changed

3 files changed

+92
-73
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Generated by Django 3.2.23 on 2024-01-24 11:35
2+
3+
import django.db.models.deletion
4+
from django.db import migrations, models
5+
6+
7+
class Migration(migrations.Migration):
8+
dependencies = [
9+
('viewer', '0032_auto_20240119_1501'),
10+
]
11+
12+
operations = [
13+
migrations.AlterField(
14+
model_name='siteobservation',
15+
name='cmpd',
16+
field=models.ForeignKey(
17+
null=True,
18+
on_delete=django.db.models.deletion.CASCADE,
19+
to='viewer.compound',
20+
),
21+
),
22+
]

viewer/models.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,7 @@ def __repr__(self) -> str:
416416
class SiteObservation(models.Model):
417417
code = models.TextField()
418418
experiment = models.ForeignKey(Experiment, on_delete=models.CASCADE)
419-
cmpd = models.ForeignKey(Compound, on_delete=models.CASCADE)
419+
cmpd = models.ForeignKey(Compound, null=True, on_delete=models.CASCADE)
420420
xtalform_site = models.ForeignKey(XtalformSite, on_delete=models.CASCADE)
421421
canon_site_conf = models.ForeignKey(CanonSiteConf, on_delete=models.CASCADE)
422422
bound_file = models.FileField(

viewer/target_loader.py

+69-72
Original file line numberDiff line numberDiff line change
@@ -670,11 +670,12 @@ def process_compound(
670670
else "ligand_cif"
671671
)
672672
self.report.log(
673-
Level.FATAL,
673+
Level.WARNING,
674674
"{} missing from {} in '{}' experiment section".format(
675675
exc, smiles, protein_name
676676
),
677677
)
678+
return None
678679

679680
fields = {
680681
"smiles": smiles,
@@ -796,7 +797,7 @@ def process_xtalform_quatassembly(
796797
"""
797798
del kwargs
798799
assert item_data
799-
xtalform_id, _, _, data = item_data
800+
xtalform_id, _, assembly_id, data = item_data
800801

801802
# hm.. doesn't reflect the fact that it's from a different
802803
# file.. and the message should perhaps be a bit different
@@ -809,10 +810,10 @@ def process_xtalform_quatassembly(
809810

810811
xtalform = xtalforms[xtalform_id].instance
811812

812-
assembly_id = extract(key="assembly")
813+
quat_assembly_id = extract(key="assembly")
813814

814815
# TODO: need to key check these as well..
815-
assembly = quat_assemblies[assembly_id].instance
816+
assembly = quat_assemblies[quat_assembly_id].instance
816817

817818
fields = {
818819
"assembly_id": assembly_id,
@@ -1041,7 +1042,12 @@ def process_site_observation(
10411042
code = f"{experiment.code}_{chain}_{str(ligand)}_{str(idx)}"
10421043
key = f"{experiment.code}/{chain}/{str(ligand)}"
10431044

1044-
compound = compounds[experiment_id].instance
1045+
try:
1046+
compound = compounds[experiment_id].instance
1047+
except KeyError:
1048+
# compound missing, fine
1049+
compound = None
1050+
10451051
canon_site_conf = canon_site_confs[idx].instance
10461052
xtalform_site = xtalform_sites[key]
10471053

@@ -1121,25 +1127,56 @@ def process_site_observation(
11211127
key=key,
11221128
)
11231129

1124-
def process_metadata(
1125-
self,
1126-
upload_root: Path,
1127-
):
1128-
"""Extract model instances from metadata file and save them to db."""
1129-
# TODO: this method is quite long and should perhaps be broken
1130-
# apart. then again, it just keeps calling other methods to
1131-
# create model instances, so logically it's just doing the
1132-
# same thing.
1130+
def process_bundle(self):
1131+
"""Resolves subdirs in uploaded data bundle.
1132+
1133+
If called from task, takes task as a parameter for status updates.
1134+
"""
1135+
1136+
# by now I should have archive unpacked, get target name from
1137+
# config.yaml
1138+
up_iter = self.raw_data.glob("upload_*")
1139+
try:
1140+
upload_dir = next(up_iter)
1141+
except StopIteration as exc:
1142+
msg = "Upload directory missing from uploaded file"
1143+
self.report.log(Level.FATAL, msg)
1144+
# what do you mean unused?!
1145+
raise StopIteration(
1146+
msg
1147+
) from exc # pylint: disable=# pylint: disable=protected-access
1148+
1149+
try:
1150+
upload_dir = next(up_iter)
1151+
self.report.log(Level.WARNING, "Multiple upload directories in archive")
1152+
except StopIteration:
1153+
# just a warning, ignoring the second one
1154+
pass
1155+
1156+
# now that target name is not included in path, I don't need
1157+
# it here, need it just before creating target object. Also,
1158+
# there's probably no need to throw a fatal here, I can
1159+
# reasonably well deduce it from meta (I think)
1160+
config_it = upload_dir.glob(CONFIG_FILE)
1161+
try:
1162+
config_file = next(config_it)
1163+
except StopIteration as exc:
1164+
msg = f"config file missing from {str(upload_dir)}"
1165+
self.report.log(Level.FATAL, msg)
1166+
raise StopIteration() from exc
11331167

1134-
# update_task(task, "PROCESSING", "Processing metadata")
1135-
logger.info("%sProcessing %s", self.report.task_id, upload_root)
1168+
config = self._load_yaml(config_file)
1169+
logger.debug("config: %s", config)
1170+
1171+
try:
1172+
self.target_name = config["target_name"]
1173+
except KeyError as exc:
1174+
raise KeyError("target_name missing in config file") from exc
11361175

11371176
# moved this bit from init
11381177
self.target, target_created = Target.objects.get_or_create(
11391178
title=self.target_name,
1140-
defaults={
1141-
"display_name": self.target_name,
1142-
},
1179+
display_name=self.target_name,
11431180
)
11441181

11451182
# TODO: original target loader's function get_create_projects
@@ -1158,7 +1195,7 @@ def process_metadata(
11581195
# remove uploaded file
11591196
Path(self.bundle_path).unlink()
11601197
msg = f"{self.bundle_name} already uploaded, skipping."
1161-
self.report.final(msg, upload_state=UploadState.CANCELED)
1198+
self.report.log(Level.INFO, msg)
11621199
raise FileExistsError(msg)
11631200

11641201
if project_created and committer.pk == settings.ANONYMOUS_USER:
@@ -1170,7 +1207,7 @@ def process_metadata(
11701207
assert self.target
11711208
self.target.project_id.add(self.project)
11721209

1173-
meta = self._load_yaml(Path(upload_root).joinpath(METADATA_FILE))
1210+
meta = self._load_yaml(Path(upload_dir).joinpath(METADATA_FILE))
11741211

11751212
# collect top level info
11761213
self.version_number = meta["version_number"]
@@ -1208,11 +1245,21 @@ def process_metadata(
12081245
)
12091246
self.experiment_upload.save()
12101247

1248+
xtalforms_yaml = self._load_yaml(Path(upload_dir).joinpath(XTALFORMS_FILE))
1249+
1250+
# this is the last file to load. if any of the files missing, don't continue
1251+
if not meta or not config or not xtalforms_yaml:
1252+
self.report.final(
1253+
"Missing files in uploaded data, aborting",
1254+
Level.FATAL,
1255+
)
1256+
return
1257+
12111258
( # pylint: disable=unbalanced-tuple-unpacking
12121259
assemblies,
12131260
xtalform_assemblies,
12141261
) = self._get_yaml_blocks(
1215-
yaml_data=self._load_yaml(Path(upload_root).joinpath(XTALFORMS_FILE)),
1262+
yaml_data=xtalforms_yaml,
12161263
blocks=("assemblies", "xtalforms"),
12171264
)
12181265

@@ -1338,56 +1385,6 @@ def process_metadata(
13381385
].instance
13391386
val.instance.save()
13401387

1341-
def process_bundle(self):
1342-
"""Resolves subdirs in uploaded data bundle.
1343-
1344-
If called from task, takes task as a parameter for status updates.
1345-
"""
1346-
1347-
# by now I should have archive unpacked, get target name from
1348-
# config.yaml
1349-
up_iter = self.raw_data.glob("upload_*")
1350-
try:
1351-
upload_dir = next(up_iter)
1352-
except StopIteration as exc:
1353-
msg = "Upload directory missing from uploaded file"
1354-
self.report.log(Level.FATAL, msg)
1355-
# what do you mean unused?!
1356-
raise StopIteration(
1357-
msg
1358-
) from exc # pylint: disable=# pylint: disable=protected-access
1359-
1360-
try:
1361-
upload_dir = next(up_iter)
1362-
self.report.log(Level.WARNING, "Multiple upload directories in archive")
1363-
except StopIteration:
1364-
# just a warning, ignoring the second one
1365-
pass
1366-
1367-
# now that target name is not included in path, I don't need
1368-
# it here, need it just before creating target object. Also,
1369-
# there's probably no need to throw a fatal here, I can
1370-
# reasonably well deduce it from meta (I think)
1371-
config_it = upload_dir.glob(CONFIG_FILE)
1372-
try:
1373-
config_file = next(config_it)
1374-
except StopIteration as exc:
1375-
msg = f"config file missing from {str(upload_dir)}"
1376-
self.report.log(Level.FATAL, msg)
1377-
raise StopIteration() from exc
1378-
1379-
config = self._load_yaml(config_file)
1380-
logger.debug("config: %s", config)
1381-
1382-
try:
1383-
self.target_name = config["target_name"]
1384-
except KeyError as exc:
1385-
raise KeyError("target_name missing in config file") from exc
1386-
1387-
self.process_metadata(
1388-
upload_root=upload_dir,
1389-
)
1390-
13911388
def _load_yaml(self, yaml_file: Path) -> dict:
13921389
try:
13931390
with open(yaml_file, "r", encoding="utf-8") as file:

0 commit comments

Comments
 (0)