From b5c3dc9e8848272991bc80634bb33b2d7f094cdf Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Thu, 25 Apr 2024 09:13:29 +0100 Subject: [PATCH 1/5] fix: deeper nesting level to site observaton in meta_aligner.yaml Data loads successfully but actual v2 upload has not been tested --- viewer/migrations/0051_auto_20240425_0748.py | 23 ++++++++++++++++++++ viewer/models.py | 1 + viewer/target_loader.py | 8 ++++--- 3 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 viewer/migrations/0051_auto_20240425_0748.py diff --git a/viewer/migrations/0051_auto_20240425_0748.py b/viewer/migrations/0051_auto_20240425_0748.py new file mode 100644 index 00000000..0c7b4f7a --- /dev/null +++ b/viewer/migrations/0051_auto_20240425_0748.py @@ -0,0 +1,23 @@ +# Generated by Django 3.2.24 on 2024-04-25 07:48 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('viewer', '0050_auto_20240412_0930'), + ] + + operations = [ + migrations.AddField( + model_name='historicalsiteobservation', + name='version', + field=models.PositiveSmallIntegerField(default=1), + ), + migrations.AddField( + model_name='siteobservation', + name='version', + field=models.PositiveSmallIntegerField(default=1), + ), + ] diff --git a/viewer/models.py b/viewer/models.py index 72c0c8f8..5aa960dd 100644 --- a/viewer/models.py +++ b/viewer/models.py @@ -430,6 +430,7 @@ def __repr__(self) -> str: class SiteObservation(models.Model): code = models.TextField(null=True) longcode = models.TextField(null=True) + version = models.PositiveSmallIntegerField(null=False, default=1) experiment = models.ForeignKey(Experiment, on_delete=models.CASCADE) cmpd = models.ForeignKey(Compound, null=True, on_delete=models.CASCADE) xtalform_site = models.ForeignKey(XtalformSite, on_delete=models.CASCADE) diff --git a/viewer/target_loader.py b/viewer/target_loader.py index ba4aaa19..3da0a646 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -1231,16 +1231,17 @@ def process_xtalform_site( index_data=index_data, ) - @create_objects(depth=5) + @create_objects(depth=6) def process_site_observation( self, experiments: dict[int | str, MetadataObject], compounds: dict[int | str, MetadataObject], xtalform_sites: dict[str, Model], canon_site_confs: dict[int | str, MetadataObject], - item_data: tuple[str, str, str, int | str, int | str, dict] | None = None, + item_data: tuple[str, str, str, int | str, int, int | str, dict] | None = None, # chain: str, # ligand: str, + # version: int, # idx: int | str, # data: dict, validate_files: bool = True, @@ -1265,7 +1266,7 @@ def process_site_observation( del kwargs assert item_data try: - experiment_id, _, chain, ligand, idx, data = item_data + experiment_id, _, chain, ligand, version, idx, data = item_data except ValueError: # wrong data item return None @@ -1367,6 +1368,7 @@ def process_site_observation( fields = { # Code for this protein (e.g. Mpro_Nterm-x0029_A_501_0) "longcode": longcode, + "version": version, "experiment": experiment, "cmpd": compound, "xtalform_site": xtalform_site, From 9da3b35fbd8fe884f7fbadf672120cda9f045364 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Thu, 25 Apr 2024 11:03:13 +0100 Subject: [PATCH 2/5] fix: renamed panddas_event_files to ligand_binding_events in meta_al --- viewer/target_loader.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/viewer/target_loader.py b/viewer/target_loader.py index 3da0a646..820ee45f 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -764,14 +764,14 @@ def process_experiment( ) try: - panddas_files = data["crystallographic_files"]["panddas_event_files"] + event_files = data["crystallographic_files"]["ligand_binding_events"] except KeyError: - panddas_files = [] + event_files = [] map_info_files = self.validate_map_files( - key="panddas_event_files", + key="ligand_binding_events", obj_identifier=experiment_name, - file_struct=panddas_files, + file_struct=event_files, validate_files=validate_files, ) From 6813102bb9037d83036ae6e4654929440b4edf1c Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Mon, 29 Apr 2024 15:58:11 +0100 Subject: [PATCH 3/5] stashing Added sorting keys for versioned key. V1 data loading, waiting for fixes in conf site presentation to continue with v2 --- viewer/target_loader.py | 75 ++++++++++++++++++++++++++++++++++------- 1 file changed, 62 insertions(+), 13 deletions(-) diff --git a/viewer/target_loader.py b/viewer/target_loader.py index 820ee45f..447de218 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -97,6 +97,8 @@ class MetadataObject: """ instance: Model + key: str + versioned_key: str index_data: dict = field(default_factory=dict) new: bool = False @@ -118,7 +120,7 @@ class ProcessedObject: key: str defaults: dict = field(default_factory=dict) index_data: dict = field(default_factory=dict) - identifier: Optional[str] = "" + versioned_key: Optional[str] = "" @dataclass @@ -298,10 +300,11 @@ def alphanumerator(start_from: str = "") -> Generator[str, None, None]: return generator -def strip_version(s: str) -> str: +def strip_version(s: str, separator: str = "/") -> str: # format something like XX01ZVNS2B-x0673/B/501/1 # remove tailing '/1' - return s[0 : s.rfind('/')] + return s[0 : s.rfind(separator)] + # return s def create_objects(func=None, *, depth=math.inf): @@ -410,7 +413,11 @@ def wrapper_create_objects( ) m = MetadataObject( - instance=obj, index_data=instance_data.index_data, new=new + instance=obj, + key=instance_data.key, + versioned_key=instance_data.versioned_key, + index_data=instance_data.index_data, + new=new, ) # index data here probs result[instance_data.key] = m @@ -1082,7 +1089,9 @@ def process_canon_site( """ del kwargs assert item_data - canon_site_id, data = item_data + v_canon_site_id, data = item_data + + canon_site_id = strip_version(v_canon_site_id, separator="+") extract = functools.partial( self._extract, @@ -1099,7 +1108,9 @@ def process_canon_site( } conf_sites_ids = extract(key="conformer_site_ids", return_type=list) + # conf_sites_ids = set([strip_version(k, separator="+") for k in conf_sites_ids]) ref_conf_site_id = extract(key="reference_conformer_site_id") + # ref_conf_site_id = strip_version(ref_conf_site_id, separator="+") index_data = { "ref_conf_site": ref_conf_site_id, @@ -1112,6 +1123,7 @@ def process_canon_site( fields=fields, index_data=index_data, key=canon_site_id, + versioned_key=v_canon_site_id, ) @create_objects(depth=1) @@ -1134,8 +1146,10 @@ def process_canon_site_conf( """ del kwargs assert item_data - conf_site_name, data = item_data - canon_site = canon_sites[conf_site_name] + v_conf_site_name, data = item_data + conf_site_name = strip_version(v_conf_site_name, separator="+") + + canon_site = canon_sites[v_conf_site_name] extract = functools.partial( self._extract, @@ -1154,6 +1168,8 @@ def process_canon_site_conf( } members = extract(key="members") + # members = set([strip_version(k) for k in members]) + ref_ligands = extract(key="reference_ligand_id") index_fields = { @@ -1166,6 +1182,7 @@ def process_canon_site_conf( fields=fields, index_data=index_fields, key=conf_site_name, + versioned_key=v_conf_site_name, ) @create_objects(depth=1) @@ -1189,7 +1206,8 @@ def process_xtalform_site( """ del kwargs assert item_data - xtalform_site_name, data = item_data + v_xtalform_site_name, data = item_data + xtalform_site_name = strip_version(v_xtalform_site_name) extract = functools.partial( self._extract, @@ -1201,6 +1219,7 @@ def process_xtalform_site( xtalform_id = extract(key="xtalform_id") canon_site_id = extract(key="canonical_site_id") + # canon_site_id = strip_version(canon_site_id, separator="+") xtalform = xtalforms[xtalform_id].instance canon_site = canon_sites[canon_site_id] @@ -1228,6 +1247,7 @@ def process_xtalform_site( fields=fields, defaults=defaults, key=xtalform_site_name, + versioned_key=v_xtalform_site_name, index_data=index_data, ) @@ -1238,7 +1258,7 @@ def process_site_observation( compounds: dict[int | str, MetadataObject], xtalform_sites: dict[str, Model], canon_site_confs: dict[int | str, MetadataObject], - item_data: tuple[str, str, str, int | str, int, int | str, dict] | None = None, + item_data: tuple[str, str, str, int | str, int, str, dict] | None = None, # chain: str, # ligand: str, # version: int, @@ -1266,11 +1286,12 @@ def process_site_observation( del kwargs assert item_data try: - experiment_id, _, chain, ligand, version, idx, data = item_data + experiment_id, _, chain, ligand, version, v_idx, data = item_data except ValueError: # wrong data item return None + idx = strip_version(v_idx, separator="+") extract = functools.partial( self._extract, data=data, @@ -1283,6 +1304,7 @@ def process_site_observation( longcode = f"{experiment.code}_{chain}_{str(ligand)}_{str(idx)}" key = f"{experiment.code}/{chain}/{str(ligand)}" + v_key = f"{experiment.code}/{chain}/{str(ligand)}/{version}" smiles = extract(key="ligand_smiles_string") @@ -1399,6 +1421,7 @@ def process_site_observation( fields=fields, defaults=defaults, key=key, + versioned_key=v_key, ) def process_bundle(self): @@ -1641,12 +1664,25 @@ def process_bundle(self): val.instance.xtalform_site_num = next(xtnum) val.instance.save() + # reindex conf sites by versioned tag + canon_site_conf_versioned = {} + for val in canon_site_conf_objects.values(): # pylint: disable=no-member + for k in val.index_data["members"]: + # strip the version number from tag + canon_site_conf_versioned[val.versioned_key] = val.instance + # now can update CanonSite with ref_conf_site # also, fill the canon_site_num field + # TODO: ref_conf_site is with version, object's key isn't + # for val in canon_site_objects.values(): # pylint: disable=no-member + # val.instance.ref_conf_site = canon_site_conf_objects[ + # val.index_data["reference_conformer_site_id"] + # ].instance + # val.instance.save() for val in canon_site_objects.values(): # pylint: disable=no-member - val.instance.ref_conf_site = canon_site_conf_objects[ + val.instance.ref_conf_site = canon_site_conf_versioned[ val.index_data["reference_conformer_site_id"] - ].instance + ] val.instance.save() # canon site instances are now complete @@ -1704,6 +1740,7 @@ def process_bundle(self): # 2024-03-04, if you need to check for so in so_group.filter(code__isnull=True): + logger.debug("processing so: %s", so.longcode) if so.experiment.type == 1: # manual. code is pdb code code = f"{so.experiment.code}-{next(suffix)}" @@ -1740,11 +1777,22 @@ def process_bundle(self): so.code = code so.save() + site_observations_versioned = {} + for val in site_observation_objects.values(): # pylint: disable=no-member + site_observations_versioned[val.versioned_key] = val.instance + # final remaining fk, attach reference site observation to canon_site_conf for val in canon_site_conf_objects.values(): # pylint: disable=no-member val.instance.ref_site_observation = site_observation_objects[ strip_version(val.index_data["reference_ligands"]) ].instance + logger.debug("attaching canon_site_conf: %r", val.instance) + logger.debug( + "attaching canon_site_conf: %r", + site_observation_objects[ + strip_version(val.index_data["reference_ligands"]) + ].instance.longcode, + ) val.instance.save() logger.debug("data read and processed, adding tags") @@ -1770,7 +1818,8 @@ def process_bundle(self): ) tag = val.instance.name.split('+')[0] so_list = [ - site_observation_objects[strip_version(k)].instance + # site_observation_objects[k].instance for k in val.index_data["members"] + site_observations_versioned[k] for k in val.index_data["members"] ] self._tag_observations(tag, prefix, "ConformerSites", so_list) From 8de2dfac3295a73ac6e37995a1df2b5b25b1a7d3 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Thu, 2 May 2024 09:32:27 +0100 Subject: [PATCH 4/5] feat: fully functional versioned data Reads and processes upload_2+ data where version numbers are given in suffix --- viewer/target_loader.py | 82 +++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 35 deletions(-) diff --git a/viewer/target_loader.py b/viewer/target_loader.py index 447de218..9f05e830 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -400,7 +400,12 @@ def wrapper_create_objects( self.report.log(logging.ERROR, msg) failed = failed + 1 - if not obj: + if obj: + # update any additional fields + instance_qs = instance_data.model_class.objects.filter(pk=obj.pk) + instance_qs.update(**instance_data.defaults) + obj.refresh_from_db() + else: # create fake object so I can just push the upload # through and compile report for user feedback obj = instance_data.model_class( @@ -420,7 +425,7 @@ def wrapper_create_objects( new=new, ) # index data here probs - result[instance_data.key] = m + result[instance_data.versioned_key] = m msg = "{} {} objects processed, {} created, {} fetched from database".format( created + existing + failed, @@ -862,6 +867,7 @@ def process_experiment( model_class=Experiment, fields=fields, key=experiment_name, + versioned_key=experiment_name, defaults=defaults, index_data=index_fields, ) @@ -923,6 +929,7 @@ def process_compound( fields={}, defaults=defaults, key=protein_name, + versioned_key=protein_name, ) @create_objects(depth=1) @@ -976,6 +983,7 @@ def process_xtalform( model_class=Xtalform, fields=fields, key=xtalform_name, + versioned_key=xtalform_name, defaults=defaults, ) @@ -1017,6 +1025,7 @@ def process_quat_assembly( model_class=QuatAssembly, fields=fields, key=assembly_name, + versioned_key=assembly_name, ) @create_objects(depth=3) @@ -1066,6 +1075,7 @@ def process_xtalform_quatassembly( model_class=XtalformQuatAssembly, fields=fields, key=xtalform_id, + versioned_key=xtalform_id, ) @create_objects(depth=1) @@ -1316,7 +1326,6 @@ def process_site_observation( # the first. try: logger.debug('exp: %s, %s', experiment, experiments[experiment_id].new) - # logger.debug('exp compounds: %s', experiment.compounds) compound = experiment.compounds.get( smiles=experiments[experiment_id].index_data["smiles"] ) @@ -1337,7 +1346,7 @@ def process_site_observation( f"Multiple compounds for experiment {experiment.code}", ) - canon_site_conf = canon_site_confs[idx].instance + canon_site_conf = canon_site_confs[v_idx].instance xtalform_site = xtalform_sites[key] ( # pylint: disable=unbalanced-tuple-unpacking @@ -1664,25 +1673,13 @@ def process_bundle(self): val.instance.xtalform_site_num = next(xtnum) val.instance.save() - # reindex conf sites by versioned tag - canon_site_conf_versioned = {} - for val in canon_site_conf_objects.values(): # pylint: disable=no-member - for k in val.index_data["members"]: - # strip the version number from tag - canon_site_conf_versioned[val.versioned_key] = val.instance - # now can update CanonSite with ref_conf_site # also, fill the canon_site_num field # TODO: ref_conf_site is with version, object's key isn't - # for val in canon_site_objects.values(): # pylint: disable=no-member - # val.instance.ref_conf_site = canon_site_conf_objects[ - # val.index_data["reference_conformer_site_id"] - # ].instance - # val.instance.save() for val in canon_site_objects.values(): # pylint: disable=no-member - val.instance.ref_conf_site = canon_site_conf_versioned[ + val.instance.ref_conf_site = canon_site_conf_objects[ val.index_data["reference_conformer_site_id"] - ] + ].instance val.instance.save() # canon site instances are now complete @@ -1704,12 +1701,18 @@ def process_bundle(self): ) values = ["experiment"] - qs = ( - SiteObservation.objects.values(*values) - .order_by(*values) - .annotate(obvs=ArrayAgg("id")) - .values_list("obvs", flat=True) - ) + # fmt: off + qs = SiteObservation.objects.filter( + experiment__experiment_upload__target=self.target, + code__isnull=True, + ).values( + *values, + ).order_by( + *values, + ).annotate( + obvs=ArrayAgg("id"), + ).values_list("obvs", flat=True) + # fmt: on for elem in qs: # fmt: off @@ -1764,12 +1767,17 @@ def process_bundle(self): # TODO: this should ideally be solved by db engine, before # rushing to write the trigger, have think about the # loader concurrency situations - if SiteObservation.objects.filter( + code_qs = SiteObservation.objects.filter( experiment__experiment_upload__target=self.target, code=code, - ).exists(): + ) + # if code exists and the experiment is new + logger.debug( + 'checking code uniq: %s, %s', code, so.experiment.status + ) + if code_qs.exists() and so.experiment.status == 0: msg = ( - f"short code {code} already exists for this target; " + f"short code {code} already exists for this target; " + "specify a code_prefix to resolve this conflict" ) self.report.log(logging.ERROR, msg) @@ -1777,20 +1785,22 @@ def process_bundle(self): so.code = code so.save() - site_observations_versioned = {} - for val in site_observation_objects.values(): # pylint: disable=no-member - site_observations_versioned[val.versioned_key] = val.instance + # site_observations_versioned = {} + # for val in site_observation_objects.values(): # pylint: disable=no-member + # site_observations_versioned[val.versioned_key] = val.instance # final remaining fk, attach reference site observation to canon_site_conf for val in canon_site_conf_objects.values(): # pylint: disable=no-member val.instance.ref_site_observation = site_observation_objects[ - strip_version(val.index_data["reference_ligands"]) + # strip_version(val.index_data["reference_ligands"]) + val.index_data["reference_ligands"] ].instance logger.debug("attaching canon_site_conf: %r", val.instance) logger.debug( "attaching canon_site_conf: %r", site_observation_objects[ - strip_version(val.index_data["reference_ligands"]) + # strip_version(val.index_data["reference_ligands"]) + val.index_data["reference_ligands"] ].instance.longcode, ) val.instance.save() @@ -1818,9 +1828,10 @@ def process_bundle(self): ) tag = val.instance.name.split('+')[0] so_list = [ - # site_observation_objects[k].instance for k in val.index_data["members"] - site_observations_versioned[k] + site_observation_objects[k].instance for k in val.index_data["members"] + # site_observations_versioned[k] + # for k in val.index_data["members"] ] self._tag_observations(tag, prefix, "ConformerSites", so_list) @@ -1855,7 +1866,8 @@ def process_bundle(self): ) tag = f"{val.instance.xtalform.name} - {val.instance.xtalform_site_id}" so_list = [ - site_observation_objects[strip_version(k)].instance + # site_observation_objects[strip_version(k)].instance + site_observation_objects[k].instance for k in val.index_data["residues"] ] self._tag_observations(tag, prefix, "CrystalformSites", so_list) From 597e1d7d01b0903795f9b1ad3fe0b13c0e9964d9 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Thu, 2 May 2024 09:50:58 +0100 Subject: [PATCH 5/5] fix: removed some dead code --- viewer/target_loader.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/viewer/target_loader.py b/viewer/target_loader.py index 9f05e830..6df1f026 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -302,9 +302,8 @@ def alphanumerator(start_from: str = "") -> Generator[str, None, None]: def strip_version(s: str, separator: str = "/") -> str: # format something like XX01ZVNS2B-x0673/B/501/1 - # remove tailing '/1' + # remove tailing '1' return s[0 : s.rfind(separator)] - # return s def create_objects(func=None, *, depth=math.inf): @@ -1118,9 +1117,7 @@ def process_canon_site( } conf_sites_ids = extract(key="conformer_site_ids", return_type=list) - # conf_sites_ids = set([strip_version(k, separator="+") for k in conf_sites_ids]) ref_conf_site_id = extract(key="reference_conformer_site_id") - # ref_conf_site_id = strip_version(ref_conf_site_id, separator="+") index_data = { "ref_conf_site": ref_conf_site_id, @@ -1178,7 +1175,6 @@ def process_canon_site_conf( } members = extract(key="members") - # members = set([strip_version(k) for k in members]) ref_ligands = extract(key="reference_ligand_id") @@ -1229,7 +1225,6 @@ def process_xtalform_site( xtalform_id = extract(key="xtalform_id") canon_site_id = extract(key="canonical_site_id") - # canon_site_id = strip_version(canon_site_id, separator="+") xtalform = xtalforms[xtalform_id].instance canon_site = canon_sites[canon_site_id] @@ -1347,7 +1342,7 @@ def process_site_observation( ) canon_site_conf = canon_site_confs[v_idx].instance - xtalform_site = xtalform_sites[key] + xtalform_site = xtalform_sites[v_key] ( # pylint: disable=unbalanced-tuple-unpacking bound_file, @@ -1689,8 +1684,7 @@ def process_bundle(self): xtalform_site_by_tag = {} for val in xtalform_sites_objects.values(): # pylint: disable=no-member for k in val.index_data["residues"]: - # strip the version number from tag - xtalform_site_by_tag[strip_version(k)] = val.instance + xtalform_site_by_tag[k] = val.instance site_observation_objects = self.process_site_observation( yaml_data=crystals, @@ -1792,14 +1786,12 @@ def process_bundle(self): # final remaining fk, attach reference site observation to canon_site_conf for val in canon_site_conf_objects.values(): # pylint: disable=no-member val.instance.ref_site_observation = site_observation_objects[ - # strip_version(val.index_data["reference_ligands"]) val.index_data["reference_ligands"] ].instance logger.debug("attaching canon_site_conf: %r", val.instance) logger.debug( "attaching canon_site_conf: %r", site_observation_objects[ - # strip_version(val.index_data["reference_ligands"]) val.index_data["reference_ligands"] ].instance.longcode, ) @@ -1866,9 +1858,7 @@ def process_bundle(self): ) tag = f"{val.instance.xtalform.name} - {val.instance.xtalform_site_id}" so_list = [ - # site_observation_objects[strip_version(k)].instance - site_observation_objects[k].instance - for k in val.index_data["residues"] + site_observation_objects[k].instance for k in val.index_data["residues"] ] self._tag_observations(tag, prefix, "CrystalformSites", so_list)