cleanup

elewis2 · elewis2 · commit 32173c65dbf2 · 2024-11-08T08:57:20.000-08:00
diff --git a/pori_python/ipr/annotate.py b/pori_python/ipr/annotate.py
@@ -84,7 +84,10 @@ def get_ipr_statements_from_variants(
     ]
 
     for ipr_row in convert_statements_to_alterations(
-        graphkb_conn, inferred_statements, disease_name, convert_to_rid_set(inferred_matches)
+        graphkb_conn,
+        inferred_statements,
+        disease_name,
+        convert_to_rid_set(inferred_matches),
     ):
         ipr_row["kbData"]["inferred"] = True
         rows.append(ipr_row)
@@ -122,16 +125,16 @@ def annotate_expression_variants(
             continue
         try:
             matches = gkb_match.match_expression_variant(graphkb_conn, gene, variant)
-            for matched_stmt in get_ipr_statements_from_variants(graphkb_conn, matches, disease_name):
+            for matched_stmt in get_ipr_statements_from_variants(
+                graphkb_conn, matches, disease_name
+            ):
                 ipr_row = {
-                    'variant': row['key'],
-                    'variantType': row.get('variantType', 'exp'),
-                    'kbVariantId': matched_stmt['kbVariantId'],
-                    'kbVariant': matched_stmt['kbVariant'],
-                    'kbMatchedStatements': [matched_stmt]
+                    "variant": row["key"],
+                    "variantType": row.get("variantType", "exp"),
+                    "kbVariantId": matched_stmt["kbVariantId"],
+                    "kbVariant": matched_stmt["kbVariant"],
+                    "kbMatchedStatements": [matched_stmt],
                 }
-                #ipr_row["variant"] = row["key"]
-                #ipr_row["variantType"] = row.get("variantType", "exp")
                 alterations.append(ipr_row)
         except FeatureNotFoundError as err:
             problem_genes.add(gene)
@@ -182,21 +185,17 @@ def annotate_copy_variants(
             continue
         try:
             matches = gkb_match.match_copy_variant(graphkb_conn, gene, variant)
-            #for ipr_row in get_ipr_statements_from_variants(graphkb_conn, matches, disease_name):
-            #    ipr_row["variant"] = row["key"]
-            #    ipr_row["variantType"] = row.get("variantType", "cnv")
-            #    alterations.append(ipr_row)
-            for matched_stmt in get_ipr_statements_from_variants(graphkb_conn, matches, disease_name):
+            for matched_stmt in get_ipr_statements_from_variants(
+                graphkb_conn, matches, disease_name
+            ):
                 ipr_row = {
-                    'variant': row['key'],
-                    'variantType': row.get('variantType', 'cnv'),
-                    'kbVariantId': matched_stmt['kbVariantId'],
-                    'kbVariant': matched_stmt['kbVariant'],
-                    'kbMatchedStatements': [matched_stmt]
+                    "variant": row["key"],
+                    "variantType": row.get("variantType", "cnv"),
+                    "kbVariantId": matched_stmt["kbVariantId"],
+                    "kbVariant": matched_stmt["kbVariant"],
+                    "kbMatchedStatements": [matched_stmt],
                 }
                 alterations.append(ipr_row)
-                #ipr_row["variant"] = row["key"]
-                #ipr_row["variantType"] = row.get("variantType", "exp")
         except FeatureNotFoundError as err:
             problem_genes.add(gene)
             logger.debug(f"Unrecognized gene ({gene} {variant}): {err}")
@@ -270,23 +269,17 @@ def annotate_positional_variants(
                         matches = gkb_match.match_positional_variant(graphkb_conn, variant)
                     else:
                         raise parse_err
-                for matched_stmt in get_ipr_statements_from_variants(graphkb_conn, matches, disease_name):
+                for matched_stmt in get_ipr_statements_from_variants(
+                    graphkb_conn, matches, disease_name
+                ):
                     ipr_row = {
-                        'variant': row['key'],
-                        'variantType': row.get("variantType", "mut" if row.get("gene") else "sv"),
-                        'kbVariant': matched_stmt['kbVariant'],
-                        'kbVariantId': matched_stmt['kbVariantId'],
-                        'kbMatchedStatements': [matched_stmt]
+                        "variant": row["key"],
+                        "variantType": row.get("variantType", "mut" if row.get("gene") else "sv"),
+                        "kbVariant": matched_stmt["kbVariant"],
+                        "kbVariantId": matched_stmt["kbVariantId"],
+                        "kbMatchedStatements": [matched_stmt],
                     }
                     alterations.append(Hashabledict(ipr_row))
-                #for ipr_row in get_ipr_statements_from_variants(
-                #    graphkb_conn, matches, disease_name
-                #):
-                #    ipr_row["variant"] = row["key"]
-                #    ipr_row["variantType"] = row.get(
-                #        "variantType", "mut" if row.get("gene") else "sv"
-                #    )
-                #    alterations.append(Hashabledict(ipr_row))
 
             except FeatureNotFoundError as err:
                 logger.debug(f"failed to match positional variants ({variant}): {err}")
@@ -344,7 +337,10 @@ def annotate_msi(
             "target": {
                 "target": "CategoryVariant",
                 "filters": {
-                    "reference1": {"target": "Signature", "filters": {"name": msi_category}}
+                    "reference1": {
+                        "target": "Signature",
+                        "filters": {"name": msi_category},
+                    }
                 },
             },
             "queryType": "similarTo",
@@ -353,24 +349,24 @@ def annotate_msi(
     )
     if msi_categories:
         msi_variants = [cast(Variant, var) for var in msi_categories]
-        for matched_stmt in get_ipr_statements_from_variants(graphkb_conn, msi_variants, disease_name):
+        for matched_stmt in get_ipr_statements_from_variants(
+            graphkb_conn, msi_variants, disease_name
+        ):
             ipr_row = {
-                'variant': msi_category,
-                'variantType': 'msi',
-                'kbVariantId': matched_stmt['kbVariantId'],
-                'kbVariant': matched_stmt['kbVariant'],
-                'kbMatchedStatements': [matched_stmt]
+                "variant": msi_category,
+                "variantType": "msi",
+                "kbVariantId": matched_stmt["kbVariantId"],
+                "kbVariant": matched_stmt["kbVariant"],
+                "kbMatchedStatements": [matched_stmt],
             }
             gkb_matches.append(ipr_row)
-        #for ipr_row in get_ipr_statements_from_variants(graphkb_conn, msi_variants, disease_name):
-        #    ipr_row["variant"] = msi_category
-        #    ipr_row["variantType"] = "msi"
-        #    gkb_matches.append(ipr_row)
     return gkb_matches
 
 
 def annotate_tmb(
-    graphkb_conn: GraphKBConnection, disease_name: str = "cancer", category: str = TMB_HIGH_CATEGORY
+    graphkb_conn: GraphKBConnection,
+    disease_name: str = "cancer",
+    category: str = TMB_HIGH_CATEGORY,
 ) -> List[KbMatch]:
     """Annotate Tumour Mutation Burden (tmb) categories from GraphKB in the IPR alterations format.
 
@@ -401,17 +397,15 @@ def annotate_tmb(
     )
     if categories:
         cat_variants = [cast(Variant, var) for var in categories]
-        for matched_stmt in get_ipr_statements_from_variants(graphkb_conn, cat_variants, disease_name):
+        for matched_stmt in get_ipr_statements_from_variants(
+            graphkb_conn, cat_variants, disease_name
+        ):
             ipr_row = {
-                'variant': category,
-                'variantType': 'tmb',
-                'kbVariantId': matched_stmt['kbVariantId'],
-                'kbVariant': matched_stmt['kbVariant'],
-                'kbMatchedStatements': [matched_stmt]
+                "variant": category,
+                "variantType": "tmb",
+                "kbVariantId": matched_stmt["kbVariantId"],
+                "kbVariant": matched_stmt["kbVariant"],
+                "kbMatchedStatements": [matched_stmt],
             }
             gkb_matches.append(ipr_row)
-        #for ipr_row in get_ipr_statements_from_variants(graphkb_conn, cat_variants, disease_name):
-        #    ipr_row["variant"] = category
-        #    ipr_row["variantType"] = "tmb"
-        #    gkb_matches.append(ipr_row)
     return gkb_matches
diff --git a/pori_python/ipr/ipr.py b/pori_python/ipr/ipr.py
@@ -245,7 +245,7 @@ def select_expression_plots(
     selected_variants = {
         (match["variantType"], match["variant"])
         for match in kb_matches
-        if "therapeutic" in [stmt['category'] for stmt in match['kbMatchedStatements']]
+        if "therapeutic" in [stmt["category"] for stmt in match["kbMatchedStatements"]]
     }
     images_by_gene: Dict[str, ImageDefinition] = {}
     selected_genes = set()
@@ -282,15 +282,9 @@ def create_key_alterations(
         variant_type = kb_match["variantType"]
         variant_key = kb_match["variant"]
 
-        # TODO nb need to support multiple categories now.
-        # need to check whether there are consequences from possibly having an 'unknown'
-        # element in the list since here we are only checking whether the list only includes
-        # unknown elements
-        match_categories = [item['category'] for item in kb_match['kbMatchedStatements']]
-        if list(set(match_categories)) == ['unknown']:
+        match_categories = [item["category"] for item in kb_match["kbMatchedStatements"]]
+        if list(set(match_categories)) == ["unknown"]:
             continue
-        #if kb_match["category"] == "unknown":
-        #    continue
 
         if variant_type not in type_mapping.keys():
             if variant_type not in skipped_variant_types:
@@ -312,9 +306,6 @@ def create_key_alterations(
             alterations.append(f'{variant.get("gene","")} ({variant.get("expressionState")})')
         elif variant_type == "cnv":
             alterations.append(f'{variant.get("gene","")} ({variant.get("cnvState")})')
-        # only show germline if relevant
-        #elif kb_match["category"] in GERMLINE_BASE_TERMS and variant.get("germline"):
-        #update the germline check to look for any in a list
         elif any(item in GERMLINE_BASE_TERMS for item in match_categories):
             alterations.append(f"germline {variant['variant']}")
         else:
@@ -335,7 +326,9 @@ def create_key_alterations(
 
 
 def germline_kb_matches(
-    kb_matches: List[Hashabledict], all_variants: Sequence[IprVariant], assume_somatic: bool = True
+    kb_matches: List[Hashabledict],
+    all_variants: Sequence[IprVariant],
+    assume_somatic: bool = True,
 ) -> List[Hashabledict]:
     """Filter kb_matches for matching to germline or somatic events using the 'germline' optional property.
 
diff --git a/pori_python/ipr/summary.py b/pori_python/ipr/summary.py
@@ -11,7 +11,14 @@
 from pori_python.graphkb.util import convert_to_rid_list
 from pori_python.graphkb.vocab import get_term_tree
 from pori_python.ipr.inputs import create_graphkb_sv_notation
-from pori_python.types import Hashabledict, IprVariant, KbMatch, Ontology, Record, Statement
+from pori_python.types import (
+    Hashabledict,
+    IprVariant,
+    KbMatch,
+    Ontology,
+    Record,
+    Statement,
+)
 
 from .util import (
     convert_to_rid_set,
@@ -264,7 +271,9 @@ def create_section_html(
     for statement_id, sentence in sentences_by_statement_id.items():
         relevance = statements[statement_id]["relevance"]["@rid"]
         category = categorize_relevance(
-            graphkb_conn, relevance, RELEVANCE_BASE_TERMS + [("resistance", ["no sensitivity"])]
+            graphkb_conn,
+            relevance,
+            RELEVANCE_BASE_TERMS + [("resistance", ["no sensitivity"])],
         )
         sentence_categories[sentence] = category
 
@@ -274,7 +283,12 @@ def create_section_html(
             "target": "Feature",
             "filters": {
                 "AND": [
-                    {"source": {"target": "Source", "filters": {"name": "entrez gene"}}},
+                    {
+                        "source": {
+                            "target": "Source",
+                            "filters": {"name": "entrez gene"},
+                        }
+                    },
                     {"name": gene_name},
                     {"biotype": "gene"},
                 ]
@@ -311,7 +325,14 @@ def create_section_html(
         {
             s
             for (s, v) in sentence_categories.items()
-            if v not in ["diagnostic", "biological", "therapeutic", "prognostic", "resistance"]
+            if v
+            not in [
+                "diagnostic",
+                "biological",
+                "therapeutic",
+                "prognostic",
+                "resistance",
+            ]
         },
         {s for (s, v) in sentence_categories.items() if v == "resistance"},
     ]:
@@ -341,8 +362,7 @@ def section_statements_by_genes(
 
     return genes
 
-# TODO can bandaid this to work but will need some more thought to actually
-# be sure it makes sense for multivariant statement matches
+
 def auto_analyst_comments(
     graphkb_conn: GraphKBConnection,
     matches: Sequence[KbMatch] | Sequence[Hashabledict],
@@ -356,16 +376,14 @@ def auto_analyst_comments(
     variant_keys_by_statement_ids: Dict[str, Set[str]] = {}
 
     for match in matches:
-        for stmt in match['kbMatchedStatements']:
-            rid = stmt['kbStatementId']
-            exp_variant = match['variant']
-            # is it possible this already handles multiple variants for a single rid?
+        for stmt in match["kbMatchedStatements"]:
+            rid = stmt["kbStatementId"]
+            exp_variant = match["variant"]
             variant_keys_by_statement_ids.setdefault(rid, set()).add(exp_variant)
 
     exp_variants_by_statements: Dict[str, List[IprVariant]] = {}
     for rid, keys in variant_keys_by_statement_ids.items():
         try:
-            # preserves multiple variant matches?
             exp_variants_by_statements[rid] = [variants_by_keys[key] for key in keys]
         except KeyError as err:
             logger.warning(f"No specific variant matched for {rid}:{keys} - {err}")
@@ -377,7 +395,7 @@ def auto_analyst_comments(
 
     # get details for statements
     for match in matches:
-        for stmt in match['kbMatchedStatements']:
+        for stmt in match["kbMatchedStatements"]:
             rid = stmt["kbStatementId"].replace("#", "")
             result = graphkb_conn.request(f"/statements/{rid}?neighbors=1")["result"]
 
diff --git a/pori_python/ipr/therapeutic_options.py b/pori_python/ipr/therapeutic_options.py
@@ -31,12 +31,10 @@ def create_therapeutic_options(
 
     for match in kb_matches:
         row_type = "therapeutic"
-        # check multiple cateogires
-        for stmt in match['kbMatchedStatements']:
+        for stmt in match["kbMatchedStatements"]:
             if stmt["category"] != "therapeutic" or stmt["relevance"] == "eligibility":
                 continue
 
-            # check multiple relevances
             if stmt["kbRelevanceId"] in resistance_markers:
                 row_type = "chemoresistance"
 
diff --git a/tests/test_ipr/test_upload.py b/tests/test_ipr/test_upload.py
@@ -140,9 +140,7 @@ def loaded_reports(tmp_path_factory) -> Generator:
         "async": (async_patient_id, async_loaded_report),
     }
     yield loaded_reports_result
-    return
 
-    # TODO restore this - not deleting them for now, in order to view in client
     ipr_conn.delete(uri=f"reports/{loaded_report['reports'][0]['ident']}")
     ipr_conn.delete(uri=f"reports/{async_loaded_report['reports'][0]['ident']}")
 

Original file line number	Diff line number	Diff line change
`@@ -140,9 +140,7 @@ def loaded_reports(tmp_path_factory) -> Generator:`
`140`	`140`	`"async": (async_patient_id, async_loaded_report),`
`141`	`141`	`}`
`142`	`142`	`yield loaded_reports_result`
`143`		`- return`
`144`	`143`
`145`		`- # TODO restore this - not deleting them for now, in order to view in client`
`146`	`144`	`ipr_conn.delete(uri=f"reports/{loaded_report['reports'][0]['ident']}")`
`147`	`145`	`ipr_conn.delete(uri=f"reports/{async_loaded_report['reports'][0]['ident']}")`
`148`	`146`