Skip to content

Commit 32173c6

Browse files
committed
cleanup
1 parent 03c9245 commit 32173c6

File tree

5 files changed

+88
-87
lines changed

5 files changed

+88
-87
lines changed

pori_python/ipr/annotate.py

+51-57
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,10 @@ def get_ipr_statements_from_variants(
8484
]
8585

8686
for ipr_row in convert_statements_to_alterations(
87-
graphkb_conn, inferred_statements, disease_name, convert_to_rid_set(inferred_matches)
87+
graphkb_conn,
88+
inferred_statements,
89+
disease_name,
90+
convert_to_rid_set(inferred_matches),
8891
):
8992
ipr_row["kbData"]["inferred"] = True
9093
rows.append(ipr_row)
@@ -122,16 +125,16 @@ def annotate_expression_variants(
122125
continue
123126
try:
124127
matches = gkb_match.match_expression_variant(graphkb_conn, gene, variant)
125-
for matched_stmt in get_ipr_statements_from_variants(graphkb_conn, matches, disease_name):
128+
for matched_stmt in get_ipr_statements_from_variants(
129+
graphkb_conn, matches, disease_name
130+
):
126131
ipr_row = {
127-
'variant': row['key'],
128-
'variantType': row.get('variantType', 'exp'),
129-
'kbVariantId': matched_stmt['kbVariantId'],
130-
'kbVariant': matched_stmt['kbVariant'],
131-
'kbMatchedStatements': [matched_stmt]
132+
"variant": row["key"],
133+
"variantType": row.get("variantType", "exp"),
134+
"kbVariantId": matched_stmt["kbVariantId"],
135+
"kbVariant": matched_stmt["kbVariant"],
136+
"kbMatchedStatements": [matched_stmt],
132137
}
133-
#ipr_row["variant"] = row["key"]
134-
#ipr_row["variantType"] = row.get("variantType", "exp")
135138
alterations.append(ipr_row)
136139
except FeatureNotFoundError as err:
137140
problem_genes.add(gene)
@@ -182,21 +185,17 @@ def annotate_copy_variants(
182185
continue
183186
try:
184187
matches = gkb_match.match_copy_variant(graphkb_conn, gene, variant)
185-
#for ipr_row in get_ipr_statements_from_variants(graphkb_conn, matches, disease_name):
186-
# ipr_row["variant"] = row["key"]
187-
# ipr_row["variantType"] = row.get("variantType", "cnv")
188-
# alterations.append(ipr_row)
189-
for matched_stmt in get_ipr_statements_from_variants(graphkb_conn, matches, disease_name):
188+
for matched_stmt in get_ipr_statements_from_variants(
189+
graphkb_conn, matches, disease_name
190+
):
190191
ipr_row = {
191-
'variant': row['key'],
192-
'variantType': row.get('variantType', 'cnv'),
193-
'kbVariantId': matched_stmt['kbVariantId'],
194-
'kbVariant': matched_stmt['kbVariant'],
195-
'kbMatchedStatements': [matched_stmt]
192+
"variant": row["key"],
193+
"variantType": row.get("variantType", "cnv"),
194+
"kbVariantId": matched_stmt["kbVariantId"],
195+
"kbVariant": matched_stmt["kbVariant"],
196+
"kbMatchedStatements": [matched_stmt],
196197
}
197198
alterations.append(ipr_row)
198-
#ipr_row["variant"] = row["key"]
199-
#ipr_row["variantType"] = row.get("variantType", "exp")
200199
except FeatureNotFoundError as err:
201200
problem_genes.add(gene)
202201
logger.debug(f"Unrecognized gene ({gene} {variant}): {err}")
@@ -270,23 +269,17 @@ def annotate_positional_variants(
270269
matches = gkb_match.match_positional_variant(graphkb_conn, variant)
271270
else:
272271
raise parse_err
273-
for matched_stmt in get_ipr_statements_from_variants(graphkb_conn, matches, disease_name):
272+
for matched_stmt in get_ipr_statements_from_variants(
273+
graphkb_conn, matches, disease_name
274+
):
274275
ipr_row = {
275-
'variant': row['key'],
276-
'variantType': row.get("variantType", "mut" if row.get("gene") else "sv"),
277-
'kbVariant': matched_stmt['kbVariant'],
278-
'kbVariantId': matched_stmt['kbVariantId'],
279-
'kbMatchedStatements': [matched_stmt]
276+
"variant": row["key"],
277+
"variantType": row.get("variantType", "mut" if row.get("gene") else "sv"),
278+
"kbVariant": matched_stmt["kbVariant"],
279+
"kbVariantId": matched_stmt["kbVariantId"],
280+
"kbMatchedStatements": [matched_stmt],
280281
}
281282
alterations.append(Hashabledict(ipr_row))
282-
#for ipr_row in get_ipr_statements_from_variants(
283-
# graphkb_conn, matches, disease_name
284-
#):
285-
# ipr_row["variant"] = row["key"]
286-
# ipr_row["variantType"] = row.get(
287-
# "variantType", "mut" if row.get("gene") else "sv"
288-
# )
289-
# alterations.append(Hashabledict(ipr_row))
290283

291284
except FeatureNotFoundError as err:
292285
logger.debug(f"failed to match positional variants ({variant}): {err}")
@@ -344,7 +337,10 @@ def annotate_msi(
344337
"target": {
345338
"target": "CategoryVariant",
346339
"filters": {
347-
"reference1": {"target": "Signature", "filters": {"name": msi_category}}
340+
"reference1": {
341+
"target": "Signature",
342+
"filters": {"name": msi_category},
343+
}
348344
},
349345
},
350346
"queryType": "similarTo",
@@ -353,24 +349,24 @@ def annotate_msi(
353349
)
354350
if msi_categories:
355351
msi_variants = [cast(Variant, var) for var in msi_categories]
356-
for matched_stmt in get_ipr_statements_from_variants(graphkb_conn, msi_variants, disease_name):
352+
for matched_stmt in get_ipr_statements_from_variants(
353+
graphkb_conn, msi_variants, disease_name
354+
):
357355
ipr_row = {
358-
'variant': msi_category,
359-
'variantType': 'msi',
360-
'kbVariantId': matched_stmt['kbVariantId'],
361-
'kbVariant': matched_stmt['kbVariant'],
362-
'kbMatchedStatements': [matched_stmt]
356+
"variant": msi_category,
357+
"variantType": "msi",
358+
"kbVariantId": matched_stmt["kbVariantId"],
359+
"kbVariant": matched_stmt["kbVariant"],
360+
"kbMatchedStatements": [matched_stmt],
363361
}
364362
gkb_matches.append(ipr_row)
365-
#for ipr_row in get_ipr_statements_from_variants(graphkb_conn, msi_variants, disease_name):
366-
# ipr_row["variant"] = msi_category
367-
# ipr_row["variantType"] = "msi"
368-
# gkb_matches.append(ipr_row)
369363
return gkb_matches
370364

371365

372366
def annotate_tmb(
373-
graphkb_conn: GraphKBConnection, disease_name: str = "cancer", category: str = TMB_HIGH_CATEGORY
367+
graphkb_conn: GraphKBConnection,
368+
disease_name: str = "cancer",
369+
category: str = TMB_HIGH_CATEGORY,
374370
) -> List[KbMatch]:
375371
"""Annotate Tumour Mutation Burden (tmb) categories from GraphKB in the IPR alterations format.
376372
@@ -401,17 +397,15 @@ def annotate_tmb(
401397
)
402398
if categories:
403399
cat_variants = [cast(Variant, var) for var in categories]
404-
for matched_stmt in get_ipr_statements_from_variants(graphkb_conn, cat_variants, disease_name):
400+
for matched_stmt in get_ipr_statements_from_variants(
401+
graphkb_conn, cat_variants, disease_name
402+
):
405403
ipr_row = {
406-
'variant': category,
407-
'variantType': 'tmb',
408-
'kbVariantId': matched_stmt['kbVariantId'],
409-
'kbVariant': matched_stmt['kbVariant'],
410-
'kbMatchedStatements': [matched_stmt]
404+
"variant": category,
405+
"variantType": "tmb",
406+
"kbVariantId": matched_stmt["kbVariantId"],
407+
"kbVariant": matched_stmt["kbVariant"],
408+
"kbMatchedStatements": [matched_stmt],
411409
}
412410
gkb_matches.append(ipr_row)
413-
#for ipr_row in get_ipr_statements_from_variants(graphkb_conn, cat_variants, disease_name):
414-
# ipr_row["variant"] = category
415-
# ipr_row["variantType"] = "tmb"
416-
# gkb_matches.append(ipr_row)
417411
return gkb_matches

pori_python/ipr/ipr.py

+6-13
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ def select_expression_plots(
245245
selected_variants = {
246246
(match["variantType"], match["variant"])
247247
for match in kb_matches
248-
if "therapeutic" in [stmt['category'] for stmt in match['kbMatchedStatements']]
248+
if "therapeutic" in [stmt["category"] for stmt in match["kbMatchedStatements"]]
249249
}
250250
images_by_gene: Dict[str, ImageDefinition] = {}
251251
selected_genes = set()
@@ -282,15 +282,9 @@ def create_key_alterations(
282282
variant_type = kb_match["variantType"]
283283
variant_key = kb_match["variant"]
284284

285-
# TODO nb need to support multiple categories now.
286-
# need to check whether there are consequences from possibly having an 'unknown'
287-
# element in the list since here we are only checking whether the list only includes
288-
# unknown elements
289-
match_categories = [item['category'] for item in kb_match['kbMatchedStatements']]
290-
if list(set(match_categories)) == ['unknown']:
285+
match_categories = [item["category"] for item in kb_match["kbMatchedStatements"]]
286+
if list(set(match_categories)) == ["unknown"]:
291287
continue
292-
#if kb_match["category"] == "unknown":
293-
# continue
294288

295289
if variant_type not in type_mapping.keys():
296290
if variant_type not in skipped_variant_types:
@@ -312,9 +306,6 @@ def create_key_alterations(
312306
alterations.append(f'{variant.get("gene","")} ({variant.get("expressionState")})')
313307
elif variant_type == "cnv":
314308
alterations.append(f'{variant.get("gene","")} ({variant.get("cnvState")})')
315-
# only show germline if relevant
316-
#elif kb_match["category"] in GERMLINE_BASE_TERMS and variant.get("germline"):
317-
#update the germline check to look for any in a list
318309
elif any(item in GERMLINE_BASE_TERMS for item in match_categories):
319310
alterations.append(f"germline {variant['variant']}")
320311
else:
@@ -335,7 +326,9 @@ def create_key_alterations(
335326

336327

337328
def germline_kb_matches(
338-
kb_matches: List[Hashabledict], all_variants: Sequence[IprVariant], assume_somatic: bool = True
329+
kb_matches: List[Hashabledict],
330+
all_variants: Sequence[IprVariant],
331+
assume_somatic: bool = True,
339332
) -> List[Hashabledict]:
340333
"""Filter kb_matches for matching to germline or somatic events using the 'germline' optional property.
341334

pori_python/ipr/summary.py

+30-12
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,14 @@
1111
from pori_python.graphkb.util import convert_to_rid_list
1212
from pori_python.graphkb.vocab import get_term_tree
1313
from pori_python.ipr.inputs import create_graphkb_sv_notation
14-
from pori_python.types import Hashabledict, IprVariant, KbMatch, Ontology, Record, Statement
14+
from pori_python.types import (
15+
Hashabledict,
16+
IprVariant,
17+
KbMatch,
18+
Ontology,
19+
Record,
20+
Statement,
21+
)
1522

1623
from .util import (
1724
convert_to_rid_set,
@@ -264,7 +271,9 @@ def create_section_html(
264271
for statement_id, sentence in sentences_by_statement_id.items():
265272
relevance = statements[statement_id]["relevance"]["@rid"]
266273
category = categorize_relevance(
267-
graphkb_conn, relevance, RELEVANCE_BASE_TERMS + [("resistance", ["no sensitivity"])]
274+
graphkb_conn,
275+
relevance,
276+
RELEVANCE_BASE_TERMS + [("resistance", ["no sensitivity"])],
268277
)
269278
sentence_categories[sentence] = category
270279

@@ -274,7 +283,12 @@ def create_section_html(
274283
"target": "Feature",
275284
"filters": {
276285
"AND": [
277-
{"source": {"target": "Source", "filters": {"name": "entrez gene"}}},
286+
{
287+
"source": {
288+
"target": "Source",
289+
"filters": {"name": "entrez gene"},
290+
}
291+
},
278292
{"name": gene_name},
279293
{"biotype": "gene"},
280294
]
@@ -311,7 +325,14 @@ def create_section_html(
311325
{
312326
s
313327
for (s, v) in sentence_categories.items()
314-
if v not in ["diagnostic", "biological", "therapeutic", "prognostic", "resistance"]
328+
if v
329+
not in [
330+
"diagnostic",
331+
"biological",
332+
"therapeutic",
333+
"prognostic",
334+
"resistance",
335+
]
315336
},
316337
{s for (s, v) in sentence_categories.items() if v == "resistance"},
317338
]:
@@ -341,8 +362,7 @@ def section_statements_by_genes(
341362

342363
return genes
343364

344-
# TODO can bandaid this to work but will need some more thought to actually
345-
# be sure it makes sense for multivariant statement matches
365+
346366
def auto_analyst_comments(
347367
graphkb_conn: GraphKBConnection,
348368
matches: Sequence[KbMatch] | Sequence[Hashabledict],
@@ -356,16 +376,14 @@ def auto_analyst_comments(
356376
variant_keys_by_statement_ids: Dict[str, Set[str]] = {}
357377

358378
for match in matches:
359-
for stmt in match['kbMatchedStatements']:
360-
rid = stmt['kbStatementId']
361-
exp_variant = match['variant']
362-
# is it possible this already handles multiple variants for a single rid?
379+
for stmt in match["kbMatchedStatements"]:
380+
rid = stmt["kbStatementId"]
381+
exp_variant = match["variant"]
363382
variant_keys_by_statement_ids.setdefault(rid, set()).add(exp_variant)
364383

365384
exp_variants_by_statements: Dict[str, List[IprVariant]] = {}
366385
for rid, keys in variant_keys_by_statement_ids.items():
367386
try:
368-
# preserves multiple variant matches?
369387
exp_variants_by_statements[rid] = [variants_by_keys[key] for key in keys]
370388
except KeyError as err:
371389
logger.warning(f"No specific variant matched for {rid}:{keys} - {err}")
@@ -377,7 +395,7 @@ def auto_analyst_comments(
377395

378396
# get details for statements
379397
for match in matches:
380-
for stmt in match['kbMatchedStatements']:
398+
for stmt in match["kbMatchedStatements"]:
381399
rid = stmt["kbStatementId"].replace("#", "")
382400
result = graphkb_conn.request(f"/statements/{rid}?neighbors=1")["result"]
383401

pori_python/ipr/therapeutic_options.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,10 @@ def create_therapeutic_options(
3131

3232
for match in kb_matches:
3333
row_type = "therapeutic"
34-
# check multiple cateogires
35-
for stmt in match['kbMatchedStatements']:
34+
for stmt in match["kbMatchedStatements"]:
3635
if stmt["category"] != "therapeutic" or stmt["relevance"] == "eligibility":
3736
continue
3837

39-
# check multiple relevances
4038
if stmt["kbRelevanceId"] in resistance_markers:
4139
row_type = "chemoresistance"
4240

tests/test_ipr/test_upload.py

-2
Original file line numberDiff line numberDiff line change
@@ -140,9 +140,7 @@ def loaded_reports(tmp_path_factory) -> Generator:
140140
"async": (async_patient_id, async_loaded_report),
141141
}
142142
yield loaded_reports_result
143-
return
144143

145-
# TODO restore this - not deleting them for now, in order to view in client
146144
ipr_conn.delete(uri=f"reports/{loaded_report['reports'][0]['ident']}")
147145
ipr_conn.delete(uri=f"reports/{async_loaded_report['reports'][0]['ident']}")
148146

0 commit comments

Comments
 (0)