Skip to content

Commit d604c77

Browse files
committed
add comment
1 parent 3b0d00e commit d604c77

File tree

1 file changed

+97
-67
lines changed

1 file changed

+97
-67
lines changed

pori_python/ipr/ipr.py

+97-67
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,13 @@ def filter_structural_variants(
4444
Filter structural variants to remove non-high quality events unless they are matched/annotated or
4545
they involve a gene that is a known fusion partner
4646
"""
47-
matched_svs = {match["variant"] for match in kb_matches if match["variantType"] == "sv"}
47+
matched_svs = {
48+
match["variant"] for match in kb_matches if match["variantType"] == "sv"
49+
}
4850
fusion_genes = {
49-
gene["name"] for gene in gene_annotations if gene.get("knownFusionPartner", False)
51+
gene["name"]
52+
for gene in gene_annotations
53+
if gene.get("knownFusionPartner", False)
5054
}
5155

5256
result = []
@@ -84,7 +88,9 @@ def get_evidencelevel_mapping(graphkb_conn: GraphKBConnection) -> Dict[str, str]
8488

8589
# Filter IPR EvidenceLevel and map each outgoing CrossReferenceOf to displayName
8690
ipr_source_rid = graphkb_conn.get_source("ipr")["@rid"]
87-
ipr_evidence_levels = filter(lambda d: d.get("source") == ipr_source_rid, evidence_levels)
91+
ipr_evidence_levels = filter(
92+
lambda d: d.get("source") == ipr_source_rid, evidence_levels
93+
)
8894
cross_references_mapping: Dict[str, str] = dict()
8995
ipr_rids_to_displayname: Dict[str, str] = dict()
9096
for level in ipr_evidence_levels:
@@ -132,7 +138,9 @@ def convert_statements_to_alterations(
132138
"""
133139
disease_matches = {
134140
r["@rid"]
135-
for r in gkb_vocab.get_term_tree(graphkb_conn, disease_name, ontology_class="Disease")
141+
for r in gkb_vocab.get_term_tree(
142+
graphkb_conn, disease_name, ontology_class="Disease"
143+
)
136144
}
137145

138146
if not disease_matches:
@@ -145,7 +153,9 @@ def convert_statements_to_alterations(
145153

146154
# get the recruitment status for any trial associated with a statement
147155
clinical_trials = [
148-
s["subject"]["@rid"] for s in statements if s["subject"]["@class"] == "ClinicalTrial"
156+
s["subject"]["@rid"]
157+
for s in statements
158+
if s["subject"]["@class"] == "ClinicalTrial"
149159
]
150160
recruitment_statuses = {}
151161
if clinical_trials:
@@ -162,7 +172,9 @@ def convert_statements_to_alterations(
162172

163173
for statement in statements:
164174
variants = [
165-
cast(Variant, c) for c in statement["conditions"] if c["@class"] in VARIANT_CLASSES
175+
cast(Variant, c)
176+
for c in statement["conditions"]
177+
if c["@class"] in VARIANT_CLASSES
166178
]
167179
diseases = [c for c in statement["conditions"] if c["@class"] == "Disease"]
168180
disease_match = len(diseases) == 1 and diseases[0]["@rid"] in disease_matches
@@ -183,8 +195,12 @@ def convert_statements_to_alterations(
183195

184196
evidence_level_str = display_evidence_levels(statement)
185197
evidence_levels = statement.get("evidenceLevel") or []
186-
ipr_evidence_levels = [ev_map[el.get("@rid", "")] for el in evidence_levels if el]
187-
ipr_evidence_levels_str = ";".join(sorted(set([el for el in ipr_evidence_levels])))
198+
ipr_evidence_levels = [
199+
ev_map[el.get("@rid", "")] for el in evidence_levels if el
200+
]
201+
ipr_evidence_levels_str = ";".join(
202+
sorted(set([el for el in ipr_evidence_levels]))
203+
)
188204

189205
for variant in variants:
190206
if variant["@rid"] not in variant_matches:
@@ -194,10 +210,16 @@ def convert_statements_to_alterations(
194210
"approvedTherapy": approved_therapy or False,
195211
"category": ipr_section or "unknown",
196212
"context": (
197-
statement["subject"]["displayName"] if statement["subject"] else ""
213+
statement["subject"]["displayName"]
214+
if statement["subject"]
215+
else ""
216+
),
217+
"kbContextId": (
218+
statement["subject"]["@rid"] if statement["subject"] else ""
219+
),
220+
"disease": ";".join(
221+
sorted(d.get("displayName", "") for d in diseases)
198222
),
199-
"kbContextId": (statement["subject"]["@rid"] if statement["subject"] else ""),
200-
"disease": ";".join(sorted(d.get("displayName", "") for d in diseases)),
201223
"evidenceLevel": evidence_level_str or "",
202224
"iprEvidenceLevel": ipr_evidence_levels_str or "",
203225
"kbStatementId": statement["@rid"],
@@ -259,7 +281,9 @@ def select_expression_plots(
259281
gene = str(variant.get("gene", ""))
260282
hist = str(variant.get("histogramImage", ""))
261283
if hist:
262-
images_by_gene[gene] = ImageDefinition({"key": f"expDensity.{gene}", "path": hist})
284+
images_by_gene[gene] = ImageDefinition(
285+
{"key": f"expDensity.{gene}", "path": hist}
286+
)
263287
return [images_by_gene[gene] for gene in selected_genes if gene in images_by_gene]
264288

265289

@@ -302,7 +326,9 @@ def create_key_alterations(
302326
counts[type_mapping[variant_type]].add(variant_key)
303327

304328
if variant_type == "exp":
305-
alterations.append(f'{variant.get("gene","")} ({variant.get("expressionState")})')
329+
alterations.append(
330+
f'{variant.get("gene","")} ({variant.get("expressionState")})'
331+
)
306332
elif variant_type == "cnv":
307333
alterations.append(f'{variant.get("gene","")} ({variant.get("cnvState")})')
308334
# only show germline if relevant
@@ -326,7 +352,9 @@ def create_key_alterations(
326352

327353

328354
def germline_kb_matches(
329-
kb_matches: List[Hashabledict], all_variants: Sequence[IprVariant], assume_somatic: bool = True
355+
kb_matches: List[Hashabledict],
356+
all_variants: Sequence[IprVariant],
357+
assume_somatic: bool = True,
330358
) -> List[Hashabledict]:
331359
"""Filter kb_matches for matching to germline or somatic events using the 'germline' optional property.
332360
@@ -377,23 +405,27 @@ def germline_kb_matches(
377405
# Remove any matches to germline events
378406
for alt in somatic_alts:
379407
var_list = [v for v in all_variants if v["key"] == alt["variant"]]
380-
somatic_var_list = [v for v in var_list if not v.get("germline", not assume_somatic)]
408+
somatic_var_list = [
409+
v for v in var_list if not v.get("germline", not assume_somatic)
410+
]
381411
if var_list and not somatic_var_list:
382412
logger.debug(
383413
f"Dropping germline match to somatic statement kbStatementId:{alt['kbStatementId']}: {alt['kbVariant']} {alt['category']}"
384414
)
385415
elif somatic_var_list:
386416
ret_list.append(alt) # match to somatic variant
387417
else:
388-
ret_list.append(alt) # alteration not in any specific keys matches to check.
418+
ret_list.append(
419+
alt
420+
) # alteration not in any specific keys matches to check.
389421

390422
return ret_list
391423

392424

393425
def multi_variant_filtering(
394426
graphkb_conn: GraphKBConnection,
395427
gkb_matches: List[KbMatch],
396-
excludedTypes: List[str] = ['wildtype'],
428+
excludedTypes: List[str] = ["wildtype"],
397429
) -> List[KbMatch]:
398430
"""Filters out GraphKB matches that doesn't match to all required variants on multi-variant statements
399431
@@ -413,8 +445,8 @@ def multi_variant_filtering(
413445
filtered list of KbMatch statements
414446
"""
415447
# All matching statements & variants (GKB RIDs)
416-
matching_statement_rids = {match['kbStatementId'] for match in gkb_matches}
417-
matching_variant_rids = {match['kbVariantId'] for match in gkb_matches}
448+
matching_statement_rids = {match["kbStatementId"] for match in gkb_matches}
449+
matching_variant_rids = {match["kbVariantId"] for match in gkb_matches}
418450

419451
# Get conditions detail on all matching statements
420452
res = graphkb_conn.post(
@@ -423,7 +455,7 @@ def multi_variant_filtering(
423455
"target": "Statement",
424456
"filters": {
425457
"@rid": list(matching_statement_rids),
426-
"operator": 'IN',
458+
"operator": "IN",
427459
},
428460
"history": True,
429461
"returnProperties": [
@@ -434,21 +466,21 @@ def multi_variant_filtering(
434466
],
435467
},
436468
)
437-
statements = res['result']
469+
statements = res["result"]
438470

439471
# Get set of excluded Vocabulary RIDs for variant types
440472
excluded = {}
441-
if len(excludedTypes) != 0 and excludedTypes[0] != '':
473+
if len(excludedTypes) != 0 and excludedTypes[0] != "":
442474
excluded = gkb_vocab.get_terms_set(graphkb_conn, excludedTypes)
443475

444476
# Mapping statements to their conditional variants
445477
# (discarding non-variant conditions & variant conditions from excluded types)
446478
statement_to_variants = {}
447479
for statement in statements:
448-
statement_to_variants[statement['@rid']] = {
449-
el['@rid']
450-
for el in statement['conditions']
451-
if (el['@class'] in VARIANT_CLASSES and el.get('type', '') not in excluded)
480+
statement_to_variants[statement["@rid"]] = {
481+
el["@rid"]
482+
for el in statement["conditions"]
483+
if (el["@class"] in VARIANT_CLASSES and el.get("type", "") not in excluded)
452484
}
453485

454486
# Set of statements with complete matching
@@ -460,56 +492,54 @@ def multi_variant_filtering(
460492

461493
# Filtering out incompleted matches of gkb_matches
462494
return [
463-
match for match in gkb_matches if match['kbStatementId'] in complete_matching_statements
495+
match
496+
for match in gkb_matches
497+
if match["kbStatementId"] in complete_matching_statements
464498
]
465499

466-
THERAPEUTIC = 'therapeutic'
467-
BEST_THERAPEUTIC = 'best_therapeutic'
468-
PCP = 'pcp'
469-
DIAGNOSTIC='diagnostic'
470-
PROGNOSTIC = 'prognostic'
471-
BIOLOGICAL= 'biological'
472-
OTHER = 'other'
473500

474-
def assign_kb_match_tables(gkb_matches):
501+
THERAPEUTIC = "therapeutic"
502+
BEST_THERAPEUTIC = "best_therapeutic"
503+
PCP = "pcp"
504+
DIAGNOSTIC = "diagnostic"
505+
PROGNOSTIC = "prognostic"
506+
BIOLOGICAL = "biological"
507+
OTHER = "other"
508+
509+
510+
def assign_kb_match_tables(gkb_matches: List[KbMatch]) -> List[KbMatch]:
511+
"""
512+
Adds property kbsectionTag to kbData for kb statements. This property is used to
513+
determine which table in the kbmatches section the statement will be displayed in.
514+
515+
NB the value of approvedTherapy is ignored if the category is not therapeutic.
516+
517+
Params:
518+
gkb_matches: KbMatch statements to be filtered
519+
Returns:
520+
list of KbMatch statements
521+
"""
475522
for item in gkb_matches:
476-
"""
477-
this does not handle 'approvedTherapy' = True but category != 'therapeutic',
478-
but this situation is already not handled in ipr. here, the value of approvedTherapy
479-
is ignored if the category is not therapeutic.
480-
481-
there are no cases in the db where approvedTherapy=True and category != therapeutic.
482-
483-
TODO: handle this bit from the client
484-
targetedGermlineGenes: coalesceEntries([
485-
...pharmacogenomicResp,
486-
...cancerPredisResp.filter(({ kbMatches }) => (kbMatches as any)?.variant?.germline),
487-
]), -> normal - this is the pcp table where it's germline. but what happens
488-
to the nongermline variants?
489-
there are no pharmacogenomic, nongermline variants,
490-
but there ARE MANY cancer predisposition nongermline variants - it looks like they may
491-
just not be displayed. other it is
492-
493-
# leave this one in - it's a different endpoint
494-
targetedSomaticGenes: targetedSomaticGenesResp.filter((tg) => !/germline/.test(tg?.sample)),
495-
"""
496-
if item['category'] == 'therapeutic':
497-
#approvedTherapy=true&category=therapeutic&matchedCancer=true&iprEvidenceLevel=IPR-A,IPR-B
498-
if item['approvedTherapy'] and item['matchedCancer'] and item['iprEvidenceLevel'] in ['IPR-A', 'IPR-B']:
523+
if item["category"] == "therapeutic":
524+
if (
525+
item["approvedTherapy"]
526+
and item["matchedCancer"]
527+
and item["iprEvidenceLevel"] in ["IPR-A", "IPR-B"]
528+
):
499529
kbmatch_tag = BEST_THERAPEUTIC
500530
kbmatch_tag = THERAPEUTIC
501-
elif item['category'] in ['pharmacogenomic', 'cancer-predisposition']:
502-
if item['germline']:
531+
elif item["category"] in ["pharmacogenomic", "cancer-predisposition"]:
532+
if item["germline"]:
503533
kbmatch_tag = PCP
504534
else:
505535
kbmatch_tag = OTHER
506-
elif item['category'] == 'diagnostic':
536+
elif item["category"] == "diagnostic":
507537
kbmatch_tag = DIAGNOSTIC
508-
elif item['category'] == 'prognostic':
509-
kbmatch_tag= PROGNOSTIC
510-
elif item['category'] == 'biological':
511-
kbmatch_tag= BIOLOGICAL
512-
else: # category == 'unknown' or 'novel'
538+
elif item["category"] == "prognostic":
539+
kbmatch_tag = PROGNOSTIC
540+
elif item["category"] == "biological":
541+
kbmatch_tag = BIOLOGICAL
542+
else: # category == 'unknown' or 'novel'
513543
kbmatch_tag = OTHER
514-
item['kbData']['kbmatchTag'] = kbmatch_tag
544+
item["kbData"]["kbmatchTag"] = kbmatch_tag
515545
return gkb_matches

0 commit comments

Comments
 (0)