Skip to content

Commit 0c8a66f

Browse files
committed
lint
1 parent f5377d3 commit 0c8a66f

File tree

4 files changed

+51
-156
lines changed

4 files changed

+51
-156
lines changed

pori_python/ipr/ipr.py

+19-59
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,9 @@ def filter_structural_variants(
4949
Filter structural variants to remove non-high quality events unless they are matched/annotated or
5050
they involve a gene that is a known fusion partner
5151
"""
52-
matched_svs = {
53-
match["variant"] for match in kb_matches if match["variantType"] == "sv"
54-
}
52+
matched_svs = {match["variant"] for match in kb_matches if match["variantType"] == "sv"}
5553
fusion_genes = {
56-
gene["name"]
57-
for gene in gene_annotations
58-
if gene.get("knownFusionPartner", False)
54+
gene["name"] for gene in gene_annotations if gene.get("knownFusionPartner", False)
5955
}
6056

6157
result = []
@@ -93,9 +89,7 @@ def get_evidencelevel_mapping(graphkb_conn: GraphKBConnection) -> Dict[str, str]
9389

9490
# Filter IPR EvidenceLevel and map each outgoing CrossReferenceOf to displayName
9591
ipr_source_rid = graphkb_conn.get_source("ipr")["@rid"]
96-
ipr_evidence_levels = filter(
97-
lambda d: d.get("source") == ipr_source_rid, evidence_levels
98-
)
92+
ipr_evidence_levels = filter(lambda d: d.get("source") == ipr_source_rid, evidence_levels)
9993
cross_references_mapping: Dict[str, str] = dict()
10094
ipr_rids_to_displayname: Dict[str, str] = dict()
10195
for level in ipr_evidence_levels:
@@ -144,9 +138,7 @@ def convert_statements_to_alterations(
144138
"""
145139
disease_matches = {
146140
r["@rid"]
147-
for r in gkb_vocab.get_term_tree(
148-
graphkb_conn, disease_name, ontology_class="Disease"
149-
)
141+
for r in gkb_vocab.get_term_tree(graphkb_conn, disease_name, ontology_class="Disease")
150142
}
151143

152144
if not disease_matches:
@@ -159,9 +151,7 @@ def convert_statements_to_alterations(
159151

160152
# get the recruitment status for any trial associated with a statement
161153
clinical_trials = [
162-
s["subject"]["@rid"]
163-
for s in statements
164-
if s["subject"]["@class"] == "ClinicalTrial"
154+
s["subject"]["@rid"] for s in statements if s["subject"]["@class"] == "ClinicalTrial"
165155
]
166156
recruitment_statuses = {}
167157
if clinical_trials:
@@ -178,9 +168,7 @@ def convert_statements_to_alterations(
178168

179169
for statement in statements:
180170
variants = [
181-
cast(Variant, c)
182-
for c in statement["conditions"]
183-
if c["@class"] in VARIANT_CLASSES
171+
cast(Variant, c) for c in statement["conditions"] if c["@class"] in VARIANT_CLASSES
184172
]
185173
diseases = [c for c in statement["conditions"] if c["@class"] == "Disease"]
186174
disease_match = len(diseases) == 1 and diseases[0]["@rid"] in disease_matches
@@ -201,12 +189,8 @@ def convert_statements_to_alterations(
201189

202190
evidence_level_str = display_evidence_levels(statement)
203191
evidence_levels = statement.get("evidenceLevel") or []
204-
ipr_evidence_levels = [
205-
ev_map[el.get("@rid", "")] for el in evidence_levels if el
206-
]
207-
ipr_evidence_levels_str = ";".join(
208-
sorted(set([el for el in ipr_evidence_levels]))
209-
)
192+
ipr_evidence_levels = [ev_map[el.get("@rid", "")] for el in evidence_levels if el]
193+
ipr_evidence_levels_str = ";".join(sorted(set([el for el in ipr_evidence_levels])))
210194

211195
for variant in variants:
212196
if variant["@rid"] not in variant_matches:
@@ -216,16 +200,10 @@ def convert_statements_to_alterations(
216200
"approvedTherapy": approved_therapy or False,
217201
"category": ipr_section or "unknown",
218202
"context": (
219-
statement["subject"]["displayName"]
220-
if statement["subject"]
221-
else ""
222-
),
223-
"kbContextId": (
224-
statement["subject"]["@rid"] if statement["subject"] else ""
225-
),
226-
"disease": ";".join(
227-
sorted(d.get("displayName", "") for d in diseases)
203+
statement["subject"]["displayName"] if statement["subject"] else ""
228204
),
205+
"kbContextId": (statement["subject"]["@rid"] if statement["subject"] else ""),
206+
"disease": ";".join(sorted(d.get("displayName", "") for d in diseases)),
229207
"evidenceLevel": evidence_level_str or "",
230208
"iprEvidenceLevel": ipr_evidence_levels_str or "",
231209
"kbStatementId": statement["@rid"],
@@ -288,9 +266,7 @@ def select_expression_plots(
288266
gene = str(variant.get("gene", ""))
289267
hist = str(variant.get("histogramImage", ""))
290268
if hist:
291-
images_by_gene[gene] = ImageDefinition(
292-
{"key": f"expDensity.{gene}", "path": hist}
293-
)
269+
images_by_gene[gene] = ImageDefinition({"key": f"expDensity.{gene}", "path": hist})
294270
return [images_by_gene[gene] for gene in selected_genes if gene in images_by_gene]
295271

296272

@@ -333,9 +309,7 @@ def create_key_alterations(
333309
counts[type_mapping[variant_type]].add(variant_key)
334310

335311
if variant_type == "exp":
336-
alterations.append(
337-
f'{variant.get("gene","")} ({variant.get("expressionState")})'
338-
)
312+
alterations.append(f'{variant.get("gene","")} ({variant.get("expressionState")})')
339313
elif variant_type == "cnv":
340314
alterations.append(f'{variant.get("gene","")} ({variant.get("cnvState")})')
341315
# only show germline if relevant
@@ -412,19 +386,15 @@ def germline_kb_matches(
412386
# Remove any matches to germline events
413387
for alt in somatic_alts:
414388
var_list = [v for v in all_variants if v["key"] == alt["variant"]]
415-
somatic_var_list = [
416-
v for v in var_list if not v.get("germline", not assume_somatic)
417-
]
389+
somatic_var_list = [v for v in var_list if not v.get("germline", not assume_somatic)]
418390
if var_list and not somatic_var_list:
419391
logger.debug(
420392
f"Dropping germline match to somatic statement kbStatementId:{alt['kbStatementId']}: {alt['kbVariant']} {alt['category']}"
421393
)
422394
elif somatic_var_list:
423395
ret_list.append(alt) # match to somatic variant
424396
else:
425-
ret_list.append(
426-
alt
427-
) # alteration not in any specific keys matches to check.
397+
ret_list.append(alt) # alteration not in any specific keys matches to check.
428398

429399
return ret_list
430400

@@ -499,9 +469,7 @@ def multi_variant_filtering(
499469

500470
# Filtering out incompleted matches of gkb_matches
501471
return [
502-
match
503-
for match in gkb_matches
504-
if match["kbStatementId"] in complete_matching_statements
472+
match for match in gkb_matches if match["kbStatementId"] in complete_matching_statements
505473
]
506474

507475

@@ -547,9 +515,7 @@ def get_kb_matched_statements(
547515
for item in gkb_matches:
548516
stmt = copy(item)
549517
stmt["requiredKbMatches"].sort()
550-
kbs = KbMatchedStatement(
551-
{key: val for (key, val) in stmt.items() if key in kbs_keys}
552-
)
518+
kbs = KbMatchedStatement({key: val for (key, val) in stmt.items() if key in kbs_keys})
553519
dict_key = str(kbs)
554520
kbMatchedStatements[dict_key] = kbs
555521
return [*kbMatchedStatements.values()]
@@ -591,11 +557,7 @@ def get_kb_statement_matched_conditions(
591557
kbMatchedStatementConditions = {}
592558

593559
for kbStmt in kbMatchedStatements:
594-
stmts = [
595-
item
596-
for item in gkb_matches
597-
if item["kbStatementId"] == kbStmt["kbStatementId"]
598-
]
560+
stmts = [item for item in gkb_matches if item["kbStatementId"] == kbStmt["kbStatementId"]]
599561

600562
requirements = {}
601563
for requirement in stmts[0]["requiredKbMatches"]:
@@ -616,9 +578,7 @@ def get_kb_statement_matched_conditions(
616578

617579
# remove empty sets from requirements if allowing partial matches
618580
if allow_partial_matches:
619-
requirements = {
620-
key: val for (key, val) in requirements.items() if len(val) > 0
621-
}
581+
requirements = {key: val for (key, val) in requirements.items() if len(val) > 0}
622582

623583
variantConditionSets = list(product(*requirements.values()))
624584
conditionSets = [

pori_python/ipr/main.py

+17-50
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,7 @@
5353

5454
def file_path(path: str) -> str:
5555
if not os.path.exists(path):
56-
raise argparse.ArgumentTypeError(
57-
f"{repr(path)} is not a valid filename. does not exist"
58-
)
56+
raise argparse.ArgumentTypeError(f"{repr(path)} is not a valid filename. does not exist")
5957
return path
6058

6159

@@ -72,9 +70,7 @@ def command_interface() -> None:
7270
default=os.environ.get("USER"),
7371
help="username to use connecting to graphkb/ipr",
7472
)
75-
req.add_argument(
76-
"--password", required=True, help="password to use connecting to graphkb/ipr"
77-
)
73+
req.add_argument("--password", required=True, help="password to use connecting to graphkb/ipr")
7874
req.add_argument(
7975
"-c", "--content", required=True, type=file_path, help="Report Content as JSON"
8076
)
@@ -168,9 +164,7 @@ def clean_unsupported_content(upload_content: Dict, ipr_spec: Dict = {}) -> Dict
168164
and "genesCreate" in ipr_spec["components"]["schemas"].keys()
169165
and "properties" in ipr_spec["components"]["schemas"]["genesCreate"].keys()
170166
):
171-
genes_spec = ipr_spec["components"]["schemas"]["genesCreate"][
172-
"properties"
173-
].keys()
167+
genes_spec = ipr_spec["components"]["schemas"]["genesCreate"]["properties"].keys()
174168

175169
# check what ipr report upload expects and adjust contents to match
176170
for old_name, new_name in RENAMED_GENE_PROPERTIES.items():
@@ -205,9 +199,7 @@ def clean_unsupported_content(upload_content: Dict, ipr_spec: Dict = {}) -> Dict
205199
removed_keys[key] = 1
206200
gene.pop(key)
207201
for key, count in removed_keys.items():
208-
logger.warning(
209-
f"IPR unsupported property '{key}' removed from {count} genes."
210-
)
202+
logger.warning(f"IPR unsupported property '{key}' removed from {count} genes.")
211203

212204
drop_columns = ["variant", "variantType", "histogramImage"]
213205
# DEVSU-2034 - use a 'displayName'
@@ -224,9 +216,7 @@ def clean_unsupported_content(upload_content: Dict, ipr_spec: Dict = {}) -> Dict
224216
for variant in upload_content.get(variant_list_section, []):
225217
if not variant.get("displayName"):
226218
variant["displayName"] = (
227-
variant.get("variant")
228-
or variant.get("kbCategory")
229-
or variant.get("key", "")
219+
variant.get("variant") or variant.get("kbCategory") or variant.get("key", "")
230220
)
231221
if variant_list_section == "probeResults":
232222
# currently probeResults will error if they do NOT have a 'variant' column.
@@ -255,9 +245,7 @@ def clean_unsupported_content(upload_content: Dict, ipr_spec: Dict = {}) -> Dict
255245

256246

257247
def create_report(**kwargs) -> Dict:
258-
logger.warning(
259-
"Deprecated function 'create_report' called - use ipr_report instead"
260-
)
248+
logger.warning("Deprecated function 'create_report' called - use ipr_report instead")
261249
return ipr_report(**kwargs)
262250

263251

@@ -329,22 +317,16 @@ def ipr_report(
329317
try:
330318
validate_report_content(content)
331319
except jsonschema.exceptions.ValidationError as err:
332-
logger.error(
333-
"Failed schema check - report variants may be corrupted or unmatched."
334-
)
320+
logger.error("Failed schema check - report variants may be corrupted or unmatched.")
335321
logger.error(f"Failed schema check: {err}")
336322

337323
kb_disease_match = content["kbDiseaseMatch"]
338324

339325
# validate the input variants
340326
small_mutations = preprocess_small_mutations(content.get("smallMutations", []))
341-
structural_variants = preprocess_structural_variants(
342-
content.get("structuralVariants", [])
343-
)
327+
structural_variants = preprocess_structural_variants(content.get("structuralVariants", []))
344328
copy_variants = preprocess_copy_variants(content.get("copyVariants", []))
345-
expression_variants = preprocess_expression_variants(
346-
content.get("expressionVariants", [])
347-
)
329+
expression_variants = preprocess_expression_variants(content.get("expressionVariants", []))
348330
if expression_variants:
349331
check_comparators(content, expression_variants)
350332

@@ -393,9 +375,7 @@ def ipr_report(
393375
tmb["kbCategory"] = TMB_HIGH_CATEGORY
394376

395377
# GERO-296 - try matching to graphkb
396-
tmb_matches = annotate_tmb(
397-
graphkb_conn, kb_disease_match, TMB_HIGH_CATEGORY
398-
)
378+
tmb_matches = annotate_tmb(graphkb_conn, kb_disease_match, TMB_HIGH_CATEGORY)
399379
if tmb_matches:
400380
tmb_variant["kbCategory"] = TMB_HIGH_CATEGORY # type: ignore
401381
tmb_variant["variant"] = TMB_HIGH_CATEGORY
@@ -404,9 +384,7 @@ def ipr_report(
404384
logger.info(
405385
f"GERO-296 '{TMB_HIGH_CATEGORY}' matches {len(tmb_matches)} statements."
406386
)
407-
gkb_matches.extend(
408-
[Hashabledict(tmb_statement) for tmb_statement in tmb_matches]
409-
)
387+
gkb_matches.extend([Hashabledict(tmb_statement) for tmb_statement in tmb_matches])
410388
logger.debug(f"\tgkb_matches: {len(gkb_matches)}")
411389

412390
# MATCHING MSI
@@ -429,9 +407,7 @@ def ipr_report(
429407
msi_variant["variant"] = msi_cat
430408
msi_variant["key"] = msi_cat
431409
msi_variant["variantType"] = "msi"
432-
logger.info(
433-
f"GERO-295 '{msi_cat}' matches {len(msi_matches)} msi statements."
434-
)
410+
logger.info(f"GERO-295 '{msi_cat}' matches {len(msi_matches)} msi statements.")
435411
gkb_matches.extend([Hashabledict(msi) for msi in msi_matches])
436412
logger.debug(f"\tgkb_matches: {len(gkb_matches)}")
437413

@@ -496,20 +472,15 @@ def ipr_report(
496472
# verify germline kb statements matched germline observed variants, not somatic variants
497473
org_len = len(gkb_matches)
498474
gkb_matches = [
499-
Hashabledict(match)
500-
for match in germline_kb_matches(gkb_matches, all_variants)
475+
Hashabledict(match) for match in germline_kb_matches(gkb_matches, all_variants)
501476
]
502477
num_removed = org_len - len(gkb_matches)
503478
if num_removed:
504-
logger.info(
505-
f"Removing {num_removed} germline events without medical matches."
506-
)
479+
logger.info(f"Removing {num_removed} germline events without medical matches.")
507480

508481
if custom_kb_match_filter:
509482
logger.info(f"custom_kb_match_filter on {len(gkb_matches)} variants")
510-
gkb_matches = [
511-
Hashabledict(match) for match in custom_kb_match_filter(gkb_matches)
512-
]
483+
gkb_matches = [Hashabledict(match) for match in custom_kb_match_filter(gkb_matches)]
513484
logger.info(f"\t custom_kb_match_filter left {len(gkb_matches)} variants")
514485

515486
# TODO: can probably be removed with change to kbmatch processing, but double check
@@ -576,9 +547,7 @@ def ipr_report(
576547
# remove after testing
577548
# "kbMatches": [trim_empty_values(a) for a in gkb_matches], # type: ignore
578549
"copyVariants": [
579-
trim_empty_values(c)
580-
for c in copy_variants
581-
if c["gene"] in genes_with_variants
550+
trim_empty_values(c) for c in copy_variants if c["gene"] in genes_with_variants
582551
],
583552
"smallMutations": [trim_empty_values(s) for s in small_mutations],
584553
"expressionVariants": [
@@ -602,9 +571,7 @@ def ipr_report(
602571
"therapeuticTarget": targets,
603572
}
604573
)
605-
output.setdefault("images", []).extend(
606-
select_expression_plots(gkb_matches, all_variants)
607-
)
574+
output.setdefault("images", []).extend(select_expression_plots(gkb_matches, all_variants))
608575

609576
output = clean_unsupported_content(output, ipr_spec)
610577
ipr_result = None

0 commit comments

Comments
 (0)