Skip to content

Commit 4996001

Browse files
committed
commit to save
1 parent dbadfb2 commit 4996001

File tree

6 files changed

+241
-129
lines changed

6 files changed

+241
-129
lines changed

pori_python/ipr/annotate.py

+53-47
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,9 @@ def get_second_pass_variants(
4747
}
4848

4949
for reference1, variant_type in inferred_variants:
50-
variants = gkb_match.match_category_variant(graphkb_conn, reference1, variant_type)
50+
variants = gkb_match.match_category_variant(
51+
graphkb_conn, reference1, variant_type
52+
)
5153

5254
for variant in variants:
5355
all_inferred_matches[variant["@rid"]] = variant
@@ -80,7 +82,8 @@ def get_ipr_statements_from_variants(
8082
inferred_statements = [
8183
s
8284
for s in get_statements_from_variants(graphkb_conn, inferred_matches)
83-
if s["@rid"] not in existing_statements # do not duplicate if non-inferred match
85+
if s["@rid"]
86+
not in existing_statements # do not duplicate if non-inferred match
8487
]
8588

8689
for ipr_row in convert_statements_to_alterations(
@@ -125,16 +128,15 @@ def annotate_expression_variants(
125128
continue
126129
try:
127130
matches = gkb_match.match_expression_variant(graphkb_conn, gene, variant)
128-
for matched_stmt in get_ipr_statements_from_variants(
131+
for ipr_row in get_ipr_statements_from_variants(
129132
graphkb_conn, matches, disease_name
130133
):
131-
ipr_row = {
132-
"variant": row["key"],
133-
"variantType": row.get("variantType", "exp"),
134-
"kbVariantId": matched_stmt["kbVariantId"],
135-
"kbVariant": matched_stmt["kbVariant"],
136-
"kbMatchedStatements": [matched_stmt],
137-
}
134+
ipr_row["variant"] = row["key"]
135+
ipr_row["variantType"] = row.get("variantType", "exp")
136+
# "kbVariantId": matched_stmt["kbVariantId"],
137+
# "kbVariant": matched_stmt["kbVariant"],
138+
# "kbMatchedStatements": [matched_stmt],
139+
138140
alterations.append(ipr_row)
139141
except FeatureNotFoundError as err:
140142
problem_genes.add(gene)
@@ -181,20 +183,20 @@ def annotate_copy_variants(
181183
if variant not in REPORTED_COPY_VARIANTS:
182184
# https://www.bcgsc.ca/jira/browse/GERO-77
183185
skipped += 1
184-
logger.debug(f"Dropping {gene} copy change '{variant}' - not in REPORTED_COPY_VARIANTS")
186+
logger.debug(
187+
f"Dropping {gene} copy change '{variant}' - not in REPORTED_COPY_VARIANTS"
188+
)
185189
continue
186190
try:
187191
matches = gkb_match.match_copy_variant(graphkb_conn, gene, variant)
188-
for matched_stmt in get_ipr_statements_from_variants(
192+
for ipr_row in get_ipr_statements_from_variants(
189193
graphkb_conn, matches, disease_name
190194
):
191-
ipr_row = {
192-
"variant": row["key"],
193-
"variantType": row.get("variantType", "cnv"),
194-
"kbVariantId": matched_stmt["kbVariantId"],
195-
"kbVariant": matched_stmt["kbVariant"],
196-
"kbMatchedStatements": [matched_stmt],
197-
}
195+
ipr_row["variant"] = row["key"]
196+
ipr_row["variantType"] = row.get("variantType", "cnv")
197+
# "kbVariantId": matched_stmt["kbVariantId"],
198+
# "kbVariant": matched_stmt["kbVariant"],
199+
# "kbMatchedStatements": [matched_stmt],
198200
alterations.append(ipr_row)
199201
except FeatureNotFoundError as err:
200202
problem_genes.add(gene)
@@ -208,7 +210,9 @@ def annotate_copy_variants(
208210
)
209211
if problem_genes:
210212
logger.error(f"gene finding failures for copy variants {sorted(problem_genes)}")
211-
logger.error(f"gene finding failure for {len(problem_genes)} copy variant genes")
213+
logger.error(
214+
f"gene finding failure for {len(problem_genes)} copy variant genes"
215+
)
212216
logger.info(
213217
f"matched {len(variants)} copy category variants to {len(alterations)} graphkb annotations"
214218
)
@@ -266,19 +270,21 @@ def annotate_positional_variants(
266270
f"Assuming malformed deletion variant {variant} is {variant[:-2] + 'del'}"
267271
)
268272
variant = variant[:-2] + "del"
269-
matches = gkb_match.match_positional_variant(graphkb_conn, variant)
273+
matches = gkb_match.match_positional_variant(
274+
graphkb_conn, variant
275+
)
270276
else:
271277
raise parse_err
272-
for matched_stmt in get_ipr_statements_from_variants(
278+
for ipr_row in get_ipr_statements_from_variants(
273279
graphkb_conn, matches, disease_name
274280
):
275-
ipr_row = {
276-
"variant": row["key"],
277-
"variantType": row.get("variantType", "mut" if row.get("gene") else "sv"),
278-
"kbVariant": matched_stmt["kbVariant"],
279-
"kbVariantId": matched_stmt["kbVariantId"],
280-
"kbMatchedStatements": [matched_stmt],
281-
}
281+
ipr_row["variant"] = row["key"]
282+
ipr_row["variantType"] = row.get(
283+
"variantType", "mut" if row.get("gene") else "sv"
284+
)
285+
# "kbVariant": matched_stmt["kbVariant"],
286+
# "kbVariantId": matched_stmt["kbVariantId"],
287+
# "kbMatchedStatements": [matched_stmt],
282288
alterations.append(Hashabledict(ipr_row))
283289

284290
except FeatureNotFoundError as err:
@@ -301,7 +307,9 @@ def annotate_positional_variants(
301307

302308
if problem_genes:
303309
logger.error(f"gene finding failures for {sorted(problem_genes)}")
304-
logger.error(f"{len(problem_genes)} gene finding failures for positional variants")
310+
logger.error(
311+
f"{len(problem_genes)} gene finding failures for positional variants"
312+
)
305313
if errors:
306314
logger.error(f"skipped {errors} positional variants due to errors")
307315

@@ -349,16 +357,14 @@ def annotate_msi(
349357
)
350358
if msi_categories:
351359
msi_variants = [cast(Variant, var) for var in msi_categories]
352-
for matched_stmt in get_ipr_statements_from_variants(
360+
for ipr_row in get_ipr_statements_from_variants(
353361
graphkb_conn, msi_variants, disease_name
354362
):
355-
ipr_row = {
356-
"variant": msi_category,
357-
"variantType": "msi",
358-
"kbVariantId": matched_stmt["kbVariantId"],
359-
"kbVariant": matched_stmt["kbVariant"],
360-
"kbMatchedStatements": [matched_stmt],
361-
}
363+
ipr_row["variant"] = msi_category
364+
ipr_row["variantType"] = "msi"
365+
# "kbVariantId": matched_stmt["kbVariantId"],
366+
# "kbVariant": matched_stmt["kbVariant"],
367+
# "kbMatchedStatements": [matched_stmt],
362368
gkb_matches.append(ipr_row)
363369
return gkb_matches
364370

@@ -387,7 +393,9 @@ def annotate_tmb(
387393
"filters": {
388394
"reference1": {
389395
"target": "Signature",
390-
"filters": {"OR": [{"name": category}, {"displayName": category}]},
396+
"filters": {
397+
"OR": [{"name": category}, {"displayName": category}]
398+
},
391399
}
392400
},
393401
},
@@ -397,15 +405,13 @@ def annotate_tmb(
397405
)
398406
if categories:
399407
cat_variants = [cast(Variant, var) for var in categories]
400-
for matched_stmt in get_ipr_statements_from_variants(
408+
for ipr_row in get_ipr_statements_from_variants(
401409
graphkb_conn, cat_variants, disease_name
402410
):
403-
ipr_row = {
404-
"variant": category,
405-
"variantType": "tmb",
406-
"kbVariantId": matched_stmt["kbVariantId"],
407-
"kbVariant": matched_stmt["kbVariant"],
408-
"kbMatchedStatements": [matched_stmt],
409-
}
411+
ipr_row["variant"] = category
412+
ipr_row["variantType"] = "tmb"
413+
# "kbVariantId": matched_stmt["kbVariantId"],
414+
# "kbVariant": matched_stmt["kbVariant"],
415+
# "kbMatchedStatements": [matched_stmt],
410416
gkb_matches.append(ipr_row)
411417
return gkb_matches

0 commit comments

Comments
 (0)