Skip to content

Commit b20acc2

Browse files
committed
Slight speed increase from heapify'ing a list instead of pushing into a sorted list
1 parent 2b2b55c commit b20acc2

File tree

1 file changed

+10
-4
lines changed

1 file changed

+10
-4
lines changed

pgscatalog.match/src/pgscatalog/match/cli/intersect_cli.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def run_intersect():
1919
logger.debug("Verbose logging enabled")
2020

2121
# Process & sort reference variants
22-
logger.info("Reading & sorting REFERENCE variants: {}".format(args.reference))
22+
logger.info("Reading REFERENCE variants: {}".format(args.reference))
2323
with xopen('reference_variants.txt', 'wt') as outf:
2424
outf.write('CHR:POS:A0:A1\tID_REF\tREF_REF\tIS_INDEL\tSTRANDAMB\tIS_MA_REF\n')
2525
ref_heap = []
@@ -35,7 +35,10 @@ def run_intersect():
3535

3636
IS_INDEL = (len(v['REF']) > 1) | (len(ALT) > 1)
3737
STRANDAMB = (v['REF'] == allele_complement(ALT))
38-
heapq.heappush(ref_heap, ([key, v['ID'], v['REF']],[IS_INDEL, STRANDAMB, IS_MA_REF]))
38+
ref_heap.append(([key, v['ID'], v['REF']], [IS_INDEL, STRANDAMB, IS_MA_REF]))
39+
40+
logger.info("Sorting REFERENCE variants (heapify)")
41+
heapq.heapify(ref_heap)
3942

4043
# Output the sorted reference variants
4144
logger.info("Outputting REFERENCE variants -> reference_variants.txt")
@@ -51,7 +54,7 @@ def run_intersect():
5154
outf.write('CHR:POS:A0:A1\tID_TARGET\tREF_TARGET\tIS_MA_TARGET\tMAF\tF_MISS_DOSAGE\n')
5255
target_heap = []
5356
for path in args.target:
54-
logger.info("Reading & sorting TARGET variants: {}".format(path))
57+
logger.info("Reading TARGET variants: {}".format(path))
5558
pvar = read_var_general(path, chrom=None) # essential not to filter if it is target (messes up common line indexing)
5659

5760
loc_afreq = path.replace('.pvar.zst', '.afreq.gz')
@@ -75,7 +78,10 @@ def run_intersect():
7578
# outf.write('{}\t{}\t{}\t{}\t{}\t{}\n'.format(key, v['ID'], v['REF'], str(IS_MA_TARGET), ALT_FREQS[i],
7679
# F_MISS_DOSAGE))
7780
MAF = AAF2MAF(ALT_FREQS[i])
78-
heapq.heappush(target_heap, ([key, v['ID'], v['REF']], [IS_MA_TARGET, MAF,F_MISS_DOSAGE]))
81+
target_heap.append(([key, v['ID'], v['REF']], [IS_MA_TARGET, MAF,F_MISS_DOSAGE]))
82+
83+
logger.info("Sorting TARGET variants (heapify)")
84+
heapq.heapify(target_heap)
7985

8086
# Output the sorted reference variants
8187
logger.info("Outputting TARGET variants -> target_variants.txt")

0 commit comments

Comments
 (0)