Skip to content

Commit 95730be

Browse files
committed
fix(ids): Fix the case where canonical IDs are not supplied.
1 parent d1530c6 commit 95730be

File tree

4 files changed

+15
-5
lines changed

4 files changed

+15
-5
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -154,3 +154,4 @@ node_modules/
154154
docs/*.rst
155155
!docs/index.rst
156156

157+
/test

gtdb_itol_decorate/__main__.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,27 @@
77
from gtdb_itol_decorate.itol import get_phylum_colours, write_color_datastrip, \
88
get_internal_nodes_with_labels, write_internal_node_labels, write_tree_colours, write_collapse_file, \
99
write_popup_file
10-
from gtdb_itol_decorate.newick import load_newick_to_tree, validate_dendropy_namespace, \
10+
from gtdb_itol_decorate.newick import load_newick_to_tree, assert_no_duplicate_taxa, \
1111
get_canonical_mapping, validate_sets, strip_tree_labels, set_node_desc_taxa, set_taxon_label_for_internal_nodes
1212
from gtdb_itol_decorate.util import log
1313

1414

1515
def main(tree_path: Path, tax_path: Path, out_dir: Path):
16+
17+
# Create the output directory
1618
log(f'Creating output directory: {out_dir}')
1719
out_dir.mkdir(exist_ok=True)
1820

21+
# Read and validate the tree
1922
log(f'Reading tree from: {tree_path}')
2023
tree = load_newick_to_tree(str(tree_path))
2124
log(f'Found {len(tree.leaf_nodes()):,} leaf nodes in the tree.')
22-
validate_dendropy_namespace((x.label for x in tree.taxon_namespace))
25+
assert_no_duplicate_taxa((x.label for x in tree.taxon_namespace))
26+
27+
# Create a mapping from the canonical genome ID to the tree
2328
d_canonical_to_gid = get_canonical_mapping((x.label for x in tree.taxon_namespace))
2429

30+
# Read and validate the taxonomy file
2531
log(f'Reading taxonomy from: {tax_path}')
2632
d_tax = load_taxonomy_file(str(tax_path), set(d_canonical_to_gid.keys()))
2733
log(f'Read the taxonomy for {len(d_tax):,} genomes.')

gtdb_itol_decorate/gtdb.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def load_taxonomy_file(path: str, limit_to_gids: set):
1515
with open(path) as f:
1616
for line in f.readlines():
1717
gid, tax = line.strip().split('\t')
18-
gid = canonical_gid(gid)
18+
# gid = canonical_gid(gid)
1919
if gid not in limit_to_gids:
2020
continue
2121
if gid in out:

gtdb_itol_decorate/newick.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import sys
2+
13
from collections import Counter, deque, defaultdict
24

35
import dendropy
@@ -27,7 +29,7 @@ def load_newick_to_tree(path: str) -> dendropy.Tree:
2729
preserve_underscores=True)
2830

2931

30-
def validate_dendropy_namespace(taxa):
32+
def assert_no_duplicate_taxa(taxa):
3133
taxa_count = Counter(taxa)
3234
duplicates = {k: v for k, v in taxa_count.items() if v > 1}
3335
if len(duplicates) > 0:
@@ -48,7 +50,8 @@ def get_lca_str(node: dendropy.Node):
4850
def get_canonical_mapping(gids):
4951
out = dict()
5052
for gid in gids:
51-
out[canonical_gid(gid)] = gid
53+
out[gid] = gid
54+
# out[canonical_gid(gid)] = gid
5255
return out
5356

5457

0 commit comments

Comments
 (0)