|
7 | 7 | from gtdb_itol_decorate.itol import get_phylum_colours, write_color_datastrip, \
|
8 | 8 | get_internal_nodes_with_labels, write_internal_node_labels, write_tree_colours, write_collapse_file, \
|
9 | 9 | write_popup_file
|
10 |
| -from gtdb_itol_decorate.newick import load_newick_to_tree, validate_dendropy_namespace, \ |
| 10 | +from gtdb_itol_decorate.newick import load_newick_to_tree, assert_no_duplicate_taxa, \ |
11 | 11 | get_canonical_mapping, validate_sets, strip_tree_labels, set_node_desc_taxa, set_taxon_label_for_internal_nodes
|
12 | 12 | from gtdb_itol_decorate.util import log
|
13 | 13 |
|
14 | 14 |
|
15 | 15 | def main(tree_path: Path, tax_path: Path, out_dir: Path):
|
| 16 | + |
| 17 | + # Create the output directory |
16 | 18 | log(f'Creating output directory: {out_dir}')
|
17 | 19 | out_dir.mkdir(exist_ok=True)
|
18 | 20 |
|
| 21 | + # Read and validate the tree |
19 | 22 | log(f'Reading tree from: {tree_path}')
|
20 | 23 | tree = load_newick_to_tree(str(tree_path))
|
21 | 24 | log(f'Found {len(tree.leaf_nodes()):,} leaf nodes in the tree.')
|
22 |
| - validate_dendropy_namespace((x.label for x in tree.taxon_namespace)) |
| 25 | + assert_no_duplicate_taxa((x.label for x in tree.taxon_namespace)) |
| 26 | + |
| 27 | + # Create a mapping from the canonical genome ID to the tree |
23 | 28 | d_canonical_to_gid = get_canonical_mapping((x.label for x in tree.taxon_namespace))
|
24 | 29 |
|
| 30 | + # Read and validate the taxonomy file |
25 | 31 | log(f'Reading taxonomy from: {tax_path}')
|
26 | 32 | d_tax = load_taxonomy_file(str(tax_path), set(d_canonical_to_gid.keys()))
|
27 | 33 | log(f'Read the taxonomy for {len(d_tax):,} genomes.')
|
|
0 commit comments