|
| 1 | +import os |
| 2 | +from pathlib import Path |
| 3 | + |
| 4 | +import dendropy |
| 5 | +import typer |
| 6 | + |
| 7 | +from gtdb_itol_decorate.decorate import decorate_tree |
| 8 | +from gtdb_itol_decorate.gtdb import load_taxonomy_file, get_taxon_to_phylum |
| 9 | +from gtdb_itol_decorate.itol import get_phylum_to_lca, get_phylum_colours, write_color_datastrip, \ |
| 10 | + get_internal_nodes_with_labels, write_internal_node_labels, write_tree_colours, write_collapse_file, \ |
| 11 | + write_popup_file |
| 12 | +from gtdb_itol_decorate.newick import load_newick_file, load_newick_to_tree, validate_dendropy_namespace, \ |
| 13 | + get_canonical_mapping, validate_sets, strip_tree_labels, set_node_desc_taxa, set_taxon_label_for_internal_nodes |
| 14 | +from gtdb_itol_decorate.util import log |
| 15 | + |
| 16 | + |
| 17 | +def main(tree_path: Path, tax_path: Path, out_dir: Path): |
| 18 | + log(f'Creating output directory: {out_dir}') |
| 19 | + out_dir.mkdir(exist_ok=True) |
| 20 | + |
| 21 | + log(f'Reading tree from: {tree_path}') |
| 22 | + tree = load_newick_to_tree(str(tree_path)) |
| 23 | + log(f'Found {len(tree.leaf_nodes()):,} leaf nodes in the tree.') |
| 24 | + validate_dendropy_namespace((x.label for x in tree.taxon_namespace)) |
| 25 | + d_canonical_to_gid = get_canonical_mapping((x.label for x in tree.taxon_namespace)) |
| 26 | + |
| 27 | + log(f'Reading taxonomy from: {tax_path}') |
| 28 | + d_tax = load_taxonomy_file(str(tax_path), set(d_canonical_to_gid.keys())) |
| 29 | + log(f'Read the taxonomy for {len(d_tax):,} genomes.') |
| 30 | + validate_sets(set(d_canonical_to_gid.keys()), set(d_tax.keys())) |
| 31 | + |
| 32 | + log('Reverse mapping taxon to phylum') |
| 33 | + d_taxon_to_phylum = get_taxon_to_phylum(d_tax) |
| 34 | + |
| 35 | + log('Annotating internal nodes with descendant taxa') |
| 36 | + set_node_desc_taxa(tree) |
| 37 | + set_taxon_label_for_internal_nodes(tree, d_tax) |
| 38 | + |
| 39 | + log('Getting the last common ancestor of each phylum.') |
| 40 | + d_phylum_to_lca = get_phylum_to_lca(tree) |
| 41 | + d_phylum_palette = get_phylum_colours(d_phylum_to_lca) |
| 42 | + write_color_datastrip(d_phylum_to_lca, d_phylum_palette, out_dir / 'itol_dataset_strip_phylum.txt') |
| 43 | + |
| 44 | + log('Making tree compatible with iTOL (stripping labels)') |
| 45 | + strip_tree_labels(tree) |
| 46 | + path_tree_out = out_dir / f'{tree_path.name}_stripped' |
| 47 | + tree.write_to_path(path_tree_out, schema='newick', suppress_rooting=True, unquoted_underscores=True) |
| 48 | + |
| 49 | + log('Writing internal node labels') |
| 50 | + d_int_label_to_lca = get_internal_nodes_with_labels(tree) |
| 51 | + write_internal_node_labels(d_int_label_to_lca, out_dir / 'itol_labels.txt') |
| 52 | + |
| 53 | + log('Getting tree colour palette') |
| 54 | + write_tree_colours(tree, d_taxon_to_phylum, out_dir / 'itol_tree_colours.txt', d_phylum_palette) |
| 55 | + |
| 56 | + log('Writing popup information file') |
| 57 | + write_popup_file(tree, d_tax, out_dir / 'itol_popup.txt') |
| 58 | + |
| 59 | + log('Writing collapse files') |
| 60 | + write_collapse_file(d_int_label_to_lca, out_dir / 'itol_collapse_phylum.txt', 'p__') |
| 61 | + write_collapse_file(d_int_label_to_lca, out_dir / 'itol_collapse_class.txt', 'c__') |
| 62 | + write_collapse_file(d_int_label_to_lca, out_dir / 'itol_collapse_order.txt', 'o__') |
| 63 | + write_collapse_file(d_int_label_to_lca, out_dir / 'itol_collapse_family.txt', 'f__') |
| 64 | + write_collapse_file(d_int_label_to_lca, out_dir / 'itol_collapse_genus.txt', 'g__') |
| 65 | + log('Done.') |
| 66 | + |
| 67 | + |
| 68 | +if __name__ == "__main__": |
| 69 | + typer.run(main) |
0 commit comments