Skip to content

Commit 0c4a71e

Browse files
committed
feat(initial release): First commit.
1 parent ac26dee commit 0c4a71e

14 files changed

+11223
-0
lines changed

.github/workflows/build-publish.yml

+116
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
name: Version, build, publish
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
8+
jobs:
9+
10+
semantic_release_next_version:
11+
name: Next version
12+
runs-on: ubuntu-latest
13+
steps:
14+
- uses: actions/checkout@v3
15+
- uses: actions/setup-node@v3
16+
with:
17+
cache: npm
18+
node-version: 16
19+
- run: npm install
20+
- run: npx semantic-release --dry-run
21+
id: semantic_release
22+
env:
23+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
24+
- uses: actions/upload-artifact@v3
25+
with:
26+
name: semantic-release
27+
path: |
28+
gtdb_itol_decorate/__init__.py
29+
CHANGELOG.md
30+
retention-days: 1
31+
outputs:
32+
version: ${{ steps.semantic_release.outputs.version }}
33+
34+
build_python:
35+
name: Python build
36+
runs-on: ubuntu-latest
37+
needs: semantic_release_next_version
38+
if: needs.semantic_release_next_version.outputs.version != ''
39+
steps:
40+
- uses: actions/checkout@v3
41+
- uses: actions/download-artifact@v3
42+
with:
43+
name: semantic-release
44+
- uses: actions/setup-python@v4
45+
with:
46+
python-version: '3.8'
47+
- run: |
48+
python -m pip install -U setuptools wheel
49+
python setup.py sdist bdist_wheel
50+
- uses: actions/upload-artifact@v3
51+
with:
52+
name: pypi
53+
path: dist/*
54+
retention-days: 1
55+
56+
test_python_sdist:
57+
name: Python test *.tar.gz
58+
runs-on: ubuntu-latest
59+
needs: build_python
60+
steps:
61+
- uses: actions/download-artifact@v3
62+
with:
63+
name: pypi
64+
path: dist
65+
- uses: actions/setup-python@v4
66+
with:
67+
python-version: '3.8'
68+
- run: python -m pip install dist/*.tar.gz
69+
70+
test_python_wheel:
71+
name: Python test *.whl
72+
runs-on: ubuntu-latest
73+
needs: build_python
74+
steps:
75+
- uses: actions/download-artifact@v3
76+
with:
77+
name: pypi
78+
path: dist
79+
- uses: actions/setup-python@v4
80+
with:
81+
python-version: '3.8'
82+
- run: python -m pip install dist/*.whl
83+
84+
semantic_release_publish:
85+
name: Publish to GitHub
86+
needs: [test_python_sdist, test_python_wheel]
87+
runs-on: ubuntu-latest
88+
steps:
89+
- uses: actions/checkout@v3
90+
- uses: actions/setup-node@v3
91+
with:
92+
cache: npm
93+
node-version: 16
94+
- run: npm install
95+
- run: npx semantic-release
96+
env:
97+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
98+
99+
upload_pypi:
100+
name: Publish to PyPI
101+
needs: [semantic_release_publish]
102+
runs-on: ubuntu-latest
103+
steps:
104+
- uses: actions/download-artifact@v3
105+
with:
106+
name: pypi
107+
path: dist
108+
- uses: actions/setup-python@v4
109+
with:
110+
python-version: '3.8'
111+
- run: |
112+
python -m pip install -U twine
113+
twine upload dist/*
114+
env:
115+
TWINE_USERNAME: __token__
116+
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}

.gitignore

+27
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ __pycache__/
99
# Distribution / packaging
1010
.Python
1111
build/
12+
_build/
13+
_static/
14+
_templates/
1215
develop-eggs/
1316
dist/
1417
downloads/
@@ -127,3 +130,27 @@ dmypy.json
127130

128131
# Pyre type checker
129132
.pyre/
133+
134+
# PyCharm
135+
.idea/
136+
137+
# OS
138+
.DS_Store
139+
140+
*.cpp
141+
*.c
142+
143+
docs/build/
144+
docs/source/generated/
145+
146+
/target
147+
148+
wheelhouse/
149+
150+
# Node modules
151+
node_modules/
152+
153+
# RST files
154+
docs/*.rst
155+
!docs/index.rst
156+

.releaserc

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"tagFormat": "v${version}",
3+
"branches": [
4+
"main"
5+
],
6+
"plugins": [
7+
"@semantic-release/commit-analyzer",
8+
"@semantic-release/release-notes-generator",
9+
[
10+
"@semantic-release/exec",
11+
{
12+
"verifyReleaseCmd": "sed -i \"s/.*__version__.*/__version__ = '${nextRelease.version}'/g\" gtdb_itol_decorate/__init__.py && echo ::set-output name=version::${nextRelease.version}"
13+
}
14+
],
15+
[
16+
"@semantic-release/changelog",
17+
{
18+
"changelogFile": "CHANGELOG.md"
19+
}
20+
],
21+
[
22+
"@semantic-release/git",
23+
{
24+
"assets": [
25+
"CHANGELOG.md",
26+
"gtdb_itol_decorate/__init__.py"
27+
],
28+
"message": "chore(release): ${nextRelease.version} [skip ci]\n\n${nextRelease.notes}"
29+
}
30+
],
31+
[
32+
"@semantic-release/github"
33+
]
34+
]
35+
}

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
# gtdb-itol-decorate
2+
23
Creates iTOL files for tree decoration, given a set of GTDB genomes.

gtdb_itol_decorate/__init__.py

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
__title__ = 'gtdb_itol_decorate'
2+
__description__ = 'Creates iTOL files for tree decoration, given a set of GTDB genomes.'
3+
__url__ = 'https://github.com/Ecogenomics/gtdb-itol-decorate'
4+
__version__ = '1.0.0'
5+
__author__ = 'Aaron Mussig'
6+
__author_email__ = 'aaronmussig@gmail.com'
7+
__license__ = 'GPL-3.0'
8+
__bug_url__ = 'https://github.com/Ecogenomics/gtdb-itol-decorate'
9+
__doc_url__ = 'https://github.com/Ecogenomics/gtdb-itol-decorate'
10+
__src_url__ = 'https://github.com/Ecogenomics/gtdb-itol-decorate'

gtdb_itol_decorate/__main__.py

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import os
2+
from pathlib import Path
3+
4+
import dendropy
5+
import typer
6+
7+
from gtdb_itol_decorate.decorate import decorate_tree
8+
from gtdb_itol_decorate.gtdb import load_taxonomy_file, get_taxon_to_phylum
9+
from gtdb_itol_decorate.itol import get_phylum_to_lca, get_phylum_colours, write_color_datastrip, \
10+
get_internal_nodes_with_labels, write_internal_node_labels, write_tree_colours, write_collapse_file, \
11+
write_popup_file
12+
from gtdb_itol_decorate.newick import load_newick_file, load_newick_to_tree, validate_dendropy_namespace, \
13+
get_canonical_mapping, validate_sets, strip_tree_labels, set_node_desc_taxa, set_taxon_label_for_internal_nodes
14+
from gtdb_itol_decorate.util import log
15+
16+
17+
def main(tree_path: Path, tax_path: Path, out_dir: Path):
18+
log(f'Creating output directory: {out_dir}')
19+
out_dir.mkdir(exist_ok=True)
20+
21+
log(f'Reading tree from: {tree_path}')
22+
tree = load_newick_to_tree(str(tree_path))
23+
log(f'Found {len(tree.leaf_nodes()):,} leaf nodes in the tree.')
24+
validate_dendropy_namespace((x.label for x in tree.taxon_namespace))
25+
d_canonical_to_gid = get_canonical_mapping((x.label for x in tree.taxon_namespace))
26+
27+
log(f'Reading taxonomy from: {tax_path}')
28+
d_tax = load_taxonomy_file(str(tax_path), set(d_canonical_to_gid.keys()))
29+
log(f'Read the taxonomy for {len(d_tax):,} genomes.')
30+
validate_sets(set(d_canonical_to_gid.keys()), set(d_tax.keys()))
31+
32+
log('Reverse mapping taxon to phylum')
33+
d_taxon_to_phylum = get_taxon_to_phylum(d_tax)
34+
35+
log('Annotating internal nodes with descendant taxa')
36+
set_node_desc_taxa(tree)
37+
set_taxon_label_for_internal_nodes(tree, d_tax)
38+
39+
log('Getting the last common ancestor of each phylum.')
40+
d_phylum_to_lca = get_phylum_to_lca(tree)
41+
d_phylum_palette = get_phylum_colours(d_phylum_to_lca)
42+
write_color_datastrip(d_phylum_to_lca, d_phylum_palette, out_dir / 'itol_dataset_strip_phylum.txt')
43+
44+
log('Making tree compatible with iTOL (stripping labels)')
45+
strip_tree_labels(tree)
46+
path_tree_out = out_dir / f'{tree_path.name}_stripped'
47+
tree.write_to_path(path_tree_out, schema='newick', suppress_rooting=True, unquoted_underscores=True)
48+
49+
log('Writing internal node labels')
50+
d_int_label_to_lca = get_internal_nodes_with_labels(tree)
51+
write_internal_node_labels(d_int_label_to_lca, out_dir / 'itol_labels.txt')
52+
53+
log('Getting tree colour palette')
54+
write_tree_colours(tree, d_taxon_to_phylum, out_dir / 'itol_tree_colours.txt', d_phylum_palette)
55+
56+
log('Writing popup information file')
57+
write_popup_file(tree, d_tax, out_dir / 'itol_popup.txt')
58+
59+
log('Writing collapse files')
60+
write_collapse_file(d_int_label_to_lca, out_dir / 'itol_collapse_phylum.txt', 'p__')
61+
write_collapse_file(d_int_label_to_lca, out_dir / 'itol_collapse_class.txt', 'c__')
62+
write_collapse_file(d_int_label_to_lca, out_dir / 'itol_collapse_order.txt', 'o__')
63+
write_collapse_file(d_int_label_to_lca, out_dir / 'itol_collapse_family.txt', 'f__')
64+
write_collapse_file(d_int_label_to_lca, out_dir / 'itol_collapse_genus.txt', 'g__')
65+
log('Done.')
66+
67+
68+
if __name__ == "__main__":
69+
typer.run(main)

gtdb_itol_decorate/decorate.py

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from collections import defaultdict
2+
3+
4+
5+
def decorate_tree(tree, d_gid_to_tax):
6+
7+
d_node_to_leaf_taxa = defaultdict(set)
8+
for node in tree.postorder_node_iter():
9+
# desc_taxa = get_node_desc_taxa(node)
10+
11+
print(node)
12+
13+
14+
# Create a mapping from the taxon to each gid
15+
d_taxon_to_gids = defaultdict(set)
16+
for gid, taxon in d_gid_to_tax.items():
17+
d_taxon_to_gids[taxon.p.value].add(gid)
18+
d_taxon_to_gids[taxon.c.value].add(gid)
19+
d_taxon_to_gids[taxon.o.value].add(gid)
20+
d_taxon_to_gids[taxon.f.value].add(gid)
21+
d_taxon_to_gids[taxon.g.value].add(gid)
22+
d_taxon_to_gids[taxon.s.value].add(gid)
23+
24+
# Find the highest node that forms a monophyletic group
25+
d_node_to_taxa = defaultdict(set)
26+
for taxon, gids in d_taxon_to_gids.items():
27+
mrca_node = tree.mrca(taxon_labels=gids)
28+
29+
# This is the seed node, so we can't decorate it
30+
if mrca_node.parent_node is None:
31+
continue
32+
33+
d_node_to_taxa[mrca_node].add(taxon)
34+
return d_node_to_taxa

gtdb_itol_decorate/gtdb.py

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
from collections import defaultdict
2+
from typing import Dict
3+
4+
from gtdblib.taxon.rank import TaxonRank
5+
from gtdblib.taxon.taxon import Taxon
6+
from gtdblib.taxonomy.taxonomy import Taxonomy
7+
8+
from gtdb_itol_decorate.util import canonical_gid
9+
10+
11+
def load_taxonomy_file(path: str, limit_to_gids: set):
12+
gtdb_ranks = [TaxonRank.DOMAIN, TaxonRank.PHYLUM, TaxonRank.CLASS,
13+
TaxonRank.ORDER, TaxonRank.FAMILY, TaxonRank.GENUS,
14+
TaxonRank.SPECIES]
15+
out = dict()
16+
with open(path) as f:
17+
for line in f.readlines():
18+
gid, tax = line.strip().split('\t')
19+
gid = canonical_gid(gid)
20+
if gid not in limit_to_gids:
21+
continue
22+
if gid in out:
23+
raise Exception(f'Duplicate genome id found: {gid}')
24+
ranks = tax.split(';')
25+
if len(ranks) != 7:
26+
raise Exception(f'Invalid taxonomy for {gid} (expected 7 ranks): {tax}')
27+
28+
d_rank_to_taxon = dict()
29+
for rank, taxon_rank in zip(ranks, gtdb_ranks):
30+
taxon = Taxon(taxon_rank, rank)
31+
d_rank_to_taxon[taxon_rank.value[0]] = taxon
32+
taxonomy = Taxonomy(**d_rank_to_taxon)
33+
out[gid] = taxonomy
34+
return out
35+
36+
37+
38+
def get_taxon_to_phylum(d_tax: Dict[str, Taxonomy]):
39+
out = dict()
40+
for gid, taxonomy in d_tax.items():
41+
for rank in ('c', 'o', 'f', 'g', 's'):
42+
taxon = getattr(taxonomy, rank).value
43+
out[taxon] = taxonomy.p.value
44+
return out
45+

0 commit comments

Comments
 (0)