generated from bokulich-lab/q2-plugin-template
-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathhmmer.py
71 lines (65 loc) · 2.53 KB
/
hmmer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# ----------------------------------------------------------------------------
# Copyright (c) 2022, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import os
import tempfile
from functools import partial
from typing import Union
import pandas as pd
from q2_moshpit.eggnog.orthologs.common import (
_run_eggnog_search_pipeline, _create_symlinks,
_eggnog_search, _search_runner
)
from q2_moshpit.eggnog.types import EggnogHmmerIdmapDirectoryFmt
from q2_types.feature_data_mag import MAGSequencesDirFmt
from q2_types.genome_data import (
ProteinsDirectoryFormat, SeedOrthologDirFmt, LociDirectoryFormat
)
from q2_types.per_sample_sequences import (
ContigSequencesDirFmt, MultiMAGSequencesDirFmt
)
from q2_types.profile_hmms import PressedProfileHmmsDirectoryFmt
def _eggnog_hmmer_search(
sequences: Union[
ContigSequencesDirFmt,
MultiMAGSequencesDirFmt,
MAGSequencesDirFmt
],
idmap: EggnogHmmerIdmapDirectoryFmt,
pressed_hmm_db: PressedProfileHmmsDirectoryFmt,
seed_alignments: ProteinsDirectoryFormat,
num_cpus: int = 1,
db_in_memory: bool = False
) -> (SeedOrthologDirFmt, pd.DataFrame, LociDirectoryFormat):
with tempfile.TemporaryDirectory() as output_loc:
taxon_id = os.listdir(idmap.path)[0].split(".")[0]
tmp_subdir = f"{output_loc}/hmmer/{taxon_id}"
os.makedirs(tmp_subdir)
_create_symlinks(
[pressed_hmm_db, idmap, seed_alignments], tmp_subdir
)
search_runner = partial(
_search_runner, output_loc=output_loc,
num_cpus=num_cpus, db_in_memory=db_in_memory,
runner_args=[
'hmmer', '--data_dir', output_loc, '-d', taxon_id,
'--genepred', 'prodigal' # default incompatible with HMMER
]
)
result, ft, loci = _eggnog_search(sequences, search_runner, output_loc)
return result, ft, loci
def eggnog_hmmer_search(
ctx, sequences, pressed_hmm_db, idmap, seed_alignments,
num_cpus=1, db_in_memory=False, num_partitions=None
):
collated_hits, collated_tables, collated_loci = (
_run_eggnog_search_pipeline(
ctx, sequences, [idmap, pressed_hmm_db, seed_alignments],
num_cpus, db_in_memory, num_partitions,
"_eggnog_hmmer_search"
))
return collated_hits, collated_tables, collated_loci