Skip to content

Commit 3aa03ca

Browse files
replacing binaries with conda libraries
1 parent 2261a57 commit 3aa03ca

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

101 files changed

+73
-274119
lines changed

components/components_detection.py

+20-11
Original file line numberDiff line numberDiff line change
@@ -71,20 +71,27 @@ def _get_double_dna(self):
7171
self.dna = self.dna.upper()
7272

7373
def _mkvtree_command(self):
74-
cmd = "tools/vmatch/mkvtree -db new_input.fa -dna -pl -lcp -suf -tis -ois -bwt -bck -sti1"
75-
os.system(cmd)
74+
#cmd = "tools/vmatch/mkvtree -db new_input.fa -dna -pl -lcp -suf -tis -ois -bwt -bck -sti1"
75+
no_binary_cmd = "mkvtree -db new_input.fa -dna -pl -lcp -suf -tis -ois -bwt -bck -sti1"
76+
os.system(no_binary_cmd)
7677

7778
def _vmatch_command(self):
7879
l_flag_val = self.settings["vmatch -l"]
7980
e_flag_val = self.settings["vmatch -e"]
8081
e_value_flag_val = self.settings["vmatch -evalue"]
8182

82-
cmd = "tools/vmatch/vmatch " + "-l " + l_flag_val + " -evalue " + e_value_flag_val + " -e " + e_flag_val + \
83-
" -s leftseq " + " -absolute -nodist -noevalue -noscore -noidentity " + \
84-
"-sort ia -best 1000000 " + "new_input.fa" + " > " + \
85-
"vmatch_result_new.txt"
83+
#cmd = "tools/vmatch/vmatch " + "-l " + l_flag_val + " -evalue " + e_value_flag_val + " -e " + e_flag_val + \
84+
# " -s leftseq " + " -absolute -nodist -noevalue -noscore -noidentity " + \
85+
# "-sort ia -best 1000000 " + "new_input.fa" + " > " + \
86+
# "vmatch_result_new.txt"
87+
88+
no_binary_cmd = "vmatch " + "-l " + l_flag_val + " -evalue " + e_value_flag_val + " -e " + e_flag_val + \
89+
" -s leftseq " + " -absolute -nodist -noevalue -noscore -noidentity " + \
90+
"-sort ia -best 1000000 " + "new_input.fa" + " > " + \
91+
"vmatch_result_new.txt"
92+
93+
os.system(no_binary_cmd)
8694

87-
os.system(cmd)
8895

8996
def _take_vmatch_results_new(self):
9097
cutoff = 55
@@ -356,8 +363,9 @@ def _make_fasta_with_repeats(self):
356363
f.write(lines)
357364

358365
def _run_clustal_omega_repeats(self):
359-
cmd = self.absolute_path_to_tools + "clustalOmega/clustalo -i clustal_repeats.fa -o loc_align.txt"
360-
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
366+
#cmd = self.absolute_path_to_tools + "clustalOmega/clustalo -i clustal_repeats.fa -o loc_align.txt"
367+
cmd_no_binary = "clustalo -i clustal_repeats.fa -o loc_align.txt"
368+
process = subprocess.Popen(cmd_no_binary, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
361369
process.communicate()
362370

363371
def _find_max_clustal_omega_repeat_results(self):
@@ -406,8 +414,9 @@ def _run_clustal_omega_min_max(self):
406414
f.write(">min\n")
407415
f.write(f"{self.min_seq}\n")
408416

409-
cmd = self.absolute_path_to_tools + "clustalOmega/clustalo -i min_max.fa -o min_max_align.txt"
410-
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
417+
#cmd = self.absolute_path_to_tools + "clustalOmega/clustalo -i min_max.fa -o min_max_align.txt"
418+
cmd_no_binary = "clustalo -i min_max.fa -o min_max_align.txt"
419+
process = subprocess.Popen(cmd_no_binary, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
411420
process.communicate()
412421

413422
def _obtain_max_sequence(self):

components/components_evaluation.py

+53-32
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,12 @@ def _extract_orf_scores(self):
6565
f.write(crispr_seq)
6666
f.write("\n")
6767

68-
cmd = 'tools/prodigal/prodigal'
69-
cmd += ' -i file_for_prodigal.fa -o prodigal_result.txt -c -p meta'
70-
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
68+
#cmd = 'tools/prodigal/prodigal'
69+
#cmd += ' -i file_for_prodigal.fa -o prodigal_result.txt -c -p meta'
70+
71+
no_binary_cmd = "prodigal -i file_for_prodigal.fa -o prodigal_result.txt -c -p meta"
72+
73+
process = subprocess.Popen(no_binary_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
7174
process.communicate()
7275

7376
with open("prodigal_result.txt") as f:
@@ -207,14 +210,19 @@ def _extract_orf_scores_and_proteins(self):
207210
f.write(crispr_seq)
208211
f.write("\n")
209212

210-
cmd = 'tools/prodigal/prodigal'
211-
cmd += ' -i file_for_prodigal.fa -o prodigal_result.txt -c -p meta'
212-
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
213+
#cmd = 'tools/prodigal/prodigal'
214+
#cmd += ' -i file_for_prodigal.fa -o prodigal_result.txt -c -p meta'
215+
216+
no_binary_cmd = "prodigal -i file_for_prodigal.fa -o prodigal_result.txt -c -p meta"
217+
process = subprocess.Popen(no_binary_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
213218
process.communicate()
214219

215-
cmd = 'tools/prodigal/prodigal'
216-
cmd += ' -i file_for_prodigal.fa -p meta -a protein_results.fa'
217-
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
220+
#cmd = 'tools/prodigal/prodigal'
221+
#cmd += ' -i file_for_prodigal.fa -p meta -a protein_results.fa'
222+
223+
no_binary_cmd = "prodigal -i file_for_prodigal.fa -p meta -a protein_results.fa"
224+
225+
process = subprocess.Popen(no_binary_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
218226
process.communicate()
219227

220228
with open("prodigal_result.txt") as f:
@@ -290,8 +298,11 @@ def _extract_orf_scores_and_proteins(self):
290298
self.dict_final_orf_result[key] = list_scores
291299

292300
def _run_hmm_search(self):
293-
cmd = 'tools/hmm_search/hmmsearch --tblout result_hmm.out tools/hmm_search/models_tandem.hmm protein_results.fa'
294-
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
301+
#cmd = 'tools/hmm_search/hmmsearch --tblout result_hmm.out tools/hmm_search/models_tandem.hmm protein_results.fa'
302+
303+
no_binary_cmd = "hmmsearch --tblout result_hmm.out tools/hmm_search/models_tandem.hmm protein_results.fa"
304+
305+
process = subprocess.Popen(no_binary_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
295306
process.communicate()
296307

297308
def _extract_best_score_from_hmm(self):
@@ -386,24 +397,28 @@ def _extract_all_blast_scores(self):
386397

387398
db_file = 'Verified_repeats_dataset1.fa'
388399

389-
cmd = 'tools/blasting/blastn -query file_with_all_consensus.fa'
390-
cmd += ' -db tools/blasting/'
391-
cmd += db_file
392-
cmd += ' -word_size 6'
393-
cmd += ' -outfmt 6 -out output_fasta_bulk_extraction1'
400+
#cmd = 'tools/blasting/blastn -query file_with_all_consensus.fa'
401+
#cmd += ' -db tools/blasting/'
402+
#cmd += db_file
403+
#cmd += ' -word_size 6'
404+
#cmd += ' -outfmt 6 -out output_fasta_bulk_extraction1'
405+
406+
no_binary_cmd = f"blastn -query file_with_all_consensus.fa -db tools/blasting/{db_file} -word_size 6 -outfmt 6 -out output_fasta_bulk_extraction1"
394407

395-
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
408+
process = subprocess.Popen(no_binary_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
396409
process.communicate()
397410

398411
db_file = 'Verified_repeats_dataset2.fa'
399412

400-
cmd = 'tools/blasting/blastn -query file_with_all_consensus.fa'
401-
cmd += ' -db tools/blasting/'
402-
cmd += db_file
403-
cmd += ' -word_size 6'
404-
cmd += ' -outfmt 6 -out output_fasta_bulk_extraction2'
413+
#cmd = 'tools/blasting/blastn -query file_with_all_consensus.fa'
414+
#cmd += ' -db tools/blasting/'
415+
#cmd += db_file
416+
#cmd += ' -word_size 6'
417+
#cmd += ' -outfmt 6 -out output_fasta_bulk_extraction2'
418+
419+
no_binary_cmd = f"blastn -query file_with_all_consensus.fa -db tools/blasting/{db_file} -word_size 6 -outfmt 6 -out output_fasta_bulk_extraction2"
405420

406-
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
421+
process = subprocess.Popen(no_binary_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
407422
process.communicate()
408423

409424
with open("output_fasta_bulk_extraction1") as f:
@@ -495,9 +510,11 @@ def _extract_mfe_scores(self):
495510
f.write(consensus)
496511
f.write("\n")
497512

498-
cmd = "cat file_for_mfe.fa | tools/rna_fold/RNAfold --noLP --noPS > rna_fold_output.txt"
513+
#cmd = "cat file_for_mfe.fa | tools/rna_fold/RNAfold --noLP --noPS > rna_fold_output.txt"
499514

500-
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
515+
no_binary_cmd = "cat file_for_mfe.fa | RNAfold --noLP --noPS > rna_fold_output.txt"
516+
517+
process = subprocess.Popen(no_binary_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
501518
process.communicate()
502519

503520
with open("rna_fold_output.txt") as f:
@@ -867,18 +884,22 @@ def _create_file(self):
867884
f.write(self.crispr_seq)
868885

869886
def _call_prodigal(self):
870-
cmd = 'tools/prodigal/prodigal'
871-
cmd += ' -i fasta_to_do_hmm_{}.fa -p meta -a protein_{}.fa'.format(self.index, self.index)
887+
#cmd = 'tools/prodigal/prodigal'
888+
#cmd += ' -i fasta_to_do_hmm_{}.fa -p meta -a protein_{}.fa'.format(self.index, self.index)
872889

873-
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
890+
no_binary_cmd = f"prodigal -i fasta_to_do_hmm_{self.index}.fa -p meta -a protein_{self.index}.fa"
891+
process = subprocess.Popen(no_binary_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
874892
a, b = process.communicate()
875893

876894
def _run_hmm_search(self, hmm_model):
877895
if os.stat('protein_{}.fa'.format(self.index)).st_size != 0:
878-
cmd = 'tools/hmm_search/hmmsearch --tblout {} {} {}'.format('result_hmm_{}.out'.format(self.index),
879-
hmm_model,
880-
'protein_{}.fa'.format(self.index))
881-
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
896+
#cmd = 'tools/hmm_search/hmmsearch --tblout {} {} {}'.format('result_hmm_{}.out'.format(self.index),
897+
# hmm_model,
898+
# 'protein_{}.fa'.format(self.index))
899+
900+
no_binary_cmd = f"tools/hmm_search/hmmsearch --tblout result_hmm_{self.index}.out hmm_model protein_{self.index}.fa"
901+
902+
process = subprocess.Popen(no_binary_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
882903
process.communicate()
883904
else:
884905
self.hmm_score = 0.0

tools/blasting/blastn

-40.2 MB
Binary file not shown.

tools/clustalOmega/clustalo

-4.69 MB
Binary file not shown.

tools/fasta/fasta36

-865 KB
Binary file not shown.

tools/hmm_search/hmmsearch

-849 KB
Binary file not shown.

0 commit comments

Comments
 (0)