Skip to content

Commit

Permalink
move write functionality of get_AA_sequences_for_PCs to new function
Browse files Browse the repository at this point in the history
  • Loading branch information
ozcan committed May 4, 2017
1 parent e3cda23 commit 1726476
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 19 deletions.
41 changes: 23 additions & 18 deletions anvio/dbops.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,7 +777,7 @@ def __init__(self, args, r=run, p=progress):
self.run.info('Pan DB', 'Initialized: %s (v. %s)' % (self.pan_db_path, anvio.__pan__version__))


def get_AA_sequences_for_PCs(self, pc_names=set([]), output_file_path=None, skip_alignments=False):
def get_AA_sequences_for_PCs(self, pc_names=set([]), skip_alignments=False):
"""Returns a dictionary of sequences (aligned or not) in a given protein cluster:
{
Expand Down Expand Up @@ -805,9 +805,6 @@ def get_AA_sequences_for_PCs(self, pc_names=set([]), output_file_path=None, skip
but there is not genomes storage is available to get it." \
% 'a PC' if len(pc_names) > 1 else '%d PCs' % len(pc_names))

if output_file_path:
filesnpaths.is_output_file_writable(output_file_path)

if not self.protein_clusters_initialized:
self.init_protein_clusters()

Expand All @@ -817,11 +814,8 @@ def get_AA_sequences_for_PCs(self, pc_names=set([]), output_file_path=None, skip
Here are some of the missing ones; %s" \
% (len(missing_pc_names), len(pc_names), ', '.join(missing_pc_names[0:5])))

if output_file_path:
output_file = open(output_file_path, 'w')

self.progress.new('Accessing protein cluster seqeunces')
sequence_counter = 0

for pc_name in pc_names:
self.progress.update("processing '%s' ..." % pc_name )
sequences[pc_name] = {}
Expand All @@ -835,23 +829,34 @@ def get_AA_sequences_for_PCs(self, pc_names=set([]), output_file_path=None, skip
sequence = utils.restore_alignment(sequence, alignment_summary)

sequences[pc_name][genome_name][gene_callers_id] = sequence
sequence_counter += 1

if output_file_path:
self.progress.end()

return sequences

def write_AA_sequences_to_file(self, pc_names=set([]), skip_alignments=False, output_file_path=None):
if output_file_path:
filesnpaths.is_output_file_writable(output_file_path)

output_file = open(output_file_path, 'w')
sequences = self.get_AA_sequences_for_PCs(pc_names=pc_names, skip_alignments=skip_alignments)

self.progress.new('Writing protein cluster seqeunces to file')
sequence_counter = 0
for pc_name in pc_names:
for genome_name in sequences[pc_name]:
for gene_callers_id in sequences[pc_name][genome_name]:
output_file.write('>%08d|pc:%s|genome_name:%s|gene_callers_id:%d\n' % (sequence_counter,
pc_name,
genome_name,
gene_callers_id))
output_file.write('%s\n' % sequence)
output_file.write('%s\n' % sequences[pc_name][genome_name][gene_callers_id])
sequence_counter += 1
self.progress.update("processing '%s' ..." % pc_name)

self.progress.end()

if output_file_path:
output_file.close()
self.run.info('Output file', output_file_path, lc='green')

return sequences

output_file.close()
self.run.info('Output file', output_file_path, lc='green')

def init_protein_clusters_functions(self):
self.progress.new('Initializing functions for protein clusters')
Expand Down
2 changes: 1 addition & 1 deletion bin/anvi-export-pc-alignments
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def main(args):

run.info('Number of protein clusters to report', len(pc_ids))

pan.get_AA_sequences_for_PCs(pc_names=pc_ids, output_file_path=args.output_file)
pan.write_AA_sequences_to_file(pc_names=pc_ids, output_file_path=args.output_file)


if __name__ == '__main__':
Expand Down

0 comments on commit 1726476

Please sign in to comment.