Skip to content

Commit

Permalink
Merge pull request #2 from SyntenyBio/srgk26/concat_json_1
Browse files Browse the repository at this point in the history
Refactoring temporary json file concatenation
  • Loading branch information
srgk26 authored Jan 3, 2023
2 parents 176b7a7 + dcdfd07 commit a0af51a
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 6 deletions.
10 changes: 4 additions & 6 deletions abstar/core/abstar.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import time
import traceback
import warnings
import shutil

from Bio import SeqIO

Expand Down Expand Up @@ -440,15 +441,13 @@ def concat_outputs(input_file, temp_output_file_dicts, output_dir, args):
if args.gzip:
ohandle = gzip.open(ofile + ".gz", 'wb')
else:
ohandle = open(ofile, 'w')
ohandle = open(ofile, 'wb')
with ohandle as out_file:
# JSON-formatted files don't have headers, so we don't worry about it
if output_type == 'json':
for temp_file in temp_files:
with open(temp_file) as f:
for line in f:
out_file.write(line)
out_file.write('\n')
with open(temp_file, "rb") as f:
shutil.copyfileobj(f, out_file, length=16 * 1024**2) # Increasing buffer size to 16MB for faster transfer
# For file formats with headers, only keep headers from the first file
if output_type in ['imgt', 'tabular', 'airr']:
for i, temp_file in enumerate(temp_files):
Expand All @@ -458,7 +457,6 @@ def concat_outputs(input_file, temp_output_file_dicts, output_dir, args):
out_file.write(line)
elif j >= 1:
out_file.write(line)
out_file.write('\n')
if args.parquet and output_type not in PARQUET_INCOMPATIBLE:
logger.info('Converting concatenated output to parquet format')
pname = oprefix + '.parquet'
Expand Down
1 change: 1 addition & 0 deletions abstar/utils/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,7 @@ def write_output(output_dict, output_dir, output_prefix):
output_file = os.path.join(subdir, output_name)
with open(output_file, 'w') as f:
f.write('\n'.join(output_dict[fmt]))
f.write("\n")
output_file_dict[fmt] = output_file
return output_file_dict

Expand Down

0 comments on commit a0af51a

Please sign in to comment.