chipseq.bds

#!/usr/bin/env bds
#vim: syntax=java


help == chipseq pipeline settings

pe 		:= false 	help Paired end data.
final_stage	:= "" 		help Final stage for pipeline (bam, filt_bam, tag, xcor and peak).
callpeak 	:= "spp,macs2"	help Peak calling method : spp and macs2, can choose both (default: 'spp,macs2').
true_rep   	:= false	help Call peaks on true replicates only.
ctl_depth_ratio := 1.2 		help Cut-off ratio of two control tagaligns for pooling (default: 1.2).
sigtrk 		:= ""		help (BETA) Signal track generation method : bam2bw (bamCoverage in deepTools) or tag2bw (align2rawsignal).
make_wig 	:= false	help Create wig (for '-sigtrk tag2bw' only).
idr_thresh 	:= "0.05"	help IDR threshold : -log_10(score) (default: 0.05).
nreads 		:= 15000000	help # reads to be subsampled for cross corr. analysis (default: 15000000).


help() // print help if no parameters are given

include "modules/input_fastq.bds"
include "modules/input_bam.bds"
include "modules/input_tagalign.bds"
include "modules/input_peak.bds"

include "modules/species.bds"

include "modules/report.bds"

include "modules/align_bwa.bds"
include "modules/postalign_bam.bds"
include "modules/postalign_bed.bds"

include "modules/callpeak_spp.bds"
include "modules/callpeak_macs2.bds"
include "modules/callpeak_etc.bds"

include "modules/idr.bds"

include "modules/signal.bds"


// system vars
input 		:= ""
num_ctl 	:= 1


// Important file names are stored in global variables (usually a string map string{} with a key with replicate id and peakcaller name)
// e.g. filt_bam{"rep1"} = filtered bam for replicate 1, peak_pr1{"spp,2"} = peak file for pseudo replicate 1 of replicate 2 generated from spp

string{} filt_bam
string{} tag, tag_pr1, tag_pr2 // replicate data: map with key ("$ctl,$rep" or "$ctl,$rep,$pe" for PE fastqs)
string tag_ppr1, tag_ppr2
string{} peak, peak_pr1, peak_pr2, peak_pooled, peak_ppr1, peak_ppr2 // peaks: map with key ("$rep")
string peak_overlap

string{} tag_ctl, tag_ctl_label // key: replicate id, value: actual control used for each replicate (according to control depth ratio)

string{} idr_tr, idr_pr, idr_tr_png, idr_pr_png
string idr_ppr, idr_opt, idr_consv, idr_ppr_png

string{} signal_trk 	 // signal tracks from deepTools or bamCoverage: map with key ("$ctl,$rep")
string{} signal_trk_pval, signal_trk_fc // signal track from macs2

string{} flagstat_qc, dup_qc, flagstat_nodup_qc, pbc_qc, xcor_qc, xcor_plot // QC logs: map with key ("$ctl,$rep" or "$ctl,$rep,$pe" for PE fastqs)

string{} xcor_qc_pr1, xcor_qc_pr2
string xcor_qc_ppr1, xcor_qc_ppr2
string idr_qc


main() 


void main() { // chipseq pipeline starts here

	init_chipseq() // read command line parameters or configruation file

	chk_input() // check input files are valid

	align() // align and postalign

	pool_tags() // make pooled tagaligns and choose appropriate control for each replicate

	call_peaks() // call peaks

	naive_overlap() // naive overlap

	do_idr() // IDR

	create_sig_trk()

	report()
}

void init_chipseq() {

	pe		= get_conf_val_bool( pe, 		["pe"] )
	final_stage	= get_conf_val( final_stage, 		["final_stage"] )

	callpeak	= get_conf_val( callpeak, 		["callpeak"] )
	true_rep 	= get_conf_val_bool( true_rep, 		["true_rep"] )
	ctl_depth_ratio = get_conf_val_real( ctl_depth_ratio, 	["ctl_depth_ratio"])
		
	sigtrk 		= get_conf_val( sigtrk, 		["sigtrk"] )

	idr_thresh 	= get_conf_val( idr_thresh, 		["idr_thresh"] )
	nreads 		= get_conf_val_int( nreads,		["nreads"] )

	// determine input type
	if ( get_peak(1,0) != "" ) 	input = "peak"
	if ( get_tag(0,1) != "" ) 	input = "tag"
	if ( get_filt_bam(0,1) != "" ) 	input = "filt_bam"
	if ( get_bam(0,1) != "" ) 	input = "bam"
	if ( get_fastq(0,1,1) != "" ) 	input = "fastq"

	if ( !control_exists() ) num_ctl = 0

	print_chipseq()
	
	out_dir = mkdir( out_dir ) // create output directory and get absolute path for it	
}

void print_chipseq() {

	print( "\n\n== chipseq pipeline settings\n")
	print( "Final stage for ChIP-Seq\t: $final_stage\n" )
	print( "# replicates \t\t\t: "+get_num_rep()+"\n" )
	print( "Subsample # read for cross-corr. analysis \t: $nreads\n")
	print( "Input data type\t\t\t: $input\n")
	print( "Peak calling method\t\t: $callpeak\n" )
	print( "Peak calling for true reps only\t: $true_rep\n" )
	print( "Control rep. depth ratio\t: $ctl_depth_ratio\n" )
	print( "Sig. trk. generation method\t: $sigtrk\n" )
	print( "Create wig\t\t\t: $make_wig\n" )
	print( "IDR threshold\t\t\t: $idr_thresh\n" )
}

void chk_input() {

	print( "\n\n== checking chipseq inputs (data type = $input) ...\n\n" );

	if ( is_input_fastq() ) 	chk_align_bwa()
	if ( is_callpeak_macs2() ) 	chk_callpeak_macs2()
	if ( is_sigtrk_aln2rawsig() ) 	chk_signal_aln2rawsig()
	if ( is_final_stage_idr() ) 	chk_idr()
	if ( multimapping > 0 ) 	error("Multimapping is not available for chipseq pipeline!\n")

	print("\n")

	if ( is_input_peak() ) { // read peaks here

		if ( true_rep && get_num_rep() == 1 ) error("Cannot do IDR for true replicates with only one replicate!")

		for ( int rep=0; rep<=get_num_rep(); rep++) { // rep==0 : pooled

			if ( get_num_rep() == 1 && rep==0 ) continue // if only one replicate, skip reading pooled rep

			for (int pse=0; pse<=2; pse++) { // pse==0 : true rep

				if ( true_rep && pse > 0 ) continue

				peak_ := get_peak(rep,pse)
				suffix1 := rep==0 ? "replicate" : "replicate $rep"
				suffix2 := rep==0 ? "pseudo-replicate $pse" : "pseudo-replicate $pse for replicate $rep"
				prefix := (rep==0 ? "pooled " : "") + (pse==0 ? suffix1 : suffix2)
				
				print( "$prefix: \n\t$peak_"+"\n")
				if ( !path_exists( peak_ ) ) error("\t\tFile not found!\n")

				if ( rep == 0 ) {
					if ( pse == 0 )		peak_pooled{"spp"} 	= peak_
					else if ( pse == 1 )	peak_ppr1{"spp"} 	= peak_
					else if ( pse == 2 )	peak_ppr2{"spp"} 	= peak_
				}
				else {
					if ( pse == 0 )		peak{"spp,$rep"} 	= peak_
					else if ( pse == 1 )	peak_pr1{"spp,$rep"} 	= peak_
					else if ( pse == 2 )	peak_pr2{"spp,$rep"} 	= peak_
				}
			}
		}

		return
	}

	string[] data_all

	for ( int ctl=0; ctl <= num_ctl; ctl++) { // iterate through replicats (0: not control, 1~: controls)

		for ( int rep=1; rep <= get_num_rep(); rep++) {

			string[] data

			prefix := (ctl==1) ? "Control " : ""
			suffix := is_paired_end( ctl, rep ) ? " (PE)" : " (SE)"

			if ( is_input_fastq() ) {
				prefix = prefix + "Rep$rep fastq" + suffix
				fastqs := get_fastqs( ctl, rep )
				if ( fastqs.size()==0 ) {
					data.push( "" )
				}
				else {
					for ( string fastq : fastqs ) data.push( fastq )
				}
			}
			else if ( is_input_bam() ) {
				prefix = prefix +"Rep$rep bam" + suffix
				data.push( get_bam( ctl, rep ) )
			}
			else if ( is_input_filt_bam() ) {
				prefix = prefix +"Rep$rep filt_bam" + suffix
				data.push( get_filt_bam( ctl, rep ) )
			}
			else if ( is_input_tag() ) {
				prefix = prefix + "Rep$rep tagalign" + suffix
				data.push( get_tag( ctl, rep ) )
			}

			print("$prefix :\n")

			for ( string s : data ) {
				print("\t$s\n")
				if ( (s != "") && !path_exists(s) ) error("\t\tFile not found!\n")
			}

			// if data is missing
			if ( data[0] == "" ) {
				if ( (rep>=2) && (ctl==1) ) \
					print( "\tWarning: $prefix missing! keep going... (using control 1 for calling peaks on replicate $rep)\n")
				else if ( (rep==2) && (ctl==0) ) \
					print( "\tWarning: $prefix missing! keep going... (peak calling for replicate 1 only)\n")
				else \
					error( "\t$prefix missing!\n")
				continue
			}

			// check any duplicate input filename
			for ( string s : data ) {
				if ( is_in_array( get_basename( s ), get_basename( data_all ) ) ) \
					error( "\t$prefix has duplicate filename!\n")
			}

			data_all = concat( data_all, data )
		}
	}

	if ( final_stage != "" ) \
		print( "\n====== Final stage : $final_stage \n\n" );
}

void align() {

	if ( is_input_peak() ) return

	//// distribute # threads for each replicate/control

	int{} filesize

	for ( int ctl=0; ctl <= num_ctl; ctl++) { // iterate through inputs (ctl==0 : replicate, ctl==1 : control)		

		for ( int rep=1; rep <= get_num_rep(); rep++) {
		
			if ( !is_data_available( ctl, rep ) ) continue

			// check file size to distribute_nth nth to each nth_app
			// determine # threads for each app related to alignment

			key := "$ctl,$rep"  // key name for global output variable (map)

			// get file size in bytes
			if ( is_input_fastq() ) {

				fastqs := get_fastqs( ctl, rep )
				filesize{key} = (fastqs[0]).size()
				if ( fastqs.size() > 1) filesize{key} += (fastqs[1]).size()				
			}
			else if ( is_input_bam() ) {

				filesize{key} = (get_bam( ctl, rep )).size()
			}
			else if ( is_input_filt_bam() ) {

				filesize{key} = (get_filt_bam( ctl, rep )).size()
			}
			else if ( is_input_tag() ) {

				filesize{key} = (get_tag( ctl, rep )).size()
			}
		}
	}

	nth_rep := distribute_nth( nth, filesize ) // distribute_nth # threads according to file size

	//// align
	
	for ( int ctl=0; ctl <= num_ctl; ctl++) { // iterate through inputs (ctl==0 : replicate, ctl==1 : control)

		for ( int rep=1; rep <= get_num_rep(); rep++) {

			if ( !is_data_available( ctl, rep ) ) continue

			key := "$ctl,$rep"  // key name for global output variable (map)

			if ( no_par ) _align( ctl, rep, nth_rep{key} ) // parallel jobs for align() for each replicate and each control
			else	  par _align( ctl, rep, nth_rep{key} ) // parallel jobs for align() for each replicate and each control
		}
	}

	wait_clear_tids()

	print( "\n== Done align()\n" )
}

void _align( int ctl, int rep, int nth_rep ) {

	if ( is_single_ended( ctl, rep ) ) 	_align_SE( ctl, rep, nth_rep )
	else 					_align_PE( ctl, rep, nth_rep )

	if ( !no_par ) monitor_par()
}

void _align_SE( int ctl, int rep, int nth_rep ) {

	info 	:= get_info( ctl, rep )
	key 	:= "$ctl,$rep"  // key name for global output variable (map)

	aln_o_dir := mkdir( "$out_dir/align/$info" ) // create align output directory
	qc_o_dir  := mkdir( "$out_dir/qc/$info" ) // create qc output dir.

	string bam

	if ( is_input_fastq() ) {

		fastqs := get_fastqs( ctl, rep )

		( bam, flagstat_qc{key} ) = _bwa( fastqs[0], aln_o_dir, qc_o_dir, info, nth_rep )
		wait

		if ( is_final_stage_bam() ) return
	}

	string filt_bam_

	if ( is_input_bam() || is_input_fastq() ) {
	
		if ( is_input_bam() ) bam = get_bam( ctl, rep )

		( filt_bam_, dup_qc{key}, flagstat_nodup_qc{key}, pbc_qc{key} ) \
			= _dedup_bam( bam, aln_o_dir, qc_o_dir, info, nth_rep )
		filt_bam{key} = filt_bam_
		wait

		if ( is_final_stage_filt_bam() ) return
	}

	string tag_

	if ( is_input_filt_bam() || is_input_bam() || is_input_fastq() ) {

		if ( is_input_filt_bam() ) {
			filt_bam_ = get_filt_bam( ctl, rep )
			filt_bam{key} = filt_bam_
		}

		tag_ = _bam_to_tag( filt_bam_, aln_o_dir, info )
		wait

		tag{key} = tag_

		if ( is_final_stage_tag() ) return
	}

	if ( is_input_tag() || is_input_filt_bam() || is_input_bam() || is_input_fastq() ) {

		if ( is_input_tag() ) { 
			tag_ = get_tag( ctl, rep )
			tag{key} = tag_
		}

		string xcor_qc_

		if ( ctl == 0 ) { // if replicate

			subsampled_tag := _subsample_tag( tag_, nreads, aln_o_dir, info )

			string subsampled_tag_pr1, subsampled_tag_pr2

			string qc_pr1_o_dir, qc_pr2_o_dir

			if ( !true_rep ) { // pseudo replicates

				aln_pr1_o_dir := mkdir( "$out_dir/align/pseudo_reps/$info/pr1" )
				aln_pr2_o_dir := mkdir( "$out_dir/align/pseudo_reps/$info/pr2" )

				qc_pr1_o_dir    = mkdir( "$out_dir/qc/pseudo_reps/$info/pr1" ) // create qc output dir.
				qc_pr2_o_dir    = mkdir( "$out_dir/qc/pseudo_reps/$info/pr2" ) // create qc output dir.

				string tag_pr1_, tag_pr2_
				(tag_pr1_, tag_pr2_ ) = _spr( tag_, aln_pr1_o_dir, aln_pr2_o_dir, info ) // make self pseudo replicate
				tag_pr1{key} = tag_pr1_
				tag_pr2{key} = tag_pr2_
				wait

				subsampled_tag_pr1 = _subsample_tag( tag_pr1_, nreads, aln_o_dir, info )
				subsampled_tag_pr2 = _subsample_tag( tag_pr2_, nreads, aln_o_dir, info )				
			}

			wait

			// distribute_nth nth_rep for xcor
			nth_xcor := distribute_nth( nth_rep, true_rep ? [1] : [2,1,1] ) // for [true-rep,pr1,pr2]

			// xcor for true rep
			( xcor_qc_, xcor_plot{key} ) = _xcor( subsampled_tag, qc_o_dir, info, nth_xcor[0], info )
			xcor_qc{key} = xcor_qc_

			if ( !true_rep ) { // pseudo replicates

				// xcor for pseudo replicates
				string tmp
				( xcor_qc_pr1{key}, tmp ) = _xcor( subsampled_tag_pr1, qc_pr1_o_dir, info+"-pr1", nth_xcor[1], "pseudo_reps/$info/pr1" )
				( xcor_qc_pr2{key}, tmp ) = _xcor( subsampled_tag_pr2, qc_pr2_o_dir, info+"-pr2", nth_xcor[2], "pseudo_reps/$info/pr2" )
			}
		}

		if ( is_final_stage_xcor() ) return
	}	
}

void _align_PE( int ctl, int rep, int nth_rep ) {

	info 	:= get_info( ctl, rep )
	key 	:= "$ctl,$rep"  // key name for global output variable (map)

	aln_o_dir := mkdir( "$out_dir/align/$info" ) // create align output directory
	qc_o_dir    := mkdir( "$out_dir/qc/$info" ) // create qc output dir.

	string bam

	if ( is_input_fastq() ) {

		fastqs := get_fastqs( ctl, rep )

		( bam, flagstat_qc{key} ) = _bwa_PE( fastqs[0], fastqs[1], aln_o_dir, qc_o_dir, info, nth_rep )
		wait

		if ( is_final_stage_bam() ) return
	}

	string filt_bam_

	if ( is_input_bam() || is_input_fastq() ) {

		if ( is_input_bam() ) bam = get_bam( ctl, rep )

		( filt_bam_, dup_qc{key}, flagstat_nodup_qc{key}, pbc_qc{key} ) \
			= _dedup_bam_PE( bam, aln_o_dir, qc_o_dir, info, nth_rep )
		filt_bam{key} = filt_bam_
		wait

		if ( is_final_stage_filt_bam() ) return
	}
	
	string tag_

	if ( is_input_filt_bam() || is_input_bam() || is_input_fastq() ) {

		if ( is_input_filt_bam() ) {
			filt_bam_ = get_filt_bam( ctl, rep )
			filt_bam{key} = filt_bam_
		}

		tag_ = _bam_to_tag( filt_bam_, aln_o_dir, info )
		wait
		tag{key} = tag_

		if ( is_final_stage_tag() ) return
	}

	string bedpe, subsampled_tag

	if ( is_input_tag() || is_input_filt_bam() || is_input_bam() || is_input_fastq() ) {

		if ( ctl == 0 ) { // if replicate

			if ( is_input_tag() ) {

				tag_ = get_tag( ctl, rep )
				tag{key} = tag_

				subsampled_tag = _subsample_tag_PE_xcor( tag_, nreads, aln_o_dir, info )
				wait
			}
			else {
				bedpe = _bam_to_bedpe( filt_bam_, aln_o_dir, info )
				wait

				subsampled_tag = _subsample_bedpe_to_tag_xcor( bedpe, nreads, aln_o_dir, info )
			}

			string tag_pr1_, tag_pr2_

			string qc_pr1_o_dir, qc_pr2_o_dir

			if ( !true_rep ) { // if replicate

				aln_pr1_o_dir := mkdir( "$out_dir/align/pseudo_reps/$info/pr1" )
				aln_pr2_o_dir := mkdir( "$out_dir/align/pseudo_reps/$info/pr2" )

				qc_pr1_o_dir    = mkdir( "$out_dir/qc/pseudo_reps/$info/pr1" ) // create qc output dir.
				qc_pr2_o_dir    = mkdir( "$out_dir/qc/pseudo_reps/$info/pr2" ) // create qc output dir.

				if ( is_input_tag() ) { // if starting from tag, we don't have bedpe file			
					( tag_pr1_, tag_pr2_ ) = _spr_tag_PE( tag_, aln_pr1_o_dir, aln_pr2_o_dir, info )
					tag_pr1{key} = tag_pr1_
					tag_pr2{key} = tag_pr2_
				}
				else {
					( tag_pr1_, tag_pr2_ ) = _spr_PE( bedpe, aln_pr1_o_dir, aln_pr2_o_dir, info )
					tag_pr1{key} = tag_pr1_
					tag_pr2{key} = tag_pr2_
				}
			}

			wait

			// distribute_nth nth_rep for xcor
			nth_xcor := distribute_nth( nth_rep, true_rep ? [1] : [2,1,1] ) // for [true-rep,pr1,pr2]

			string xcor_qc_
			( xcor_qc_, xcor_plot{key} ) = _xcor( subsampled_tag, qc_o_dir, info, nth_xcor[0], info )
			xcor_qc{key} = xcor_qc_

			if ( !true_rep ) { // if replicate

				// xcor for pseudo replicates
				string tmp
				( xcor_qc_pr1{key}, tmp ) = _xcor( tag_pr1_, qc_pr1_o_dir, info+"-pr1", nth_xcor[1], "pseudo_reps/$info/pr1" )
				( xcor_qc_pr2{key}, tmp ) = _xcor( tag_pr2_, qc_pr2_o_dir, info+"-pr2", nth_xcor[2], "pseudo_reps/$info/pr2" )
			}
		}

		if ( is_final_stage_xcor() ) return
	}
}

void pool_tags() {

	if ( !(is_final_stage_peak() || is_final_stage_idr()) ) return

	if ( is_input_peak() ) return

	////////////// pool replicates and controls, then create ppr (pooled pseudoreplicates) before peak calling

	string[] tags, tags_pr1, tags_pr2, tags_ctl
	int[] ids_ctl

	for ( int rep=1; rep<=get_num_rep(); rep++ ) {

		tags.add( tag{"0,$rep"} )

	 	if ( !true_rep ) {
			tags_pr1.add( tag_pr1{"0,$rep"} )
			tags_pr2.add( tag_pr2{"0,$rep"} )
		}

		if ( is_data_available( 1, rep ) ) { // if control exists
			tags_ctl.add( tag{"1,$rep"} )
			ids_ctl.add( rep )
		}
	}

	nth_xcor := distribute_nth( nth, [1,1] )

	if ( tags.size() > 1 ) {

	 	aln_pooled_o_dir := mkdir( "$out_dir/align/pooled_rep" )

		tag_pooled := _pool_tag( tags, aln_pooled_o_dir, "reps" )
		tag{"0,pooled"} = tag_pooled

		// graphviz, filetable for pooled tagalign
		string[] graph_in
		for ( int rep=1; rep<=get_num_rep(); rep++ ) graph_in.add("tagalign_(rep$rep)")

		_add_to_graphviz( graph_in, tags, ["tagalign_(pooled)"], [tag_pooled] )
		_add_to_filetable(["L1_align/pooled_rep/tagalign"], [tag_pooled] )
		//

	 	if ( !true_rep ) {
		 	aln_ppr1_o_dir   := mkdir( "$out_dir/align/pooled_pseudo_reps/ppr1" )
		 	aln_ppr2_o_dir   := mkdir( "$out_dir/align/pooled_pseudo_reps/ppr2" )

			qc_ppr1_o_dir    := mkdir( "$out_dir/qc/pooled_pseudo_reps/ppr1" ) // create qc output dir.
			qc_ppr2_o_dir    := mkdir( "$out_dir/qc/pooled_pseudo_reps/ppr2" ) // create qc output dir.

			tag_ppr1_ := _pool_tag( tags_pr1, aln_ppr1_o_dir, "reps-pr1" )
			tag_ppr2_ := _pool_tag( tags_pr2, aln_ppr2_o_dir, "reps-pr2" )
			tag_ppr1 = tag_ppr1_ // for thread safety
			tag_ppr2 = tag_ppr2_
			wait

			// graphviz, filetable for ppr
			string[] graph_in_pr1, graph_in_pr2
			for ( int rep=1; rep<=get_num_rep(); rep++ ) {
				graph_in_pr1.add("tagalign_(rep$rep-pr1)")
				graph_in_pr2.add("tagalign_(rep$rep-pr2)")
			}

			_add_to_graphviz( graph_in_pr1, tags_pr1, ["tagalign_(ppr1)"], [tag_ppr1] )
			_add_to_graphviz( graph_in_pr2, tags_pr2, ["tagalign_(ppr2)"], [tag_ppr2] )
			_add_to_filetable(["L1_align/pooled_pseudo_reps/ppr1/tagalign"], [tag_ppr1] )
			_add_to_filetable(["L1_align/pooled_pseudo_reps/ppr2/tagalign"], [tag_ppr2] )
			//

			string tmp
			(xcor_qc_ppr1, tmp) = _xcor( tag_ppr1_, qc_ppr1_o_dir, "ppr1", nth_xcor[0], "pooled_pseudo_reps/ppr1" )
			(xcor_qc_ppr2, tmp) = _xcor( tag_ppr2_, qc_ppr2_o_dir, "ppr2", nth_xcor[1], "pooled_pseudo_reps/ppr2" )
		}
	}

	if ( tags_ctl.size() > 1 ) {

		aln_pooled_ctl_o_dir := mkdir( "$out_dir/align/pooled_ctl" )

		tag_ctl_pooled := _pool_tag( tags_ctl, aln_pooled_ctl_o_dir, "ctls" )
		tag{"1,pooled"} = tag_ctl_pooled

		// graphviz, filetable for pooled control tagalign
		string[] graph_in
		for ( int ctl : ids_ctl ) graph_in.add("tagalign_(ctl$ctl)")

		_add_to_graphviz( graph_in, tags_ctl, ["tagalign_(ctl, pooled)"], [tag_ctl_pooled] )
		_add_to_filetable(["L1_align/pooled_ctl/tagalign"], [tag_ctl_pooled] )
		//		
	}

	wait_clear_tids()

	/////////// choose appropriate control for each replicate

	if ( tags_ctl.size() == 0 ) { // if no control at all

		tag_ctl{"pooled"} 	= ""
		tag_ctl_label{"pooled"} = ""

		for ( int rep=1; rep<=get_num_rep(); rep++ ) {

			tag_ctl{rep} 		= ""
			tag_ctl_label{rep} 	= ""
		}
	}
	else if ( tags_ctl.size() == 1 ) { // if only one control, use it for all replicates

		tag_ctl{"pooled"} 	= tags_ctl[0]
		tag_ctl_label{"pooled"} = "ctl" + ids_ctl[0]

		for ( int rep=1; rep<=get_num_rep(); rep++ ) {

			tag_ctl{rep} 		= tags_ctl[0]
			tag_ctl_label{rep} 	= "ctl" + ids_ctl[0]
		}
	}
	else { // if multiple controls, check # of lines in replicate/control tagaligns and apply ctl_depth_ratio

		tag_ctl{"pooled"} 		= tag{"1,pooled"}
		tag_ctl_label{"pooled"} 	= "ctl, pooled"		

		real{} nlines 		// # of lines in tagaligns, key: 0,rep for replicate, 1,rep for control
		real[] nlines_ctl 	// # of lines in control tagaligns

		for ( int rep=1; rep<=get_num_rep(); rep++ ) {

			nlines{"0,$rep"} = get_no_lines( tag{"0,$rep"} )

			//print("DEBUG: # lines rep $rep tag.: " + nlines{"0,$rep"} + ", " + tag{"0,$rep"} + "\n")

			if ( is_data_available( 1, rep ) ) { // if control exists

				nlines{"1,$rep"} = get_no_lines( tag{"1,$rep"} )
				nlines_ctl.add( nlines{"1,$rep"} )

				//print("DEBUG: # lines ctl $rep tag.: " + nlines{"1,$rep"} + ", " + tag{"1,$rep"} + "\n")
			}
		}

		use_pooled_ctl := false

		for ( int i=0; i<nlines_ctl.size(); i++ ) { // check every # lines in every pair of control tagaligns
			for ( int j=i+1; j<nlines_ctl.size(); j++ ) {

				if ( 	nlines_ctl[i] / nlines_ctl[j] > ctl_depth_ratio || \
					nlines_ctl[j] / nlines_ctl[i] > ctl_depth_ratio ) {

					use_pooled_ctl = true

					print("\nNumber of reads in controls differ by a factor of $ctl_depth_ratio. Using pooled controls.\n" )

					break
				}
			}
		}

		for ( int rep=1; rep<=get_num_rep(); rep++ ) {

			if ( !use_pooled_ctl && is_data_available( 1, rep ) ) {
			
				if ( nlines{"1,$rep"} < nlines{"0,$rep"} ) {

					print("\nFewer reads in control $rep than experiment replicate $rep. Using pooled controls for replicate $rep.\n")

					tag_ctl{rep} 		= tag{"1,pooled"}
					tag_ctl_label{rep} 	= "ctl, pooled"
				}
				else {
					tag_ctl{rep} 		= tag{"1,$rep"}
					tag_ctl_label{rep} 	= "ctl$rep"
				}
			}
			else {

				tag_ctl{rep} 		= tag{"1,pooled"}
				tag_ctl_label{rep} 	= "ctl, pooled"
			}
		}
	}

	wait_clear_tids()

	print( "\n== Done pool_tags()\n" )
}

void call_peaks() {

	if ( !(is_final_stage_peak() || is_final_stage_idr()) ) return

	if ( is_input_peak() ) return

	int{} nth_peak  // distribute_nth # threads according to peak callers, priority for spp than macs2 (4 vs 1)

	if ( is_callpeak_spp() && is_callpeak_macs2() ) {
		( nth_peak{"spp"}, nth_peak{"macs2"} ) = distribute_nth( nth, [4,1] )
	}
	else if ( is_callpeak_spp() ) {
		nth_peak{"spp"} = nth
	}
	else if ( is_callpeak_macs2() ) {
		nth_peak{"macs2"} = nth
	}
	else {
		error("No peak caller is specified (-callpeak)!\n")
	}

	if ( is_callpeak_spp() ) {
		if ( no_par ) _call_peaks( "spp", nth_peak{"spp"} )
		else 	  par _call_peaks( "spp", nth_peak{"spp"} )
	}
	if ( is_callpeak_macs2() ) {
		if ( no_par ) _call_peaks( "macs2", nth_peak{"macs2"} )
		else 	  par _call_peaks( "macs2", nth_peak{"macs2"} )
	}

	wait_clear_tids()

	print( "\n== Done call_peaks()\n" )	
}

void _call_peaks( string pc, int nth_peak ) { // pc: pc

	if ( pc == "spp" && !control_exists() ) return

	//// calculate fragment length (required for SPP and MACS2)

	string fraglen_ppr1, fraglen_ppr2, fraglen_mean
	string{} fraglen, fraglen_pr1, fraglen_pr2 // key = replicate id
	real fraglen_sum = 0.0

	for (int rep=1; rep<=get_num_rep(); rep++) {

		fraglen{rep} = get_fraglen( xcor_qc{"0,$rep"} ) // get fragment length of replicate 1

		if ( !true_rep ) {

			fraglen_pr1{rep} = get_fraglen( xcor_qc_pr1{"0,$rep"} )
			fraglen_pr2{rep} = get_fraglen( xcor_qc_pr2{"0,$rep"} )
		}

		fraglen_sum += fraglen{rep}.parseReal()
	}

	if ( !true_rep && get_num_rep() > 1 ) {
		fraglen_ppr1 = get_fraglen( xcor_qc_ppr1 )
		fraglen_ppr2 = get_fraglen( xcor_qc_ppr2 )
	}

	real num_rep_real = get_num_rep()
	fraglen_mean = round( fraglen_sum / num_rep_real ) // compute fraglen mean for pooled


	//// distribute # threads for each peak calling

	int nth_pooled, nth_ppr, nth_true, nth_pr

	if ( !true_rep ) {
		( nth_pooled, nth_ppr ) = distribute_nth( nth_peak, [ 2, 1, 1 ] ) // pooled, ppr1, ppr2
		( nth_true, nth_pr ) 	= distribute_nth( nth_peak, [ 2, 1, 1 ] ) // for each replicate (true, pr1, pr2)
	}
	else {
		( nth_pooled, nth_true ) = distribute_nth( nth_peak, [ get_num_rep(), 1 ] ) // pooled, true
	}

	//// call peaks

	// create directories
	peak_o_dir 	:= mkdir( "$out_dir/peak/$pc") // peak directory structure
	sig_o_dir 	:= mkdir( "$out_dir/signal/$pc") // signal directory structure

	// call peaks on pooled replicates and pooled pseudo replicates
	if ( get_num_rep() > 1 ) {

		pooled_o_dir	:= mkdir( "$peak_o_dir/pooled_rep" )
		pooled_sig_o_dir:= mkdir( "$sig_o_dir/pooled_rep" )

		pooled_hrchy 	:= "pooled_rep" 

		peak_pooled{"$pc"}= _call_peaks( pc, tag{"0,pooled"}, tag_ctl{"pooled"}, fraglen_mean, true, pooled_o_dir, pooled_sig_o_dir, \
							"pooled", tag_ctl_label{"pooled"}, nth_pooled, pooled_hrchy )
		sleep( 4 )

		if ( !true_rep ) {

			ppr1_o_dir 	:= mkdir( "$peak_o_dir/pooled_pseudo_reps/ppr1" )
			ppr1_sig_o_dir 	:= mkdir( "$sig_o_dir/pooled_pseudo_reps/ppr1" )
			ppr2_o_dir 	:= mkdir( "$peak_o_dir/pooled_pseudo_reps/ppr2" )
			ppr2_sig_o_dir 	:= mkdir( "$sig_o_dir/pooled_pseudo_reps/ppr2" )

			ppr1_hrchy 	:= "pooled_pseudo_reps/ppr1"
			ppr2_hrchy 	:= "pooled_pseudo_reps/ppr2"

			peak_ppr1{"$pc"} = _call_peaks( pc, tag_ppr1, tag_ctl{"pooled"}, fraglen_ppr1, false, ppr1_o_dir, ppr1_sig_o_dir, \
								"ppr1", tag_ctl_label{"pooled"}, nth_ppr, ppr1_hrchy )

			peak_ppr2{"$pc"} = _call_peaks( pc, tag_ppr2, tag_ctl{"pooled"}, fraglen_ppr2, false, ppr2_o_dir, ppr2_sig_o_dir, \
								"ppr2",	tag_ctl_label{"pooled"}, nth_ppr, ppr2_hrchy )
		}
	}

	// call peaks for each replicate and pseudo replicates for it
	for (int rep=1; rep<=get_num_rep(); rep++) {

		o_dir	:= mkdir( "$peak_o_dir/rep$rep" )
		sig_dir := mkdir( "$sig_o_dir/rep$rep" )

		hrchy 	:= "rep$rep"

		peak{"$pc,$rep"} = _call_peaks( pc, tag{"0,$rep"}, tag_ctl{rep}, fraglen{rep}, true, o_dir, sig_dir, \
							"rep$rep", tag_ctl_label{rep}, nth_true, hrchy )

		sleep( 4 )

		if ( !true_rep ) {

			pr1_o_dir 	:= mkdir( "$peak_o_dir/pseudo_reps/rep$rep/pr1" )
			pr1_sig_o_dir 	:= mkdir( "$sig_o_dir/pseudo_reps/rep$rep/pr1" )
			pr2_o_dir 	:= mkdir( "$peak_o_dir/pseudo_reps/rep$rep/pr2" )
			pr2_sig_o_dir 	:= mkdir( "$sig_o_dir/pseudo_reps/rep$rep/pr2" )

			pr1_hrchy 	:= "pseudo_reps/rep$rep/pr1"
			pr2_hrchy 	:= "pseudo_reps/rep$rep/pr2"

			peak_pr1{"$pc,$rep"} = _call_peaks( pc, tag_pr1{"0,$rep"}, tag_ctl{rep}, fraglen_pr1{rep}, false, pr1_o_dir, pr1_sig_o_dir, \
								"rep$rep-pr1", tag_ctl_label{rep}, nth_pr, pr1_hrchy )

			peak_pr2{"$pc,$rep"} = _call_peaks( pc, tag_pr2{"0,$rep"}, tag_ctl{rep}, fraglen_pr2{rep}, false, pr2_o_dir, pr2_sig_o_dir, \
								"rep$rep-pr2", tag_ctl_label{rep}, nth_pr, pr2_hrchy )
		}
	}	

	if ( !no_par ) monitor_par()	
}

string _call_peaks( string peakcaller, string tag1, string tag1_ctl, string fraglen, bool make_sig, \
			string peak_o_dir, string sig_o_dir, string label, string label_ctl, \
			int nth_peak, string hrchy ) {
	
	if ( peakcaller == "spp" ) {

		string regionpeak, score, pdf 

		( regionpeak, score, pdf ) \
			= _spp( tag1, tag1_ctl, fraglen, peak_o_dir, label, label_ctl, nth_peak, hrchy )

		return regionpeak // use regionpeak instead of narrowpeak
	}
	else if ( peakcaller == "macs2" ) {

		string narrowpeak, gpeak, fc_bw, pval_bw

		( narrowpeak, gpeak, fc_bw, pval_bw ) \
			= _macs2( tag1, tag1_ctl, fraglen, make_sig, peak_o_dir, sig_o_dir, label, label_ctl, hrchy )

		// macs2 generates signal tracks
		if ( make_sig ) {
 			signal_trk_pval{label} 		= pval_bw
			signal_trk_fc{label} 		= fc_bw
		}

		return narrowpeak // use narrowpeak
	}
}

void naive_overlap() {

	if ( !(is_final_stage_peak() || is_final_stage_idr()) ) return

	if ( !is_callpeak_spp() ) return

	if ( !peak.hasKey("spp,1") ) return // if no peaks from SPP exit

	// naive overlap peak

	overlap_o_dir := mkdir( "$out_dir/peak/spp/overlap" )

	if ( get_num_rep() == 1 ) {

		if ( !true_rep ) {

			peak_overlap = _naive_overlap_peak( "regionPeak", peak{"spp,1"}, peak_pr1{"spp,1"}, peak_pr2{"spp,1"}, overlap_o_dir, "peak", \
									"peak_spp", "spp/overlap")
		}
	}
	else {
		string[] peaks // make array of peaks from true replicates
		for ( int rep=1; rep<=get_num_rep(); rep++ ) peaks.add( peak{"spp,$rep"} )

		peak_ppr1_ := true_rep ? "" : peak_ppr1{"spp"}
		peak_ppr2_ := true_rep ? "" : peak_ppr2{"spp"}

		peak_overlap = _naive_overlap_peak( "regionPeak", peak_pooled{"spp"}, peaks, \
								  peak_ppr1_, peak_ppr2_, overlap_o_dir, "peak",\
								"peak_spp", "spp/overlap")
	}

	wait_clear_tids()

	print( "\n== Done naive_overlap()\n" )
}

void do_idr() {

	if ( !is_final_stage_idr() ) return
	if ( !is_callpeak_spp() ) return

	if ( !peak.hasKey("spp,1") ) return // if no peaks from SPP exit

	idr_o_dir 	:= mkdir( "$out_dir/peak/idr" )

	// IDR on true replicates (on every pair of peaks from replicates)

	for ( int i=1; i<=get_num_rep(); i++ ) {

		for ( int j=i+1; j<=get_num_rep(); j++ ) {

			idr_true_o_dir 	:= mkdir( "$idr_o_dir/true_reps/rep$i-rep$j" )

			(idr_tr{"$i,$j"}, idr_tr_png{"$i,$j"} ) = _idr2( peak{"spp,$i"}, peak{"spp,$j"}, peak_pooled{"spp"}, idr_thresh, "signal.value", idr_true_o_dir, "rep$i-rep$j", \
									"peak_spp", ["rep$i","rep$j","pooled"], "true_reps/rep$i-rep$j" )
		}

		if ( !true_rep ) {

			idr_pr_o_dir := mkdir( "$idr_o_dir/pseudo_reps/rep$i" )

			(idr_pr{i}, idr_pr_png{i}) = _idr2( peak_pr1{"spp,$i"}, peak_pr2{"spp,$i"}, peak{"spp,$i"}, idr_thresh, "signal.value", idr_pr_o_dir, "rep$i-pr", \
								"peak_spp", ["rep$i-pr1","rep$i-pr2","rep$i"], "pseudo_reps/rep$i" )
		}
	}

	if ( !true_rep && get_num_rep() > 1 ) {

		idr_ppr_o_dir := mkdir( "$idr_o_dir/pooled_pseudo_reps" )

		(idr_ppr, idr_ppr_png) = _idr2( peak_ppr1{"spp"}, peak_ppr2{"spp"}, peak_pooled{"spp"}, idr_thresh, "signal.value", idr_ppr_o_dir, "ppr", \
							"peak_spp", ["ppr1","ppr2","pooled"], "pooled_pseudo_reps" )
	}

	wait_clear_tids()

	qc_o_dir    := mkdir( "$out_dir/qc" ) // create qc output dir.

	// get final idr qc score, use idr final idr narrow peak files from true, pseudo and pooled pseudo reps
	(idr_qc, idr_opt, idr_consv) = _idr_final_qc( idr_tr, idr_pr, idr_ppr, idr_o_dir, qc_o_dir, "" )

	wait_clear_tids()

	print( "\n== Done do_idr()\n" )	
}

void create_sig_trk() {

	if ( !is_sigtrk_deeptools() && !is_sigtrk_aln2rawsig() ) return

	for ( int rep=1; rep <= get_num_rep(); rep++) {

		if ( no_par ) _create_sig_trk( rep )
		else	  par _create_sig_trk( rep )
	}

	wait_clear_tids()

	print( "\n== Done create_sig_trk()\n" )	
}

void _create_sig_trk( int rep ) {

	key := "0,$rep"  // key name for global output variable (map)
	info:= "rep$rep"
	if ( is_sigtrk_deeptools() && filt_bam.hasKey(key) ) { // signal track generation (deeptools)

		sig_o_dir := mkdir( "$out_dir/signal/bam2bw/$info" )

		signal_trk{key} = _bam_to_bw( filt_bam{key}, sig_o_dir, info, 1 )
	}

	if ( is_sigtrk_aln2rawsig() && tag.hasKey(key) && xcor_qc.hasKey(key) ) {

		sig_o_dir := mkdir( "$out_dir/signal/tag2bw/$info" )

		fraglen := get_fraglen( xcor_qc{key} )
		signal_trk{key} = _tag_to_bw( tag{key}, fraglen, sig_o_dir, info )
		
		if ( make_wig ) _tag_to_wig( tag{key}, fraglen, sig_o_dir, info )
	}
}

void report() {

	wait_clear_tids()

	html := _html_filetable() 	// treeview for directory and file structure 
	html += _html_chipseq_tracks() 	// epigenome browser tracks
	html += _html_graphviz()	// graphviz workflow diagram
	html += _html_chipseq_QC()	// show QC tables and images

	report( html )

	print( "\n== Done report()\n" )		
}

string _html_chipseq_QC() {

	string[] flagstat_qcs, dup_qcs, flagstat_nodup_qcs, pbc_qcs, xcor_qcs, xcor_plots
	string[] flagstat_headers, dup_headers, flagstat_nodup_headers, pbc_headers, xcor_headers

	for ( int ctl=0; ctl <= num_ctl; ctl++) { // iterate through inputs (ctl==0 : replicate, ctl==1 : control)	
	
		for ( int rep=1; rep <= get_num_rep(); rep++) {

			if ( !is_data_available( ctl, rep ) ) continue

			info := get_info( ctl, rep )
			key := "$ctl,$rep"

			if ( ctl == 0 ) {
				//html_rep_by_id += _html_xcor( info, 	[ info ], [ xcor_qc{key} ], [ xcor_plot{key} ], [ info ] )

				if ( xcor_qc.hasKey( key ) )	{
					xcor_qcs 		+= xcor_qc{key}
					xcor_plots 		+= xcor_plot{key}

					xcor_headers 		+= info
				}
			}

			if ( flagstat_qc.hasKey( key ) ) 	flagstat_qcs 		+= flagstat_qc{key}
			if ( dup_qc.hasKey( key ) ) 		dup_qcs 		+= dup_qc{key}
			if ( flagstat_nodup_qc.hasKey( key ) )	flagstat_nodup_qcs 	+= flagstat_nodup_qc{key}
			if ( pbc_qc.hasKey( key ) ) 		pbc_qcs			+= pbc_qc{key}

			if ( flagstat_qc.hasKey( key ) ) 	flagstat_headers 	+= info
			if ( dup_qc.hasKey( key ) ) 		dup_headers 		+= info
			if ( flagstat_nodup_qc.hasKey( key ) )	flagstat_nodup_headers 	+= info
			if ( pbc_qc.hasKey( key ) ) 		pbc_headers		+= info

		}
	}	

	html := "<div id='chipseq_qc'>"
	
	html += _parse_flagstat_to_html( "all", 	flagstat_headers, flagstat_qcs, flagstat_headers )
	html += _parse_dup_to_html( "all", 		dup_headers, dup_qcs, dup_headers )
	html += _parse_flagstat_to_html( "all, filtered",flagstat_nodup_headers, flagstat_nodup_qcs, flagstat_nodup_headers )
	html += _parse_pbc_to_html( "all", 		pbc_headers, pbc_qcs, pbc_headers )
	html += _parse_xcor_to_html( "all", 		xcor_headers, xcor_qcs, xcor_plots, xcor_headers )

	// if idr qc's exists, add them to html
	if ( idr_qc != "" ) 		html += _parse_idr_to_html( "idr", idr_qc )

	for ( int i=1; i<=get_num_rep(); i++ ) {
		
		for ( int j=i+1; j<=get_num_rep(); j++ ) {
			if ( idr_tr_png.hasKey("$i,$j") ) html += _html_img( idr_tr_png{"$i,$j"}, 800, "true reps (rep$i-rep$j)" ) + "&nbsp"
		}

		if ( !true_rep ) {
			if ( idr_pr_png.hasKey(i) ) html += _html_img( idr_pr_png{i}, 800, "rep$i pseudo-reps" ) + "&nbsp"
		}
	}		

	if ( idr_ppr_png != "" ) 	html += _html_img( idr_ppr_png, 800, "pooled pseudo-reps" ) + "&nbsp"

	html += "</div><br>"
	return html
}

string _html_chipseq_tracks() {

	string[] trk_files, trk_types, trk_names

	if ( signal_trk_pval.hasKey( "pooled" ) ) { trk_types += "bigwig"; trk_names += "$title pval (pooled)"; trk_files += signal_trk_pval{"pooled"} }
	if ( signal_trk_fc.hasKey( "pooled" ) )   { trk_types += "bigwig"; trk_names += "$title fc (pooled)";   trk_files += signal_trk_fc{"pooled"}   }

	if ( peak_overlap != "" ) { trk_types += "hammock"; trk_names += "$title peak overlap"; trk_files += _peak_to_hammock( peak_overlap ) }

	if ( idr_opt != "" ) {	trk_types += "hammock"; trk_names += "$title peak idr (opt. set)"; trk_files += _peak_to_hammock( _get_idr_peak_trk( idr_opt ) ) }
	if ( idr_consv != "" ) {trk_types += "hammock"; trk_names += "$title peak idr (cons. set)"; trk_files += _peak_to_hammock( _get_idr_peak_trk( idr_consv ) ) }

	for (int rep=1; rep<=get_num_rep(); rep++) {

		if ( signal_trk_pval.hasKey( "rep$rep" ) ) { trk_types += "bigwig"; trk_names += "$title pval (rep$rep)"; trk_files += signal_trk_pval{"rep$rep"} }
		if ( signal_trk_fc.hasKey( "rep$rep" ) )   { trk_types += "bigwig"; trk_names += "$title fc (rep$rep)";   trk_files += signal_trk_fc{"rep$rep"}   }
		if ( idr_pr.hasKey(rep) ) {trk_types += "hammock"; trk_names += "$title peak idr (rep$rep-pr)"; trk_files += _peak_to_hammock( _get_idr_peak_trk( idr_pr{rep} ) ) }
	}

	html := _html_epg_browser_viz( trk_files, trk_types, trk_names )

	return html
}

void help() {

	if ( is_cmd_line_arg_empty() ) {
		printHelp()
		exit
	}
}

// basic functions

bool is_callpeak_spp() {

	return callpeak.toLower().indexOf("spp")>=0
}

bool is_callpeak_macs2() {

	return callpeak.toLower().indexOf("macs")>=0
}

bool is_sigtrk_aln2rawsig() {

	return sigtrk.toLower().indexOf( "aln" )>=0 || sigtrk.toLower().indexOf( "align" )>=0 || sigtrk.toLower().indexOf( "tag" )>=0
}

bool is_sigtrk_deeptools() {

	return sigtrk.toLower().indexOf( "deep" )>=0 || sigtrk.toLower().indexOf( "bam" )>=0
}

bool is_input_fastq() {

	return input.toLower() == "fastq"
}

bool is_input_bam() {

	return input.toLower() == "bam"
}

bool is_input_filt_bam() {

	return input.toLower() == "filt_bam"
}

bool is_input_tag() {

	return input.toLower() == "tag" || input.toLower() == "tagalign"
}

bool is_input_peak() {

	return input.toLower() == "peak"
}

int get_num_rep() {

	if ( is_input_fastq() ) 	 return get_num_rep_fastq()
	else if ( is_input_bam() )  	 return get_num_rep_bam()
	else if ( is_input_filt_bam() )  return get_num_rep_filt_bam()
	else if ( is_input_tag() ) 	 return get_num_rep_tag()
	else if ( is_input_peak() ) 	 return get_num_rep_peak()

	return 0
}

bool is_final_stage_bam() {

	return final_stage.toLower() == "bam"
}

bool is_final_stage_filt_bam() {

	return final_stage.toLower() == "filt_bam"
}

bool is_final_stage_tag() {

	return final_stage.toLower() == "tag" || final_stage.toLower() == "tagalign"
}

bool is_final_stage_xcor() {

	return final_stage.toLower() == "xcor"
}

bool is_final_stage_peak() {

	return final_stage.toLower() == "peak"
}

bool is_final_stage_idr() {

	return (final_stage.toLower() == "idr") || (final_stage.toLower() == "")
}

bool is_paired_end( int ctl, int rep ) {

	if ( is_input_fastq() ) {

		fastqs := get_fastqs( ctl, rep )
		return fastqs.size()==2
	}
	else {
		return pe
	}
}

bool is_single_ended( int ctl, int rep ) {

	return !is_paired_end( ctl, rep )
}

bool is_paired_end() {

	return is_paired_end( 0, 1 )
}

bool is_single_ended() {

	return !is_paired_end()
}


bool is_data_available( int ctl, int rep ) {

	if ( (ctl==1) && (rep>2) ) return false

	if ( is_input_fastq() ) {
		return get_fastqs( ctl, rep ).size() > 0
	}
	else if ( is_input_bam() ) {
		return get_bam( ctl, rep ) != ""
	}
	else if ( is_input_filt_bam() ) {
		return get_filt_bam( ctl, rep ) != ""
	}
	else if ( is_input_tag() ) {
		return get_tag( ctl, rep ) != ""
	}
	else {
		return false
	}
}

bool control_exists() {

	for ( int rep=1; rep<=get_num_rep(); rep++ ) {

		if ( is_data_available( 1, rep ) ) return true
	}

	return false
}

string get_info( int ctl, int rep ) {

	return ( (ctl>0) ? "ctl" : "rep") + rep
}