diff --git a/DESCRIPTION b/DESCRIPTION
index a8273a6..88e7f9f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: fetchNOMe
 Type: Package
 Title: Package for fast and convenient retrieval of NOMe-seq data from BAM files
-Version: 0.4
+Version: 0.5
 Date: 2024-05-09
 Authors@R: person(given = "Evgeniy A.", family="Ozonov", email = "evgeniy.ozonov@fmi.ch", role = c("aut", "cre"))
 Description: This R package is tailored for fast and convenient retrieval of NOMe-seq data from BAM files aligned with QuasR, Bismark and BISCUIT. 
diff --git a/NAMESPACE b/NAMESPACE
index 64803a7..e35c380 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,4 +2,5 @@ useDynLib(fetchNOMe, .registration=TRUE)
 importFrom(Rcpp, evalCpp)
 export(get_data_matrix_from_bams)
 export(get_ctable_from_bams)
-export(get_protect_stats_from_bams)
\ No newline at end of file
+export(get_protect_stats_from_bams)
+export(get_molecule_data_list_from_bams)
\ No newline at end of file
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 0b285ec..c6b5c7d 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -1,15 +1,19 @@
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
-fetch_cooc_ctable_from_bams_cpp <- function(infiles, regionChr, regionStart, regionEnd, max_spac, seqstring, seqStart, seqEnd, remove_nonunique, clip_until_nbg, max_protect_frac, max_bisC_meth, min_bisC_size, mapqMin, mapqMax, absIsizeMin, absIsizeMax, min_read_size, max_read_size, alignerUsed) {
-    .Call(`_fetchNOMe_fetch_cooc_ctable_from_bams_cpp`, infiles, regionChr, regionStart, regionEnd, max_spac, seqstring, seqStart, seqEnd, remove_nonunique, clip_until_nbg, max_protect_frac, max_bisC_meth, min_bisC_size, mapqMin, mapqMax, absIsizeMin, absIsizeMax, min_read_size, max_read_size, alignerUsed)
+fetch_molec_data_list_from_bams_cpp <- function(whichContext, infiles, regionChr, regionStart, regionEnd, seqstring, seqStart, seqEnd, remove_nonunique, clip_until_nbg, max_protect_frac, max_bisC_meth, min_bisC_size, mapqMin, mapqMax, alignerUsed, min_frag_data_len, min_frag_data_dens, data_as_rle) {
+    .Call(`_fetchNOMe_fetch_molec_data_list_from_bams_cpp`, whichContext, infiles, regionChr, regionStart, regionEnd, seqstring, seqStart, seqEnd, remove_nonunique, clip_until_nbg, max_protect_frac, max_bisC_meth, min_bisC_size, mapqMin, mapqMax, alignerUsed, min_frag_data_len, min_frag_data_dens, data_as_rle)
 }
 
-fetch_data_matrix_from_bams_cpp <- function(whichContext, infiles, regionChr, regionStart, regionEnd, seqstring, seqStart, seqEnd, remove_nonunique, clip_until_nbg, max_protect_frac, max_bisC_meth, min_bisC_size, mapqMin, mapqMax, absIsizeMin, absIsizeMax, min_read_size, max_read_size, alignerUsed) {
-    .Call(`_fetchNOMe_fetch_data_matrix_from_bams_cpp`, whichContext, infiles, regionChr, regionStart, regionEnd, seqstring, seqStart, seqEnd, remove_nonunique, clip_until_nbg, max_protect_frac, max_bisC_meth, min_bisC_size, mapqMin, mapqMax, absIsizeMin, absIsizeMax, min_read_size, max_read_size, alignerUsed)
+fetch_cooc_ctable_from_bams_cpp <- function(infiles, regionChr, regionStart, regionEnd, max_spac, seqstring, seqStart, seqEnd, remove_nonunique, clip_until_nbg, max_protect_frac, max_bisC_meth, min_bisC_size, mapqMin, mapqMax, alignerUsed, min_frag_data_len, min_frag_data_dens) {
+    .Call(`_fetchNOMe_fetch_cooc_ctable_from_bams_cpp`, infiles, regionChr, regionStart, regionEnd, max_spac, seqstring, seqStart, seqEnd, remove_nonunique, clip_until_nbg, max_protect_frac, max_bisC_meth, min_bisC_size, mapqMin, mapqMax, alignerUsed, min_frag_data_len, min_frag_data_dens)
 }
 
-fetch_protect_stats_from_bams_cpp <- function(infiles, regionChr, regionStart, regionEnd, seqstring, seqStart, seqEnd, remove_nonunique, clip_until_nbg, max_protect_frac, max_bisC_meth, min_bisC_size, mapqMin, mapqMax, absIsizeMin, absIsizeMax, min_read_size, max_read_size, alignerUsed) {
-    .Call(`_fetchNOMe_fetch_protect_stats_from_bams_cpp`, infiles, regionChr, regionStart, regionEnd, seqstring, seqStart, seqEnd, remove_nonunique, clip_until_nbg, max_protect_frac, max_bisC_meth, min_bisC_size, mapqMin, mapqMax, absIsizeMin, absIsizeMax, min_read_size, max_read_size, alignerUsed)
+fetch_data_matrix_from_bams_cpp <- function(whichContext, infiles, regionChr, regionStart, regionEnd, seqstring, seqStart, seqEnd, remove_nonunique, clip_until_nbg, max_protect_frac, max_bisC_meth, min_bisC_size, mapqMin, mapqMax, alignerUsed, min_frag_data_len, min_frag_data_dens) {
+    .Call(`_fetchNOMe_fetch_data_matrix_from_bams_cpp`, whichContext, infiles, regionChr, regionStart, regionEnd, seqstring, seqStart, seqEnd, remove_nonunique, clip_until_nbg, max_protect_frac, max_bisC_meth, min_bisC_size, mapqMin, mapqMax, alignerUsed, min_frag_data_len, min_frag_data_dens)
+}
+
+fetch_protect_stats_from_bams_cpp <- function(infiles, regionChr, regionStart, regionEnd, seqstring, seqStart, seqEnd, remove_nonunique, clip_until_nbg, max_protect_frac, max_bisC_meth, min_bisC_size, mapqMin, mapqMax, alignerUsed, min_frag_data_len, min_frag_data_dens) {
+    .Call(`_fetchNOMe_fetch_protect_stats_from_bams_cpp`, infiles, regionChr, regionStart, regionEnd, seqstring, seqStart, seqEnd, remove_nonunique, clip_until_nbg, max_protect_frac, max_bisC_meth, min_bisC_size, mapqMin, mapqMax, alignerUsed, min_frag_data_len, min_frag_data_dens)
 }
 
diff --git a/R/get_ctable_from_bams.R b/R/get_ctable_from_bams.R
index 63c5d38..88be213 100644
--- a/R/get_ctable_from_bams.R
+++ b/R/get_ctable_from_bams.R
@@ -11,6 +11,8 @@
 #' @param regions \linkS4class{GRanges} object for regions of interest.
 #' @param genome \code{character} path to fasta file which was used as reference for alignment.
 #' @param alignerUsed \code{character} that defines which aligner was used to generate BAM files. Currently supported aligners are QuasR, Bismark and BISCUIT.
+#' @param min_frag_data_len \code{integer} ignore fragments that have genomic lengths from most-left to most-right data points less than \code{min_frag_data_len}.
+#' @param min_frag_data_dens \code{numeric} ignore fragments that have density of data-containing positions lower than \code{min_frag_data_dens}.
 #' @param collapseBySample \code{logical} indicating whether to collapse counts for bam files with same \code{samplenames}. If \code{FALSE} prefix \code{s} followed by index is added to \code{samplenames}.
 #' @param max_spacing maximum spacing length for which to collect co-occurrence counts.
 #' @param remove_nonunique \code{logical} if \code{TRUE} only unique fragments are analyzed. Uniqueness is defined by fragments start, end and methylation states of all cytosines. 
@@ -22,12 +24,8 @@
 #' i.e. only fragments with number of non-GCH, and non-WCG cytosines higher then \code{min_bisC_size} are subject to filtering based on bisulfite conversion efficiency.
 #' @param mapqMin \code{integer} setting minimum MAPQ of reads to be included into analysis.
 #' @param mapqMax \code{integer} setting maximum MAPQ of reads to be included into analysis.
-#' @param absIsizeMin \code{integer} or \code{NULL} (default). For paired-end experiments, minimal absolute insert
-#'   size (TLEN field in SAM Spec v1.4) of alignments to be included.
-#' @param absIsizeMax \code{integer} or \code{NULL} (default). For paired-end experiments, maximal absolute insert
-#'   size (TLEN field in SAM Spec v1.4) of alignments to be included.
-#' @param min_read_size \code{integer}. minimum read length to be included.
-#' @param max_read_size \code{integer}. maximum read length to be included.
+
+#' @param max_read_size \code{integer}. maximum read length which is expected in the data. This parameter controls only extension of regions for extracting reference sequence.
 #' @param ncores number of cores to use.
 #'
 #' @return \code{tibble} where each row corresponds to sample - region combination. 
@@ -55,6 +53,8 @@ get_ctable_from_bams <- function(bamfiles,
                                  regions,
                                  genome,
                                  alignerUsed = c("QuasR","Bismark","BISCUIT"),
+                                 min_frag_data_len = 50L,
+                                 min_frag_data_dens = 0.05,
                                  collapseBySample = TRUE,
                                  max_spacing = 300,
                                  remove_nonunique = TRUE,
@@ -64,9 +64,6 @@ get_ctable_from_bams <- function(bamfiles,
                                  min_bisC_size = 10,
                                  mapqMin = 0,
                                  mapqMax = 255,
-                                 absIsizeMin = NULL,
-                                 absIsizeMax = NULL,
-                                 min_read_size = 25L,
                                  max_read_size = 1000L,
                                  ncores = 1L){
   
@@ -93,28 +90,17 @@ get_ctable_from_bams <- function(bamfiles,
   
   alignerUsed <- match.arg(alignerUsed)
   
-  if (is.null(absIsizeMin)) # no tlen filtering
-    absIsizeMin <- -1L
-  if (is.null(absIsizeMax))
-    absIsizeMax <- -1L
-  
-  
-  
   message(paste0("Collecting co-occurrence frequencies from BAM files generated by ",alignerUsed,"."))
   
   
   ## load refsequences for regions
   ## extend regions from left and right
-  if(absIsizeMax > 0)
-    ext_len <- absIsizeMax
-  else
-    ext_len <- max_read_size
   
   fa_index <- Rsamtools::scanFaIndex(genome)
   GenomeInfoDb::seqlevels(regions) <- GenomeInfoDb::seqlevels(fa_index)
   GenomeInfoDb::seqinfo(regions) <- suppressMessages(GenomeInfoDb::Seqinfo(seqnames = as.character(GenomeInfoDb::seqnames(fa_index)),
                                                                            seqlengths = GenomicRanges::width(fa_index)))
-  refseq_gr <- suppressWarnings(GenomicRanges::resize(regions,width = GenomicRanges::width(regions) + 2*ext_len,fix="center"))
+  refseq_gr <- suppressWarnings(GenomicRanges::resize(regions,width = GenomicRanges::width(regions) + 2*max_read_size,fix="center"))
   
   ## trim to remove parts out of chromosomes
   refseq_gr <- GenomicRanges::trim(refseq_gr)
@@ -150,11 +136,10 @@ get_ctable_from_bams <- function(bamfiles,
                                                                                                                               min_bisC_size = min_bisC_size,
                                                                                                                               mapqMin = mapqMin,
                                                                                                                               mapqMax = mapqMax,
-                                                                                                                              absIsizeMin = absIsizeMin,
-                                                                                                                              absIsizeMax = absIsizeMax,
-                                                                                                                              min_read_size = min_read_size,
-                                                                                                                              max_read_size = max_read_size,
-                                                                                                                              alignerUsed = alignerUsed)
+                                                                                                                              alignerUsed = alignerUsed,
+                                                                                                                              min_frag_data_len = min_frag_data_len,
+                                                                                                                              min_frag_data_dens = min_frag_data_dens
+                                                                                                                              )
                                                                                    
                                                                                    
                                                                                    #coocmatr <- outlist[["CoocCountTable"]]
diff --git a/R/get_data_matrix_from_bams.R b/R/get_data_matrix_from_bams.R
index 134a5b2..d0b4685 100644
--- a/R/get_data_matrix_from_bams.R
+++ b/R/get_data_matrix_from_bams.R
@@ -17,6 +17,8 @@
 #' \item{"allC"}{methylation states of all cytosines combined.}
 #' }
 #' @param alignerUsed \code{character} that defines which aligner was used to generate BAM files. Currently supported aligners are QuasR, Bismark and BISCUIT.
+#' @param min_frag_data_len \code{integer} ignore fragments that have genomic lengths from most-left to most-right data points less than \code{min_frag_data_len}.
+#' @param min_frag_data_dens \code{numeric} ignore fragments that have density of data-containing positions lower than \code{min_frag_data_dens}.
 #' @param collapseBySample \code{logical} indicating whether to collapse counts for bam files with same \code{samplenames}. If \code{FALSE} prefix \code{s} followed by index is added to \code{samplenames}.
 #' @param remove_nonunique \code{logical} if \code{TRUE} only unique fragments are analyzed. Uniqueness is defined by fragments start, end and methylation states of all cytosines.
 #' @param clip_until_nbg \code{integer} controlling clipping partial footprints at both ends of fragments. Namely, protected states are omitted from each end until \code{clip_until_nbg} unprotected states are met.
@@ -27,12 +29,7 @@
 #' i.e. only fragments with number of non-GCH, and non-WCG cytosines higher then \code{min_bisC_size} are subject to filtering based on bisulfite conversion efficiency.
 #' @param mapqMin \code{integer} setting minimum MAPQ of reads to be included into analysis.
 #' @param mapqMax \code{integer} setting maximum MAPQ of reads to be included into analysis.
-#' @param absIsizeMin \code{integer} or \code{NULL} (default). For paired-end experiments, minimal absolute insert
-#'   size (TLEN field in SAM Spec v1.4) of alignments to be included.
-#' @param absIsizeMax \code{integer} or \code{NULL} (default). For paired-end experiments, maximal absolute insert
-#'   size (TLEN field in SAM Spec v1.4) of alignments to be included.
-#' @param min_read_size \code{integer}. minimum read length to be included.
-#' @param max_read_size \code{integer}. maximum read length to be included.
+#' @param max_read_size \code{integer}. maximum read length which is expected in the data. This parameter controls only extension of regions for extracting reference sequence.
 #' @param ncores number of cores to use.
 #' @return \code{tibble} where each row corresponds to sample - region combination.
 #' Columns of the returned \code{tibble} represent:
@@ -59,6 +56,8 @@ get_data_matrix_from_bams <- function(bamfiles,
 																			genome,
 																			whichContext = c("GCH","WCG","bisC","otherC", "allC"),
 																			alignerUsed = c("QuasR","Bismark","BISCUIT"),
+																			min_frag_data_len = 50L,
+																			min_frag_data_dens = 0.05,
 																			collapseBySample = TRUE,
 																			remove_nonunique = TRUE,
 																			clip_until_nbg  = 1L,
@@ -67,9 +66,6 @@ get_data_matrix_from_bams <- function(bamfiles,
 																			min_bisC_size = 10,
 																			mapqMin = 0,
 																			mapqMax = 255,
-																			absIsizeMin = NULL,
-																			absIsizeMax = NULL,
-																			min_read_size = 25L,
 																			max_read_size = 1000L,
 																			ncores = 1L){
 	if(!inherits(bamfiles,"character"))
@@ -96,11 +92,6 @@ get_data_matrix_from_bams <- function(bamfiles,
 
 	alignerUsed <- match.arg(alignerUsed)
 
-	if (is.null(absIsizeMin)) # no tlen filtering
-	  absIsizeMin <- -1L
-	if (is.null(absIsizeMax))
-	  absIsizeMax <- -1L
-	
 	message(paste0("Fetching data from BAM files generated by ",alignerUsed,"."))
 
 
@@ -110,15 +101,11 @@ get_data_matrix_from_bams <- function(bamfiles,
 	## load refsequences for regions
 	
 	## extend regions from left and right
-	if(absIsizeMax > 0)
-	  ext_len <- absIsizeMax
-	else
-	  ext_len <- max_read_size
 	fa_index <- Rsamtools::scanFaIndex(genome)
 	GenomeInfoDb::seqlevels(regions) <- GenomeInfoDb::seqlevels(fa_index)
 	GenomeInfoDb::seqinfo(regions) <- suppressMessages(GenomeInfoDb::Seqinfo(seqnames = as.character(GenomeInfoDb::seqnames(fa_index)),
 																														 seqlengths = GenomicRanges::width(fa_index)))
-	refseq_gr <- suppressWarnings(GenomicRanges::resize(regions,width = GenomicRanges::width(regions) + 2*ext_len,fix="center"))
+	refseq_gr <- suppressWarnings(GenomicRanges::resize(regions,width = GenomicRanges::width(regions) + 2*max_read_size,fix="center"))
 
 	## trim to remove parts out of chromosomes
 	refseq_gr <- GenomicRanges::trim(refseq_gr)
@@ -156,11 +143,9 @@ get_data_matrix_from_bams <- function(bamfiles,
 																																			 																						 min_bisC_size = min_bisC_size,
 																																			 																						 mapqMin = mapqMin,
 																																			 																						 mapqMax = mapqMax,
-																																			 																						 absIsizeMin = absIsizeMin,
-																																			 																						 absIsizeMax = absIsizeMax,
-																																			 																						 min_read_size = min_read_size,
-																																			 																						 max_read_size = max_read_size,
-																																			 																						 alignerUsed = alignerUsed)
+																																			 																						 alignerUsed = alignerUsed,
+																																			 																						 min_frag_data_len = min_frag_data_len,
+																																			 																						 min_frag_data_dens = min_frag_data_dens)
 
 																																			 	data_mat <- outlist[["DataMatrix"]]
 																																			 	# invert rows if strand is -
diff --git a/R/get_molecule_data_list_from_bams.R b/R/get_molecule_data_list_from_bams.R
new file mode 100644
index 0000000..885a315
--- /dev/null
+++ b/R/get_molecule_data_list_from_bams.R
@@ -0,0 +1,172 @@
+
+
+
+#' Fetch data for each molecule from NOMe-seq BAM files for a set of regions
+#'
+#'
+#'
+#' @param bamfiles \code{character} paths to bam files
+#' @param samplenames \code{character} names of samples. If \code{collapseBySample} is \code{TRUE} counts are aggregated across bam files with same sample name.
+#' @param regions \linkS4class{GRanges} object for regions of interest.
+#' @param genome \code{character} path to fasta file which was used as reference for alignment.
+#' @param whichContext \code{character} which can be one of "GCH","WCG","bisC", "otherC" or "allC" and sets which data to retrieve.
+#' \describe{
+#' \item{"GCH}{protection data;}
+#' \item{"WCG"}{endogenous CpG methylation data;}
+#' \item{"bisC}{methylation states of non-GCH and non-WCG cytosins used for estimating efficiency of bisulfite conversion;}
+#' \item{"otherC"}{methylation states of all other cytosines which do not fall into above categories.}
+#' \item{"allC"}{methylation states of all cytosines combined.}
+#' }
+#' @param alignerUsed \code{character} that defines which aligner was used to generate BAM files. Currently supported aligners are QuasR, Bismark and BISCUIT.
+#' @param min_frag_data_len \code{integer} ignore fragments that have genomic lengths from most-left to most-right data points less than \code{min_frag_data_len}.
+#' @param min_frag_data_dens \code{numeric} ignore fragments that have density of data-containing positions lower than \code{min_frag_data_dens}.
+#' @param collapseBySample \code{logical} indicating whether to collapse counts for bam files with same \code{samplenames}. If \code{FALSE} prefix \code{s} followed by index is added to \code{samplenames}.
+#' @param remove_nonunique \code{logical} if \code{TRUE} only unique fragments are analyzed. Uniqueness is defined by fragments start, end and methylation states of all cytosines.
+#' @param clip_until_nbg \code{integer} controlling clipping partial footprints at both ends of fragments. Namely, protected states are omitted from each end until \code{clip_until_nbg} unprotected states are met.
+#' Set \code{clip_until_nbg=0L} to skip clipping pre-processing.
+#' @param noclip_protect_frac_above \code{numeric} controlling fragment clipping, i.e. fragments that have fraction of protected states higher than \code{max_protect_frac} escape clipping. This parameter allows to avoid removal of fully protected fragments during clipping process.
+#' @param max_bisC_meth \code{numeric} controlling removal of fragments with failed bisulfite conversion, i.e. fragments with fraction of methylated non-GCH, and non-WCG cytosines higher then \code{max_bisC_meth} are removed from the analysis to get rid of fragments for which bisulfite conversion failed.
+#' @param min_bisC_size \code{integer} setting minimum number non-GCH, and non-WCG cytosines for filtering according to bisulfite conversion efficiency,
+#' i.e. only fragments with number of non-GCH, and non-WCG cytosines higher then \code{min_bisC_size} are subject to filtering based on bisulfite conversion efficiency.
+#' @param mapqMin \code{integer} setting minimum MAPQ of reads to be included into analysis.
+#' @param mapqMax \code{integer} setting maximum MAPQ of reads to be included into analysis.
+#' @param max_read_size \code{integer}. maximum read length which is expected in the data. This parameter controls only extension of regions for extracting reference sequence.
+#' @param ncores number of cores to use.
+#' @param data_as_Rle \code{logical} if \code{TRUE} data vectors are \code{Rle} compressed.
+#' @return \code{tibble} where each row corresponds to sample - molecule combination.
+#' Please note that if \code{regions} contain overlapping ranges it is possible that same molecule to appear several times.
+#' 
+#' Columns of the returned \code{tibble} represent:
+#' \describe{
+#'   \item{SampleName}{Name of a sample as defined by \code{samplenames}.}
+#'   \item{seqnames, start, end}{seqnames (or chromosomes) and coordinates of molecules. Please note, that start and end are positions of the first and last data points and not positions of alignments.}
+#'   \item{R1strand}{strands of alignments for R1 reads}
+#'   \item{qname}{names of a molecules}
+#'   \item{nDataPoints}{numbers of informative data points within molecules}
+#'   \item{startDataPoint}{most-left data points}
+#'   \item{endDataPoint}{most-right data points}
+#'   \item{fragData_list}{vectors with molecule data
+#'   }
+#' }
+#' @export
+#'
+
+get_molecule_data_list_from_bams <- function(bamfiles,
+                                      samplenames,
+                                      regions,
+                                      genome,
+                                      whichContext = c("GCH","WCG","bisC","otherC", "allC"),
+                                      alignerUsed = c("QuasR","Bismark","BISCUIT"),
+                                      min_frag_data_len = 50L,
+                                      min_frag_data_dens = 0.05,
+                                      collapseBySample = TRUE,
+                                      remove_nonunique = TRUE,
+                                      clip_until_nbg  = 1L,
+                                      noclip_protect_frac_above = 0.90,
+                                      max_bisC_meth = 0.1,
+                                      min_bisC_size = 10,
+                                      mapqMin = 0,
+                                      mapqMax = 255,
+                                      max_read_size = 1000L,
+                                      ncores = 1L,
+                                      data_as_Rle = FALSE){
+  if(!inherits(bamfiles,"character"))
+    stop("bamfiles must be vector of character")
+  if(!inherits(samplenames,"character"))
+    stop("samplenames must be vector of character")
+  if(any(!file.exists(bamfiles))){
+    nonex <- which(!file.exists(bamfiles))
+    nonex <- paste0(bamfiles[nonex],collapse = "\n")
+    stop(paste0("Could not find following BAM files:\n",nonex))
+  }
+  
+  if(!inherits(regions,"GRanges"))
+    stop("regions must be Granges")
+  
+  
+  if(!inherits(genome,"character") & length(genome) !=1)
+    stop("genome must be character and contain only one element")
+  
+  if(!file.exists(genome))
+    stop("Could not find genome:", genome)
+  
+  whichContext <- match.arg(whichContext)
+  
+  alignerUsed <- match.arg(alignerUsed)
+  
+  message(paste0("Fetching data for every molecule from BAM files generated by ",alignerUsed,"."))
+  
+  
+  ## remove metadata from regions
+  GenomicRanges::mcols(regions) <- NULL
+  
+  ## load refsequences for regions
+  
+  ## extend regions from left and right
+  fa_index <- Rsamtools::scanFaIndex(genome)
+  GenomeInfoDb::seqlevels(regions) <- GenomeInfoDb::seqlevels(fa_index)
+  GenomeInfoDb::seqinfo(regions) <- suppressMessages(GenomeInfoDb::Seqinfo(seqnames = as.character(GenomeInfoDb::seqnames(fa_index)),
+                                                                           seqlengths = GenomicRanges::width(fa_index)))
+  refseq_gr <- suppressWarnings(GenomicRanges::resize(regions,width = GenomicRanges::width(regions) + 2*max_read_size,fix="center"))
+  
+  ## trim to remove parts out of chromosomes
+  refseq_gr <- GenomicRanges::trim(refseq_gr)
+  refseqs <- Rsamtools::scanFa(genome,refseq_gr)
+  
+  # 2. split vector by sample name. If collapseBySample is FALSE add to samplenames some suffixes
+  if(!collapseBySample){
+    samplenames <- paste0("s",seq_along(samplenames),"_",samplenames)
+  }
+  
+  bamfiles <- split(bamfiles, f=samplenames)
+  
+  
+  molec_data_list <- do.call(rbind,lapply(names(bamfiles),
+                                        function(bamgroupname){
+                                          ctbl <- do.call(rbind,parallel::mclapply(seq_along(regions),
+                                                                                   function(regi){
+                                                                                     
+                                                                                     regdf <- GenomicRanges::as.data.frame(regions[regi])
+                                                                                     regdf$seqnames <- as.character(regdf[,"seqnames"])
+                                                                                     seqgr <- as.data.frame(refseq_gr[regi])
+                                                                                     
+                                                                                     outlist <- fetch_molec_data_list_from_bams_cpp(whichContext = whichContext,
+                                                                                                                                    infiles = bamfiles[[bamgroupname]],
+                                                                                                                                    regionChr = regdf[1,"seqnames"],
+                                                                                                                                    regionStart = regdf[1,"start"] - 1, # convert to 0-based
+                                                                                                                                    regionEnd = regdf[1,"end"],
+                                                                                                                                    remove_nonunique = remove_nonunique,
+                                                                                                                                    seqstring = as.character(refseqs[regi]),
+                                                                                                                                    seqStart = seqgr[1,"start"] - 1,
+                                                                                                                                    seqEnd = seqgr[1,"end"],
+                                                                                                                                    clip_until_nbg  = clip_until_nbg,
+                                                                                                                                    max_protect_frac = noclip_protect_frac_above,
+                                                                                                                                    max_bisC_meth = max_bisC_meth,
+                                                                                                                                    min_bisC_size = min_bisC_size,
+                                                                                                                                    mapqMin = mapqMin,
+                                                                                                                                    mapqMax = mapqMax,
+                                                                                                                                    alignerUsed = alignerUsed,
+                                                                                                                                    min_frag_data_len = min_frag_data_len,
+                                                                                                                                    min_frag_data_dens = min_frag_data_dens,
+                                                                                                                                    data_as_rle = data_as_Rle)
+                                                                                     
+                                                                                     
+                                                                                     outtbl <- tibble::tibble("SampleName" = bamgroupname,
+                                                                                                              "seqnames" = regdf[1,"seqnames"],
+                                                                                                              "start" = outlist[["start"]],
+                                                                                                              "end" = outlist[["end"]],
+                                                                                                              "R1strand" = outlist[["R1strand"]],
+                                                                                                              "fragConfig" = outlist[["fragConfigs"]],
+                                                                                                              "qname" = outlist[["qnames"]],
+                                                                                                              "nDataPoints" = outlist[["nDataPoints"]],
+                                                                                                              "startDataPoint" = outlist[["startDataPoint"]],
+                                                                                                              "endDataPoint" = outlist[["endDataPoint"]],
+                                                                                                              "fragData_list" = outlist[["fragData_list"]])
+                                                                                     colnames(outtbl)[which(colnames(outtbl) == "fragData_list")] <- paste0(whichContext,"_fragData_list")
+                                                                                     
+                                                                                     return(outtbl)
+                                                                                   },mc.cores = ncores))
+                                          
+                                          return(ctbl)
+                                        }))
+}
\ No newline at end of file
diff --git a/R/get_protect_stats_from_bams.R b/R/get_protect_stats_from_bams.R
index e22b1fa..99cc3f6 100644
--- a/R/get_protect_stats_from_bams.R
+++ b/R/get_protect_stats_from_bams.R
@@ -9,6 +9,8 @@
 #' @param regions \linkS4class{GRanges} object for regions of interest.
 #' @param genome \code{character} path to fasta file which was used as reference for alignment.
 #' @param alignerUsed \code{character} that defines which aligner was used to generate BAM files. Currently supported aligners are QuasR, Bismark and BISCUIT.
+#' @param min_frag_data_len \code{integer} ignore fragments that have genomic lengths from most-left to most-right data points less than \code{min_frag_data_len}.
+#' @param min_frag_data_dens \code{numeric} ignore fragments that have density of data-containing positions lower than \code{min_frag_data_dens}.
 #' @param collapseBySample \code{logical} indicating whether to collapse counts for bam files with same \code{samplenames}. If \code{FALSE} prefix \code{s} followed by index is added to \code{samplenames}.
 #' @param remove_nonunique \code{logical} if \code{TRUE} only unique fragments are analyzed. Uniqueness is defined by fragments start, end and methylation states of all cytosines. 
 #' @param clip_until_nbg \code{integer} controlling clipping partial footprints at both ends of fragments. Namely, protected states are erased from each end until \code{clip_until_nbg} unprotected states are met.
@@ -19,12 +21,7 @@
 #' i.e. only fragments with number of non-GCH, and non-WCG cytosines higher then \code{min_bisC_size} are subject to filtering based on bisulfite conversion efficiency.
 #' @param mapqMin \code{integer} setting minimum MAPQ of reads to be included into analysis.
 #' @param mapqMax \code{integer} setting maximum MAPQ of reads to be included into analysis.
-#' @param absIsizeMin \code{integer} or \code{NULL} (default). For paired-end experiments, minimal absolute insert
-#'   size (TLEN field in SAM Spec v1.4) of alignments to be included.
-#' @param absIsizeMax \code{integer} or \code{NULL} (default). For paired-end experiments, maximal absolute insert
-#'   size (TLEN field in SAM Spec v1.4) of alignments to be included.
-#' @param min_read_size \code{integer}. minimum read length to be included.
-#' @param max_read_size \code{integer}. maximum read length to be included.
+#' @param max_read_size \code{integer}. maximum read length which is expected in the data. This parameter controls only extension of regions for extracting reference sequence.
 #' @param ncores number of cores to use.
 #'
 #' @return \code{tibble} where each row corresponds to sample - region combination. 
@@ -56,6 +53,8 @@ get_protect_stats_from_bams <- function(bamfiles,
 																							regions,
 																							genome,
 																							alignerUsed = c("QuasR","Bismark","BISCUIT"),
+																							min_frag_data_len = 50L,
+																							min_frag_data_dens = 0.05,
 																							collapseBySample = TRUE,
 																							remove_nonunique = TRUE,
 																							clip_until_nbg  = 1L,
@@ -64,9 +63,6 @@ get_protect_stats_from_bams <- function(bamfiles,
 																							min_bisC_size = 10,
 																							mapqMin = 0,
 																							mapqMax = 255,
-																							absIsizeMin = NULL,
-																							absIsizeMax = NULL,
-																							min_read_size = 25L,
 																							max_read_size = 1000L,
 																							ncores = 1L){
 	if(!inherits(bamfiles,"character"))
@@ -89,24 +85,17 @@ get_protect_stats_from_bams <- function(bamfiles,
 		stop("Could not find genome:", genome)
 	
   alignerUsed <- match.arg(alignerUsed)
-  if (is.null(absIsizeMin)) # no tlen filtering
-    absIsizeMin <- -1L
-  if (is.null(absIsizeMax))
-    absIsizeMax <- -1L
+  
   message(paste0("Collecting protection statistics from BAM files generated by ",alignerUsed,"."))
   
   
 	## load refsequences for regions
 	## extend regions from left and right
-  if(absIsizeMax > 0)
-    ext_len <- absIsizeMax
-  else
-    ext_len <- max_read_size
-	fa_index <- Rsamtools::scanFaIndex(genome)
+  fa_index <- Rsamtools::scanFaIndex(genome)
 	GenomeInfoDb::seqlevels(regions) <- GenomeInfoDb::seqlevels(fa_index)
 	GenomeInfoDb::seqinfo(regions) <- suppressMessages(GenomeInfoDb::Seqinfo(seqnames = as.character(GenomeInfoDb::seqnames(fa_index)),
 																														 seqlengths = GenomicRanges::width(fa_index)))
-	refseq_gr <- suppressWarnings(GenomicRanges::resize(regions,width = GenomicRanges::width(regions) + 2*ext_len,fix="center"))
+	refseq_gr <- suppressWarnings(GenomicRanges::resize(regions,width = GenomicRanges::width(regions) + 2*max_read_size,fix="center"))
 	
 	## trim to remove parts out of chromosomes
 	refseq_gr <- GenomicRanges::trim(refseq_gr)
@@ -142,11 +131,9 @@ get_protect_stats_from_bams <- function(bamfiles,
 																																				 																							 min_bisC_size = min_bisC_size,
 																																				 																							 mapqMin = mapqMin,
 																																				 																							 mapqMax = mapqMax,
-																																				 																							 absIsizeMin = absIsizeMin,
-																																				 																							 absIsizeMax = absIsizeMax,
-																																				 																							 min_read_size = min_read_size,
-																																				 																							 max_read_size = max_read_size,
-																																				 																							 alignerUsed = alignerUsed)
+																																				 																							 alignerUsed = alignerUsed,
+																																				 																							 min_frag_data_len = min_frag_data_len,
+																																				 																							 min_frag_data_dens = min_frag_data_dens)
 																																				 	
 																																				 	data_mat <- outlist[["ProtectStats"]]
 																																				 	
diff --git a/man/get_ctable_from_bams.Rd b/man/get_ctable_from_bams.Rd
index 114627e..ba59c00 100644
--- a/man/get_ctable_from_bams.Rd
+++ b/man/get_ctable_from_bams.Rd
@@ -10,6 +10,8 @@ get_ctable_from_bams(
   regions,
   genome,
   alignerUsed = c("QuasR", "Bismark", "BISCUIT"),
+  min_frag_data_len = 50L,
+  min_frag_data_dens = 0.05,
   collapseBySample = TRUE,
   max_spacing = 300,
   remove_nonunique = TRUE,
@@ -19,9 +21,6 @@ get_ctable_from_bams(
   min_bisC_size = 10,
   mapqMin = 0,
   mapqMax = 255,
-  absIsizeMin = NULL,
-  absIsizeMax = NULL,
-  min_read_size = 25L,
   max_read_size = 1000L,
   ncores = 1L
 )
@@ -37,6 +36,10 @@ get_ctable_from_bams(
 
 \item{alignerUsed}{\code{character} that defines which aligner was used to generate BAM files. Currently supported aligners are QuasR, Bismark and BISCUIT.}
 
+\item{min_frag_data_len}{\code{integer} ignore fragments that have genomic lengths from most-left to most-right data points less than \code{min_frag_data_len}.}
+
+\item{min_frag_data_dens}{\code{numeric} ignore fragments that have density of data-containing positions lower than \code{min_frag_data_dens}.}
+
 \item{collapseBySample}{\code{logical} indicating whether to collapse counts for bam files with same \code{samplenames}. If \code{FALSE} prefix \code{s} followed by index is added to \code{samplenames}.}
 
 \item{max_spacing}{maximum spacing length for which to collect co-occurrence counts.}
@@ -57,15 +60,7 @@ i.e. only fragments with number of non-GCH, and non-WCG cytosines higher then \c
 
 \item{mapqMax}{\code{integer} setting maximum MAPQ of reads to be included into analysis.}
 
-\item{absIsizeMin}{\code{integer} or \code{NULL} (default). For paired-end experiments, minimal absolute insert
-size (TLEN field in SAM Spec v1.4) of alignments to be included.}
-
-\item{absIsizeMax}{\code{integer} or \code{NULL} (default). For paired-end experiments, maximal absolute insert
-size (TLEN field in SAM Spec v1.4) of alignments to be included.}
-
-\item{min_read_size}{\code{integer}. minimum read length to be included.}
-
-\item{max_read_size}{\code{integer}. maximum read length to be included.}
+\item{max_read_size}{\code{integer}. maximum read length which is expected in the data. This parameter controls only extension of regions for extracting reference sequence.}
 
 \item{ncores}{number of cores to use.}
 }
diff --git a/man/get_data_matrix_from_bams.Rd b/man/get_data_matrix_from_bams.Rd
index aa37a89..a06ec22 100644
--- a/man/get_data_matrix_from_bams.Rd
+++ b/man/get_data_matrix_from_bams.Rd
@@ -11,6 +11,8 @@ get_data_matrix_from_bams(
   genome,
   whichContext = c("GCH", "WCG", "bisC", "otherC", "allC"),
   alignerUsed = c("QuasR", "Bismark", "BISCUIT"),
+  min_frag_data_len = 50L,
+  min_frag_data_dens = 0.05,
   collapseBySample = TRUE,
   remove_nonunique = TRUE,
   clip_until_nbg = 1L,
@@ -19,9 +21,6 @@ get_data_matrix_from_bams(
   min_bisC_size = 10,
   mapqMin = 0,
   mapqMax = 255,
-  absIsizeMin = NULL,
-  absIsizeMax = NULL,
-  min_read_size = 25L,
   max_read_size = 1000L,
   ncores = 1L
 )
@@ -46,6 +45,10 @@ get_data_matrix_from_bams(
 
 \item{alignerUsed}{\code{character} that defines which aligner was used to generate BAM files. Currently supported aligners are QuasR, Bismark and BISCUIT.}
 
+\item{min_frag_data_len}{\code{integer} ignore fragments that have genomic lengths from most-left to most-right data points less than \code{min_frag_data_len}.}
+
+\item{min_frag_data_dens}{\code{numeric} ignore fragments that have density of data-containing positions lower than \code{min_frag_data_dens}.}
+
 \item{collapseBySample}{\code{logical} indicating whether to collapse counts for bam files with same \code{samplenames}. If \code{FALSE} prefix \code{s} followed by index is added to \code{samplenames}.}
 
 \item{remove_nonunique}{\code{logical} if \code{TRUE} only unique fragments are analyzed. Uniqueness is defined by fragments start, end and methylation states of all cytosines.}
@@ -64,15 +67,7 @@ i.e. only fragments with number of non-GCH, and non-WCG cytosines higher then \c
 
 \item{mapqMax}{\code{integer} setting maximum MAPQ of reads to be included into analysis.}
 
-\item{absIsizeMin}{\code{integer} or \code{NULL} (default). For paired-end experiments, minimal absolute insert
-size (TLEN field in SAM Spec v1.4) of alignments to be included.}
-
-\item{absIsizeMax}{\code{integer} or \code{NULL} (default). For paired-end experiments, maximal absolute insert
-size (TLEN field in SAM Spec v1.4) of alignments to be included.}
-
-\item{min_read_size}{\code{integer}. minimum read length to be included.}
-
-\item{max_read_size}{\code{integer}. maximum read length to be included.}
+\item{max_read_size}{\code{integer}. maximum read length which is expected in the data. This parameter controls only extension of regions for extracting reference sequence.}
 
 \item{ncores}{number of cores to use.}
 }
diff --git a/man/get_molecule_data_list_from_bams.Rd b/man/get_molecule_data_list_from_bams.Rd
new file mode 100644
index 0000000..76d9793
--- /dev/null
+++ b/man/get_molecule_data_list_from_bams.Rd
@@ -0,0 +1,93 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get_molecule_data_list_from_bams.R
+\name{get_molecule_data_list_from_bams}
+\alias{get_molecule_data_list_from_bams}
+\title{Fetch data for each molecule from NOMe-seq BAM files for a set of regions}
+\usage{
+get_molecule_data_list_from_bams(
+  bamfiles,
+  samplenames,
+  regions,
+  genome,
+  whichContext = c("GCH", "WCG", "bisC", "otherC", "allC"),
+  alignerUsed = c("QuasR", "Bismark", "BISCUIT"),
+  min_frag_data_len = 50L,
+  min_frag_data_dens = 0.05,
+  collapseBySample = TRUE,
+  remove_nonunique = TRUE,
+  clip_until_nbg = 1L,
+  noclip_protect_frac_above = 0.9,
+  max_bisC_meth = 0.1,
+  min_bisC_size = 10,
+  mapqMin = 0,
+  mapqMax = 255,
+  max_read_size = 1000L,
+  ncores = 1L
+)
+}
+\arguments{
+\item{bamfiles}{\code{character} paths to bam files}
+
+\item{samplenames}{\code{character} names of samples. If \code{collapseBySample} is \code{TRUE} counts are aggregated across bam files with same sample name.}
+
+\item{regions}{\linkS4class{GRanges} object for regions of interest.}
+
+\item{genome}{\code{character} path to fasta file which was used as reference for alignment.}
+
+\item{whichContext}{\code{character} which can be one of "GCH","WCG","bisC", "otherC" or "allC" and sets which data to retrieve.
+\describe{
+\item{"GCH}{protection data;}
+\item{"WCG"}{endogenous CpG methylation data;}
+\item{"bisC}{methylation states of non-GCH and non-WCG cytosins used for estimating efficiency of bisulfite conversion;}
+\item{"otherC"}{methylation states of all other cytosines which do not fall into above categories.}
+\item{"allC"}{methylation states of all cytosines combined.}
+}}
+
+\item{alignerUsed}{\code{character} that defines which aligner was used to generate BAM files. Currently supported aligners are QuasR, Bismark and BISCUIT.}
+
+\item{min_frag_data_len}{\code{integer} ignore fragments that have genomic lengths from most-left to most-right data points less than \code{min_frag_data_len}.}
+
+\item{min_frag_data_dens}{\code{numeric} ignore fragments that have density of data-containing positions lower than \code{min_frag_data_dens}.}
+
+\item{collapseBySample}{\code{logical} indicating whether to collapse counts for bam files with same \code{samplenames}. If \code{FALSE} prefix \code{s} followed by index is added to \code{samplenames}.}
+
+\item{remove_nonunique}{\code{logical} if \code{TRUE} only unique fragments are analyzed. Uniqueness is defined by fragments start, end and methylation states of all cytosines.}
+
+\item{clip_until_nbg}{\code{integer} controlling clipping partial footprints at both ends of fragments. Namely, protected states are omitted from each end until \code{clip_until_nbg} unprotected states are met.
+Set \code{clip_until_nbg=0L} to skip clipping pre-processing.}
+
+\item{noclip_protect_frac_above}{\code{numeric} controlling fragment clipping, i.e. fragments that have fraction of protected states higher than \code{max_protect_frac} escape clipping. This parameter allows to avoid removal of fully protected fragments during clipping process.}
+
+\item{max_bisC_meth}{\code{numeric} controlling removal of fragments with failed bisulfite conversion, i.e. fragments with fraction of methylated non-GCH, and non-WCG cytosines higher then \code{max_bisC_meth} are removed from the analysis to get rid of fragments for which bisulfite conversion failed.}
+
+\item{min_bisC_size}{\code{integer} setting minimum number non-GCH, and non-WCG cytosines for filtering according to bisulfite conversion efficiency,
+i.e. only fragments with number of non-GCH, and non-WCG cytosines higher then \code{min_bisC_size} are subject to filtering based on bisulfite conversion efficiency.}
+
+\item{mapqMin}{\code{integer} setting minimum MAPQ of reads to be included into analysis.}
+
+\item{mapqMax}{\code{integer} setting maximum MAPQ of reads to be included into analysis.}
+
+\item{max_read_size}{\code{integer}. maximum read length which is expected in the data. This parameter controls only extension of regions for extracting reference sequence.}
+
+\item{ncores}{number of cores to use.}
+}
+\value{
+\code{tibble} where each row corresponds to sample - molecule combination.
+Please note that if \code{regions} contain overlapping ranges it is possible that same molecule to appear several times.
+
+Columns of the returned \code{tibble} represent:
+\describe{
+  \item{SampleName}{Name of a sample as defined by \code{samplenames}.}
+  \item{seqnames, start, end}{seqnames (or chromosomes) and coordinates of molecules. Please note, that start and end are positions of the first and last data points and not positions of alignments.}
+  \item{R1strand}{strands of alignments for R1 reads}
+  \item{qname}{names of a molecules}
+  \item{nDataPoints}{numbers of informative data points within molecules}
+  \item{startDataPoint}{most-left data points}
+  \item{endDataPoint}{most-right data points}
+  \item{fragData_list}{vectors with molecule data
+  }
+}
+}
+\description{
+Fetch data for each molecule from NOMe-seq BAM files for a set of regions
+}
diff --git a/man/get_protect_stats_from_bams.Rd b/man/get_protect_stats_from_bams.Rd
index a201c5e..4b556bd 100644
--- a/man/get_protect_stats_from_bams.Rd
+++ b/man/get_protect_stats_from_bams.Rd
@@ -18,9 +18,6 @@ get_protect_stats_from_bams(
   min_bisC_size = 10,
   mapqMin = 0,
   mapqMax = 255,
-  absIsizeMin = NULL,
-  absIsizeMax = NULL,
-  min_read_size = 25L,
   max_read_size = 1000L,
   ncores = 1L
 )
@@ -54,17 +51,13 @@ i.e. only fragments with number of non-GCH, and non-WCG cytosines higher then \c
 
 \item{mapqMax}{\code{integer} setting maximum MAPQ of reads to be included into analysis.}
 
-\item{absIsizeMin}{\code{integer} or \code{NULL} (default). For paired-end experiments, minimal absolute insert
-size (TLEN field in SAM Spec v1.4) of alignments to be included.}
+\item{max_read_size}{\code{integer}. maximum read length which is expected in the data. This parameter controls only extension of regions for extracting reference sequence.}
 
-\item{absIsizeMax}{\code{integer} or \code{NULL} (default). For paired-end experiments, maximal absolute insert
-size (TLEN field in SAM Spec v1.4) of alignments to be included.}
-
-\item{min_read_size}{\code{integer}. minimum read length to be included.}
+\item{ncores}{number of cores to use.}
 
-\item{max_read_size}{\code{integer}. maximum read length to be included.}
+\item{min_frag_data_len}{\code{integer} ignore fragments that have genomic lengths from most-left to most-right data points less than \code{min_frag_data_len}.}
 
-\item{ncores}{number of cores to use.}
+\item{min_frag_data_dens}{\code{numeric} ignore fragments that have density of data-containing positions lower than \code{min_frag_data_dens}.}
 }
 \value{
 \code{tibble} where each row corresponds to sample - region combination. 
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index b1a19d8..6029094 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -11,9 +11,38 @@ Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
 Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
 #endif
 
+// fetch_molec_data_list_from_bams_cpp
+Rcpp::List fetch_molec_data_list_from_bams_cpp(const Rcpp::CharacterVector& whichContext, const Rcpp::CharacterVector& infiles, const Rcpp::CharacterVector& regionChr, const Rcpp::IntegerVector& regionStart, const Rcpp::IntegerVector& regionEnd, const Rcpp::CharacterVector& seqstring, const Rcpp::IntegerVector& seqStart, const Rcpp::IntegerVector& seqEnd, const Rcpp::LogicalVector& remove_nonunique, const Rcpp::IntegerVector& clip_until_nbg, const Rcpp::NumericVector& max_protect_frac, const Rcpp::NumericVector& max_bisC_meth, const Rcpp::IntegerVector& min_bisC_size, const Rcpp::IntegerVector& mapqMin, const Rcpp::IntegerVector& mapqMax, const Rcpp::CharacterVector& alignerUsed, const Rcpp::IntegerVector& min_frag_data_len, const Rcpp::NumericVector& min_frag_data_dens, const Rcpp::LogicalVector& data_as_rle);
+RcppExport SEXP _fetchNOMe_fetch_molec_data_list_from_bams_cpp(SEXP whichContextSEXP, SEXP infilesSEXP, SEXP regionChrSEXP, SEXP regionStartSEXP, SEXP regionEndSEXP, SEXP seqstringSEXP, SEXP seqStartSEXP, SEXP seqEndSEXP, SEXP remove_nonuniqueSEXP, SEXP clip_until_nbgSEXP, SEXP max_protect_fracSEXP, SEXP max_bisC_methSEXP, SEXP min_bisC_sizeSEXP, SEXP mapqMinSEXP, SEXP mapqMaxSEXP, SEXP alignerUsedSEXP, SEXP min_frag_data_lenSEXP, SEXP min_frag_data_densSEXP, SEXP data_as_rleSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const Rcpp::CharacterVector& >::type whichContext(whichContextSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::CharacterVector& >::type infiles(infilesSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::CharacterVector& >::type regionChr(regionChrSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type regionStart(regionStartSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type regionEnd(regionEndSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::CharacterVector& >::type seqstring(seqstringSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type seqStart(seqStartSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type seqEnd(seqEndSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::LogicalVector& >::type remove_nonunique(remove_nonuniqueSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type clip_until_nbg(clip_until_nbgSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type max_protect_frac(max_protect_fracSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type max_bisC_meth(max_bisC_methSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type min_bisC_size(min_bisC_sizeSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type mapqMin(mapqMinSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type mapqMax(mapqMaxSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::CharacterVector& >::type alignerUsed(alignerUsedSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type min_frag_data_len(min_frag_data_lenSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type min_frag_data_dens(min_frag_data_densSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::LogicalVector& >::type data_as_rle(data_as_rleSEXP);
+    rcpp_result_gen = Rcpp::wrap(fetch_molec_data_list_from_bams_cpp(whichContext, infiles, regionChr, regionStart, regionEnd, seqstring, seqStart, seqEnd, remove_nonunique, clip_until_nbg, max_protect_frac, max_bisC_meth, min_bisC_size, mapqMin, mapqMax, alignerUsed, min_frag_data_len, min_frag_data_dens, data_as_rle));
+    return rcpp_result_gen;
+END_RCPP
+}
 // fetch_cooc_ctable_from_bams_cpp
-Rcpp::List fetch_cooc_ctable_from_bams_cpp(const Rcpp::CharacterVector& infiles, const Rcpp::CharacterVector& regionChr, const Rcpp::IntegerVector& regionStart, const Rcpp::IntegerVector& regionEnd, const Rcpp::IntegerVector& max_spac, const Rcpp::CharacterVector& seqstring, const Rcpp::IntegerVector& seqStart, const Rcpp::IntegerVector& seqEnd, const Rcpp::LogicalVector& remove_nonunique, const Rcpp::IntegerVector& clip_until_nbg, const Rcpp::NumericVector& max_protect_frac, const Rcpp::NumericVector& max_bisC_meth, const Rcpp::IntegerVector& min_bisC_size, const Rcpp::IntegerVector& mapqMin, const Rcpp::IntegerVector& mapqMax, const Rcpp::IntegerVector& absIsizeMin, const Rcpp::IntegerVector& absIsizeMax, const Rcpp::IntegerVector& min_read_size, const Rcpp::IntegerVector& max_read_size, const Rcpp::CharacterVector& alignerUsed);
-RcppExport SEXP _fetchNOMe_fetch_cooc_ctable_from_bams_cpp(SEXP infilesSEXP, SEXP regionChrSEXP, SEXP regionStartSEXP, SEXP regionEndSEXP, SEXP max_spacSEXP, SEXP seqstringSEXP, SEXP seqStartSEXP, SEXP seqEndSEXP, SEXP remove_nonuniqueSEXP, SEXP clip_until_nbgSEXP, SEXP max_protect_fracSEXP, SEXP max_bisC_methSEXP, SEXP min_bisC_sizeSEXP, SEXP mapqMinSEXP, SEXP mapqMaxSEXP, SEXP absIsizeMinSEXP, SEXP absIsizeMaxSEXP, SEXP min_read_sizeSEXP, SEXP max_read_sizeSEXP, SEXP alignerUsedSEXP) {
+Rcpp::List fetch_cooc_ctable_from_bams_cpp(const Rcpp::CharacterVector& infiles, const Rcpp::CharacterVector& regionChr, const Rcpp::IntegerVector& regionStart, const Rcpp::IntegerVector& regionEnd, const Rcpp::IntegerVector& max_spac, const Rcpp::CharacterVector& seqstring, const Rcpp::IntegerVector& seqStart, const Rcpp::IntegerVector& seqEnd, const Rcpp::LogicalVector& remove_nonunique, const Rcpp::IntegerVector& clip_until_nbg, const Rcpp::NumericVector& max_protect_frac, const Rcpp::NumericVector& max_bisC_meth, const Rcpp::IntegerVector& min_bisC_size, const Rcpp::IntegerVector& mapqMin, const Rcpp::IntegerVector& mapqMax, const Rcpp::CharacterVector& alignerUsed, const Rcpp::IntegerVector& min_frag_data_len, const Rcpp::NumericVector& min_frag_data_dens);
+RcppExport SEXP _fetchNOMe_fetch_cooc_ctable_from_bams_cpp(SEXP infilesSEXP, SEXP regionChrSEXP, SEXP regionStartSEXP, SEXP regionEndSEXP, SEXP max_spacSEXP, SEXP seqstringSEXP, SEXP seqStartSEXP, SEXP seqEndSEXP, SEXP remove_nonuniqueSEXP, SEXP clip_until_nbgSEXP, SEXP max_protect_fracSEXP, SEXP max_bisC_methSEXP, SEXP min_bisC_sizeSEXP, SEXP mapqMinSEXP, SEXP mapqMaxSEXP, SEXP alignerUsedSEXP, SEXP min_frag_data_lenSEXP, SEXP min_frag_data_densSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -32,18 +61,16 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type min_bisC_size(min_bisC_sizeSEXP);
     Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type mapqMin(mapqMinSEXP);
     Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type mapqMax(mapqMaxSEXP);
-    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type absIsizeMin(absIsizeMinSEXP);
-    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type absIsizeMax(absIsizeMaxSEXP);
-    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type min_read_size(min_read_sizeSEXP);
-    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type max_read_size(max_read_sizeSEXP);
     Rcpp::traits::input_parameter< const Rcpp::CharacterVector& >::type alignerUsed(alignerUsedSEXP);
-    rcpp_result_gen = Rcpp::wrap(fetch_cooc_ctable_from_bams_cpp(infiles, regionChr, regionStart, regionEnd, max_spac, seqstring, seqStart, seqEnd, remove_nonunique, clip_until_nbg, max_protect_frac, max_bisC_meth, min_bisC_size, mapqMin, mapqMax, absIsizeMin, absIsizeMax, min_read_size, max_read_size, alignerUsed));
+    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type min_frag_data_len(min_frag_data_lenSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type min_frag_data_dens(min_frag_data_densSEXP);
+    rcpp_result_gen = Rcpp::wrap(fetch_cooc_ctable_from_bams_cpp(infiles, regionChr, regionStart, regionEnd, max_spac, seqstring, seqStart, seqEnd, remove_nonunique, clip_until_nbg, max_protect_frac, max_bisC_meth, min_bisC_size, mapqMin, mapqMax, alignerUsed, min_frag_data_len, min_frag_data_dens));
     return rcpp_result_gen;
 END_RCPP
 }
 // fetch_data_matrix_from_bams_cpp
-Rcpp::List fetch_data_matrix_from_bams_cpp(const Rcpp::CharacterVector& whichContext, const Rcpp::CharacterVector& infiles, const Rcpp::CharacterVector& regionChr, const Rcpp::IntegerVector& regionStart, const Rcpp::IntegerVector& regionEnd, const Rcpp::CharacterVector& seqstring, const Rcpp::IntegerVector& seqStart, const Rcpp::IntegerVector& seqEnd, const Rcpp::LogicalVector& remove_nonunique, const Rcpp::IntegerVector& clip_until_nbg, const Rcpp::NumericVector& max_protect_frac, const Rcpp::NumericVector& max_bisC_meth, const Rcpp::IntegerVector& min_bisC_size, const Rcpp::IntegerVector& mapqMin, const Rcpp::IntegerVector& mapqMax, const Rcpp::IntegerVector& absIsizeMin, const Rcpp::IntegerVector& absIsizeMax, const Rcpp::IntegerVector& min_read_size, const Rcpp::IntegerVector& max_read_size, const Rcpp::CharacterVector& alignerUsed);
-RcppExport SEXP _fetchNOMe_fetch_data_matrix_from_bams_cpp(SEXP whichContextSEXP, SEXP infilesSEXP, SEXP regionChrSEXP, SEXP regionStartSEXP, SEXP regionEndSEXP, SEXP seqstringSEXP, SEXP seqStartSEXP, SEXP seqEndSEXP, SEXP remove_nonuniqueSEXP, SEXP clip_until_nbgSEXP, SEXP max_protect_fracSEXP, SEXP max_bisC_methSEXP, SEXP min_bisC_sizeSEXP, SEXP mapqMinSEXP, SEXP mapqMaxSEXP, SEXP absIsizeMinSEXP, SEXP absIsizeMaxSEXP, SEXP min_read_sizeSEXP, SEXP max_read_sizeSEXP, SEXP alignerUsedSEXP) {
+Rcpp::List fetch_data_matrix_from_bams_cpp(const Rcpp::CharacterVector& whichContext, const Rcpp::CharacterVector& infiles, const Rcpp::CharacterVector& regionChr, const Rcpp::IntegerVector& regionStart, const Rcpp::IntegerVector& regionEnd, const Rcpp::CharacterVector& seqstring, const Rcpp::IntegerVector& seqStart, const Rcpp::IntegerVector& seqEnd, const Rcpp::LogicalVector& remove_nonunique, const Rcpp::IntegerVector& clip_until_nbg, const Rcpp::NumericVector& max_protect_frac, const Rcpp::NumericVector& max_bisC_meth, const Rcpp::IntegerVector& min_bisC_size, const Rcpp::IntegerVector& mapqMin, const Rcpp::IntegerVector& mapqMax, const Rcpp::CharacterVector& alignerUsed, const Rcpp::IntegerVector& min_frag_data_len, const Rcpp::NumericVector& min_frag_data_dens);
+RcppExport SEXP _fetchNOMe_fetch_data_matrix_from_bams_cpp(SEXP whichContextSEXP, SEXP infilesSEXP, SEXP regionChrSEXP, SEXP regionStartSEXP, SEXP regionEndSEXP, SEXP seqstringSEXP, SEXP seqStartSEXP, SEXP seqEndSEXP, SEXP remove_nonuniqueSEXP, SEXP clip_until_nbgSEXP, SEXP max_protect_fracSEXP, SEXP max_bisC_methSEXP, SEXP min_bisC_sizeSEXP, SEXP mapqMinSEXP, SEXP mapqMaxSEXP, SEXP alignerUsedSEXP, SEXP min_frag_data_lenSEXP, SEXP min_frag_data_densSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -62,18 +89,16 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type min_bisC_size(min_bisC_sizeSEXP);
     Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type mapqMin(mapqMinSEXP);
     Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type mapqMax(mapqMaxSEXP);
-    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type absIsizeMin(absIsizeMinSEXP);
-    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type absIsizeMax(absIsizeMaxSEXP);
-    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type min_read_size(min_read_sizeSEXP);
-    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type max_read_size(max_read_sizeSEXP);
     Rcpp::traits::input_parameter< const Rcpp::CharacterVector& >::type alignerUsed(alignerUsedSEXP);
-    rcpp_result_gen = Rcpp::wrap(fetch_data_matrix_from_bams_cpp(whichContext, infiles, regionChr, regionStart, regionEnd, seqstring, seqStart, seqEnd, remove_nonunique, clip_until_nbg, max_protect_frac, max_bisC_meth, min_bisC_size, mapqMin, mapqMax, absIsizeMin, absIsizeMax, min_read_size, max_read_size, alignerUsed));
+    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type min_frag_data_len(min_frag_data_lenSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type min_frag_data_dens(min_frag_data_densSEXP);
+    rcpp_result_gen = Rcpp::wrap(fetch_data_matrix_from_bams_cpp(whichContext, infiles, regionChr, regionStart, regionEnd, seqstring, seqStart, seqEnd, remove_nonunique, clip_until_nbg, max_protect_frac, max_bisC_meth, min_bisC_size, mapqMin, mapqMax, alignerUsed, min_frag_data_len, min_frag_data_dens));
     return rcpp_result_gen;
 END_RCPP
 }
 // fetch_protect_stats_from_bams_cpp
-Rcpp::List fetch_protect_stats_from_bams_cpp(const Rcpp::CharacterVector& infiles, const Rcpp::CharacterVector& regionChr, const Rcpp::IntegerVector& regionStart, const Rcpp::IntegerVector& regionEnd, const Rcpp::CharacterVector& seqstring, const Rcpp::IntegerVector& seqStart, const Rcpp::IntegerVector& seqEnd, const Rcpp::LogicalVector& remove_nonunique, const Rcpp::IntegerVector& clip_until_nbg, const Rcpp::NumericVector& max_protect_frac, const Rcpp::NumericVector& max_bisC_meth, const Rcpp::IntegerVector& min_bisC_size, const Rcpp::IntegerVector& mapqMin, const Rcpp::IntegerVector& mapqMax, const Rcpp::IntegerVector& absIsizeMin, const Rcpp::IntegerVector& absIsizeMax, const Rcpp::IntegerVector& min_read_size, const Rcpp::IntegerVector& max_read_size, const Rcpp::CharacterVector& alignerUsed);
-RcppExport SEXP _fetchNOMe_fetch_protect_stats_from_bams_cpp(SEXP infilesSEXP, SEXP regionChrSEXP, SEXP regionStartSEXP, SEXP regionEndSEXP, SEXP seqstringSEXP, SEXP seqStartSEXP, SEXP seqEndSEXP, SEXP remove_nonuniqueSEXP, SEXP clip_until_nbgSEXP, SEXP max_protect_fracSEXP, SEXP max_bisC_methSEXP, SEXP min_bisC_sizeSEXP, SEXP mapqMinSEXP, SEXP mapqMaxSEXP, SEXP absIsizeMinSEXP, SEXP absIsizeMaxSEXP, SEXP min_read_sizeSEXP, SEXP max_read_sizeSEXP, SEXP alignerUsedSEXP) {
+Rcpp::List fetch_protect_stats_from_bams_cpp(const Rcpp::CharacterVector& infiles, const Rcpp::CharacterVector& regionChr, const Rcpp::IntegerVector& regionStart, const Rcpp::IntegerVector& regionEnd, const Rcpp::CharacterVector& seqstring, const Rcpp::IntegerVector& seqStart, const Rcpp::IntegerVector& seqEnd, const Rcpp::LogicalVector& remove_nonunique, const Rcpp::IntegerVector& clip_until_nbg, const Rcpp::NumericVector& max_protect_frac, const Rcpp::NumericVector& max_bisC_meth, const Rcpp::IntegerVector& min_bisC_size, const Rcpp::IntegerVector& mapqMin, const Rcpp::IntegerVector& mapqMax, const Rcpp::CharacterVector& alignerUsed, const Rcpp::IntegerVector& min_frag_data_len, const Rcpp::NumericVector& min_frag_data_dens);
+RcppExport SEXP _fetchNOMe_fetch_protect_stats_from_bams_cpp(SEXP infilesSEXP, SEXP regionChrSEXP, SEXP regionStartSEXP, SEXP regionEndSEXP, SEXP seqstringSEXP, SEXP seqStartSEXP, SEXP seqEndSEXP, SEXP remove_nonuniqueSEXP, SEXP clip_until_nbgSEXP, SEXP max_protect_fracSEXP, SEXP max_bisC_methSEXP, SEXP min_bisC_sizeSEXP, SEXP mapqMinSEXP, SEXP mapqMaxSEXP, SEXP alignerUsedSEXP, SEXP min_frag_data_lenSEXP, SEXP min_frag_data_densSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -91,20 +116,19 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type min_bisC_size(min_bisC_sizeSEXP);
     Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type mapqMin(mapqMinSEXP);
     Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type mapqMax(mapqMaxSEXP);
-    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type absIsizeMin(absIsizeMinSEXP);
-    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type absIsizeMax(absIsizeMaxSEXP);
-    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type min_read_size(min_read_sizeSEXP);
-    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type max_read_size(max_read_sizeSEXP);
     Rcpp::traits::input_parameter< const Rcpp::CharacterVector& >::type alignerUsed(alignerUsedSEXP);
-    rcpp_result_gen = Rcpp::wrap(fetch_protect_stats_from_bams_cpp(infiles, regionChr, regionStart, regionEnd, seqstring, seqStart, seqEnd, remove_nonunique, clip_until_nbg, max_protect_frac, max_bisC_meth, min_bisC_size, mapqMin, mapqMax, absIsizeMin, absIsizeMax, min_read_size, max_read_size, alignerUsed));
+    Rcpp::traits::input_parameter< const Rcpp::IntegerVector& >::type min_frag_data_len(min_frag_data_lenSEXP);
+    Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type min_frag_data_dens(min_frag_data_densSEXP);
+    rcpp_result_gen = Rcpp::wrap(fetch_protect_stats_from_bams_cpp(infiles, regionChr, regionStart, regionEnd, seqstring, seqStart, seqEnd, remove_nonunique, clip_until_nbg, max_protect_frac, max_bisC_meth, min_bisC_size, mapqMin, mapqMax, alignerUsed, min_frag_data_len, min_frag_data_dens));
     return rcpp_result_gen;
 END_RCPP
 }
 
 static const R_CallMethodDef CallEntries[] = {
-    {"_fetchNOMe_fetch_cooc_ctable_from_bams_cpp", (DL_FUNC) &_fetchNOMe_fetch_cooc_ctable_from_bams_cpp, 20},
-    {"_fetchNOMe_fetch_data_matrix_from_bams_cpp", (DL_FUNC) &_fetchNOMe_fetch_data_matrix_from_bams_cpp, 20},
-    {"_fetchNOMe_fetch_protect_stats_from_bams_cpp", (DL_FUNC) &_fetchNOMe_fetch_protect_stats_from_bams_cpp, 19},
+    {"_fetchNOMe_fetch_molec_data_list_from_bams_cpp", (DL_FUNC) &_fetchNOMe_fetch_molec_data_list_from_bams_cpp, 19},
+    {"_fetchNOMe_fetch_cooc_ctable_from_bams_cpp", (DL_FUNC) &_fetchNOMe_fetch_cooc_ctable_from_bams_cpp, 18},
+    {"_fetchNOMe_fetch_data_matrix_from_bams_cpp", (DL_FUNC) &_fetchNOMe_fetch_data_matrix_from_bams_cpp, 18},
+    {"_fetchNOMe_fetch_protect_stats_from_bams_cpp", (DL_FUNC) &_fetchNOMe_fetch_protect_stats_from_bams_cpp, 17},
     {NULL, NULL, 0}
 };
 
diff --git a/src/fetch_data_from_bam.cpp b/src/fetch_data_from_bam.cpp
index 99d2b21..183bd94 100644
--- a/src/fetch_data_from_bam.cpp
+++ b/src/fetch_data_from_bam.cpp
@@ -7,193 +7,218 @@ bool fetch_data_from_bam(const string& bamfile,
                          const int& regionEnd,
                          const alignerType& aligner,
                          obj_pnts* pnts){
-	
-	
-	bam1_t *hit = bam_init1();
-	samfile_t *fin;
-	bam_index_t *idx;
-	
-	
-	fin = _bam_tryopen(bamfile.c_str(), "rb", NULL);
-	idx = bam_index_load(bamfile.c_str()); // load BAM index
-	if (idx == 0)
-		Rcpp::stop("BAM index for '" + bamfile + "' unavailable\n");
-	
-	// get target id
-	int tid = 0;
-	while(regionChr.compare(fin->header->target_name[tid]) != 0 && tid+1<fin->header->n_targets)
-		tid++;
-	
-	if(regionChr.compare(fin->header->target_name[tid]) != 0)
-		Rcpp::stop("could not find target '" + regionChr + "' in bam header of " + bamfile + ".\n");
-	
-	
-	// define a pointer to a callback retrival function
-	int (*collectRegionData)(const bam1_t *hit, void *data);
-	
-	// choose callback function for data parsing based on aligner used
-	switch(aligner) {
-	case QuasR:
-	  collectRegionData = &collectRegionData_QsBism;
-	  break;
-	case Bismark:
-	  collectRegionData = &collectRegionData_QsBism;
-	  break;
-	case BISCUIT:
-	  collectRegionData = &collectRegionData_Bisq;
-	  break;
-	default:
-	  Rcpp::stop("Only alignments generated by QuasR, Bismark or BISCUIT can be analyzed\n");
-	}
-	
-	// call corresponding retrieval function to store alignments in region
-	bam_fetch(fin->x.bam, idx, tid, regionStart, regionEnd, pnts, collectRegionData);
-	
-	// clean bam file objects
-	bam_index_destroy(idx);
-	samclose(fin);
-	
-	return(1);
+  
+  
+  bam1_t *hit = bam_init1();
+  samfile_t *fin;
+  bam_index_t *idx;
+  
+  
+  fin = _bam_tryopen(bamfile.c_str(), "rb", NULL);
+  idx = bam_index_load(bamfile.c_str()); // load BAM index
+  if (idx == 0)
+    Rcpp::stop("BAM index for '" + bamfile + "' unavailable\n");
+  
+  // get target id
+  int tid = 0;
+  while(regionChr.compare(fin->header->target_name[tid]) != 0 && tid+1<fin->header->n_targets)
+    tid++;
+  
+  if(regionChr.compare(fin->header->target_name[tid]) != 0)
+    Rcpp::stop("could not find target '" + regionChr + "' in bam header of " + bamfile + ".\n");
+  
+  
+  // define a pointer to a callback retrival function
+  int (*collectRegionData)(const bam1_t *hit, void *data);
+  
+  // choose callback function for data parsing based on aligner used
+  switch(aligner) {
+  case QuasR:
+    collectRegionData = &collectRegionData_QsBism;
+    break;
+  case Bismark:
+    collectRegionData = &collectRegionData_QsBism;
+    break;
+  case BISCUIT:
+    collectRegionData = &collectRegionData_Bisq;
+    break;
+  default:
+    Rcpp::stop("Only alignments generated by QuasR, Bismark or BISCUIT can be analyzed\n");
+  }
+  
+  // call corresponding retrieval function to store alignments in region
+  bam_fetch(fin->x.bam, idx, tid, regionStart, regionEnd, pnts, collectRegionData);
+  
+  // clean bam file objects
+  bam_index_destroy(idx);
+  samclose(fin);
+  
+  return(1);
 }
 
+
+string getFragConfig(const bam1_t *hit){
+  // get configuration of the fragment
+  string frag_config;
+  if(bam_is_first(hit)){
+    if(bam_is_rev(hit))
+      frag_config = "R1-";
+    else
+      frag_config = "R1+";
+    if(bam_is_proper_pair(hit)){
+      if(bam_is_mrev(hit))
+        frag_config += "R2-";
+      else
+        frag_config += "R2+";
+    }
+  } else if(bam_is_second(hit)){
+    if(bam_is_proper_pair(hit)){
+      if(bam_is_rev(hit))
+        frag_config = "R1-";
+      else
+        frag_config = "R1+";
+      
+      if(bam_is_rev(hit))
+        frag_config += "R2-";
+      else
+        frag_config += "R2+";
+    } else{
+      std::cout<<"WARNING!!! Unexpected fragment configuration!"<<endl;
+    }
+  } else{
+    std::cerr<<"WARNING!!! Unexpected fragment flag!"<<endl;
+  }
+  return(frag_config);
+}
+
+
+
 // bam_fetch callback function for methyl calling and data storing for BAMs produced by QuasR and Bismark
 static int collectRegionData_QsBism(const bam1_t *hit, void *data) { 
-	
-	static obj_pnts *pdata = NULL;
-	
-	pdata = (obj_pnts*) data;
-	
-	
-	// check if alignment is valid
-	if(!isValidAln(hit,pdata))
-		return(0);
-	// for testing only
-	//print_bs_alignment(hit,pdata);
-	
-	// parse alignment and create vectors for regData
-	string qname = string(bam1_qname(hit)); // frag name
-	uint32_t *cigar = bam_get_cigar(hit); // cigar array
-	uint8_t *hitseq = bam1_seq(hit); // query sequence
-	int ref_start = hit -> core.pos; // leftmost position in reference
-	int ref_end = bam_calend(&(hit->core),cigar); // rightmost position in reference
-	bool is_read_rev = bam_is_rev(hit); // is read on "+" strand
-	bool is_mate_rev = bam_is_mrev(hit);
-	bool is_second = bam_is_second(hit); // is it a second mate
-	
-	// choose strand to get contexts for
-	seekMismType msmType;
-	if(is_second){ // parsing second mate in PE data, strand determined by first mate
-		msmType = is_mate_rev ? GtoA : CtoT;
-	} else{ // parsing first mate or SE data, strand determined by read itself
-		msmType = is_read_rev ? GtoA : CtoT;
-	}
-	
-	// for testing only
-	//Rcpp::Rcout<<"qname="<<qname<<"\nis_second="<<is_second<<"\nis_read_rev="<<is_read_rev<<"\nis_mate_rev="<<is_mate_rev<<"\nmsmType="<<msmType<<endl;
-	// 
-	// // for testing only
-	// fragData frg;
-	
-	// go across contexts and add data
-	vector<fragMapType > context_vec = {GCH, WCG, bisC, otherC};
-	for(int i = 0; i < context_vec.size(); ++i){
-		// get positions for the context in reference
-		vector<int> cntx_rpos_vec = pdata->refseq_info->getPositionsBetween(ref_start,ref_end,context_vec[i], msmType == CtoT);
-		
-		vector<int > rpos_vec;
-		vector<bool > dat_vec;
-		vector<bool > isplusstrand_vec;
-		
-		for(int cind = 0; cind < cntx_rpos_vec.size(); ++cind){
-			
-			// get query position
-			int qpos = _fromRefToQueryPos(ref_start,
-                                 cntx_rpos_vec[cind],
-                                              hit->core.n_cigar,
-                                              cigar);
-			// get nucleotide in query
-			string qseq = nt16_table[bam1_seqi(hitseq, qpos)];
-			
-			// check if C->T mismatch if msmType == CtoT or GtoA otherwise. query on - in BAM are reverse complement
-			switch(msmType) {
-				case CtoT:
-				{
-					if(qseq == "C"){ // methylated or unprotected
-					// add position on reference
-					rpos_vec.push_back(cntx_rpos_vec[cind]);
-					// add data
-					dat_vec.push_back(context_vec[i] == GCH ? 0 : 1);
-					// add strand
-					isplusstrand_vec.push_back(!is_read_rev);
-					
-				} else if(qseq == "T"){ // unmethylated or protected
-					// add position on reference
-					rpos_vec.push_back(cntx_rpos_vec[cind]);
-					// add data
-					dat_vec.push_back(context_vec[i] == GCH ? 1 : 0);
-					// add strand
-					isplusstrand_vec.push_back(!is_read_rev);
-				}
-					break;
-				}
-				case GtoA:
-				{
-					if(qseq == "G"){ // methylated or unprotected
-					// add position on reference
-					rpos_vec.push_back(cntx_rpos_vec[cind]);
-					// add data
-					dat_vec.push_back(context_vec[i] == GCH ? 0 : 1);
-					// add strand
-					isplusstrand_vec.push_back(!is_read_rev);
-					
-				} else if(qseq == "A"){ // unmethylated or protected
-					// add position on reference
-					rpos_vec.push_back(cntx_rpos_vec[cind]);
-					// add data
-					dat_vec.push_back(context_vec[i] == GCH ? 1 : 0);
-					// add strand
-					isplusstrand_vec.push_back(!is_read_rev);
-				}
-					
-					break;
-				}
-			}
-			
-			
-		}
-		
-		
-			
-		
-		// add data for the current context into regionData
-		pdata->reg_data->addFragContextData(pdata->prefix,
-                                      pdata->prefix + qname,
-                                      ref_start,
-                                      ref_end,
-                                      rpos_vec,
-                                      dat_vec,
-                                      isplusstrand_vec,
-                                      is_second,
-                                      context_vec[i]);
-		
-		// for testing only
-// 		frg.addData(ref_start,
-//               ref_end,
-//               rpos_vec,
-//               dat_vec,
-//               isplusstrand_vec,
-//               is_second,
-//               context_vec[i]);
-		
-	}
-	
-	
-	
-	
-
-	
-	return(0);
-	
+  
+  static obj_pnts *pdata = NULL;
+  
+  pdata = (obj_pnts*) data;
+  
+  
+  // check if alignment is valid
+  if(!isValidAln(hit,pdata))
+    return(0);
+  // for testing only
+  //print_bs_alignment(hit,pdata);
+  
+  // parse alignment and create vectors for regData
+  string qname = string(bam1_qname(hit)); // frag name
+  uint32_t *cigar = bam_get_cigar(hit); // cigar array
+  uint8_t *hitseq = bam1_seq(hit); // query sequence
+  int ref_start = hit -> core.pos; // leftmost position in reference
+  int ref_end = bam_calend(&(hit->core),cigar); // rightmost position in reference
+  bool is_read_rev = bam_is_rev(hit); // is read on "-" strand
+  bool is_mate_rev = bam_is_mrev(hit);
+  bool is_second = bam_is_second(hit); // is it a second mate
+  
+  string frag_config = getFragConfig(hit);
+  
+  
+  
+  // choose strand to get contexts for
+  seekMismType msmType;
+  if(is_second){ // parsing second mate in PE data, strand determined by first mate
+    msmType = is_mate_rev ? GtoA : CtoT;
+  } else{ // parsing first mate or SE data, strand determined by read itself
+    msmType = is_read_rev ? GtoA : CtoT;
+  }
+  
+  // go across contexts and add data
+  //vector<fragMapType > context_vec = {GCH, WCG, bisC, otherC};
+  //for(int i = 0; i < context_vec.size(); ++i){
+  
+  for(int i = 0; i < pdata->context_vec->size(); ++i){
+    fragMapType currContext = pdata->context_vec->at(i);
+    
+    // get positions for the context in reference
+    vector<int> cntx_rpos_vec = pdata->refseq_info->getPositionsBetween(ref_start,ref_end,currContext, msmType == CtoT);
+    
+    vector<int > rpos_vec;
+    vector<bool > dat_vec;
+    vector<bool > isplusstrand_vec;
+    
+    for(int cind = 0; cind < cntx_rpos_vec.size(); ++cind){
+      
+      // get query position
+      int qpos = _fromRefToQueryPos(ref_start,
+                                    cntx_rpos_vec[cind],
+                                                 hit->core.n_cigar,
+                                                 cigar);
+      // get nucleotide in query
+      string qseq = nt16_table[bam1_seqi(hitseq, qpos)];
+      
+      // check if C->T mismatch if msmType == CtoT or GtoA otherwise. query on - in BAM are reverse complement
+      switch(msmType) {
+      case CtoT:
+      {
+        if(qseq == "C"){ // methylated or unprotected
+        // add position on reference
+        rpos_vec.push_back(cntx_rpos_vec[cind]);
+        // add data
+        //dat_vec.push_back(context_vec[i] == GCH ? 0 : 1);
+        dat_vec.push_back(currContext == GCH ? 0 : 1);
+        // add strand
+        isplusstrand_vec.push_back(!is_read_rev);
+        
+      } else if(qseq == "T"){ // unmethylated or protected
+        // add position on reference
+        rpos_vec.push_back(cntx_rpos_vec[cind]);
+        // add data
+        //dat_vec.push_back(context_vec[i] == GCH ? 1 : 0);
+        dat_vec.push_back(currContext == GCH ? 1 : 0);
+        // add strand
+        isplusstrand_vec.push_back(!is_read_rev);
+      }
+      break;
+      }
+      case GtoA:
+      {
+        if(qseq == "G"){ // methylated or unprotected
+        // add position on reference
+        rpos_vec.push_back(cntx_rpos_vec[cind]);
+        // add data
+        //dat_vec.push_back(context_vec[i] == GCH ? 0 : 1);
+        dat_vec.push_back(currContext == GCH ? 0 : 1);
+        // add strand
+        isplusstrand_vec.push_back(!is_read_rev);
+        
+      } else if(qseq == "A"){ // unmethylated or protected
+        // add position on reference
+        rpos_vec.push_back(cntx_rpos_vec[cind]);
+        // add data
+        //dat_vec.push_back(context_vec[i] == GCH ? 1 : 0);
+        dat_vec.push_back(currContext == GCH ? 1 : 0);
+        // add strand
+        isplusstrand_vec.push_back(!is_read_rev);
+      }
+      
+      break;
+      }
+      }
+      
+      
+    }
+    
+    // add data for the current context into regionData
+    pdata->reg_data->addFragContextData(pdata->prefix,
+                                        pdata->prefix + qname,
+                                        ref_start,
+                                        ref_end,
+                                        frag_config,
+                                        rpos_vec,
+                                        dat_vec,
+                                        isplusstrand_vec,
+                                        is_second,
+                                        currContext);
+    
+  }
+  return(0);
 }
 
 
@@ -222,6 +247,8 @@ static int collectRegionData_Bisq(const bam1_t *hit, void *data) {
   bool is_mate_rev = bam_is_mrev(hit);
   bool is_second = bam_is_second(hit); // is it a second mate
   
+  string frag_config = getFragConfig(hit);
+  
   // extract YD tag
   uint8_t* ydTag = bam_aux_get(hit, "YD");
   char ydValue;
@@ -261,10 +288,13 @@ static int collectRegionData_Bisq(const bam1_t *hit, void *data) {
   // fragData frg;
   
   // go across contexts and add data
-  vector<fragMapType > context_vec = {GCH, WCG, bisC, otherC};
-  for(int i = 0; i < context_vec.size(); ++i){
+  // vector<fragMapType > context_vec = {GCH, WCG, bisC, otherC};
+  // for(int i = 0; i < context_vec.size(); ++i){
+  for(int i = 0; i < pdata->context_vec->size(); ++i){
+    fragMapType currContext = pdata->context_vec->at(i);
+    
     // get positions for the context in reference
-    vector<int> cntx_rpos_vec = pdata->refseq_info->getPositionsBetween(ref_start,ref_end,context_vec[i], msmType == CtoT);
+    vector<int> cntx_rpos_vec = pdata->refseq_info->getPositionsBetween(ref_start,ref_end,currContext, msmType == CtoT);
     
     vector<int > rpos_vec;
     vector<bool > dat_vec;
@@ -288,7 +318,7 @@ static int collectRegionData_Bisq(const bam1_t *hit, void *data) {
         // add position on reference
         rpos_vec.push_back(cntx_rpos_vec[cind]);
         // add data
-        dat_vec.push_back(context_vec[i] == GCH ? 0 : 1);
+        dat_vec.push_back(currContext == GCH ? 0 : 1);
         // add strand
         isplusstrand_vec.push_back(!is_read_rev);
         
@@ -296,7 +326,7 @@ static int collectRegionData_Bisq(const bam1_t *hit, void *data) {
         // add position on reference
         rpos_vec.push_back(cntx_rpos_vec[cind]);
         // add data
-        dat_vec.push_back(context_vec[i] == GCH ? 1 : 0);
+        dat_vec.push_back(currContext == GCH ? 1 : 0);
         // add strand
         isplusstrand_vec.push_back(!is_read_rev);
       }
@@ -308,7 +338,7 @@ static int collectRegionData_Bisq(const bam1_t *hit, void *data) {
         // add position on reference
         rpos_vec.push_back(cntx_rpos_vec[cind]);
         // add data
-        dat_vec.push_back(context_vec[i] == GCH ? 0 : 1);
+        dat_vec.push_back(currContext == GCH ? 0 : 1);
         // add strand
         isplusstrand_vec.push_back(!is_read_rev);
         
@@ -316,7 +346,7 @@ static int collectRegionData_Bisq(const bam1_t *hit, void *data) {
         // add position on reference
         rpos_vec.push_back(cntx_rpos_vec[cind]);
         // add data
-        dat_vec.push_back(context_vec[i] == GCH ? 1 : 0);
+        dat_vec.push_back(currContext == GCH ? 1 : 0);
         // add strand
         isplusstrand_vec.push_back(!is_read_rev);
       }
@@ -330,34 +360,25 @@ static int collectRegionData_Bisq(const bam1_t *hit, void *data) {
     
     
     
+    if(dat_vec.size()>0){
+      
+      // add data for the current context into regionData
+      pdata->reg_data->addFragContextData(pdata->prefix,
+                                          pdata->prefix + qname,
+                                          ref_start,
+                                          ref_end,
+                                          frag_config,
+                                          rpos_vec,
+                                          dat_vec,
+                                          isplusstrand_vec,
+                                          is_second,
+                                          currContext);
+    }
+    
     
-    // add data for the current context into regionData
-    pdata->reg_data->addFragContextData(pdata->prefix,
-                                        pdata->prefix + qname,
-                                        ref_start,
-                                        ref_end,
-                                        rpos_vec,
-                                        dat_vec,
-                                        isplusstrand_vec,
-                                        is_second,
-                                        context_vec[i]);
     
-    // for testing only
-    // 		frg.addData(ref_start,
-    //               ref_end,
-    //               rpos_vec,
-    //               dat_vec,
-    //               isplusstrand_vec,
-    //               is_second,
-    //               context_vec[i]);
     
   }
-  
-  
-  
-  
-  
-  
   return(0);
   
 }
@@ -368,25 +389,36 @@ static int collectRegionData_Bisq(const bam1_t *hit, void *data) {
 
 // checks validity of an alignment
 bool isValidAln(const bam1_t *hit,const obj_pnts* pdata){
-	
-	bool valaln = !bam_is_not_primary(hit) && 
-	  !bam_is_qc_fail(hit) && 
-	  !bam_is_pcr_dupl(hit) &&
-	  !bam_is_suppl(hit) &&
-	  ((int )(hit->core).qual >= pdata->mapqMin) &&
-	  ((int )(hit->core).qual <= pdata->mapqMax) &&
-	  ((int )(hit->core).l_qseq >= pdata->min_read_size) &&
-	  ((int )(hit->core).l_qseq <= pdata->max_read_size);
-  
-  // check insert length
-  if(bam_is_proper_pair(hit)){
-    if(pdata->absIsizeMin > 0){
-      valaln = valaln && llabs(((int )(hit->core).isize)) >= pdata->absIsizeMin;
-    }
-    if(pdata->absIsizeMax > 0){
-      valaln = valaln && llabs(((int )(hit->core).isize)) <= pdata->absIsizeMax;
-    }
-  }
+  
+  // bool valaln = !bam_is_not_primary(hit) && 
+  //   !bam_is_qc_fail(hit) && 
+  //   !bam_is_pcr_dupl(hit) &&
+  //   !bam_is_suppl(hit) &&
+  //   ((int )(hit->core).qual >= pdata->mapqMin) &&
+  //   ((int )(hit->core).qual <= pdata->mapqMax) &&
+  //   ((int )(hit->core).l_qseq >= pdata->min_read_size) &&
+  //   ((int )(hit->core).l_qseq <= pdata->max_read_size);
+  // 
+  // // check insert length
+  // if(bam_is_proper_pair(hit)){
+  //   if(pdata->absIsizeMin > 0){
+  //     valaln = valaln && llabs(((int )(hit->core).isize)) >= pdata->absIsizeMin;
+  //   }
+  //   if(pdata->absIsizeMax > 0){
+  //     valaln = valaln && llabs(((int )(hit->core).isize)) <= pdata->absIsizeMax;
+  //   }
+  // }
+  
+  
+  bool valaln = !bam_is_not_primary(hit) && 
+    !bam_is_qc_fail(hit) && 
+    !bam_is_pcr_dupl(hit) &&
+    !bam_is_suppl(hit) &&
+    ((int )(hit->core).qual >= pdata->mapqMin) &&
+    ((int )(hit->core).qual <= pdata->mapqMax);
+  
+  
+  
   
   return(valaln);
 }
@@ -394,162 +426,162 @@ bool isValidAln(const bam1_t *hit,const obj_pnts* pdata){
 
 // print alignment for debugging
 void print_bs_alignment(const bam1_t *hit,const obj_pnts* pdata){
-	
-	// print data for one alignment
-	Rcpp::Rcout<<"====== "<<bam1_qname(hit)<<" ======"<<endl;
-	Rcpp::Rcout<<"alignment is first: "<<bam_is_first(hit)<<"; alignment is reverse: "<<bam_is_rev(hit)<<endl;
-	
-	bool is_read_rev = bam_is_rev(hit); // is read on "+" strand
-	bool is_mate_rev = bam_is_mrev(hit);
-	bool is_second = bam_is_second(hit); // is it a second mate
-	
-	// get cigar array
-	uint32_t *cigar = bam_get_cigar(hit);
-	
-	// get qseq, rseq and xm tag and print out
+  
+  // print data for one alignment
+  Rcpp::Rcout<<"====== "<<bam1_qname(hit)<<" ======"<<endl;
+  Rcpp::Rcout<<"alignment is first: "<<bam_is_first(hit)<<"; alignment is reverse: "<<bam_is_rev(hit)<<endl;
+  
+  bool is_read_rev = bam_is_rev(hit); // is read on "+" strand
+  bool is_mate_rev = bam_is_mrev(hit);
+  bool is_second = bam_is_second(hit); // is it a second mate
+  
+  // get cigar array
+  uint32_t *cigar = bam_get_cigar(hit);
+  
+  // get qseq, rseq and xm tag and print out
   // old: int qlen = bam_cigar2qlen(&(hit -> core), cigar);
   int qlen = bam_cigar2qlen(hit->core.n_cigar, cigar);
-	uint8_t *hitseq = bam1_seq(hit);
-	
-	
-	// ## for Bismark meth and unmeth alphabet find positions in XM tag
-	// ## Bismark codes
-	// # z - C in CpG context - unmethylated
-	// # Z - C in CpG context - methylated
-	// # x - C in CHG context - unmethylated
-	// # X - C in CHG context - methylated
-	// # h - C in CHH context - unmethylated
-	// # H - C in CHH context - methylated
-	// # u - C in Unknown context (CN or CHN) - unmethylated
-	// # U - C in Unknown context (CN or CHN) - methylated
-	// # . - not a C or irrelevant position
-	
-	
-	// get XM tag if present
-	uint8_t *pxmtag = bam_aux_get(hit,"XM");
-	string xm_tag;
-	if(pxmtag != NULL){
-		int type = *pxmtag++;
-		if(type == 'Z')
-			xm_tag = string((char*)pxmtag);
-	}
-	
-	
-	// choose strand to get contexts for
-	seekMismType msmType;
-	if(is_second){ // parsing second mate in PE data, strand determined by first mate
-		msmType = is_mate_rev ? GtoA : CtoT;
-	} else{ // parsing first mate or SE data, strand determined by read itself
-		msmType = is_read_rev ? GtoA : CtoT;
-	}
-	
-	string queryseq, refseq, matchsymb,cntxstr;
-	for(int qpos = 0; qpos < qlen; ++qpos){
-		int rpos = _fromQueryToRefPos(hit -> core.pos,qpos,hit -> core.n_cigar,cigar);
-		string rseq;
-		string qseq = nt16_table[bam1_seqi(hitseq, qpos)];
-		
-		if(rpos != -1){
-			rseq = pdata->refseq_info->getRefSubseq(rpos,1);
-			matchsymb += "|";
-		} else {
-			rseq = "-";
-			matchsymb += " ";
-		}
-		
-		queryseq += qseq;
-		refseq += rseq;
-		
-		// get context
-		short int mcntx = pdata->refseq_info->getContextForRefPos(rpos);
-		
-		switch (msmType) {
-		case CtoT:
-		{
-			if(mcntx == 0 || (qseq != "C" && qseq != "T")){
-				cntxstr += ".";
-			} else{
-				if(rseq != "C"){
-					Rcpp::Rcout<<"print_bs_alignment: WARNING: Expect C in reference sequence but got "<<rseq<<endl;
-					cntxstr += "X";
-				} else if(mcntx == 1){
-					if(qseq == "T")
-						cntxstr += "P";
-					else if(qseq == "C")
-						cntxstr += "p";
-				} else if(mcntx == 2){
-					if(qseq == "T")
-						cntxstr += "c";
-					else if(qseq == "C")
-						cntxstr += "C";
-				} else if(mcntx == 3){
-					if(qseq == "T")
-						cntxstr += "b";
-					else if(qseq == "C")
-						cntxstr += "B";
-				} else if(mcntx == 4){
-					if(qseq == "T")
-						cntxstr += "o";
-					else if(qseq == "C")
-						cntxstr += "O";
-				}
-			
-			}
-			break;
-		}
-		case GtoA:
-		{
-			if(mcntx == 0 || (qseq != "G" && qseq != "A")){
-			cntxstr += ".";
-		} else{
-			if(rseq != "G"){
-				Rcpp::Rcout<<"print_bs_alignment: WARNING: Expect G in reference sequence but got "<<rseq<<endl;
-				cntxstr += "X";
-			} else if(mcntx == -1){
-				if(qseq == "A")
-					cntxstr += "P";
-				else if(qseq == "G")
-					cntxstr += "p";
-			} else if(mcntx == -2){
-				if(qseq == "A")
-					cntxstr += "c";
-				else if(qseq == "G")
-					cntxstr += "C";
-			} else if(mcntx == -3){
-				if(qseq == "A")
-					cntxstr += "b";
-				else if(qseq == "G")
-					cntxstr += "B";
-			} else if(mcntx == -4){
-				if(qseq == "A")
-					cntxstr += "o";
-				else if(qseq == "G")
-					cntxstr += "O";
-			}
-			
-		}
-			
-			break;
-		}
-		}
-		
-		
-		
-	}
-	
-	// print reference sequence
-	Rcpp::Rcout<<"reference:  "<<refseq<<endl;
-	// print match symbols
-	Rcpp::Rcout<<"            "<<matchsymb<<endl;
-	// print query seq
-	Rcpp::Rcout<<"    query:  "<<queryseq<<endl;
-	// print nome context
-	Rcpp::Rcout<<"  context:  "<<cntxstr<<endl;
-	
-	// print XM tag if present
-	if(xm_tag != "")
-		Rcpp::Rcout<<"   XM tag:  "<<xm_tag<<endl;
-	// print end of alignment 
-	Rcpp::Rcout<<"======================================================================================="<<endl;
+  uint8_t *hitseq = bam1_seq(hit);
+  
+  
+  // ## for Bismark meth and unmeth alphabet find positions in XM tag
+  // ## Bismark codes
+  // # z - C in CpG context - unmethylated
+  // # Z - C in CpG context - methylated
+  // # x - C in CHG context - unmethylated
+  // # X - C in CHG context - methylated
+  // # h - C in CHH context - unmethylated
+  // # H - C in CHH context - methylated
+  // # u - C in Unknown context (CN or CHN) - unmethylated
+  // # U - C in Unknown context (CN or CHN) - methylated
+  // # . - not a C or irrelevant position
+  
+  
+  // get XM tag if present
+  uint8_t *pxmtag = bam_aux_get(hit,"XM");
+  string xm_tag;
+  if(pxmtag != NULL){
+    int type = *pxmtag++;
+    if(type == 'Z')
+      xm_tag = string((char*)pxmtag);
+  }
+  
+  
+  // choose strand to get contexts for
+  seekMismType msmType;
+  if(is_second){ // parsing second mate in PE data, strand determined by first mate
+    msmType = is_mate_rev ? GtoA : CtoT;
+  } else{ // parsing first mate or SE data, strand determined by read itself
+    msmType = is_read_rev ? GtoA : CtoT;
+  }
+  
+  string queryseq, refseq, matchsymb,cntxstr;
+  for(int qpos = 0; qpos < qlen; ++qpos){
+    int rpos = _fromQueryToRefPos(hit -> core.pos,qpos,hit -> core.n_cigar,cigar);
+    string rseq;
+    string qseq = nt16_table[bam1_seqi(hitseq, qpos)];
+    
+    if(rpos != -1){
+      rseq = pdata->refseq_info->getRefSubseq(rpos,1);
+      matchsymb += "|";
+    } else {
+      rseq = "-";
+      matchsymb += " ";
+    }
+    
+    queryseq += qseq;
+    refseq += rseq;
+    
+    // get context
+    short int mcntx = pdata->refseq_info->getContextForRefPos(rpos);
+    
+    switch (msmType) {
+    case CtoT:
+    {
+      if(mcntx == 0 || (qseq != "C" && qseq != "T")){
+      cntxstr += ".";
+    } else{
+      if(rseq != "C"){
+        Rcpp::Rcout<<"print_bs_alignment: WARNING: Expect C in reference sequence but got "<<rseq<<endl;
+        cntxstr += "X";
+      } else if(mcntx == 1){
+        if(qseq == "T")
+          cntxstr += "P";
+        else if(qseq == "C")
+          cntxstr += "p";
+      } else if(mcntx == 2){
+        if(qseq == "T")
+          cntxstr += "c";
+        else if(qseq == "C")
+          cntxstr += "C";
+      } else if(mcntx == 3){
+        if(qseq == "T")
+          cntxstr += "b";
+        else if(qseq == "C")
+          cntxstr += "B";
+      } else if(mcntx == 4){
+        if(qseq == "T")
+          cntxstr += "o";
+        else if(qseq == "C")
+          cntxstr += "O";
+      }
+      
+    }
+    break;
+    }
+    case GtoA:
+    {
+      if(mcntx == 0 || (qseq != "G" && qseq != "A")){
+      cntxstr += ".";
+    } else{
+      if(rseq != "G"){
+        Rcpp::Rcout<<"print_bs_alignment: WARNING: Expect G in reference sequence but got "<<rseq<<endl;
+        cntxstr += "X";
+      } else if(mcntx == -1){
+        if(qseq == "A")
+          cntxstr += "P";
+        else if(qseq == "G")
+          cntxstr += "p";
+      } else if(mcntx == -2){
+        if(qseq == "A")
+          cntxstr += "c";
+        else if(qseq == "G")
+          cntxstr += "C";
+      } else if(mcntx == -3){
+        if(qseq == "A")
+          cntxstr += "b";
+        else if(qseq == "G")
+          cntxstr += "B";
+      } else if(mcntx == -4){
+        if(qseq == "A")
+          cntxstr += "o";
+        else if(qseq == "G")
+          cntxstr += "O";
+      }
+      
+    }
+    
+    break;
+    }
+    }
+    
+    
+    
+  }
+  
+  // print reference sequence
+  Rcpp::Rcout<<"reference:  "<<refseq<<endl;
+  // print match symbols
+  Rcpp::Rcout<<"            "<<matchsymb<<endl;
+  // print query seq
+  Rcpp::Rcout<<"    query:  "<<queryseq<<endl;
+  // print nome context
+  Rcpp::Rcout<<"  context:  "<<cntxstr<<endl;
+  
+  // print XM tag if present
+  if(xm_tag != "")
+    Rcpp::Rcout<<"   XM tag:  "<<xm_tag<<endl;
+  // print end of alignment 
+  Rcpp::Rcout<<"======================================================================================="<<endl;
 }
 
diff --git a/src/fetch_data_from_bam.h b/src/fetch_data_from_bam.h
index f648e55..280a177 100644
--- a/src/fetch_data_from_bam.h
+++ b/src/fetch_data_from_bam.h
@@ -31,11 +31,13 @@ typedef struct { // for use in callback function. contains pointers to required
 	regionData *reg_data; // point to an object which stores data in region
 	int mapqMin; // minimum mapping quality score
 	int mapqMax; // maximum mapping quality score
-	int absIsizeMin; // minimum absolute tlen for paired reads
-	int absIsizeMax; // maximum absolute tlen for paired reads
-	int min_read_size; // minimum read length
-	int max_read_size; // maximum read length
+	// int absIsizeMin; // minimum absolute tlen for paired reads
+	// int absIsizeMax; // maximum absolute tlen for paired reads
+	// int min_read_size; // minimum read length
+	// int max_read_size; // maximum read length
 	string prefix; // prefix for qnames to avoid problems with identical qnames in different bams
+	//fragMapType whichContext; // required context
+	std::vector<fragMapType > *context_vec; // pointer to a vector of contexts to retrieve data for
 } obj_pnts;
 
 
@@ -53,6 +55,8 @@ static int collectRegionData_QsBism(const bam1_t *hit, void *data);
 // bam_fetch callback function for bam_fetch for data retrieval from BAM files produced by BISCUIT
 static int collectRegionData_Bisq(const bam1_t *hit, void *data);
 
+string getFragConfig(const bam1_t *hit);
+
 
 
 // checks validity of an alignment
diff --git a/src/fragData-class.cpp b/src/fragData-class.cpp
index 6cbf2ea..70feee9 100644
--- a/src/fragData-class.cpp
+++ b/src/fragData-class.cpp
@@ -4,43 +4,111 @@
 fragData::fragData(){
   fragStart = -1;
   fragEnd = -1;
+  fragConfig = "";
   prefix = "";
 };
 
 // constructors/destructors
 fragData::fragData(const string& bampref,
-         const int& fragstart,
-         const int& fragend,
-         const vector<int >& gch_pos,
-         const vector<bool >& gch_protect,
-         const vector<bool >& gch_onplusstrand,
-         const vector<int >& wcg_pos,
-         const vector<bool >& wcg_protect,
-         const vector<bool >& wcg_onplusstrand,
-         const vector<int >& bisc_pos,
-         const vector<bool >& bisc_protect,
-         const vector<bool >& bisc_onplusstrand,
-         const vector<int >& otherc_pos,
-         const vector<bool >& otherc_protect,
-         const vector<bool >& otherc_onplusstrand
+                   const int& fragstart,
+                   const int& fragend,
+                   const string& fragconfig,
+                   const vector<int >& gch_pos,
+                   const vector<bool >& gch_protect,
+                   const vector<bool >& gch_onplusstrand,
+                   const vector<int >& wcg_pos,
+                   const vector<bool >& wcg_protect,
+                   const vector<bool >& wcg_onplusstrand,
+                   const vector<int >& bisc_pos,
+                   const vector<bool >& bisc_protect,
+                   const vector<bool >& bisc_onplusstrand,
+                   const vector<int >& otherc_pos,
+                   const vector<bool >& otherc_protect,
+                   const vector<bool >& otherc_onplusstrand
 ){
   
   // set coordinates
-  fragStart = fragstart;
-  fragEnd = fragend;
-  prefix = bampref;
+  
   // add GCH data
-  addData(bampref,fragstart, fragend, gch_pos, gch_protect, gch_onplusstrand, 0,GCH);
+  addData(bampref,fragstart, fragend, fragconfig, gch_pos, gch_protect, gch_onplusstrand, 0,GCH);
   // add WCG data
-  addData(bampref,fragstart, fragend, wcg_pos, wcg_protect, wcg_onplusstrand, 0,WCG);
+  addData(bampref,fragstart, fragend, fragconfig, wcg_pos, wcg_protect, wcg_onplusstrand, 0,WCG);
   // add bisC data
-  addData(bampref,fragstart, fragend, bisc_pos, bisc_protect, bisc_onplusstrand, 0,bisC);
+  addData(bampref,fragstart, fragend, fragconfig, bisc_pos, bisc_protect, bisc_onplusstrand, 0,bisC);
   // add otherC data
-  addData(bampref,fragstart, fragend, otherc_pos, otherc_protect, otherc_onplusstrand, 0,otherC);
+  addData(bampref,fragstart, fragend, fragconfig, otherc_pos, otherc_protect, otherc_onplusstrand, 0,otherC);
 };
 
 fragData::~fragData(){};
 
+
+// add data into the map 
+bool fragData::addData(const string& bampref,
+                       const int& fragstart,
+                       const int& fragend,
+                       const string& fragconfig,
+                       const vector<int >& rposvec,
+                       const vector<bool >& protectvec,
+                       const vector<bool >& isonplusstrandvec,
+                       const bool& isSecond,// data for same positions on the second read are ignored
+                       fragMapType whichMap // must be enumerator GCH (protect), WCG (endogenous), bisC or otherC
+){ 
+  if(rposvec.size() != protectvec.size()|| rposvec.size() != isonplusstrandvec.size()){
+    Rcpp::Rcout<<"fragProtectData::addData: sizes of rposvec, protectvec and isonplusstrandvec do not match.";
+    return(0);
+  }
+  
+  // change fragment coordinates if neccessary
+  if(fragStart < 0 || fragstart < fragStart)
+    fragStart = fragstart;
+  if(fragEnd < 0 || fragend > fragEnd)
+    fragEnd = fragend;
+  
+  if(fragConfig.size() == 0)
+    fragConfig = fragconfig;
+  
+  prefix = bampref;
+  
+  // select which map to add the data to
+  map<int , bool > *mdat; // pointer to methylation/protection map
+  map<int , bool > *sdat; // pointer to strand map
+  switch(whichMap) {
+  case GCH:
+    mdat = &gchProtect;
+    sdat = &gchIsOnPlusStrand;
+    break;
+  case WCG:
+    mdat = &wcgMeth;
+    sdat = &wcgIsOnPlusStrand;
+    break;
+  case bisC:
+    mdat = &bisCMeth;
+    sdat = &bisCIsOnPlusStrand;
+    break;
+  case otherC:
+    mdat = &otherCMeth;
+    sdat = &otherCIsOnPlusStrand;
+    break;
+  }
+  
+  for(int i = 0; i < rposvec.size(); ++i){
+    // check whether position already in the map
+    if(mdat->find(rposvec[i]) == mdat->end()){ // if not found then insert data
+      mdat->insert(make_pair(rposvec[i], protectvec[i]));
+      sdat->insert(make_pair(rposvec[i], isonplusstrandvec[i]));
+    }
+    else if(!isSecond){ // if found and data is not for second mate then overwrite.
+      (*mdat)[rposvec[i]] = protectvec[i];
+      (*sdat)[rposvec[i]] = isonplusstrandvec[i];
+    }
+  }
+  return(1);
+};
+
+
+
+
+
 // get vector of positions in a map
 vector<int > fragData::getPositions(fragMapType whichMap){
   // select which map to get the data from
@@ -72,7 +140,7 @@ vector<int > fragData::getPositions(fragMapType whichMap){
 
 // get data at position
 bool fragData::getDataAt(int rpos,
-               fragMapType whichMap){
+                         fragMapType whichMap){
   
   // select which map to get the data from
   map<int , bool > *mdat; // pointer to methylation/protection map
@@ -100,7 +168,7 @@ bool fragData::getDataAt(int rpos,
 
 // get isOnPlus
 bool fragData::isDataOnPlusStrand(int rpos,
-                        fragMapType whichMap){
+                                  fragMapType whichMap){
   
   // select which map to get the data from
   map<int , bool > *sdat; // pointer to strand map
@@ -127,63 +195,6 @@ bool fragData::isDataOnPlusStrand(int rpos,
 };
 
 
-// add data into the map 
-bool fragData::addData(const string& bampref,
-             const int& fragstart,
-             const int& fragend,
-             const vector<int >& rposvec,
-             const vector<bool >& protectvec,
-             const vector<bool >& isonplusstrandvec,
-             const bool& isSecond,// data for same positions on the second read are ignored
-             fragMapType whichMap // must be enumerator GCH (protect), WCG (endogenous), bisC or otherC
-){ 
-  if(rposvec.size() != protectvec.size()|| rposvec.size() != isonplusstrandvec.size()){
-    Rcpp::Rcout<<"fragProtectData::addData: sizes of rposvec, protectvec and isonplusstrandvec do not match.";
-    return(0);
-  }
-  
-  // change fragment coordinates if neccessary
-  if(fragStart < 0 || fragstart < fragStart)
-    fragStart = fragstart;
-  if(fragEnd < 0 || fragend > fragEnd)
-    fragEnd = fragend;
-  prefix = bampref;
-  
-  // select which map to add the data to
-  map<int , bool > *mdat; // pointer to methylation/protection map
-  map<int , bool > *sdat; // pointer to strand map
-  switch(whichMap) {
-  case GCH:
-    mdat = &gchProtect;
-    sdat = &gchIsOnPlusStrand;
-    break;
-  case WCG:
-    mdat = &wcgMeth;
-    sdat = &wcgIsOnPlusStrand;
-    break;
-  case bisC:
-    mdat = &bisCMeth;
-    sdat = &bisCIsOnPlusStrand;
-    break;
-  case otherC:
-    mdat = &otherCMeth;
-    sdat = &otherCIsOnPlusStrand;
-    break;
-  }
-  
-  for(int i = 0; i < rposvec.size(); ++i){
-    // check whether position already in the map
-    if(mdat->find(rposvec[i]) == mdat->end()){ // if not found then insert data
-      mdat->insert(make_pair(rposvec[i], protectvec[i]));
-      sdat->insert(make_pair(rposvec[i], isonplusstrandvec[i]));
-    }
-    else if(!isSecond){ // if found and data is not for second mate then overwrite.
-      (*mdat)[rposvec[i]] = protectvec[i];
-      (*sdat)[rposvec[i]] = isonplusstrandvec[i];
-    }
-  }
-  return(1);
-};
 
 // convert data to strings
 vector<string > fragData::getStringData() {
@@ -261,7 +272,7 @@ void fragData::addCountsToProtectStatsTable(protectStatsTable& pStatsTable){
 
 // 2. function to clip data. returns 1 if fragment was clipped or 0 otherwise
 int fragData::clipProtectData(const int& clip_until_nbg, // clips from both ends until this number of 0 is met
-                    const double& max_protect_frac // keeps fragment if percentage of 1s in a fragment is above this number
+                              const double& max_protect_frac // keeps fragment if percentage of 1s in a fragment is above this number
 ){
   if(gchProtect.size() == 0)
     return(0);
@@ -318,7 +329,7 @@ int fragData::clipProtectData(const int& clip_until_nbg, // clips from both ends
 };
 
 
-double fragData::get_bisC_meth(int min_size){
+double fragData::get_bisC_meth(const int& min_size){
   if(bisCMeth.size() < min_size)
     return(0);
   
@@ -349,3 +360,291 @@ string fragData::getStringEncodingForUniqueness(){
 };
 
 
+string fragData::getFragConfig(){
+  return(fragConfig);
+};
+
+std::vector<int > fragData::getFragDataStartEndNpoints(fragMapType whichMap){
+  int fragDataStart = -1, fragDataEnd = -1, n_points = -1;
+  
+  if(whichMap != allC){
+    // select which map to work on
+    map<int , bool > *mdat; // pointer to methylation/protection map
+    
+    switch(whichMap) {
+    case GCH:
+      mdat = &gchProtect;
+      break;
+    case WCG:
+      mdat = &wcgMeth;
+      break;
+    case bisC:
+      mdat = &bisCMeth;
+      break;
+    case otherC:
+      mdat = &otherCMeth;
+      break;
+    }
+    
+    if(!mdat->empty()){
+      fragDataStart = mdat->begin()->first;
+      fragDataEnd = mdat->rbegin()->first;
+      n_points = mdat->size();
+    }
+    
+  } else {
+    // if allC is requested, get the minimum/maximum of positions and total number of points across all maps
+    if(!gchProtect.empty()){
+      fragDataStart = gchProtect.begin()->first;
+      fragDataEnd = gchProtect.rbegin()->first;
+      n_points = gchProtect.size();
+    }
+    
+    if(!wcgMeth.empty()){
+      if(wcgMeth.begin()->first < fragDataStart)
+        fragDataStart = wcgMeth.begin()->first;
+      if(wcgMeth.rbegin()->first > fragDataEnd)
+        fragDataEnd = wcgMeth.rbegin()->first;
+      n_points = n_points + wcgMeth.size();
+    }
+    
+    if(!bisCMeth.empty()){
+      if(bisCMeth.begin()->first < fragDataStart)
+        fragDataStart = bisCMeth.begin()->first;
+      if(bisCMeth.rbegin()->first > fragDataEnd)
+        fragDataEnd = bisCMeth.rbegin()->first;
+      n_points = n_points + bisCMeth.size();
+    }
+    
+    if(!otherCMeth.empty()){
+      if(otherCMeth.begin()->first < fragDataStart)
+        fragDataStart = otherCMeth.begin()->first;
+      if(otherCMeth.rbegin()->first > fragDataEnd)
+        fragDataEnd = otherCMeth.rbegin()->first;
+      n_points = n_points + otherCMeth.size();
+    }
+    
+  }
+  
+  std::vector<int > vec_out={fragDataStart,fragDataEnd,n_points};
+  return(vec_out);
+};
+
+
+
+// is the required map empty?
+bool fragData::isEmpty(fragMapType whichMap){
+  if(whichMap != allC){
+    // select which map to work on
+    map<int , bool > *mdat; // pointer to methylation/protection map
+    switch(whichMap) {
+    case GCH:
+      mdat = &gchProtect;
+      break;
+    case WCG:
+      mdat = &wcgMeth;
+      break;
+    case bisC:
+      mdat = &bisCMeth;
+      break;
+    case otherC:
+      mdat = &otherCMeth;
+      break;
+    }
+    
+    return(mdat->empty());
+  } else {
+    return(gchProtect.empty() && wcgMeth.empty() && bisCMeth.empty() && otherCMeth.empty());
+  }
+};
+
+
+// get the data wrapped int Rcpp::IntegerVector;
+Rcpp::List fragData::getFragDataVector(fragMapType whichMap){
+  
+  vector<int > dataPosNpoints = getFragDataStartEndNpoints(whichMap);
+  
+  int fragDataStart = dataPosNpoints[0];
+  int fragDataEnd = dataPosNpoints[1];
+  
+  Rcpp::IntegerVector fragDataVec(fragDataEnd - fragDataStart + 1);
+  //fragDataVec.fill(fragDataVec.begin(),fragDataVec.end(),NA_INTEGER);
+  fragDataVec.fill(NA_INTEGER);
+  if(whichMap != allC){
+    // select which map to work on
+    map<int , bool > *mdat; // pointer to methylation/protection map
+    switch(whichMap) {
+    case GCH:
+      mdat = &gchProtect;
+      break;
+    case WCG:
+      mdat = &wcgMeth;
+      break;
+    case bisC:
+      mdat = &bisCMeth;
+      break;
+    case otherC:
+      mdat = &otherCMeth;
+      break;
+    }
+    
+    for(map<int, bool>::iterator it = mdat->begin(); it != mdat->end(); ++it){
+      fragDataVec(it->first - fragDataStart) = it->second;
+    }
+    
+    
+  } else {
+    
+    for(int rpos = fragDataStart; rpos <= fragDataEnd; ++rpos){
+      // hierarchy GCH > WCG > bisC>otherC
+      if(gchProtect.find(rpos) != gchProtect.end()){
+        fragDataVec(rpos - fragDataStart) = 1-gchProtect[rpos];
+      } else if(wcgMeth.find(rpos) != wcgMeth.end()){
+        fragDataVec(rpos - fragDataStart) = wcgMeth[rpos];
+      } else if(bisCMeth.find(rpos) != bisCMeth.end()){
+        fragDataVec(rpos - fragDataStart) = bisCMeth[rpos];
+      } else if(otherCMeth.find(rpos) != otherCMeth.end()){
+        fragDataVec(rpos - fragDataStart) = otherCMeth[rpos];
+      }
+    }
+  }
+  
+  Rcpp::List data_out = Rcpp::List::create(Rcpp::Named("fragDataStart") = fragDataStart,
+                                           Rcpp::Named("fragDataEnd") = fragDataEnd,
+                                           Rcpp::Named("nDataPoints") = dataPosNpoints[2],
+                                                                                      Rcpp::Named("startData") = fragDataVec[0],
+                                                                                                                            Rcpp::Named("endData") = fragDataVec[fragDataVec.size() - 1],
+                                                                                                                                                                Rcpp::Named("fragDataVec") = fragDataVec);
+  
+  return(data_out);
+  
+};
+
+
+// get the Rle compressed data wrapped int Rcpp::IntegerVector;
+Rcpp::List fragData::getFragDataRle(fragMapType whichMap){
+  
+  vector<int > dataPosNpoints = getFragDataStartEndNpoints(whichMap);
+  
+  int fragDataStart = dataPosNpoints[0];
+  int fragDataEnd = dataPosNpoints[1];
+  
+  // Initialize vectors for values and lengths
+  Rcpp::IntegerVector values;
+  Rcpp::IntegerVector lengths;
+  
+  if(whichMap != allC){
+    // select which map to work on
+    map<int , bool > *mdat; // pointer to methylation/protection map
+    switch(whichMap) {
+    case GCH:
+      mdat = &gchProtect;
+      break;
+    case WCG:
+      mdat = &wcgMeth;
+      break;
+    case bisC:
+      mdat = &bisCMeth;
+      break;
+    case otherC:
+      mdat = &otherCMeth;
+      break;
+    }
+    
+    // iterator to traverse the map
+    map<int, bool>::iterator it = mdat->begin();
+    int current_value = it->second;  // Initial value
+    int current_pos = it->first - 1; // initial position. the -1 is made to deal with the first iteration in order to set the correct run length
+    int run_length = 0;              // Initial run length
+    
+    // Iterate over the map to create RLE data
+    for (; it != mdat->end(); ++it) {
+      if(it->first == current_pos + 1){
+        if (it->second == current_value) { 
+          // continue the run if the value is the same and position changed by 1
+          run_length++;
+          current_pos = it->first;
+        } else{
+          // store the current run
+          values.push_back(current_value);
+          lengths.push_back(run_length);
+          
+          // reset for the new run
+          current_value = it->second;
+          run_length = 1;
+        }
+      } else{ // positions were interupted by a run of NAs
+        // store the current run
+        values.push_back(current_value);
+        lengths.push_back(run_length);
+        // store run of NAs
+        values.push_back(NA_INTEGER);
+        lengths.push_back(it->first - current_pos - 1);
+        // reset for the new run
+        current_value = it->second;
+        current_pos = it->first;
+        run_length = 1;
+      }
+    }
+    // store the last run
+    values.push_back(current_value);
+    lengths.push_back(run_length);
+    
+  } else {
+    
+    int prev_value = -1;
+    int run_length = 0;
+    for(int rpos = fragDataStart; rpos <= fragDataEnd; ++rpos){
+      int current_value = -1;
+      // hierarchy GCH > WCG > bisC>otherC
+      if(gchProtect.find(rpos) != gchProtect.end()){
+        current_value = 1-gchProtect[rpos];
+      } else if(wcgMeth.find(rpos) != wcgMeth.end()){
+        current_value = wcgMeth[rpos];
+      } else if(bisCMeth.find(rpos) != bisCMeth.end()){
+        current_value = bisCMeth[rpos];
+      } else if(otherCMeth.find(rpos) != otherCMeth.end()){
+        current_value = otherCMeth[rpos];
+      }
+      
+      if(current_value != prev_value){
+        // store the current run
+        if(run_length > 0){
+          if(prev_value == -1)
+            values.push_back(NA_INTEGER);
+          else
+            values.push_back(prev_value);
+          lengths.push_back(run_length);
+        }
+        
+        // reset for the new run
+        prev_value = current_value;
+        run_length = 1;
+      } else{
+        // continute the run
+        //prev_value = current_value;
+        run_length++;
+      }
+    }
+    // store the last run
+    values.push_back(prev_value);
+    lengths.push_back(run_length);
+    
+    
+  }
+  
+  
+  // Load the Rle constructor function from the IRanges package
+  Rcpp::Function Rle("Rle", Rcpp::Environment::namespace_env("IRanges"));
+  SEXP rle_object = Rle(Rcpp::_["values"] = values, Rcpp::_["lengths"] = lengths);
+  
+  Rcpp::List data_out = Rcpp::List::create(Rcpp::Named("fragDataStart") = fragDataStart,
+                                           Rcpp::Named("fragDataEnd") = fragDataEnd,
+                                           Rcpp::Named("nDataPoints") = dataPosNpoints[2],
+                                           Rcpp::Named("startData") = values[0],
+                                           Rcpp::Named("endData") = values[values.size() - 1],
+                                           Rcpp::Named("rle_data") = rle_object);
+  return(data_out);
+};
+
+
diff --git a/src/fragData-class.h b/src/fragData-class.h
index eb7e839..23d45e4 100644
--- a/src/fragData-class.h
+++ b/src/fragData-class.h
@@ -28,12 +28,15 @@ class fragData{
 	int fragStart;
 	int fragEnd;
 	
+	// configuration of a fragment, i.e. R1+R2- etc.
+	string fragConfig;
 	// file prefix where the fragment came from
 	string prefix;
 	
 	// protection at GCH sites
 	map<int, bool > gchProtect; // map: 1-based position -> protection {0 - unprotected, 1 - protected}
-	map<int, bool > gchIsOnPlusStrand; // 1 - "+" strand, 0 - "-" strand
+	map<int, bool > gchIsOnPlusStrand; // 1 - "+" strand, 0 - "-" strand.
+	// The reason that the strand is encoded as an vector is that fragData is a container for a fragment consisting of both reads R1 and R2
 	
 	// methylation of endogenous CpG at WCG sites
 	map<int , bool > wcgMeth;
@@ -55,6 +58,7 @@ class fragData{
 	fragData(const string& bampref,
           const int& fragstart,
           const int& fragend,
+          const string& fragconfig,
           const vector<int >& gch_pos,
           const vector<bool >& gch_protect,
           const vector<bool >& gch_onplusstrand,
@@ -84,10 +88,12 @@ class fragData{
                          fragMapType whichMap);
 	
 	
+	
 	// add data into the map 
 	bool addData(const string& bampref,
               const int& fragstart,
               const int& fragend,
+              const string& fragconfig,
               const vector<int >& rposvec,
               const vector<bool >& protectvec,
               const vector<bool >& isonplusstrandvec,
@@ -98,6 +104,27 @@ class fragData{
 	// convert data to strings
 	vector<string > getStringData();
 	
+	string getFragConfig();
+	
+	// // get most left position of the first point with data
+	// int getFragDataStart(fragMapType whichMap);
+	// 
+	// // get most right position of the last point with data
+	// int getFragDataEnd(fragMapType whichMap);
+	
+	// get positions where the data start, end and number of informative positions
+	std::vector<int > getFragDataStartEndNpoints(fragMapType whichMap);
+	
+	
+	
+	
+	// get the data, npoints and left and right datapoints;
+	Rcpp::List getFragDataVector(fragMapType whichMap);
+	
+	
+	// get the Rle compressed data, npoints and left and right datapoints;
+	Rcpp::List getFragDataRle(fragMapType whichMap);
+	
 	
 	// print
 	void print();
@@ -117,8 +144,9 @@ class fragData{
 	);
 	
 	
-	double get_bisC_meth(int min_size);
+	double get_bisC_meth(const int& min_size);
 	
+	bool isEmpty(fragMapType whichMap);
 	
 	// get string representation of fragData
 	string getStringEncodingForUniqueness();
diff --git a/src/functions_export_rcpp.cpp b/src/functions_export_rcpp.cpp
index 40cd79e..8725535 100644
--- a/src/functions_export_rcpp.cpp
+++ b/src/functions_export_rcpp.cpp
@@ -2,6 +2,149 @@
 
 
 
+Rcpp::List fetch_molec_data_list_from_bams_cpp(const Rcpp::CharacterVector& whichContext,
+                                               const Rcpp::CharacterVector& infiles,
+                                               const Rcpp::CharacterVector& regionChr,
+                                               const Rcpp::IntegerVector& regionStart,
+                                               const Rcpp::IntegerVector& regionEnd,
+                                               const Rcpp::CharacterVector& seqstring,
+                                               const Rcpp::IntegerVector& seqStart,
+                                               const Rcpp::IntegerVector& seqEnd,
+                                               const Rcpp::LogicalVector& remove_nonunique,
+                                               const Rcpp::IntegerVector& clip_until_nbg,
+                                               const Rcpp::NumericVector& max_protect_frac,
+                                               const Rcpp::NumericVector& max_bisC_meth,
+                                               const Rcpp::IntegerVector& min_bisC_size,
+                                               const Rcpp::IntegerVector& mapqMin,
+                                               const Rcpp::IntegerVector& mapqMax,
+                                               const Rcpp::CharacterVector& alignerUsed,
+                                               const Rcpp::IntegerVector& min_frag_data_len,
+                                               const Rcpp::NumericVector& min_frag_data_dens,
+                                               const Rcpp::LogicalVector& data_as_rle){
+  
+  // convert input parameters
+  string whichContext_ = Rcpp::as<string>(whichContext);
+  fragMapType whichMap;
+  if(whichContext_ == "GCH")
+    whichMap = GCH;
+  else if(whichContext_ == "WCG")
+    whichMap = WCG;
+  else if(whichContext_ == "bisC")
+    whichMap = bisC;
+  else if(whichContext_ == "otherC")
+    whichMap = otherC;
+  else if(whichContext_ == "allC")
+    whichMap = allC;
+  else
+    Rcpp::stop("whichContext must be GCH, WCG, bisC, otherC or allC");
+  
+  string alignerUsed_ = Rcpp::as<string>(alignerUsed);
+  alignerType aligner;
+  if(alignerUsed_ == "QuasR")
+    aligner = QuasR;
+  else if(alignerUsed_ == "Bismark")
+    aligner = Bismark;
+  else if(alignerUsed_ == "BISCUIT")
+    aligner = BISCUIT;
+  else
+    Rcpp::stop("Only alignments generated by QuasR, Bismark or BISCUIT can be analyzed\n");
+  
+  
+  vector<string > infiles_ = Rcpp::as<vector<string > >(infiles);
+  string regionChr_ = Rcpp::as<string >(regionChr);
+  int regionStart_ = Rcpp::as<int >(regionStart);
+  int regionEnd_ = Rcpp::as<int >(regionEnd);
+  string seqstring_ = Rcpp::as<string >(seqstring);
+  int seqStart_ = Rcpp::as<int >(seqStart);
+  int seqEnd_ = Rcpp::as<int >(seqEnd);
+  
+  bool remove_nonunique_ = Rcpp::as<bool >(remove_nonunique);
+  int clip_until_nbg_ = Rcpp::as<int >(clip_until_nbg);
+  double max_protect_frac_ = Rcpp::as<double >(max_protect_frac);
+  double max_bisC_meth_ = Rcpp::as<double >(max_bisC_meth);
+  int min_bisC_size_ = Rcpp::as<int >(min_bisC_size);
+  int mapqMin_ = Rcpp::as<int >(mapqMin);
+  int mapqMax_ = Rcpp::as<int >(mapqMax);
+  
+  int min_frag_data_len_ = Rcpp::as<int >(min_frag_data_len);
+  double min_frag_data_dens_ = Rcpp::as<double >(min_frag_data_dens);
+  
+  bool data_as_rle_ = Rcpp::as<bool >(data_as_rle);
+  
+  // initialize container for reference sequence
+  refSeqInfo refseqInfo(seqstring_, seqStart_,seqEnd_);
+  
+  // initialize data structure for keeping object pointers
+  obj_pnts pnts;
+  // store pointer to reference sequence container
+  pnts.refseq_info = &refseqInfo;
+  // store min and amp MAPQ
+  pnts.mapqMin = mapqMin_;
+  pnts.mapqMax = mapqMax_;
+  
+  // store which contexts to parse
+  vector<fragMapType > cont_vec;
+  if(whichMap == bisC){
+    cont_vec = {bisC};
+  } else if(whichMap == allC){
+    cont_vec = {GCH, WCG, bisC, otherC};
+  } else {
+    if(max_bisC_meth_ < 1.0) // if filtering bisulfite failed reads needed we add bisC context
+      cont_vec = {whichMap, bisC};
+    else
+      cont_vec = {whichMap};
+  }
+  pnts.context_vec = &cont_vec;
+  // create container for fragments and counters
+  regionData regData(regionChr_,regionStart_,regionEnd_);
+  pnts.reg_data = &regData;
+  
+  int n_fetched = 0, n_nonUnique = 0, n_bisfailed = 0, n_analysed = 0, n_clipped = 0;
+  
+  // fetch data from all bam files
+  for(int i = 0; i < infiles_.size(); ++i){
+    pnts.prefix = "f" + to_string(i + 1)+":";
+    // fetch data from bam file
+    fetch_data_from_bam(infiles_[i],
+                        regionChr_,
+                        regionStart_,
+                        regionEnd_,
+                        aligner,
+                        &pnts);
+  }
+  
+  n_fetched = regData.size();
+
+  // post-processing of the data, i.e. filtering by uniqueness, bisulfite efficiency, clipping and removal of fragments with insufficient data
+  // function below returns the following stats
+  // {n_nonUnique,n_bisfailed,n_clipped,n_insuffFailed,n_analysed}
+  vector<int > filt_stats = regData.postProcessFragData(remove_nonunique_, 
+                                                        clip_until_nbg_, 
+                                                        max_protect_frac_, 
+                                                        max_bisC_meth_,
+                                                        min_bisC_size_,
+                                                        min_frag_data_len_,
+                                                        min_frag_data_dens_,
+                                                        whichMap);
+  
+  
+  // create List with fetched reads/fragments for R with the following information
+  // start (genomic position of the first data point. note, not the alignment start!)
+  // end last position of the data points
+  // R1strand strand where the R1 read aligned
+  // qname - name of the fragment
+  // GCH_fragData_list list which contains vectors with 0 (accessible), 1(protected) and NA, non-informative
+  Rcpp::List data_out;
+  if(data_as_rle_)
+    data_out = regData.getFragDataRle(whichMap);
+  else
+    data_out = regData.getFragDataList(whichMap);
+  return(data_out);
+}
+
+
+
+
 Rcpp::List fetch_cooc_ctable_from_bams_cpp(const Rcpp::CharacterVector& infiles,
                                            const Rcpp::CharacterVector& regionChr,
                                            const Rcpp::IntegerVector& regionStart,
@@ -17,11 +160,9 @@ Rcpp::List fetch_cooc_ctable_from_bams_cpp(const Rcpp::CharacterVector& infiles,
                                            const Rcpp::IntegerVector& min_bisC_size,
                                            const Rcpp::IntegerVector& mapqMin,
                                            const Rcpp::IntegerVector& mapqMax,
-                                           const Rcpp::IntegerVector& absIsizeMin,
-                                           const Rcpp::IntegerVector& absIsizeMax,
-                                           const Rcpp::IntegerVector& min_read_size,
-                                           const Rcpp::IntegerVector& max_read_size,
-                                           const Rcpp::CharacterVector& alignerUsed){
+                                           const Rcpp::CharacterVector& alignerUsed,
+                                           const Rcpp::IntegerVector& min_frag_data_len,
+                                           const Rcpp::NumericVector& min_frag_data_dens){
   
   string alignerUsed_ = Rcpp::as<string>(alignerUsed);
   alignerType aligner;
@@ -51,11 +192,9 @@ Rcpp::List fetch_cooc_ctable_from_bams_cpp(const Rcpp::CharacterVector& infiles,
   int min_bisC_size_ = Rcpp::as<int >(min_bisC_size);
   int mapqMin_ = Rcpp::as<int >(mapqMin);
   int mapqMax_ = Rcpp::as<int >(mapqMax);
-  int absIsizeMin_ = Rcpp::as<int >(absIsizeMin);
-  int absIsizeMax_ = Rcpp::as<int >(absIsizeMax);
-  int min_read_size_ = Rcpp::as<int >(min_read_size);
-  int max_read_size_ = Rcpp::as<int >(max_read_size);
   
+  int min_frag_data_len_ = Rcpp::as<int >(min_frag_data_len);
+  double min_frag_data_dens_ = Rcpp::as<double >(min_frag_data_dens);
   
   // initialize container for reference sequence
   refSeqInfo refseqInfo(seqstring_, seqStart_,seqEnd_);
@@ -67,18 +206,18 @@ Rcpp::List fetch_cooc_ctable_from_bams_cpp(const Rcpp::CharacterVector& infiles,
   // store min and max MAPQ
   pnts.mapqMin = mapqMin_;
   pnts.mapqMax = mapqMax_;
-  // store min and max tlen
-  pnts.absIsizeMin = absIsizeMin_;
-  pnts.absIsizeMax = absIsizeMax_;
-  // store min and max read lengths
-  pnts.min_read_size = min_read_size_;
-  pnts.max_read_size = max_read_size_;
+  // store which contexts to parse
+  vector<fragMapType > cont_vec;
+  if(max_bisC_meth_ < 1.0) // if filtering bisulfite failed reads needed we add bisC context
+    cont_vec = {GCH, bisC};
+  else
+    cont_vec = {GCH};
+  pnts.context_vec = &cont_vec;
   
   // initialize container for cooccurrence counts and counters
   coocCntTable cTable(max_spac_);
   int n_fetched = 0, n_nonUnique = 0, n_bisfailed = 0, n_analysed = 0, n_clipped = 0;
   
-  
   // for each bam file fetch data and add counts to cooccurrence table 
   for(int i = 0; i < infiles_.size(); ++i){
     // initialize container for fragments in region
@@ -94,26 +233,22 @@ Rcpp::List fetch_cooc_ctable_from_bams_cpp(const Rcpp::CharacterVector& infiles,
                         &pnts);
     n_fetched += regData.size();
     
-    // get qnames on non-unique fragments
-    set<string > nonUnfrags;
-    if(remove_nonunique_)
-      nonUnfrags = regData.getNonUniqueQnames();
-    n_nonUnique += nonUnfrags.size();
     
-    // get qnames with failed bisulfite conversion
-    set<string > failedBisConvfrags;
-    if(max_bisC_meth_ < 1.0)
-      failedBisConvfrags = regData.getFailedBisConvQnames(max_bisC_meth_,min_bisC_size_);
-    n_bisfailed += failedBisConvfrags.size();
-    
-    // erase fragments which nonunique and failed bisConv
-    std::set<string > remove_qnames(nonUnfrags);
-    remove_qnames.insert(failedBisConvfrags.begin(), failedBisConvfrags.end());
-    n_analysed += regData.eraseQnames(remove_qnames);
-    
-    // clip fragments
-    if(clip_until_nbg_ > 0)
-      n_clipped += regData.clipProtectData(clip_until_nbg_,max_protect_frac_);
+    // post-processing of the data, i.e. filtering by uniqueness, bisulfite efficiency, clipping and removal of fragments with insufficient data
+    // function below returns the following stats
+    // {n_nonUnique,n_bisfailed,n_clipped,n_insuffFailed,n_analysed}
+    vector<int > filt_stats = regData.postProcessFragData(remove_nonunique_,
+                                                          clip_until_nbg_, 
+                                                          max_protect_frac_, 
+                                                          max_bisC_meth_,
+                                                          min_bisC_size_,
+                                                          min_frag_data_len_,
+                                                          min_frag_data_dens_,
+                                                          GCH);
+    n_nonUnique += filt_stats[0];
+    n_bisfailed += filt_stats[1];
+    n_clipped += filt_stats[2];
+    n_analysed += filt_stats[4];
     
     // add counts to table
     regData.addCountsToCoocTable(cTable);
@@ -161,11 +296,9 @@ Rcpp::List fetch_data_matrix_from_bams_cpp(const Rcpp::CharacterVector& whichCon
                                            const Rcpp::IntegerVector& min_bisC_size,
                                            const Rcpp::IntegerVector& mapqMin,
                                            const Rcpp::IntegerVector& mapqMax,
-                                           const Rcpp::IntegerVector& absIsizeMin,
-                                           const Rcpp::IntegerVector& absIsizeMax,
-                                           const Rcpp::IntegerVector& min_read_size,
-                                           const Rcpp::IntegerVector& max_read_size,
-                                           const Rcpp::CharacterVector& alignerUsed){
+                                           const Rcpp::CharacterVector& alignerUsed,
+                                           const Rcpp::IntegerVector& min_frag_data_len,
+                                           const Rcpp::NumericVector& min_frag_data_dens){
   
   // convert input parameters
   string whichContext_ = Rcpp::as<string>(whichContext);
@@ -210,10 +343,9 @@ Rcpp::List fetch_data_matrix_from_bams_cpp(const Rcpp::CharacterVector& whichCon
   int min_bisC_size_ = Rcpp::as<int >(min_bisC_size);
   int mapqMin_ = Rcpp::as<int >(mapqMin);
   int mapqMax_ = Rcpp::as<int >(mapqMax);
-  int absIsizeMin_ = Rcpp::as<int >(absIsizeMin);
-  int absIsizeMax_ = Rcpp::as<int >(absIsizeMax);
-  int min_read_size_ = Rcpp::as<int >(min_read_size);
-  int max_read_size_ = Rcpp::as<int >(max_read_size);
+  
+  int min_frag_data_len_ = Rcpp::as<int >(min_frag_data_len);
+  double min_frag_data_dens_ = Rcpp::as<double >(min_frag_data_dens);
   // initialize container for reference sequence
   refSeqInfo refseqInfo(seqstring_, seqStart_,seqEnd_);
   
@@ -224,12 +356,21 @@ Rcpp::List fetch_data_matrix_from_bams_cpp(const Rcpp::CharacterVector& whichCon
   // store min and amp MAPQ
   pnts.mapqMin = mapqMin_;
   pnts.mapqMax = mapqMax_;
-  // store min and max tlen
-  pnts.absIsizeMin = absIsizeMin_;
-  pnts.absIsizeMax = absIsizeMax_;
-  // store min and max read lengths
-  pnts.min_read_size = min_read_size_;
-  pnts.max_read_size = max_read_size_;
+  
+  // store which contexts to parse
+  vector<fragMapType > cont_vec;
+  if(whichMap == bisC){
+    cont_vec = {bisC};
+  } else if(whichMap == allC){
+    cont_vec = {GCH, WCG, bisC, otherC};
+  } else {
+    if(max_bisC_meth_ < 1.0) // if filtering bisulfite failed reads needed we add bisC context
+      cont_vec = {whichMap, bisC};
+    else
+      cont_vec = {whichMap};
+  }
+  pnts.context_vec = &cont_vec;
+  
   // create container for fragments and counters
   regionData regData(regionChr_,regionStart_,regionEnd_);
   pnts.reg_data = &regData;
@@ -249,27 +390,21 @@ Rcpp::List fetch_data_matrix_from_bams_cpp(const Rcpp::CharacterVector& whichCon
   
   n_fetched = regData.size();
   
-  // get qnames on non-unique fragments
-  set<string > nonUnfrags;
-  if(remove_nonunique_)
-    nonUnfrags = regData.getNonUniqueQnames();
-  n_nonUnique = nonUnfrags.size();
-  
-  // get qnames with failed bisulfite conversion
-  set<string > failedBisConvfrags;
-  if(max_bisC_meth_ < 1.0)
-    failedBisConvfrags = regData.getFailedBisConvQnames(max_bisC_meth_,min_bisC_size_);
-  n_bisfailed = failedBisConvfrags.size();
-  
-  // erase fragments which nonunique and failed bisConv
-  std::set<string > remove_qnames(nonUnfrags);
-  remove_qnames.insert(failedBisConvfrags.begin(), failedBisConvfrags.end());
-  n_analysed = regData.eraseQnames(remove_qnames);
-  
-  // clip fragments
-  if(clip_until_nbg_ > 0)
-    n_clipped = regData.clipProtectData(clip_until_nbg_,max_protect_frac_);
-  
+  // post-processing of the data, i.e. filtering by uniqueness, bisulfite efficiency, clipping and removal of fragments with insufficient data
+  // function below returns the following stats
+  // {n_nonUnique,n_bisfailed,n_clipped,n_insuffFailed,n_analysed}
+  vector<int > filt_stats = regData.postProcessFragData(remove_nonunique_, 
+                                                        clip_until_nbg_, 
+                                                        max_protect_frac_, 
+                                                        max_bisC_meth_,
+                                                        min_bisC_size_,
+                                                        min_frag_data_len_,
+                                                        min_frag_data_dens_,
+                                                        whichMap);
+  n_nonUnique += filt_stats[0];
+  n_bisfailed += filt_stats[1];
+  n_clipped += filt_stats[2];
+  n_analysed += filt_stats[4];
   
   
   // create List for R
@@ -300,11 +435,9 @@ Rcpp::List fetch_protect_stats_from_bams_cpp(const Rcpp::CharacterVector& infile
                                              const Rcpp::IntegerVector& min_bisC_size,
                                              const Rcpp::IntegerVector& mapqMin,
                                              const Rcpp::IntegerVector& mapqMax,
-                                             const Rcpp::IntegerVector& absIsizeMin,
-                                             const Rcpp::IntegerVector& absIsizeMax,
-                                             const Rcpp::IntegerVector& min_read_size,
-                                             const Rcpp::IntegerVector& max_read_size,
-                                             const Rcpp::CharacterVector& alignerUsed){
+                                             const Rcpp::CharacterVector& alignerUsed,
+                                             const Rcpp::IntegerVector& min_frag_data_len,
+                                             const Rcpp::NumericVector& min_frag_data_dens){
   
   
   string alignerUsed_ = Rcpp::as<string>(alignerUsed);
@@ -336,10 +469,8 @@ Rcpp::List fetch_protect_stats_from_bams_cpp(const Rcpp::CharacterVector& infile
   int min_bisC_size_ = Rcpp::as<int >(min_bisC_size);
   int mapqMin_ = Rcpp::as<int >(mapqMin);
   int mapqMax_ = Rcpp::as<int >(mapqMax);
-  int absIsizeMin_ = Rcpp::as<int >(absIsizeMin);
-  int absIsizeMax_ = Rcpp::as<int >(absIsizeMax);
-  int min_read_size_ = Rcpp::as<int >(min_read_size);
-  int max_read_size_ = Rcpp::as<int >(max_read_size);
+  int min_frag_data_len_ = Rcpp::as<int >(min_frag_data_len);
+  double min_frag_data_dens_ = Rcpp::as<double >(min_frag_data_dens);
   
   // initialize container for reference sequence
   refSeqInfo refseqInfo(seqstring_, seqStart_,seqEnd_);
@@ -351,18 +482,20 @@ Rcpp::List fetch_protect_stats_from_bams_cpp(const Rcpp::CharacterVector& infile
   // store min and max MAPQ
   pnts.mapqMin = mapqMin_;
   pnts.mapqMax = mapqMax_;
-  // store min and max tlen
-  pnts.absIsizeMin = absIsizeMin_;
-  pnts.absIsizeMax = absIsizeMax_;
-  // store min and max read lengths
-  pnts.min_read_size = min_read_size_;
-  pnts.max_read_size = max_read_size_;
+  
+  // store which contexts to parse
+  vector<fragMapType > cont_vec;
+  if(max_bisC_meth_ < 1.0) // if filtering bisulfite failed reads needed we add bisC context
+    cont_vec = {GCH, bisC};
+  else
+    cont_vec = {GCH};
+  pnts.context_vec = &cont_vec;
+  
+  
   // initialize container for collecting protect statistics and counters
   protectStatsTable protStats;
   int n_fetched = 0, n_nonUnique = 0, n_bisfailed = 0, n_analysed = 0, n_clipped = 0;
-  
-  
-  // 	// for each bam file fetch data and add counts to protect stats table 
+  // for each bam file fetch data and add counts to protect stats table 
   for(int i = 0; i < infiles_.size(); ++i){
     // initialize container for fragments in region
     regionData regData(regionChr_,regionStart_,regionEnd_);
@@ -377,26 +510,21 @@ Rcpp::List fetch_protect_stats_from_bams_cpp(const Rcpp::CharacterVector& infile
                         &pnts);
     n_fetched += regData.size();
     
-    // get qnames on non-unique fragments
-    set<string > nonUnfrags;
-    if(remove_nonunique_)
-      nonUnfrags = regData.getNonUniqueQnames();
-    n_nonUnique += nonUnfrags.size();
-    
-    // get qnames with failed bisulfite conversion
-    set<string > failedBisConvfrags;
-    if(max_bisC_meth_ < 1.0)
-      failedBisConvfrags = regData.getFailedBisConvQnames(max_bisC_meth_,min_bisC_size_);
-    n_bisfailed += failedBisConvfrags.size();
-    
-    // erase fragments which nonunique and failed bisConv
-    std::set<string > remove_qnames(nonUnfrags);
-    remove_qnames.insert(failedBisConvfrags.begin(), failedBisConvfrags.end());
-    n_analysed += regData.eraseQnames(remove_qnames);
-    
-    // clip fragments
-    if(clip_until_nbg_ > 0)
-      n_clipped += regData.clipProtectData(clip_until_nbg_,max_protect_frac_);
+    // post-processing of the data, i.e. filtering by uniqueness, bisulfite efficiency, clipping and removal of fragments with insufficient data
+    // function below returns the following stats
+    // {n_nonUnique,n_bisfailed,n_clipped,n_insuffFailed,n_analysed}
+    vector<int > filt_stats = regData.postProcessFragData(remove_nonunique_, 
+                                                          clip_until_nbg_, 
+                                                          max_protect_frac_, 
+                                                          max_bisC_meth_,
+                                                          min_bisC_size_,
+                                                          min_frag_data_len_,
+                                                          min_frag_data_dens_,
+                                                          GCH);
+    n_nonUnique += filt_stats[0];
+    n_bisfailed += filt_stats[1];
+    n_clipped += filt_stats[2];
+    n_analysed += filt_stats[4];
     
     // add counts to protect stat table
     regData.addCountsToProtectStatsTable(protStats);
diff --git a/src/functions_export_rcpp.h b/src/functions_export_rcpp.h
index ff86799..0a197a9 100644
--- a/src/functions_export_rcpp.h
+++ b/src/functions_export_rcpp.h
@@ -25,7 +25,26 @@ using namespace std;
 
 bool _VERBOSE_ = 0;
 
-
+// [[Rcpp::export]]
+Rcpp::List fetch_molec_data_list_from_bams_cpp(const Rcpp::CharacterVector& whichContext,
+                                               const Rcpp::CharacterVector& infiles,
+                                               const Rcpp::CharacterVector& regionChr,
+                                               const Rcpp::IntegerVector& regionStart,
+                                               const Rcpp::IntegerVector& regionEnd,
+                                               const Rcpp::CharacterVector& seqstring,
+                                               const Rcpp::IntegerVector& seqStart,
+                                               const Rcpp::IntegerVector& seqEnd,
+                                               const Rcpp::LogicalVector& remove_nonunique,
+                                               const Rcpp::IntegerVector& clip_until_nbg,
+                                               const Rcpp::NumericVector& max_protect_frac,
+                                               const Rcpp::NumericVector& max_bisC_meth,
+                                               const Rcpp::IntegerVector& min_bisC_size,
+                                               const Rcpp::IntegerVector& mapqMin,
+                                               const Rcpp::IntegerVector& mapqMax,
+                                               const Rcpp::CharacterVector& alignerUsed,
+                                               const Rcpp::IntegerVector& min_frag_data_len,
+                                               const Rcpp::NumericVector& min_frag_data_dens,
+                                               const Rcpp::LogicalVector& data_as_rle);
 
 // [[Rcpp::export]]
 Rcpp::List fetch_cooc_ctable_from_bams_cpp(const Rcpp::CharacterVector& infiles,
@@ -43,11 +62,9 @@ Rcpp::List fetch_cooc_ctable_from_bams_cpp(const Rcpp::CharacterVector& infiles,
                                            const Rcpp::IntegerVector& min_bisC_size,
                                            const Rcpp::IntegerVector& mapqMin,
                                            const Rcpp::IntegerVector& mapqMax,
-                                           const Rcpp::IntegerVector& absIsizeMin,
-                                           const Rcpp::IntegerVector& absIsizeMax,
-                                           const Rcpp::IntegerVector& min_read_size,
-                                           const Rcpp::IntegerVector& max_read_size,
-                                           const Rcpp::CharacterVector& alignerUsed);
+                                           const Rcpp::CharacterVector& alignerUsed,
+                                           const Rcpp::IntegerVector& min_frag_data_len,
+                                           const Rcpp::NumericVector& min_frag_data_dens);
 
 
 // [[Rcpp::export]]
@@ -66,11 +83,9 @@ Rcpp::List fetch_data_matrix_from_bams_cpp(const Rcpp::CharacterVector& whichCon
                                            const Rcpp::IntegerVector& min_bisC_size,
                                            const Rcpp::IntegerVector& mapqMin,
                                            const Rcpp::IntegerVector& mapqMax,
-                                           const Rcpp::IntegerVector& absIsizeMin,
-                                           const Rcpp::IntegerVector& absIsizeMax,
-                                           const Rcpp::IntegerVector& min_read_size,
-                                           const Rcpp::IntegerVector& max_read_size,
-                                           const Rcpp::CharacterVector& alignerUsed);
+                                           const Rcpp::CharacterVector& alignerUsed,
+                                           const Rcpp::IntegerVector& min_frag_data_len,
+                                           const Rcpp::NumericVector& min_frag_data_dens);
 
 // [[Rcpp::export]]
 Rcpp::List fetch_protect_stats_from_bams_cpp(const Rcpp::CharacterVector& infiles,
@@ -87,10 +102,8 @@ Rcpp::List fetch_protect_stats_from_bams_cpp(const Rcpp::CharacterVector& infile
                                              const Rcpp::IntegerVector& min_bisC_size,
                                              const Rcpp::IntegerVector& mapqMin,
                                              const Rcpp::IntegerVector& mapqMax,
-                                             const Rcpp::IntegerVector& absIsizeMin,
-                                             const Rcpp::IntegerVector& absIsizeMax,
-                                             const Rcpp::IntegerVector& min_read_size,
-                                             const Rcpp::IntegerVector& max_read_size,
-                                             const Rcpp::CharacterVector& alignerUsed);
+                                             const Rcpp::CharacterVector& alignerUsed,
+                                             const Rcpp::IntegerVector& min_frag_data_len,
+                                             const Rcpp::NumericVector& min_frag_data_dens);
 
 #endif
\ No newline at end of file
diff --git a/src/regionData-class.cpp b/src/regionData-class.cpp
index a282bae..6e97ba3 100644
--- a/src/regionData-class.cpp
+++ b/src/regionData-class.cpp
@@ -14,6 +14,7 @@ bool regionData::addFragContextData(const string& bampref,
                         const string& qname,
                         const int& fragstart,
                         const int& fragend,
+                        const string& fragconfig,
                         const vector<int >& rposvec,
                         const vector<bool >& protectvec,
                         const vector<bool >& isonplusstrandvec,
@@ -29,6 +30,7 @@ bool regionData::addFragContextData(const string& bampref,
     frg.addData(bampref,
                 fragstart,
                 fragend,
+                fragconfig,
                 rposvec,
                 protectvec,
                 isonplusstrandvec,
@@ -41,6 +43,7 @@ bool regionData::addFragContextData(const string& bampref,
     fDataMap[qname].addData(bampref,
                             fragstart,
                             fragend,
+                            fragconfig,
                             rposvec,
                             protectvec,
                             isonplusstrandvec,
@@ -113,7 +116,7 @@ set<string > regionData::getNonUniqueQnames(){
 };
 
 // get set of qnames with failed bisulfite conversion
-set<string > regionData::getFailedBisConvQnames(double max_bisC_meth,int min_bisC_size){
+set<string > regionData::getFailedBisConvQnames(const double& max_bisC_meth,const int& min_bisC_size){
   set<string > failed_qnames;
   
   for(unordered_map<string, fragData >::iterator it = fDataMap.begin(); it != fDataMap.end();++it){
@@ -132,6 +135,99 @@ int regionData::eraseQnames(const set<string >& qnames){
   return(fDataMap.size());
 };
 
+
+int regionData::eraseEmptyFrags(fragMapType whichMap){
+  for(unordered_map<string, fragData >::iterator it = fDataMap.begin(); it != fDataMap.end();){
+    if((it->second).isEmpty(whichMap)){
+      it = fDataMap.erase(it);
+    } else{
+      ++it;
+    }
+  }
+  return(fDataMap.size());
+};
+
+set<string > regionData::getInsuffDataQnames(const int& min_frag_data_len,
+                                             const double& min_frag_data_dens,
+                                             const fragMapType& whichMap){
+  set<string > failed_qnames;
+  
+  for(unordered_map<string, fragData >::iterator it = fDataMap.begin(); it != fDataMap.end();++it){
+    std::vector<int > dataPosNpoints = (it->second).getFragDataStartEndNpoints(whichMap);
+    
+    int fragDataStart = dataPosNpoints[0];
+    int fragDataEnd = dataPosNpoints[1];
+    int npoints = dataPosNpoints[2];
+    int fragDataLen = fragDataEnd - fragDataStart + 1;
+    
+    if(fragDataLen == 0 || npoints == 0){
+      failed_qnames.insert(it->first);
+      continue;
+    }
+    
+    double fragDataDens = static_cast<double>(npoints) / fragDataLen;
+    
+    if(fragDataLen < min_frag_data_len || fragDataDens < min_frag_data_dens)
+      failed_qnames.insert(it->first);
+  }
+  return(failed_qnames);
+};
+
+
+vector<int > regionData::postProcessFragData(const bool& remove_nonunique, // remove non-unique?
+                                             const int& clip_until_nbg, // clipping partial footprints at the edges
+                                             const double& max_protect_frac, //
+                                             const double& max_bisC_meth,
+                                             const int& min_bisC_size,
+                                             const int& min_frag_data_len,
+                                             const double& min_frag_data_dens,
+                                             const fragMapType& whichMap){
+  ///// 1. filtering non-unique fragments /////
+  // get qnames on non-unique fragments
+  set<string > nonUnfrags;
+  if(remove_nonunique)
+    nonUnfrags = getNonUniqueQnames();
+  int n_nonUnique = nonUnfrags.size();
+  
+  
+  ///// 2. filtering fragments with failed bisulfite conversion, i.e. too high bisC methylation /////
+  // get qnames with failed bisulfite conversion
+  set<string > failedBisConvfrags;
+  if(max_bisC_meth < 1.0)
+    failedBisConvfrags = getFailedBisConvQnames(max_bisC_meth,min_bisC_size);
+  int n_bisfailed = failedBisConvfrags.size();
+  
+  // erase fragments which are nonunique or failed bisConv
+  set<string > remove_qnames(nonUnfrags);
+  remove_qnames.insert(failedBisConvfrags.begin(), failedBisConvfrags.end());
+  eraseQnames(remove_qnames);
+  // empty the set with qnames
+  remove_qnames.erase(remove_qnames.begin(),remove_qnames.end());
+  
+  ///// 3. clip fragments /////
+  int n_clipped = 0;
+  if(clip_until_nbg > 0){
+    n_clipped = clipProtectData(clip_until_nbg,max_protect_frac);
+    // // remove fragments which were erased by clipping
+    // eraseEmptyFrags(fragMapType whichMap);
+  }
+  
+  //// 4. filtering fragments by data length, i.e. genomic distance from most left and most right informative (containing data) positions
+  set<string > insuffDatafrags = getInsuffDataQnames(min_frag_data_len,
+                                                     min_frag_data_dens,
+                                                     whichMap);
+  int n_insuffFailed = insuffDatafrags.size();
+  
+  // erase fragments which contain insufficient data
+  int n_analysed = eraseQnames(insuffDatafrags);
+  
+  std::vector<int > vec_out={n_nonUnique,n_bisfailed,n_clipped,n_insuffFailed,n_analysed};
+  return(vec_out);
+};
+
+
+
+
 int regionData::size(){
   return(fDataMap.size());
 };
@@ -186,3 +282,115 @@ Rcpp::IntegerMatrix regionData::getDataMatrix(fragMapType whichMap){
   
   return(matr);
 };
+
+
+Rcpp::List regionData::getFragDataList(fragMapType whichMap){
+  // start (genomic position of the first data point. note, not the alignment start!)
+  // end last position of the data points
+  // fragConfigs configurations of fragments
+  // qname - name of the fragment
+  // fragData_list list which contains vectors with 0 (accessible), 1(protected) and NA, non-informative
+  
+  
+  Rcpp::IntegerVector fragStarts; // start positions are 1-based on reference
+  Rcpp::IntegerVector fragEnds; // start positions are 1-based on reference
+  
+  Rcpp::IntegerVector nDataPoints; // number of informative points
+  Rcpp::IntegerVector startDataPoints; // data points at start positions
+  Rcpp::IntegerVector endDataPoints; // data points at end positions
+  
+  Rcpp::CharacterVector fragConfigs; // configurations of configs
+  
+  Rcpp::CharacterVector fragQnames; // fragment names
+  
+  Rcpp::List fragData_list;
+  
+  
+  for(unordered_map<string, fragData >::iterator it=fDataMap.begin(); it != fDataMap.end(); ++it){
+    // get number of informative points in a fragment
+    Rcpp::List fragdat = (it->second).getFragDataVector(whichMap);
+    int npoints = Rcpp::as<int >(fragdat["nDataPoints"]);
+    
+    int fragDataStart = Rcpp::as<int >(fragdat["fragDataStart"]);
+    int fragDataEnd = Rcpp::as<int >(fragdat["fragDataEnd"]);
+    
+    int fragDataLen = fragDataEnd - fragDataStart + 1;
+    
+    fragQnames.push_back(it->first);
+    fragStarts.push_back(fragDataStart + 1); // +1 is to convert to 1-based positions
+    fragEnds.push_back(fragDataEnd + 1);
+    fragConfigs.push_back((it->second).getFragConfig());
+    nDataPoints.push_back(fragdat["nDataPoints"]);
+    startDataPoints.push_back(fragdat["startData"]);
+    endDataPoints.push_back(fragdat["endData"]);
+    fragData_list.push_back(fragdat["fragDataVec"]);
+  }
+  
+  Rcpp::List data_out = Rcpp::List::create(Rcpp::Named("start") = fragStarts,
+                                           Rcpp::Named("end") = fragEnds,
+                                           Rcpp::Named("fragConfigs") = fragConfigs,
+                                           Rcpp::Named("qnames") = fragQnames,
+                                           Rcpp::Named("nDataPoints") = nDataPoints,
+                                           Rcpp::Named("startDataPoint") = startDataPoints,
+                                           Rcpp::Named("endDataPoint") = endDataPoints,
+                                           Rcpp::Named("fragData_list") = fragData_list);
+  return(data_out);
+};
+
+
+Rcpp::List regionData::getFragDataRle(fragMapType whichMap){
+  // start (genomic position of the first data point. note, not the alignment start!)
+  // end last position of the data points
+  // fragConfigs configurations of fragments
+  // qname - name of the fragment
+  // fragData_list list which contains Rle compressed vectors with 0 (accessible), 1(protected) and NA, non-informative
+  
+  
+  Rcpp::IntegerVector fragStarts; // start positions are 1-based on reference
+  Rcpp::IntegerVector fragEnds; // start positions are 1-based on reference
+  
+  Rcpp::IntegerVector nDataPoints; // number of informative points
+  Rcpp::IntegerVector startDataPoints; // data points at start positions
+  Rcpp::IntegerVector endDataPoints; // data points at end positions
+  
+  Rcpp::CharacterVector fragConfigs; // configurations of configs
+  
+  Rcpp::CharacterVector fragQnames; // fragment names
+  
+  Rcpp::List fragData_list;
+  
+  
+  
+  for(unordered_map<string, fragData >::iterator it=fDataMap.begin(); it != fDataMap.end(); ++it){
+    
+    Rcpp::List fragdat = (it->second).getFragDataRle(whichMap);
+    int npoints = Rcpp::as<int >(fragdat["nDataPoints"]);
+    
+    int fragDataStart = Rcpp::as<int >(fragdat["fragDataStart"]);
+    int fragDataEnd = Rcpp::as<int >(fragdat["fragDataEnd"]);
+    
+    int fragDataLen = fragDataEnd - fragDataStart + 1;
+    
+    fragQnames.push_back(it->first);
+    fragStarts.push_back(fragDataStart + 1); // +1 is to convert to 1-based positions
+    fragEnds.push_back(fragDataEnd + 1);
+    fragConfigs.push_back((it->second).getFragConfig());
+    nDataPoints.push_back(fragdat["nDataPoints"]);
+    startDataPoints.push_back(fragdat["startData"]);
+    endDataPoints.push_back(fragdat["endData"]);
+    
+    fragData_list.push_back(fragdat["rle_data"]);
+  }
+  
+  Rcpp::List data_out = Rcpp::List::create(Rcpp::Named("start") = fragStarts,
+                                           Rcpp::Named("end") = fragEnds,
+                                           Rcpp::Named("fragConfigs") = fragConfigs,
+                                           Rcpp::Named("qnames") = fragQnames,
+                                           Rcpp::Named("nDataPoints") = nDataPoints,
+                                           Rcpp::Named("startDataPoint") = startDataPoints,
+                                           Rcpp::Named("endDataPoint") = endDataPoints,
+                                           Rcpp::Named("fragData_list") = fragData_list);
+  return(data_out);
+};
+
+
diff --git a/src/regionData-class.h b/src/regionData-class.h
index 4d1e944..b576cc3 100644
--- a/src/regionData-class.h
+++ b/src/regionData-class.h
@@ -20,19 +20,23 @@ class regionData{
 	int regStart;
 	int regEnd;
 	
-	unordered_map<string, fragData > fDataMap; // map: qname -> fragment
+	unordered_map<string, fragData > fDataMap; // map: qname (string) -> fragment (class fragData)
 	
 public:
+  
+  /////  constructors/destructors ///// 
 	regionData();
 	regionData(const string& regchr,
             const int& regstart,
             const int& regend);
 	~regionData();
 	
+	/////  functions for adding new data ///// 
 	bool addFragContextData(const string& bampref,
                          const string& qname,
                          const int& fragstart,
                          const int& fragend,
+                         const string& fragconfig,
                          const vector<int >& rposvec,
                          const vector<bool >& protectvec,
                          const vector<bool >& isonplusstrandvec,
@@ -42,13 +46,14 @@ class regionData{
 	
 	void print();
 	
-	
+	/////  functions for collecting statistics ///// 
 	void addCountsToCoocTable(coocCntTable& cTable);
 	
 	
 	void addCountsToProtectStatsTable(protectStatsTable& pStatsTable);
 	
 	
+	///// functions for post-processing data /////
 	// clip fragments to get rid of partial footprints
 	int clipProtectData(const int& clip_until_nbg, // clips from both ends until this number of 0 is met
                      const double& max_protect_frac); // keeps fragment if percentage of 1s in a fragment is above this number)
@@ -59,17 +64,44 @@ class regionData{
 	set<string > getNonUniqueQnames();
 	
 	// get set of qnames with failed bisulfite conversion
-	set<string > getFailedBisConvQnames(double max_bisC_meth,int min_bisC_size);
+	set<string > getFailedBisConvQnames(const double& max_bisC_meth,
+                                     const int& min_bisC_size);
 	
 	// remove fragments from the map. returns new size of the map
 	int eraseQnames(const set<string >& qnames);
 	
+	// remove fragments which contain NO data. used after clipping
+	int eraseEmptyFrags(fragMapType whichMap);
+	
+	// get set of qnames whose data length or data density is too short
+	set<string > getInsuffDataQnames(const int& min_frag_data_len,
+                                  const double& min_frag_data_dens,
+                                  const fragMapType& whichMap);
+	
+	
+	vector<int > postProcessFragData(const bool& remove_nonunique,
+                                  const int& clip_until_nbg,
+                                  const double& max_protect_frac,
+                                  const double& max_bisC_meth,
+                                  const int& min_bisC_size,
+                                  const int& min_frag_data_len,
+                                  const double& min_frag_data_dens,
+                                  const fragMapType& whichMap);
+	
+	
+	
 	int size();
 	
 	
 	// convert data in region into a Matrix and return
 	Rcpp::IntegerMatrix getDataMatrix(fragMapType whichMap);
 	
+	// get List with fetched reads/fragments for R 
+	Rcpp::List getFragDataList(fragMapType whichMap);
+	
+	// get List with fetched reads/fragments for R with Rle compressed data vectors
+	Rcpp::List getFragDataRle(fragMapType whichMap);
+	
 };
 
 
diff --git a/tests/testthat/Bismark_expected_methylation_data_read_list.rds b/tests/testthat/Bismark_expected_methylation_data_read_list.rds
new file mode 100644
index 0000000..d68744e
Binary files /dev/null and b/tests/testthat/Bismark_expected_methylation_data_read_list.rds differ
diff --git a/tests/testthat/QuasR_expected_methylation_reads_list.rds b/tests/testthat/QuasR_expected_methylation_reads_list.rds
new file mode 100644
index 0000000..de4fb43
Binary files /dev/null and b/tests/testthat/QuasR_expected_methylation_reads_list.rds differ
diff --git a/tests/testthat/test-get_ctable_from_bams.R b/tests/testthat/test-get_ctable_from_bams.R
index e7053ce..9c2936a 100644
--- a/tests/testthat/test-get_ctable_from_bams.R
+++ b/tests/testthat/test-get_ctable_from_bams.R
@@ -1,6 +1,6 @@
 test_that("get_ctable_from_bams works for QuasR bam file", {
   #library(GenomicRanges)
-  
+  #browser()
   
   quasR_coocc_matrix <- get_ctable_from_bams(bamfiles = "QuasR_test.bam",
                                              samplenames = "quasr_pe",
@@ -9,6 +9,8 @@ test_that("get_ctable_from_bams works for QuasR bam file", {
                                                                               IRanges::IRanges(start = 200,end = 500)),
                                              genome = "random_genome_700bp.fa",
                                              alignerUsed = "QuasR",
+                                             min_frag_data_len = 0L,
+                                             min_frag_data_dens = 0.0,
                                              max_spacing = 300,
                                              remove_nonunique = F,
                                              clip_until_nbg = 0L,
@@ -25,16 +27,18 @@ test_that("get_ctable_from_bams works for Bismark bam file", {
   #library(GenomicRanges)
   
   bismark_coocc_matrix <- get_ctable_from_bams(bamfiles = "Bismark_test.bam",
-                                                            samplenames = "bismark_pe",
-                                                            regions = GenomicRanges::GRanges(seqnames = "random_genome_700bp",
-                                                                                             strand = "+",
-                                                                                             IRanges::IRanges(start = 200,end = 500)),
-                                                            genome = "random_genome_700bp.fa",
-                                                            alignerUsed = "Bismark",
-                                                            max_spacing = 300,
-                                                            remove_nonunique = F,
-                                                            clip_until_nbg = 0L,
-                                                            max_bisC_meth = 1)
+                                               samplenames = "bismark_pe",
+                                               regions = GenomicRanges::GRanges(seqnames = "random_genome_700bp",
+                                                                                strand = "+",
+                                                                                IRanges::IRanges(start = 200,end = 500)),
+                                               genome = "random_genome_700bp.fa",
+                                               alignerUsed = "Bismark",
+                                               min_frag_data_len = 0L,
+                                               min_frag_data_dens = 0.0,
+                                               max_spacing = 300,
+                                               remove_nonunique = F,
+                                               clip_until_nbg = 0L,
+                                               max_bisC_meth = 1)
   
   expect_coocc_mat <- readRDS("QuasR_expected_coocc_mat.rds") 
   
diff --git a/tests/testthat/test-get_data_matrix_from_bams.R b/tests/testthat/test-get_data_matrix_from_bams.R
index 68956d4..3711099 100644
--- a/tests/testthat/test-get_data_matrix_from_bams.R
+++ b/tests/testthat/test-get_data_matrix_from_bams.R
@@ -1,6 +1,6 @@
 test_that("get_data_matrix_from_bams works for QuasR bam file", {
   #library(GenomicRanges)
-  
+  #browser()
 	quasR_methmat <- get_data_matrix_from_bams(bamfiles = "QuasR_test.bam",
 																						 samplenames = "quasr_pe",
 																						 regions = GenomicRanges::GRanges(seqnames = "random_genome_700bp",
@@ -24,7 +24,7 @@ test_that("get_data_matrix_from_bams works for QuasR bam file", {
 	expect_true(all(quasR_methmat$allC_DataMatrix[[1]] == quasr_expected_methmat))
 	
 	
-	#### check if filtering by tlen works ####
+	# #### check if filtering by data vector length works ####
 	quasR_methmat <- get_data_matrix_from_bams(bamfiles = "QuasR_test.bam",
 	                                           samplenames = "quasr_pe",
 	                                           regions = GenomicRanges::GRanges(seqnames = "random_genome_700bp",
@@ -35,9 +35,9 @@ test_that("get_data_matrix_from_bams works for QuasR bam file", {
 	                                           remove_nonunique = F,
 	                                           clip_until_nbg = 0L,
 	                                           max_bisC_meth = 1,
-	                                           absIsizeMin = 500)
+	                                           min_frag_data_len = 500L)
 	
-	## test whether 0 reads are fetched
+	# ## test whether 0 reads are fetched
 	expect_true(nrow(quasR_methmat$allC_DataMatrix[[1]]) == 0)
 	quasR_methmat <- get_data_matrix_from_bams(bamfiles = "QuasR_test.bam",
 	                                           samplenames = "quasr_pe",
@@ -49,43 +49,19 @@ test_that("get_data_matrix_from_bams works for QuasR bam file", {
 	                                           remove_nonunique = F,
 	                                           clip_until_nbg = 0L,
 	                                           max_bisC_meth = 1,
-	                                           absIsizeMax = 50)
+	                                           min_frag_data_dens  = 1.0)
 	expect_true(nrow(quasR_methmat$allC_DataMatrix[[1]]) == 0)
 	
-	#### check if filtering by read length works
-	quasR_methmat <- get_data_matrix_from_bams(bamfiles = "QuasR_test.bam",
-	                                           samplenames = "quasr_pe",
-	                                           regions = GenomicRanges::GRanges(seqnames = "random_genome_700bp",
-	                                                                            strand = "+",
-	                                                                            IRanges::IRanges(start = 200,end = 500)),
-	                                           genome = "random_genome_700bp.fa",
-	                                           whichContext ="allC",
-	                                           remove_nonunique = F,
-	                                           clip_until_nbg = 0L,
-	                                           max_bisC_meth = 1,
-	                                           min_read_size = 400)
-	expect_true(nrow(quasR_methmat$allC_DataMatrix[[1]]) == 0)
-	quasR_methmat <- get_data_matrix_from_bams(bamfiles = "QuasR_test.bam",
-	                                           samplenames = "quasr_pe",
-	                                           regions = GenomicRanges::GRanges(seqnames = "random_genome_700bp",
-	                                                                            strand = "+",
-	                                                                            IRanges::IRanges(start = 200,end = 500)),
-	                                           genome = "random_genome_700bp.fa",
-	                                           whichContext ="allC",
-	                                           remove_nonunique = F,
-	                                           clip_until_nbg = 0L,
-	                                           max_bisC_meth = 1,
-	                                           max_read_size = 50)
-	expect_true(nrow(quasR_methmat$allC_DataMatrix[[1]]) == 0)
+	
 })
 
 
 test_that("get_data_matrix_from_bams works for Bismark bam file with indels and soft clipping", {
-  library(GenomicRanges)
-	## coordinates of the test fragment
-	frag_coord_OT_gr <- GRanges(seqnames = "random_genome_700bp",
-															strand = "+",
-															IRanges(start = 200,end = 500))
+#   library(GenomicRanges)
+# 	## coordinates of the test fragment
+# 	frag_coord_OT_gr <- GRanges(seqnames = "random_genome_700bp",
+# 															strand = "+",
+# 															IRanges(start = 200,end = 500))
 	
 	bismark_methmat <- get_data_matrix_from_bams(bamfiles = "Bismark_test.bam",
 																						 samplenames = "bismark_pe",
@@ -110,7 +86,7 @@ test_that("get_data_matrix_from_bams works for Bismark bam file with indels and
 	## test that identical
 	expect_true(all(bismark_methmat$allC_DataMatrix[[1]] == bismark_expected_methmat))
 	
-	#### test if filtering by tlen works ####
+	# #### test if filtering by data vector length works ####
 	bismark_methmat <- get_data_matrix_from_bams(bamfiles = "Bismark_test.bam",
 	                                             samplenames = "bismark_pe",
 	                                             regions = GenomicRanges::GRanges(seqnames = "random_genome_700bp",
@@ -120,7 +96,7 @@ test_that("get_data_matrix_from_bams works for Bismark bam file with indels and
 	                                             whichContext ="allC",
 	                                             remove_nonunique = F,
 	                                             clip_until_nbg = 0L,
-	                                             absIsizeMin = 500)
+	                                             min_frag_data_len = 500L)
 	expect_true(nrow(bismark_methmat$allC_DataMatrix[[1]]) == 0)
 	bismark_methmat <- get_data_matrix_from_bams(bamfiles = "Bismark_test.bam",
 	                                             samplenames = "bismark_pe",
@@ -131,32 +107,10 @@ test_that("get_data_matrix_from_bams works for Bismark bam file with indels and
 	                                             whichContext ="allC",
 	                                             remove_nonunique = F,
 	                                             clip_until_nbg = 0L,
-	                                             absIsizeMax = 50)
+	                                             min_frag_data_dens = 1.0
+	                                             )
 	expect_true(nrow(bismark_methmat$allC_DataMatrix[[1]]) == 0)
 	
-	#### test if filtering by read length works ####
-	bismark_methmat <- get_data_matrix_from_bams(bamfiles = "Bismark_test.bam",
-	                                             samplenames = "bismark_pe",
-	                                             regions = GenomicRanges::GRanges(seqnames = "random_genome_700bp",
-	                                                                              strand = "+",
-	                                                                              IRanges::IRanges(start = 200,end = 500)),
-	                                             genome = "random_genome_700bp.fa",
-	                                             whichContext ="allC",
-	                                             remove_nonunique = F,
-	                                             clip_until_nbg = 0L,
-	                                             min_read_size = 500)
-	expect_true(nrow(bismark_methmat$allC_DataMatrix[[1]]) == 0)
-	bismark_methmat <- get_data_matrix_from_bams(bamfiles = "Bismark_test.bam",
-	                                             samplenames = "bismark_pe",
-	                                             regions = GenomicRanges::GRanges(seqnames = "random_genome_700bp",
-	                                                                              strand = "+",
-	                                                                              IRanges::IRanges(start = 200,end = 500)),
-	                                             genome = "random_genome_700bp.fa",
-	                                             whichContext ="allC",
-	                                             remove_nonunique = F,
-	                                             clip_until_nbg = 0L,
-	                                             max_read_size = 50)
-	expect_true(nrow(bismark_methmat$allC_DataMatrix[[1]]) == 0)
 	
 	
 })
\ No newline at end of file
diff --git a/tests/testthat/test-get_molecule_data_list_from_bams.R b/tests/testthat/test-get_molecule_data_list_from_bams.R
new file mode 100644
index 0000000..1644b25
--- /dev/null
+++ b/tests/testthat/test-get_molecule_data_list_from_bams.R
@@ -0,0 +1,123 @@
+test_that("get_molecule_data_list_from_bams works for QuasR bam file", {
+  quasR_methlist <- get_molecule_data_list_from_bams(bamfiles = "QuasR_test.bam",
+                                                     samplenames = "quasr_pe",
+                                                     regions = GenomicRanges::GRanges(seqnames = "random_genome_700bp",
+                                                                                      strand = "+",
+                                                                                      IRanges::IRanges(start = 200,end = 500)),
+                                                     genome = "random_genome_700bp.fa",
+                                                     whichContext ="allC",
+                                                     remove_nonunique = F,
+                                                     clip_until_nbg = 0L,
+                                                     max_bisC_meth = 1,
+                                                     min_frag_data_len = 50L,
+                                                     min_frag_data_dens = 0.05)
+  
+  roword <- paste0("f1:test_case_",c("OT","CTOT","OB","CTOB"))
+  quasR_methlist <- quasR_methlist[match(roword,quasR_methlist$qname),]
+  
+  rdat <- lapply(quasR_methlist$allC_fragData_list,
+                 function(x){
+                   x[is.na(x)] <- 2
+                   return(x)
+                 })
+  
+  
+  ## load expected data
+  dat_reads <- readRDS("QuasR_expected_methylation_reads_list.rds")
+  ## test if all data points equal expected values
+  expect_true(all(do.call(c,lapply(1:length(rdat),function(i){
+    rdat[[i]] == dat_reads[[i]]
+  }))))
+  ## test if all starts equal expected values
+  expect_true(all(quasR_methlist$start == c(207,207,205,205)))
+  expect_true(all(quasR_methlist$end == c(498,498,497,497)))
+  
+  
+  ## test if returned Rle encoded data is as expected
+  data_vec <- get_molecule_data_list_from_bams(bamfiles = "QuasR_test.bam",
+                                               samplenames = "quasr_pe",
+                                               regions = GenomicRanges::GRanges(seqnames = "random_genome_700bp",
+                                                                                strand = "+",
+                                                                                IRanges::IRanges(start = 200,end = 500)),
+                                               genome = "random_genome_700bp.fa",
+                                               whichContext ="GCH",
+                                               remove_nonunique = F,
+                                               clip_until_nbg = 0L,
+                                               max_bisC_meth = 1,
+                                               min_frag_data_len = 50L,
+                                               min_frag_data_dens = 0.05,
+                                               data_as_Rle = FALSE)
+  data_rle <- get_molecule_data_list_from_bams(bamfiles = "QuasR_test.bam",
+                                                     samplenames = "quasr_pe",
+                                                     regions = GenomicRanges::GRanges(seqnames = "random_genome_700bp",
+                                                                                      strand = "+",
+                                                                                      IRanges::IRanges(start = 200,end = 500)),
+                                                     genome = "random_genome_700bp.fa",
+                                                     whichContext ="GCH",
+                                                     remove_nonunique = F,
+                                                     clip_until_nbg = 0L,
+                                                     max_bisC_meth = 1,
+                                                     min_frag_data_len = 50L,
+                                                     min_frag_data_dens = 0.05,
+                                                     data_as_Rle = TRUE)
+  
+  data_vec <- data_vec[match(data_rle$qname,data_vec$qname),]
+  
+  
+  
+  rdat_vec <- lapply(data_vec$GCH_fragData_list,
+                     function(x){
+                       x[is.na(x)] <- 2
+                       return(x)
+                     })
+  rdat_rle <- lapply(data_rle$GCH_fragData_list,
+                     function(x){
+                       x <- as.vector(x)
+                       x[is.na(x)] <- 2
+                       return(x)
+                     })
+  expect_true(all(do.call(c,lapply(1:length(rdat_vec),function(i){
+    rdat_vec[[i]] == rdat_rle[[i]]
+  }))))
+  
+})
+
+
+test_that("get_molecule_data_list_from_bams works for Bismark bam file with indels and soft clipping", {
+  
+  bismark_methlist <- get_molecule_data_list_from_bams(bamfiles = "Bismark_test.bam",
+                                                       samplenames = "bismark_pe",
+                                                       regions = GenomicRanges::GRanges(seqnames = "random_genome_700bp",
+                                                                                        strand = "+",
+                                                                                        IRanges::IRanges(start = 200,end = 500)),
+                                                       genome = "random_genome_700bp.fa",
+                                                       whichContext ="allC",
+                                                       remove_nonunique = F,
+                                                       clip_until_nbg = 0L,
+                                                       max_bisC_meth = 1)
+  ## substitue NA to 2
+  roword <- paste0("f1:test_case_",c("OT","OT_I_D","CTOT","CTOT_I_D","OB","OB_I_D", "CTOB","CTOB_I_D"))
+  bismark_methlist <- bismark_methlist[match(roword,bismark_methlist$qname),]
+  rdat <- lapply(bismark_methlist$allC_fragData_list,
+                 function(x){
+                   x[is.na(x)] <- 2
+                   return(x)
+                 })
+  
+  ## load expected data
+  dat_reads <- readRDS("Bismark_expected_methylation_data_read_list.rds")
+
+  ## test if all data points equal expected values
+  expect_true(all(do.call(c,lapply(1:length(rdat),function(i){
+    rdat[[i]] == dat_reads[[i]]
+  }))))
+  
+  ## test if all starts and ends equal expected values
+  expect_true(all(bismark_methlist$start == c(207,207,207,207,205,205,205,205)))
+  expect_true(all(bismark_methlist$end == c(498,498,498,498,497,497,497,497)))
+  
+  
+  
+})
+
+
diff --git a/tests/testthat/test-get_protect_stats_from_bams.R b/tests/testthat/test-get_protect_stats_from_bams.R
index 63250b1..f9ba4ae 100644
--- a/tests/testthat/test-get_protect_stats_from_bams.R
+++ b/tests/testthat/test-get_protect_stats_from_bams.R
@@ -1,5 +1,6 @@
 test_that("get_protect_stats_from_bams works for QuasR bam file", {
   #library(GenomicRanges)
+  
   quasr_protect_stats <- get_protect_stats_from_bams(bamfiles = "QuasR_test.bam",
                                                      samplenames = "quasr_pe",
                                                      regions = GenomicRanges::GRanges(seqnames = "random_genome_700bp",
@@ -7,6 +8,8 @@ test_that("get_protect_stats_from_bams works for QuasR bam file", {
                                                                                       IRanges::IRanges(start = 200,end = 500)),
                                                      genome = "random_genome_700bp.fa",
                                                      alignerUsed = "QuasR",
+                                                     min_frag_data_len = 0L,
+                                                     min_frag_data_dens = 0.0,
                                                      remove_nonunique = F,
                                                      clip_until_nbg = 0L,
                                                      max_bisC_meth = 1)
@@ -29,6 +32,8 @@ test_that("get_protect_stats_from_bams works for Bismark bam file", {
                                                                                         IRanges::IRanges(start = 200,end = 500)),
                                                        genome = "random_genome_700bp.fa",
                                                        alignerUsed = "Bismark",
+                                                       min_frag_data_len = 0L,
+                                                       min_frag_data_dens = 0.0,
                                                        remove_nonunique = F,
                                                        clip_until_nbg = 0L,
                                                        max_bisC_meth = 1)
diff --git a/tests/testthat/test_esc_SampleFile.rds b/tests/testthat/test_esc_SampleFile.rds
new file mode 100644
index 0000000..2ac70f8
Binary files /dev/null and b/tests/testthat/test_esc_SampleFile.rds differ
diff --git a/tests/testthat/test_regions_gr.rds b/tests/testthat/test_regions_gr.rds
new file mode 100644
index 0000000..fb5af8b
Binary files /dev/null and b/tests/testthat/test_regions_gr.rds differ