diff --git a/DESCRIPTION b/DESCRIPTION index b3d6fcf4..75eaac66 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -60,4 +60,6 @@ Config/testthat/edition: 3 Language: en-US LinkingTo: Rcpp -URL: https://ncborcherding.github.io/scRepertoire/ +URL: https://www.borch.dev/uploads/screpertoire/ +BugReports: https://github.com/ncborcherding/scRepertoire/issues + diff --git a/NAMESPACE b/NAMESPACE index b211c862..3c139e00 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -31,6 +31,7 @@ export(percentAA) export(percentGenes) export(percentKmer) export(percentVJ) +export(positionalEntropy) export(subsetClones) export(vizGenes) import(dplyr) diff --git a/R/clonalAbundance.R b/R/clonalAbundance.R index bf59ddb7..a51011c8 100644 --- a/R/clonalAbundance.R +++ b/R/clonalAbundance.R @@ -86,6 +86,7 @@ clonalAbundance <- function(input.data, Con.df<- rbind.data.frame(Con.df, data1) } Con.df <- data.frame(Con.df) + Con.df$values <- factor(Con.df$values, levels=names(input.data)) col <- length(unique(Con.df$values)) fill <- "Samples" if (scale == TRUE) { diff --git a/R/clonalCluster.R b/R/clonalCluster.R index 61aeb401..61b6cc4e 100644 --- a/R/clonalCluster.R +++ b/R/clonalCluster.R @@ -28,6 +28,7 @@ #' The higher the number the more similarity of sequence will be #' used for clustering. #' @param group.by The column header used for to group contigs. +#' If (\strong{NULL}), clusters will be calculated across samples. #' @param exportGraph Return an igraph object of connected #' sequences (\strong{TRUE}) or the amended input with a #' new cluster-based variable (\strong{FALSE}). @@ -98,14 +99,13 @@ clonalCluster <- function(input.data, group_by(bound[,ref2]) %>% dplyr::summarize(sample_count = n(), unique_samples = paste0(unique(group.by), collapse = ",")) - dictionary <- list(bound) } else { bound <- bind_rows(dat) graph.variables <- bind_rows(dat) %>% group_by(bound[,ref2]) %>% dplyr::summarize(sample_count = n()) - dictionary <- dat } + dictionary <- dat #Generating Connected Component output.list <- lapply(dictionary, function(x) { cluster <- .lvCompare(x, diff --git a/R/clonalCompare.R b/R/clonalCompare.R index 9b7108d1..605ec37a 100644 --- a/R/clonalCompare.R +++ b/R/clonalCompare.R @@ -75,9 +75,6 @@ clonalCompare <- function(input.data, #Loop through the list to get a proportional summary for (i in seq_along(input.data)) { - if (chain != "both") { - input.data[[i]] <- .off.the.chain(input.data[[i]], chain, cloneCall) - } tbl <- as.data.frame(table(input.data[[i]][,cloneCall])) tbl[,2] <- tbl[,2]/sum(tbl[,2]) colnames(tbl) <- c("clones", "Proportion") diff --git a/R/clonalDiversity.R b/R/clonalDiversity.R index 2ff7bd4a..939ea7fe 100644 --- a/R/clonalDiversity.R +++ b/R/clonalDiversity.R @@ -87,6 +87,7 @@ clonalDiversity <- function(input.data, if(return.boots) { exportTable <- TRUE } + sco <- is_seurat_object(input.data) | is_se_object(input.data) input.data <- .data.wrangle(input.data, group.by, .theCall(input.data, cloneCall, check.df = FALSE), @@ -95,11 +96,8 @@ clonalDiversity <- function(input.data, mat <- NULL sample <- c() - if (!is.null(group.by)) { - input.data <- bind_rows(input.data, .id = "element.names") - input.data$group.element <- input.data[,group.by] - #group.element.uniq <- unique(input.data$group.element) - input.data <- split(input.data, f = input.data[,"group.element"]) + if(!is.null(group.by) & !sco) { + input.data <- .groupList(input.data, group.by) } min <- .short.check(input.data, cloneCall) for (i in seq_along(input.data)) { diff --git a/R/clonalQuant.R b/R/clonalQuant.R index fb1e9022..058e1a5f 100644 --- a/R/clonalQuant.R +++ b/R/clonalQuant.R @@ -94,10 +94,11 @@ clonalQuant <- function(input.data, if(!is.null(group.by)) { col <- length(unique(mat[,group.by])) } + mat[,x] = factor(mat[,x], levels = names(input.data)) #Plotting plot <- ggplot(data = mat, - aes(x=mat[,x], y=mat[,y], fill=as.factor(mat[,x]))) + + aes(x=mat[,x], y=mat[,y], fill=mat[,x])) + stat_summary(geom = "errorbar", fun.data = mean_se, position = "dodge", diff --git a/R/clonalRarefaction.R b/R/clonalRarefaction.R index 7a6bc78e..dcae9675 100644 --- a/R/clonalRarefaction.R +++ b/R/clonalRarefaction.R @@ -6,7 +6,8 @@ #' estimates for rarefaction and extrapolation. The function relies on the #' \code{\link[iNEXT]{iNEXT}} R package. Please read and cite the #' \href{https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.12613}{manuscript} -#' if using this function. +#' if using this function. The input into the iNEXT calculation is abundance, +#' incidence-based calculations are not supported. #' #' @examples #' #Making combined contig data @@ -68,7 +69,7 @@ clonalRarefaction <- function(input.data, mat <- iNEXT(mat.list, q=hill.numbers, datatype="abundance",nboot = n.boots) plot <- suppressMessages(ggiNEXT(mat, type=plot.type) + scale_shape_manual(values = rep(16,col)) + - scale_fill_manual(values = rep("white", col)) + + scale_fill_manual(values = c(.colorizer(palette,col))) + scale_color_manual(values = c(.colorizer(palette,col))) + theme_classic()) if (exportTable == TRUE) { diff --git a/R/combineExpression.R b/R/combineExpression.R index 0656f67e..4d268147 100644 --- a/R/combineExpression.R +++ b/R/combineExpression.R @@ -91,9 +91,14 @@ combineExpression <- function(input.data, clonalFrequency = n()) colnames(data2)[1] <- cloneCall data <- merge(data, data2, by = cloneCall, all = TRUE) - data <- data[,c("barcode", "CTgene", "CTnt", - "CTaa", "CTstrict", "clonalProportion", - "clonalFrequency")] + if ( cloneCall %!in% c("CTgene", "CTnt", "CTaa", "CTstrict") ) { + data <- data[,c("barcode", "CTgene", "CTnt", + "CTaa", "CTstrict", cloneCall, + "clonalProportion", "clonalFrequency")] + } else { + data <- data[,c("barcode", "CTgene", "CTnt", + "CTaa", "CTstrict", + "clonalProportion", "clonalFrequency")] } Con.df <- rbind.data.frame(Con.df, data) } } else if (group.by != "none" || !is.null(group.by)) { @@ -108,9 +113,14 @@ combineExpression <- function(input.data, colnames(data2)[c(1,2)] <- c(cloneCall, group.by) data <- merge(data, data2, by = c(cloneCall, group.by), all = TRUE) - Con.df <- data[,c("barcode", "CTgene", "CTnt", - "CTaa", "CTstrict", "clonalProportion", - "clonalFrequency")] + if ( cloneCall %!in% c("CTgene", "CTnt", "CTaa", "CTstrict") ) { + Con.df <- data[,c("barcode", "CTgene", "CTnt", + "CTaa", "CTstrict", cloneCall, + "clonalProportion", "clonalFrequency")] + } else { + Con.df <- data[,c("barcode", "CTgene", "CTnt", + "CTaa", "CTstrict", + "clonalProportion", "clonalFrequency")] } } #Detect if largest cloneSize category is too small for experiment and amend #this prevents a ton of NA values in the data @@ -140,9 +150,16 @@ combineExpression <- function(input.data, } #Formating the meta data to add - PreMeta <- unique(Con.df[,c("barcode", "CTgene", "CTnt", - "CTaa", "CTstrict", "clonalProportion", - "clonalFrequency", "cloneSize")]) + if ( cloneCall %!in% c("CTgene", "CTnt", + "CTaa", "CTstrict") ) { + PreMeta <- unique(Con.df[,c("barcode", "CTgene", "CTnt", + "CTaa", "CTstrict", cloneCall, + "clonalProportion", "clonalFrequency", "cloneSize")]) + } else { + PreMeta <- unique(Con.df[,c("barcode", "CTgene", "CTnt", + "CTaa", "CTstrict", "clonalProportion", + "clonalFrequency", "cloneSize")]) + } dup <- PreMeta$barcode[which(duplicated(PreMeta$barcode))] PreMeta <- PreMeta[PreMeta$barcode %!in% dup,] barcodes <- PreMeta$barcode diff --git a/R/positionalEntropy.R b/R/positionalEntropy.R new file mode 100644 index 00000000..1cd18a58 --- /dev/null +++ b/R/positionalEntropy.R @@ -0,0 +1,112 @@ +#' Examining the diversity of amino acids by position +#' +#' This function the diversity amino acids along the residues +#' of the CDR3 amino acid sequence. Please see +#' \code{\link{clonalDiversity}} for more information on +#' the underlying methods for diversity/entropy calculations. +#' Positions without variance will have a value reported as 0 +#' for the purposes of comparison. +#' +#' @examples +#' #Making combined contig data +#' combined <- combineTCR(contig_list, +#' samples = c("P17B", "P17L", "P18B", "P18L", +#' "P19B","P19L", "P20B", "P20L")) +#' positionalEntropy(combined, +#' chain = "TRB", +#' aa.length = 20) + +#' @param input.data The product of \code{\link{combineTCR}}, +#' \code{\link{combineBCR}}, or \code{\link{combineExpression}}. +#' @param chain "TRA", "TRB", "TRG", "TRG", "IGH", "IGL". +#' @param group.by The variable to use for grouping. +#' @param aa.length The maximum length of the CDR3 amino acid sequence. +#' @param method The method to calculate the entropy/diversity - +#' "shannon", "inv.simpson", "norm.entropy". +#' @param n.boots number of bootstraps to down sample in order to +#' get mean diversity. +#' @param exportTable Returns the data frame used for forming the graph. +#' @param palette Colors to use in visualization - input any \link[grDevices]{hcl.pals}. +#' @import ggplot2 +#' @importFrom stringr str_split +#' @export +#' @concept Summarize_Repertoire +#' @return ggplot of line graph of diversity by position +positionalEntropy <- function(input.data, + chain = "TRB", + group.by = NULL, + aa.length = 20, + method = "shannon", + n.boots = 20, + exportTable = FALSE, + palette = "inferno") { + + if(method %!in% c("shannon", "inv.simpson", "norm.entropy")) { + stop("Please select a compatible method.") + } + sco <- is_seurat_object(input.data) | is_se_object(input.data) + input.data <- .data.wrangle(input.data, + group.by, + .theCall(input.data, "CTaa", check.df = FALSE), + chain) + cloneCall <- .theCall(input.data, "CTaa") + + if(!is.null(group.by) & !sco) { + input.data <- .groupList(input.data, group.by) + } + + #Selecting Diversit Function + diversityFunc <- switch(method, + "norm.entropy" = .shannon, + "inv.simpson" = .invsimpson, + "shannon" = .normentropy, + stop("Invalid method provided")) + + min <- .short.check(input.data, cloneCall) + + lapply(input.data, function(x) { + lapply(seq_len(n.boots), function(y) { + strings <- x[,cloneCall] + strings <- do.call(c,str_split(strings, ";")) + strings <- strings[strings != "NA"] + strings <- na.omit(strings) + strings <- strings[nchar(strings) < aa.length] + strings <- strings[sample(seq_len(length(strings)), min)] + strings <- .padded_strings(strings, aa.length) + strings <- do.call(rbind, strings) + aa.output <- apply(strings, 2, function(z) { + summary <- as.data.frame(table(z, useNA = "always")) + }) + res <- suppressWarnings(Reduce(function(...) merge(..., all = TRUE, by="z"), aa.output)) + colnames(res) <- c("AA", paste0("pos.", seq_len(aa.length))) + res[seq_len(20),][is.na(res[seq_len(20),])] <- 0 + diversity <- sapply(res[,2:ncol(res)], diversityFunc) + diversity[is.nan(diversity)] <- 0 + diversity + }) -> diversity.calculations + diversity.calculations <- do.call(rbind, diversity.calculations) + diversity.means <- colMeans(diversity.calculations) + diversity.means + }) -> positional.diversity + + mat <- do.call(rbind, positional.diversity) + mat_melt <- suppressMessages(melt(mat)) + + plot <- ggplot(mat_melt, aes(x=Var2, y = value, group= Var1, color = Var1)) + + geom_line(stat = "identity") + + geom_point() + + scale_color_manual(name = "Groups", + values = rev(.colorizer(palette,nrow(mat)))) + + xlab("Amino Acid Residues") + + ylab("Relative Diversity") + + theme_classic() + + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + if (exportTable == TRUE) { + return(mat_melt) + } + return(plot) +} + + + + diff --git a/README.md b/README.md index a9c741d1..72efe219 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![BioC status](http://www.bioconductor.org/shields/build/release/bioc/scRepertoire.svg)](https://bioconductor.org/checkResults/release/bioc-LATEST/scRepertoire) [![R-CMD-check](https://github.com/ncborcherding/scRepertoire/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/ncborcherding/scRepertoire/actions/workflows/R-CMD-check.yaml) [![Codecov test coverage](https://codecov.io/gh/ncborcherding/scRepertoire/branch/master/graph/badge.svg)](https://app.codecov.io/gh/ncborcherding/scRepertoire?branch=master) -[![Documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://www.borch.dev/uploads/vignette/vignette) +[![Documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://www.borch.dev/uploads/screpertoire/) ## A toolkit for single-cell immune profiling @@ -28,10 +28,12 @@ scRepertoire has a comprehensive [website](https://www.borch.dev/uploads/screper devtools::install_github("ncborcherding/scRepertoire") ``` -### Most up-to-date version +### Installing from Bioconductor +The current version of scRepertoire is also available in the development version of Bioconductor. Important to note, the version is listed as 1.99.0 on [Bioconductor](https://bioconductor.org/packages/3.19/bioc/html/scRepertoire.html) per their version guidelines. ```R -devtools::install_github("ncborcherding/scRepertoire@dev") +BiocManager::install(version='devel') +BiocManager::install("scRepertoire") ``` ### Legacy Version 1 diff --git a/_pkgdown.yml b/_pkgdown.yml index 18dff81e..89a2a379 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -44,7 +44,7 @@ navbar: href: articles/Attaching_SC.html - text: Visualizations for Single-Cell Objects href: articles/SC_Visualizations.html - - text: Clonal Bias + - text: Quantifying Clonal Bias href: articles/Clonal_Bias.html - text: '-------' - text: Combining Deep Learning and TCRs with Trex @@ -88,6 +88,7 @@ reference: desc: Functions to summarize clonal sequences across the repertoire. - contents: - percentAA + - positionalEntropy - percentGenes - percentKmer - percentVJ diff --git a/index.md b/index.md index a037c07a..92f9cb0a 100644 --- a/index.md +++ b/index.md @@ -19,9 +19,12 @@ scRepertoire is compatible and integrated with the R packages [Trex](https://git devtools::install_github("ncborcherding/scRepertoire") ``` -#### Most up-to-date version +### Installing from Bioconductor +The current version of scRepertoire is also available in the development version of Bioconductor. Important to note, the version is listed as 1.99.0 on [Bioconductor](https://bioconductor.org/packages/3.19/bioc/html/scRepertoire.html) per their version guidelines. + ``` -devtools::install_github("ncborcherding/scRepertoire@dev") +BiocManager::install(version='devel') +BiocManager::install("scRepertoire") ``` #### Legacy Version 1 diff --git a/inst/pkgdown.yml b/inst/pkgdown.yml index 4bb894dd..f9da6cbb 100644 --- a/inst/pkgdown.yml +++ b/inst/pkgdown.yml @@ -17,7 +17,7 @@ articles: Repertoire_Summary: Repertoire_Summary.html SC_Visualizations: SC_Visualizations.html Trex: Trex.html -last_built: 2024-01-10T16:45Z +last_built: 2024-01-22T10:54Z urls: reference: https://www.borch.dev/uploads/scRepertoire/reference article: https://www.borch.dev/uploads/scRepertoire/articles diff --git a/man/clonalCluster.Rd b/man/clonalCluster.Rd index 83289df1..2c5d167d 100644 --- a/man/clonalCluster.Rd +++ b/man/clonalCluster.Rd @@ -30,7 +30,8 @@ e.g. "both", "TRA", "TRG", "IGH", "IGL".} The higher the number the more similarity of sequence will be used for clustering.} -\item{group.by}{The column header used for to group contigs.} +\item{group.by}{The column header used for to group contigs. +If (\strong{NULL}), clusters will be calculated across samples.} \item{exportGraph}{Return an igraph object of connected sequences (\strong{TRUE}) or the amended input with a diff --git a/man/clonalRarefaction.Rd b/man/clonalRarefaction.Rd index c1cb76dd..0b1b49ee 100644 --- a/man/clonalRarefaction.Rd +++ b/man/clonalRarefaction.Rd @@ -53,7 +53,8 @@ diversity (\strong{q = 2}, the inverse of Simpson concentration) to compute dive estimates for rarefaction and extrapolation. The function relies on the \code{\link[iNEXT]{iNEXT}} R package. Please read and cite the \href{https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.12613}{manuscript} -if using this function. +if using this function. The input into the iNEXT calculation is abundance, +incidence-based calculations are not supported. } \examples{ #Making combined contig data diff --git a/man/positionalEntropy.Rd b/man/positionalEntropy.Rd new file mode 100644 index 00000000..601ac8b3 --- /dev/null +++ b/man/positionalEntropy.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/positionalEntropy.R +\name{positionalEntropy} +\alias{positionalEntropy} +\title{Examining the diversity of amino acids by position} +\usage{ +positionalEntropy( + input.data, + chain = "TRB", + group.by = NULL, + aa.length = 20, + method = "shannon", + n.boots = 20, + exportTable = FALSE, + palette = "inferno" +) +} +\arguments{ +\item{input.data}{The product of \code{\link{combineTCR}}, +\code{\link{combineBCR}}, or \code{\link{combineExpression}}.} + +\item{chain}{"TRA", "TRB", "TRG", "TRG", "IGH", "IGL".} + +\item{group.by}{The variable to use for grouping.} + +\item{aa.length}{The maximum length of the CDR3 amino acid sequence.} + +\item{method}{The method to calculate the entropy/diversity - +"shannon", "inv.simpson", "norm.entropy".} + +\item{n.boots}{number of bootstraps to down sample in order to +get mean diversity.} + +\item{exportTable}{Returns the data frame used for forming the graph.} + +\item{palette}{Colors to use in visualization - input any \link[grDevices]{hcl.pals}.} +} +\value{ +ggplot of line graph of diversity by position +} +\description{ +This function the diversity amino acids along the residues +of the CDR3 amino acid sequence. Please see +\code{\link{clonalDiversity}} for more information on +the underlying methods for diversity/entropy calculations. +Positions without variance will have a value reported as 0 +for the purposes of comparison. +} +\examples{ +#Making combined contig data +combined <- combineTCR(contig_list, + samples = c("P17B", "P17L", "P18B", "P18L", + "P19B","P19L", "P20B", "P20L")) +positionalEntropy(combined, + chain = "TRB", + aa.length = 20) +} +\concept{Summarize_Repertoire} diff --git a/man/scRepertoire-package.Rd b/man/scRepertoire-package.Rd index 1b8caeec..dd9ccefc 100644 --- a/man/scRepertoire-package.Rd +++ b/man/scRepertoire-package.Rd @@ -11,7 +11,8 @@ scRepertoire is a toolkit for processing and analyzing single-cell T-cell recept \seealso{ Useful links: \itemize{ - \item \url{https://ncborcherding.github.io/scRepertoire/} + \item \url{https://www.borch.dev/uploads/screpertoire/} + \item Report bugs at \url{https://github.com/ncborcherding/scRepertoire/issues} } } diff --git a/tests/testthat/_snaps/StartractDiversity/startracdiversity-plot.new.svg b/tests/testthat/_snaps/StartractDiversity/startracdiversity-plot.new.svg new file mode 100644 index 00000000..af181bdd --- /dev/null +++ b/tests/testthat/_snaps/StartractDiversity/startracdiversity-plot.new.svg @@ -0,0 +1,344 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +migr + + + + + + + + + + +tran + + + + + + + + + + +expa + + + + + + + + + + + + + + + + + + + + +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 + +0.0 +0.1 +0.2 +0.3 + + + + + +0.0 +0.1 +0.2 +0.3 + + + + + +0.0 +0.1 +0.2 +0.3 + + + + +Index Score +StartracDiversity_plot + + diff --git a/tests/testthat/_snaps/clonalOverlay/clonaloverlay-plot.new.svg b/tests/testthat/_snaps/clonalOverlay/clonaloverlay-plot.new.svg new file mode 100644 index 00000000..003cf42d --- /dev/null +++ b/tests/testthat/_snaps/clonalOverlay/clonaloverlay-plot.new.svg @@ -0,0 +1,681 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +P19 + + + + + + + + + + +P20 + + + + + + + + + + +P17 + + + + + + + + + + +P18 + + + + + + + +-4 +0 +4 +8 + + + + + +-4 +0 +4 +8 + +-5 +0 +5 + + + + +-5 +0 +5 + + + +Dimension 1 +Dimension 2 +clonalOverlay_plot + + diff --git a/tests/testthat/_snaps/clonalRarefaction/clonalclonalrarefaction-h2-p3-plot.new.svg b/tests/testthat/_snaps/clonalRarefaction/clonalclonalrarefaction-h2-p3-plot.new.svg new file mode 100644 index 00000000..33091eda --- /dev/null +++ b/tests/testthat/_snaps/clonalRarefaction/clonalclonalrarefaction-h2-p3-plot.new.svg @@ -0,0 +1,60 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +500 +1000 +1500 + + + + + + + + + +0.0 +0.2 +0.4 +0.6 +Sample coverage +Species diversity +clonalclonalRarefaction_h2_p3_plot + + diff --git a/tests/testthat/_snaps/clonalSizeDistribution/clonalsizedistribution-plot.new.svg b/tests/testthat/_snaps/clonalSizeDistribution/clonalsizedistribution-plot.new.svg new file mode 100644 index 00000000..cb2025f6 --- /dev/null +++ b/tests/testthat/_snaps/clonalSizeDistribution/clonalsizedistribution-plot.new.svg @@ -0,0 +1,94 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +P19B +P20B +P20L +P17L +P19L +P17B +P18B +P18L + + + + + + + + + + + + + + + + + + + +-0.1 +0.0 +0.1 +0.2 +0.3 +0.4 +0.5 +clonalSizeDistribution_plot + + diff --git a/tests/testthat/_snaps/positionalEntropy/positionalentropy-tra-plot.svg b/tests/testthat/_snaps/positionalEntropy/positionalentropy-tra-plot.svg new file mode 100644 index 00000000..604773bc --- /dev/null +++ b/tests/testthat/_snaps/positionalEntropy/positionalentropy-tra-plot.svg @@ -0,0 +1,280 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 + + + + + + + + + + + + + + + + + + + + + + + + + +pos.1 +pos.2 +pos.3 +pos.4 +pos.5 +pos.6 +pos.7 +pos.8 +pos.9 +pos.10 +pos.11 +pos.12 +pos.13 +pos.14 +pos.15 +pos.16 +pos.17 +pos.18 +pos.19 +pos.20 +Amino Acid Residues +Relative Diversity + +Groups + + + + + + + + + + + + + + + + +P17B +P17L +P18B +P18L +P19B +P19L +P20B +P20L +positionalEntropy_TRA_plot + + diff --git a/tests/testthat/_snaps/positionalEntropy/positionalentropy-trb-plot.svg b/tests/testthat/_snaps/positionalEntropy/positionalentropy-trb-plot.svg new file mode 100644 index 00000000..5fdf6428 --- /dev/null +++ b/tests/testthat/_snaps/positionalEntropy/positionalentropy-trb-plot.svg @@ -0,0 +1,280 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 + + + + + + + + + + + + + + + + + + + + + + + + + +pos.1 +pos.2 +pos.3 +pos.4 +pos.5 +pos.6 +pos.7 +pos.8 +pos.9 +pos.10 +pos.11 +pos.12 +pos.13 +pos.14 +pos.15 +pos.16 +pos.17 +pos.18 +pos.19 +pos.20 +Amino Acid Residues +Relative Diversity + +Groups + + + + + + + + + + + + + + + + +P17B +P17L +P18B +P18L +P19B +P19L +P20B +P20L +positionalEntropy_TRB_plot + + diff --git a/tests/testthat/test-positionalEntropy.R b/tests/testthat/test-positionalEntropy.R new file mode 100644 index 00000000..35977721 --- /dev/null +++ b/tests/testthat/test-positionalEntropy.R @@ -0,0 +1,19 @@ +# test script for positionalEntropy.R - testcases are NOT comprehensive! + +test_that("positionalEntropy works", { + + set.seed(42) + expect_doppelganger( + "positionalEntropy_TRB_plot", + positionalEntropy(getCombined(), + chain = "TRB", + aa.length = 20) + ) + + expect_doppelganger( + "positionalEntropy_TRA_plot", + positionalEntropy(getCombined(), + chain = "TRA", + aa.length = 20) + ) +}) \ No newline at end of file diff --git a/tests/testthat/testdata/clustering/clonalCluster_2sample_data.rds b/tests/testthat/testdata/clustering/clonalCluster_2sample_data.rds index 277aaaad..f5d04a7f 100644 Binary files a/tests/testthat/testdata/clustering/clonalCluster_2sample_data.rds and b/tests/testthat/testdata/clustering/clonalCluster_2sample_data.rds differ diff --git a/tests/testthat/testdata/clustering/clonalCluster_TRBaa_data.rds b/tests/testthat/testdata/clustering/clonalCluster_TRBaa_data.rds index 421372e8..f8dc1631 100644 Binary files a/tests/testthat/testdata/clustering/clonalCluster_TRBaa_data.rds and b/tests/testthat/testdata/clustering/clonalCluster_TRBaa_data.rds differ diff --git a/tests/testthat/testdata/clustering/clonalCluster_TRBaa_metadata.rds b/tests/testthat/testdata/clustering/clonalCluster_TRBaa_metadata.rds index 5b777c90..528720e9 100644 Binary files a/tests/testthat/testdata/clustering/clonalCluster_TRBaa_metadata.rds and b/tests/testthat/testdata/clustering/clonalCluster_TRBaa_metadata.rds differ diff --git a/vignettes/articles/Clonal_Cluster.Rmd b/vignettes/articles/Clonal_Cluster.Rmd index 356cc3ed..319577a8 100644 --- a/vignettes/articles/Clonal_Cluster.Rmd +++ b/vignettes/articles/Clonal_Cluster.Rmd @@ -78,7 +78,7 @@ head(sub_combined[,c(1,2,13)]) ### Clustering with a single-cell object -If performed over the number of samples, such as the list elements, **group.by** can used to calculate only the clusters on the setting of patient sample (**group.by** = "Patient") or tissue type (**group.by** = "Type"). This will add the selected group to the beginning of the cluster designation. We can also call ```clonalCluster()``` directly on a Single-Cell Object. +If performed over the number of samples, such as the list elements, **group.by** can used to calculate only the clusters on the setting of patient sample (**group.by** = "Patient") or tissue type (**group.by** = "Type"). This will add the selected group to the beginning of the cluster designation. We can also call ```clonalCluster()``` directly on a Single-Cell Object. If **group.by** = NULL (**default**), the clusters will be calculated across all samples. ```{r tidy = FALSE} #Adding patient information diff --git a/vignettes/articles/Clonal_Diversity.Rmd b/vignettes/articles/Clonal_Diversity.Rmd index a5a03549..1f53a1b4 100644 --- a/vignettes/articles/Clonal_Diversity.Rmd +++ b/vignettes/articles/Clonal_Diversity.Rmd @@ -105,13 +105,13 @@ clonalDiversity(combined.TCR, ## clonaRarefaction -We can also use Hill numbers to estimate the rarefaction, or estimating species richness, using the the abundance of clones across groupings. Underlying the rarefaction calculation is the use of observed receptor of abundance to compute diversity. +We can also use Hill numbers to estimate the rarefaction, or estimating species richness, using the the abundance of clones across groupings. Underlying the rarefaction calculation is the use of observed receptor of **abundance** to compute diversity. **hill.numbers** + 0 - species-richness -+ 1 - Shannon -+ 2 - Simpson ++ 1 - Shannon Diversity ++ 2 - Simpson Diversity **plot.type** @@ -119,9 +119,9 @@ We can also use Hill numbers to estimate the rarefaction, or estimating species + 2 - sample completeness curve + 3 - coverage-based rarefaction/extrapolation curve -This relies on the [iNEXT](https://cran.r-project.org/web/packages/iNEXT/index.html) with the accompanying [manuscript](http://chao.stat.nthu.edu.tw/wordpress/paper/120_pdf_appendix.pdf). Like the other wrapping functions in scRepertoire, please cite the original work. +This function relies on the [iNEXT](https://cran.r-project.org/web/packages/iNEXT/index.html) with the accompanying [manuscript](http://chao.stat.nthu.edu.tw/wordpress/paper/120_pdf_appendix.pdf). Like the other wrapping functions in scRepertoire, please cite the original work. The sample completeness curve (**plot.type** = 2), may not show full sample coverage due to the size/diversity of the input data. -### Rarefaction using Shannon Diversity (q = 0) +### Rarefaction using Species Richness (q = 0) ```{r, message=FALSE, tidy = FALSE} clonalRarefaction(combined.TCR, diff --git a/vignettes/articles/Repertoire_Summary.Rmd b/vignettes/articles/Repertoire_Summary.Rmd index 2698745e..f8c51fdd 100644 --- a/vignettes/articles/Repertoire_Summary.Rmd +++ b/vignettes/articles/Repertoire_Summary.Rmd @@ -107,7 +107,6 @@ vizGenes(combined.TCR[c(1,2)], scale = FALSE) ``` - ## percentAA Quantify the proportion of amino acids along the cdr3 sequence with `percentAA()`. By default, the function will pad the sequences with NAs up to the maximum of **aa.length**. Sequences longer than **aa.length** will be removed before visualization **(default aa.length = 20)**. @@ -118,6 +117,23 @@ percentAA(combined.TCR, aa.length = 20) ``` +## positionalEntropy + +We can also quantify the level of entropy/diversity across amino acid residues along the cdr3 sequence. `positionalEntropy()` combines the quantification by residue of ``percentAA()`` with the diversity calls in ```clonalDiversity()```. Importantly, because diversity can be affected by size of sample, similar to ```clonalDiversity()```, ```positionalEntropy()``` will also downsample/bootstrap the calculation. Positions without variance will have a value reported as 0 for the purposes of comparison. + +**method** + +* "shannon" - Shannon Diversity +* "inv.simpson" - Inverse Simpson Diversity +* "norm.entropy" - Normalized Entropy + +```{r} +positionalEntropy(combined.TCR, + chain = "TRB", + aa.length = 20) +``` + + ## percentGenes Quantify the proportion of V or J gene usage with `percentGenes()`. Like `percentAA()`, we select the chain of interest and then indicate the gene of interest with the **gene** parameter. Two major limitations of `percentGenes()` are, 1) the function quantifies only V or J genes, and 2) the quantification of the genes are limited to all the V or J genes seen across the samples, not all possible V or J genes. diff --git a/vignettes/vignette.Rmd b/vignettes/vignette.Rmd index 74d29835..92541694 100644 --- a/vignettes/vignette.Rmd +++ b/vignettes/vignette.Rmd @@ -542,7 +542,6 @@ vizGenes(combined.TCR[c(1,2)], scale = FALSE) ``` - ## percentAA Quantify the proportion of amino acids along the cdr3 sequence with `percentAA()`. By default, the function will pad the sequences with NAs up to the maximum of **aa.length**. Sequences longer than **aa.length** will be removed before visualization **(default aa.length = 20)**. @@ -553,6 +552,22 @@ percentAA(combined.TCR, aa.length = 20) ``` +## positionalEntropy + +We can also quantify the level of entropy/diversity across amino acid residues along the cdr3 sequence. `positionalEntropy()` combines the quantification by residue of ``percentAA()`` with the diversity calls in ```clonalDiversity()```. Importantly, because diversity can be affected by size of sample, similar to ```clonalDiversity()```, ```positionalEntropy()``` will also downsample/bootstrap the calculation. Positions without variance will have a value reported as 0 for the purposes of comparison. + +**method** + +* "shannon" - Shannon Diversity +* "inv.simpson" - Inverse Simpson Diversity +* "norm.entropy" - Normalized Entropy + +```{r} +positionalEntropy(combined.TCR, + chain = "TRB", + aa.length = 20) +``` + ## percentGenes Quantify the proportion of V or J gene usage with `percentGenes()`. Like `percentAA()`, we select the chain of interest and then indicate the gene of interest with the **gene** parameter. Two major limitations of `percentGenes()` are, 1) the function quantifies only V or J genes, and 2) the quantification of the genes are limited to all the V or J genes seen across the samples, not all possible V or J genes. @@ -696,9 +711,9 @@ We can also use Hill numbers to estimate the rarefaction, or estimating species + 2 - sample completeness curve + 3 - coverage-based rarefaction/extrapolation curve -This relies on the [iNEXT](https://cran.r-project.org/web/packages/iNEXT/index.html) with the accompanying [manuscript](http://chao.stat.nthu.edu.tw/wordpress/paper/120_pdf_appendix.pdf). Like the other wrapping functions in scRepertoire, please cite the original work. +This function relies on the [iNEXT](https://cran.r-project.org/web/packages/iNEXT/index.html) with the accompanying [manuscript](http://chao.stat.nthu.edu.tw/wordpress/paper/120_pdf_appendix.pdf). Like the other wrapping functions in scRepertoire, please cite the original work. The sample completeness curve (**plot.type** = 2), may not show full sample coverage due to the size/diversity of the input data. -### Rarefaction using Shannon Diversity (q = 0) +### Rarefaction using Species Richness (q = 0) ```{r, message=FALSE, tidy = FALSE} clonalRarefaction(combined.TCR,