Merge pull request #314 from ncborcherding/master

updating dev to start work
BorchLab · Feb 13, 2024 · e55da4c · e55da4c
2 parents da5a22e + ff08088
commit e55da4c
Show file tree

Hide file tree

Showing 32 changed files with 2,029 additions and 43 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -60,4 +60,6 @@ Config/testthat/edition: 3
 Language: en-US
 LinkingTo: 
     Rcpp
-URL: https://ncborcherding.github.io/scRepertoire/
+URL: https://www.borch.dev/uploads/screpertoire/
+BugReports: https://github.com/ncborcherding/scRepertoire/issues
+
diff --git a/NAMESPACE b/NAMESPACE
@@ -31,6 +31,7 @@ export(percentAA)
 export(percentGenes)
 export(percentKmer)
 export(percentVJ)
+export(positionalEntropy)
 export(subsetClones)
 export(vizGenes)
 import(dplyr)

diff --git a/R/clonalAbundance.R b/R/clonalAbundance.R
@@ -86,6 +86,7 @@ clonalAbundance <- function(input.data,
       Con.df<- rbind.data.frame(Con.df, data1) 
     }
     Con.df <- data.frame(Con.df)
+    Con.df$values <- factor(Con.df$values, levels=names(input.data))
     col <- length(unique(Con.df$values))
     fill <- "Samples"
     if (scale == TRUE) { 

diff --git a/R/clonalCluster.R b/R/clonalCluster.R
@@ -28,6 +28,7 @@
 #' The higher the number the more similarity of sequence will be 
 #' used for clustering.
 #' @param group.by The column header used for to group contigs.
+#' If (\strong{NULL}), clusters will be calculated across samples.
 #' @param exportGraph Return an igraph object of connected 
 #' sequences (\strong{TRUE}) or the amended input with a
 #' new cluster-based variable (\strong{FALSE}).
@@ -98,14 +99,13 @@ clonalCluster <- function(input.data,
                           group_by(bound[,ref2]) %>%
                           dplyr::summarize(sample_count = n(),
                                     unique_samples = paste0(unique(group.by), collapse = ","))
-    dictionary <- list(bound)
   } else {
     bound <- bind_rows(dat)
     graph.variables <- bind_rows(dat) %>%
                           group_by(bound[,ref2]) %>%
                           dplyr::summarize(sample_count = n())
-    dictionary <- dat
   }
+  dictionary <- dat
   #Generating Connected Component
   output.list <- lapply(dictionary, function(x) {
     cluster <- .lvCompare(x, 

diff --git a/R/clonalCompare.R b/R/clonalCompare.R
@@ -75,9 +75,6 @@ clonalCompare <- function(input.data,
 
   #Loop through the list to get a proportional summary
   for (i in seq_along(input.data)) {
-    if (chain != "both") {
-      input.data[[i]] <- .off.the.chain(input.data[[i]], chain, cloneCall)
-    }
     tbl <- as.data.frame(table(input.data[[i]][,cloneCall]))
     tbl[,2] <- tbl[,2]/sum(tbl[,2])
     colnames(tbl) <- c("clones", "Proportion")

diff --git a/R/clonalDiversity.R b/R/clonalDiversity.R
@@ -87,6 +87,7 @@ clonalDiversity <- function(input.data,
   if(return.boots) {
     exportTable <- TRUE
   }
+  sco <- is_seurat_object(input.data) | is_se_object(input.data)
   input.data <- .data.wrangle(input.data, 
                               group.by, 
                               .theCall(input.data, cloneCall, check.df = FALSE), 
@@ -95,11 +96,8 @@ clonalDiversity <- function(input.data,
 
   mat <- NULL
   sample <- c()
-  if (!is.null(group.by)) {
-    input.data <- bind_rows(input.data, .id = "element.names")
-    input.data$group.element <- input.data[,group.by]
-    #group.element.uniq <- unique(input.data$group.element)
-    input.data <- split(input.data, f = input.data[,"group.element"])
+  if(!is.null(group.by) & !sco) {
+    input.data <- .groupList(input.data, group.by)
   }
   min <- .short.check(input.data, cloneCall)
   for (i in seq_along(input.data)) {

diff --git a/R/clonalQuant.R b/R/clonalQuant.R
@@ -94,10 +94,11 @@ clonalQuant <- function(input.data,
   if(!is.null(group.by)) {
     col <- length(unique(mat[,group.by]))
   }
+  mat[,x] = factor(mat[,x], levels = names(input.data))
 
   #Plotting
   plot <- ggplot(data = mat, 
-                 aes(x=mat[,x], y=mat[,y], fill=as.factor(mat[,x]))) +
+                 aes(x=mat[,x], y=mat[,y], fill=mat[,x])) +
             stat_summary(geom = "errorbar", 
                          fun.data = mean_se, 
                          position = "dodge", 

diff --git a/R/clonalRarefaction.R b/R/clonalRarefaction.R
@@ -6,7 +6,8 @@
 #' estimates for rarefaction and extrapolation. The function relies on the
 #' \code{\link[iNEXT]{iNEXT}} R package. Please read and cite the 
 #' \href{https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.12613}{manuscript} 
-#' if using this function.
+#' if using this function. The input into the iNEXT calculation is abundance, 
+#' incidence-based calculations are not supported.
 #' 
 #' @examples
 #' #Making combined contig data
@@ -68,7 +69,7 @@ clonalRarefaction <- function(input.data,
   mat <- iNEXT(mat.list, q=hill.numbers, datatype="abundance",nboot = n.boots) 
   plot <- suppressMessages(ggiNEXT(mat, type=plot.type) + 
             scale_shape_manual(values = rep(16,col)) + 
-            scale_fill_manual(values = rep("white", col)) + 
+            scale_fill_manual(values = c(.colorizer(palette,col))) + 
             scale_color_manual(values = c(.colorizer(palette,col)))  + 
             theme_classic())
   if (exportTable == TRUE) { 

diff --git a/R/combineExpression.R b/R/combineExpression.R
@@ -91,9 +91,14 @@ combineExpression <- function(input.data,
                                   clonalFrequency = n())
             colnames(data2)[1] <- cloneCall
             data <- merge(data, data2, by = cloneCall, all = TRUE)
-            data <- data[,c("barcode", "CTgene", "CTnt", 
-                             "CTaa", "CTstrict", "clonalProportion", 
-                             "clonalFrequency")]
+            if ( cloneCall %!in% c("CTgene", "CTnt", "CTaa", "CTstrict") ) {
+              data <- data[,c("barcode", "CTgene", "CTnt",
+                              "CTaa", "CTstrict", cloneCall,
+                              "clonalProportion", "clonalFrequency")]
+            } else {
+              data <- data[,c("barcode", "CTgene", "CTnt", 
+                              "CTaa", "CTstrict",
+                              "clonalProportion", "clonalFrequency")] }
             Con.df <- rbind.data.frame(Con.df, data)
         }
     } else if (group.by != "none" || !is.null(group.by)) {
@@ -108,9 +113,14 @@ combineExpression <- function(input.data,
 
         colnames(data2)[c(1,2)] <- c(cloneCall, group.by)
         data <- merge(data, data2, by = c(cloneCall, group.by), all = TRUE)
-        Con.df <- data[,c("barcode", "CTgene", "CTnt", 
-                          "CTaa", "CTstrict", "clonalProportion", 
-                          "clonalFrequency")]
+        if ( cloneCall %!in% c("CTgene", "CTnt", "CTaa", "CTstrict") ) {
+              Con.df <- data[,c("barcode", "CTgene", "CTnt",
+                              "CTaa", "CTstrict", cloneCall,
+                              "clonalProportion", "clonalFrequency")]
+            } else {
+              Con.df <- data[,c("barcode", "CTgene", "CTnt", 
+                              "CTaa", "CTstrict",
+                              "clonalProportion", "clonalFrequency")] }
         }
     #Detect if largest cloneSize category is too small for experiment and amend
     #this prevents a ton of NA values in the data
@@ -140,9 +150,16 @@ combineExpression <- function(input.data,
       }
 
     #Formating the meta data to add
-    PreMeta <- unique(Con.df[,c("barcode", "CTgene", "CTnt", 
-                "CTaa", "CTstrict", "clonalProportion", 
-                "clonalFrequency", "cloneSize")])
+    if ( cloneCall %!in% c("CTgene", "CTnt", 
+                         "CTaa", "CTstrict") ) {
+      PreMeta <- unique(Con.df[,c("barcode", "CTgene", "CTnt", 
+                                  "CTaa", "CTstrict", cloneCall, 
+                                  "clonalProportion", "clonalFrequency", "cloneSize")])
+    } else {
+      PreMeta <- unique(Con.df[,c("barcode", "CTgene", "CTnt", 
+                                "CTaa", "CTstrict", "clonalProportion", 
+                                "clonalFrequency", "cloneSize")])
+    }
     dup <- PreMeta$barcode[which(duplicated(PreMeta$barcode))]
     PreMeta <- PreMeta[PreMeta$barcode %!in% dup,]
     barcodes <- PreMeta$barcode

diff --git a/R/positionalEntropy.R b/R/positionalEntropy.R
@@ -0,0 +1,112 @@
+#' Examining the diversity of amino acids by position
+#'
+#' This function the diversity amino acids along the residues 
+#' of the CDR3 amino acid sequence. Please see 
+#' \code{\link{clonalDiversity}} for more information on 
+#' the underlying methods for diversity/entropy calculations. 
+#' Positions without variance will have a value reported as 0 
+#' for the purposes of comparison.
+#'
+#' @examples
+#' #Making combined contig data
+#' combined <- combineTCR(contig_list, 
+#'                         samples = c("P17B", "P17L", "P18B", "P18L", 
+#'                                     "P19B","P19L", "P20B", "P20L"))
+#' positionalEntropy(combined, 
+#'                   chain = "TRB", 
+#'                   aa.length = 20)
+
+#' @param input.data The product of \code{\link{combineTCR}}, 
+#' \code{\link{combineBCR}}, or \code{\link{combineExpression}}.
+#' @param chain "TRA", "TRB", "TRG", "TRG", "IGH", "IGL".
+#' @param group.by The variable to use for grouping.
+#' @param aa.length The maximum length of the CDR3 amino acid sequence. 
+#' @param method The method to calculate the entropy/diversity - 
+#' "shannon", "inv.simpson", "norm.entropy".
+#' @param n.boots number of bootstraps to down sample in order to 
+#' get mean diversity.
+#' @param exportTable Returns the data frame used for forming the graph.
+#' @param palette Colors to use in visualization - input any \link[grDevices]{hcl.pals}.
+#' @import ggplot2
+#' @importFrom stringr str_split
+#' @export
+#' @concept Summarize_Repertoire
+#' @return ggplot of line graph of diversity by position
+positionalEntropy <- function(input.data, 
+                              chain = "TRB", 
+                              group.by = NULL, 
+                              aa.length = 20,
+                              method = "shannon",
+                              n.boots = 20,
+                              exportTable = FALSE, 
+                              palette = "inferno")  {
+
+  if(method %!in% c("shannon", "inv.simpson", "norm.entropy")) {
+    stop("Please select a compatible method.")
+  }
+  sco <- is_seurat_object(input.data) | is_se_object(input.data)
+  input.data <- .data.wrangle(input.data, 
+                              group.by, 
+                              .theCall(input.data, "CTaa", check.df = FALSE), 
+                              chain)
+  cloneCall <- .theCall(input.data, "CTaa")
+
+  if(!is.null(group.by) & !sco) {
+    input.data <- .groupList(input.data, group.by)
+  }
+
+  #Selecting Diversit Function
+  diversityFunc <- switch(method,
+                          "norm.entropy" = .shannon,
+                          "inv.simpson" = .invsimpson,
+                          "shannon" = .normentropy,
+                          stop("Invalid method provided"))
+
+  min <- .short.check(input.data, cloneCall)
+
+  lapply(input.data, function(x) {
+      lapply(seq_len(n.boots), function(y) {
+       strings <- x[,cloneCall]
+       strings <- do.call(c,str_split(strings, ";"))
+       strings <- strings[strings != "NA"]
+       strings <- na.omit(strings)
+       strings <- strings[nchar(strings) < aa.length]
+       strings <- strings[sample(seq_len(length(strings)), min)]
+       strings <- .padded_strings(strings, aa.length)
+       strings <- do.call(rbind, strings)
+       aa.output <- apply(strings, 2, function(z) {
+         summary <- as.data.frame(table(z, useNA = "always"))
+       })
+       res <- suppressWarnings(Reduce(function(...) merge(..., all = TRUE, by="z"), aa.output))
+       colnames(res) <- c("AA", paste0("pos.", seq_len(aa.length)))
+       res[seq_len(20),][is.na(res[seq_len(20),])] <- 0
+       diversity <- sapply(res[,2:ncol(res)], diversityFunc)
+       diversity[is.nan(diversity)] <- 0
+       diversity
+    }) -> diversity.calculations
+    diversity.calculations <- do.call(rbind, diversity.calculations)
+    diversity.means <- colMeans(diversity.calculations)
+    diversity.means
+    }) -> positional.diversity
+
+    mat <- do.call(rbind, positional.diversity)
+    mat_melt <- suppressMessages(melt(mat))
+
+    plot <- ggplot(mat_melt, aes(x=Var2, y = value, group= Var1, color = Var1)) +
+      geom_line(stat = "identity") +
+      geom_point() + 
+      scale_color_manual(name = "Groups", 
+                        values = rev(.colorizer(palette,nrow(mat)))) +
+      xlab("Amino Acid Residues") +
+      ylab("Relative Diversity") +
+      theme_classic() + 
+      theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
+    if (exportTable == TRUE) { 
+      return(mat_melt) 
+    }
+    return(plot)
+}
+
+
+
+
diff --git a/README.md b/README.md
@@ -3,7 +3,7 @@
 [![BioC status](http://www.bioconductor.org/shields/build/release/bioc/scRepertoire.svg)](https://bioconductor.org/checkResults/release/bioc-LATEST/scRepertoire)
 [![R-CMD-check](https://github.com/ncborcherding/scRepertoire/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/ncborcherding/scRepertoire/actions/workflows/R-CMD-check.yaml)
 [![Codecov test coverage](https://codecov.io/gh/ncborcherding/scRepertoire/branch/master/graph/badge.svg)](https://app.codecov.io/gh/ncborcherding/scRepertoire?branch=master)
-[![Documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://www.borch.dev/uploads/vignette/vignette)
+[![Documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://www.borch.dev/uploads/screpertoire/)
 <!-- badges: end -->
 
 ## A toolkit for single-cell immune profiling
@@ -28,10 +28,12 @@ scRepertoire has a comprehensive [website](https://www.borch.dev/uploads/screper
 devtools::install_github("ncborcherding/scRepertoire")
 ```
 
-### Most up-to-date version
+### Installing from Bioconductor
+The current version of scRepertoire is also available in the development version of Bioconductor. Important to note, the version is listed as 1.99.0 on [Bioconductor](https://bioconductor.org/packages/3.19/bioc/html/scRepertoire.html) per their version guidelines.
 
 ```R
-devtools::install_github("ncborcherding/scRepertoire@dev")
+BiocManager::install(version='devel')
+BiocManager::install("scRepertoire")
 ```
 
 ### Legacy Version 1

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -44,7 +44,7 @@ navbar:
       href: articles/Attaching_SC.html
     - text: Visualizations for Single-Cell Objects
       href: articles/SC_Visualizations.html
-    - text: Clonal Bias
+    - text: Quantifying Clonal Bias
       href: articles/Clonal_Bias.html
     - text: '-------'
     - text: Combining Deep Learning and TCRs with Trex
@@ -88,6 +88,7 @@ reference:
   desc: Functions to summarize clonal sequences across the repertoire.
 - contents:
   - percentAA
+  - positionalEntropy
   - percentGenes
   - percentKmer
   - percentVJ

diff --git a/index.md b/index.md
@@ -19,9 +19,12 @@ scRepertoire is compatible and integrated with the R packages [Trex](https://git
 devtools::install_github("ncborcherding/scRepertoire")
 ```
 
-#### Most up-to-date version
+### Installing from Bioconductor
+The current version of scRepertoire is also available in the development version of Bioconductor. Important to note, the version is listed as 1.99.0 on [Bioconductor](https://bioconductor.org/packages/3.19/bioc/html/scRepertoire.html) per their version guidelines.
+
 ```
-devtools::install_github("ncborcherding/scRepertoire@dev")
+BiocManager::install(version='devel')
+BiocManager::install("scRepertoire")
 ```
 
 #### Legacy Version 1

diff --git a/inst/pkgdown.yml b/inst/pkgdown.yml
@@ -17,7 +17,7 @@ articles:
   Repertoire_Summary: Repertoire_Summary.html
   SC_Visualizations: SC_Visualizations.html
   Trex: Trex.html
-last_built: 2024-01-10T16:45Z
+last_built: 2024-01-22T10:54Z
 urls:
   reference: https://www.borch.dev/uploads/scRepertoire/reference
   article: https://www.borch.dev/uploads/scRepertoire/articles

diff --git a/man/clonalCluster.Rd b/man/clonalCluster.Rd
diff --git a/man/clonalRarefaction.Rd b/man/clonalRarefaction.Rd