Skip to content

Commit

Permalink
Merge pull request #314 from ncborcherding/master
Browse files Browse the repository at this point in the history
updating dev to start work
  • Loading branch information
ncborcherding authored Feb 13, 2024
2 parents da5a22e + ff08088 commit e55da4c
Show file tree
Hide file tree
Showing 32 changed files with 2,029 additions and 43 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,6 @@ Config/testthat/edition: 3
Language: en-US
LinkingTo:
Rcpp
URL: https://ncborcherding.github.io/scRepertoire/
URL: https://www.borch.dev/uploads/screpertoire/
BugReports: https://github.com/ncborcherding/scRepertoire/issues

1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ export(percentAA)
export(percentGenes)
export(percentKmer)
export(percentVJ)
export(positionalEntropy)
export(subsetClones)
export(vizGenes)
import(dplyr)
Expand Down
1 change: 1 addition & 0 deletions R/clonalAbundance.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ clonalAbundance <- function(input.data,
Con.df<- rbind.data.frame(Con.df, data1)
}
Con.df <- data.frame(Con.df)
Con.df$values <- factor(Con.df$values, levels=names(input.data))
col <- length(unique(Con.df$values))
fill <- "Samples"
if (scale == TRUE) {
Expand Down
4 changes: 2 additions & 2 deletions R/clonalCluster.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#' The higher the number the more similarity of sequence will be
#' used for clustering.
#' @param group.by The column header used for to group contigs.
#' If (\strong{NULL}), clusters will be calculated across samples.
#' @param exportGraph Return an igraph object of connected
#' sequences (\strong{TRUE}) or the amended input with a
#' new cluster-based variable (\strong{FALSE}).
Expand Down Expand Up @@ -98,14 +99,13 @@ clonalCluster <- function(input.data,
group_by(bound[,ref2]) %>%
dplyr::summarize(sample_count = n(),
unique_samples = paste0(unique(group.by), collapse = ","))
dictionary <- list(bound)
} else {
bound <- bind_rows(dat)
graph.variables <- bind_rows(dat) %>%
group_by(bound[,ref2]) %>%
dplyr::summarize(sample_count = n())
dictionary <- dat
}
dictionary <- dat
#Generating Connected Component
output.list <- lapply(dictionary, function(x) {
cluster <- .lvCompare(x,
Expand Down
3 changes: 0 additions & 3 deletions R/clonalCompare.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,6 @@ clonalCompare <- function(input.data,

#Loop through the list to get a proportional summary
for (i in seq_along(input.data)) {
if (chain != "both") {
input.data[[i]] <- .off.the.chain(input.data[[i]], chain, cloneCall)
}
tbl <- as.data.frame(table(input.data[[i]][,cloneCall]))
tbl[,2] <- tbl[,2]/sum(tbl[,2])
colnames(tbl) <- c("clones", "Proportion")
Expand Down
8 changes: 3 additions & 5 deletions R/clonalDiversity.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ clonalDiversity <- function(input.data,
if(return.boots) {
exportTable <- TRUE
}
sco <- is_seurat_object(input.data) | is_se_object(input.data)
input.data <- .data.wrangle(input.data,
group.by,
.theCall(input.data, cloneCall, check.df = FALSE),
Expand All @@ -95,11 +96,8 @@ clonalDiversity <- function(input.data,

mat <- NULL
sample <- c()
if (!is.null(group.by)) {
input.data <- bind_rows(input.data, .id = "element.names")
input.data$group.element <- input.data[,group.by]
#group.element.uniq <- unique(input.data$group.element)
input.data <- split(input.data, f = input.data[,"group.element"])
if(!is.null(group.by) & !sco) {
input.data <- .groupList(input.data, group.by)
}
min <- .short.check(input.data, cloneCall)
for (i in seq_along(input.data)) {
Expand Down
3 changes: 2 additions & 1 deletion R/clonalQuant.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,11 @@ clonalQuant <- function(input.data,
if(!is.null(group.by)) {
col <- length(unique(mat[,group.by]))
}
mat[,x] = factor(mat[,x], levels = names(input.data))

#Plotting
plot <- ggplot(data = mat,
aes(x=mat[,x], y=mat[,y], fill=as.factor(mat[,x]))) +
aes(x=mat[,x], y=mat[,y], fill=mat[,x])) +
stat_summary(geom = "errorbar",
fun.data = mean_se,
position = "dodge",
Expand Down
5 changes: 3 additions & 2 deletions R/clonalRarefaction.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
#' estimates for rarefaction and extrapolation. The function relies on the
#' \code{\link[iNEXT]{iNEXT}} R package. Please read and cite the
#' \href{https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.12613}{manuscript}
#' if using this function.
#' if using this function. The input into the iNEXT calculation is abundance,
#' incidence-based calculations are not supported.
#'
#' @examples
#' #Making combined contig data
Expand Down Expand Up @@ -68,7 +69,7 @@ clonalRarefaction <- function(input.data,
mat <- iNEXT(mat.list, q=hill.numbers, datatype="abundance",nboot = n.boots)
plot <- suppressMessages(ggiNEXT(mat, type=plot.type) +
scale_shape_manual(values = rep(16,col)) +
scale_fill_manual(values = rep("white", col)) +
scale_fill_manual(values = c(.colorizer(palette,col))) +
scale_color_manual(values = c(.colorizer(palette,col))) +
theme_classic())
if (exportTable == TRUE) {
Expand Down
35 changes: 26 additions & 9 deletions R/combineExpression.R
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,14 @@ combineExpression <- function(input.data,
clonalFrequency = n())
colnames(data2)[1] <- cloneCall
data <- merge(data, data2, by = cloneCall, all = TRUE)
data <- data[,c("barcode", "CTgene", "CTnt",
"CTaa", "CTstrict", "clonalProportion",
"clonalFrequency")]
if ( cloneCall %!in% c("CTgene", "CTnt", "CTaa", "CTstrict") ) {
data <- data[,c("barcode", "CTgene", "CTnt",
"CTaa", "CTstrict", cloneCall,
"clonalProportion", "clonalFrequency")]
} else {
data <- data[,c("barcode", "CTgene", "CTnt",
"CTaa", "CTstrict",
"clonalProportion", "clonalFrequency")] }
Con.df <- rbind.data.frame(Con.df, data)
}
} else if (group.by != "none" || !is.null(group.by)) {
Expand All @@ -108,9 +113,14 @@ combineExpression <- function(input.data,

colnames(data2)[c(1,2)] <- c(cloneCall, group.by)
data <- merge(data, data2, by = c(cloneCall, group.by), all = TRUE)
Con.df <- data[,c("barcode", "CTgene", "CTnt",
"CTaa", "CTstrict", "clonalProportion",
"clonalFrequency")]
if ( cloneCall %!in% c("CTgene", "CTnt", "CTaa", "CTstrict") ) {
Con.df <- data[,c("barcode", "CTgene", "CTnt",
"CTaa", "CTstrict", cloneCall,
"clonalProportion", "clonalFrequency")]
} else {
Con.df <- data[,c("barcode", "CTgene", "CTnt",
"CTaa", "CTstrict",
"clonalProportion", "clonalFrequency")] }
}
#Detect if largest cloneSize category is too small for experiment and amend
#this prevents a ton of NA values in the data
Expand Down Expand Up @@ -140,9 +150,16 @@ combineExpression <- function(input.data,
}

#Formating the meta data to add
PreMeta <- unique(Con.df[,c("barcode", "CTgene", "CTnt",
"CTaa", "CTstrict", "clonalProportion",
"clonalFrequency", "cloneSize")])
if ( cloneCall %!in% c("CTgene", "CTnt",
"CTaa", "CTstrict") ) {
PreMeta <- unique(Con.df[,c("barcode", "CTgene", "CTnt",
"CTaa", "CTstrict", cloneCall,
"clonalProportion", "clonalFrequency", "cloneSize")])
} else {
PreMeta <- unique(Con.df[,c("barcode", "CTgene", "CTnt",
"CTaa", "CTstrict", "clonalProportion",
"clonalFrequency", "cloneSize")])
}
dup <- PreMeta$barcode[which(duplicated(PreMeta$barcode))]
PreMeta <- PreMeta[PreMeta$barcode %!in% dup,]
barcodes <- PreMeta$barcode
Expand Down
112 changes: 112 additions & 0 deletions R/positionalEntropy.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#' Examining the diversity of amino acids by position
#'
#' This function the diversity amino acids along the residues
#' of the CDR3 amino acid sequence. Please see
#' \code{\link{clonalDiversity}} for more information on
#' the underlying methods for diversity/entropy calculations.
#' Positions without variance will have a value reported as 0
#' for the purposes of comparison.
#'
#' @examples
#' #Making combined contig data
#' combined <- combineTCR(contig_list,
#' samples = c("P17B", "P17L", "P18B", "P18L",
#' "P19B","P19L", "P20B", "P20L"))
#' positionalEntropy(combined,
#' chain = "TRB",
#' aa.length = 20)

#' @param input.data The product of \code{\link{combineTCR}},
#' \code{\link{combineBCR}}, or \code{\link{combineExpression}}.
#' @param chain "TRA", "TRB", "TRG", "TRG", "IGH", "IGL".
#' @param group.by The variable to use for grouping.
#' @param aa.length The maximum length of the CDR3 amino acid sequence.
#' @param method The method to calculate the entropy/diversity -
#' "shannon", "inv.simpson", "norm.entropy".
#' @param n.boots number of bootstraps to down sample in order to
#' get mean diversity.
#' @param exportTable Returns the data frame used for forming the graph.
#' @param palette Colors to use in visualization - input any \link[grDevices]{hcl.pals}.
#' @import ggplot2
#' @importFrom stringr str_split
#' @export
#' @concept Summarize_Repertoire
#' @return ggplot of line graph of diversity by position
positionalEntropy <- function(input.data,
chain = "TRB",
group.by = NULL,
aa.length = 20,
method = "shannon",
n.boots = 20,
exportTable = FALSE,
palette = "inferno") {

if(method %!in% c("shannon", "inv.simpson", "norm.entropy")) {
stop("Please select a compatible method.")
}
sco <- is_seurat_object(input.data) | is_se_object(input.data)
input.data <- .data.wrangle(input.data,
group.by,
.theCall(input.data, "CTaa", check.df = FALSE),
chain)
cloneCall <- .theCall(input.data, "CTaa")

if(!is.null(group.by) & !sco) {
input.data <- .groupList(input.data, group.by)
}

#Selecting Diversit Function
diversityFunc <- switch(method,
"norm.entropy" = .shannon,
"inv.simpson" = .invsimpson,
"shannon" = .normentropy,
stop("Invalid method provided"))

min <- .short.check(input.data, cloneCall)

lapply(input.data, function(x) {
lapply(seq_len(n.boots), function(y) {
strings <- x[,cloneCall]
strings <- do.call(c,str_split(strings, ";"))
strings <- strings[strings != "NA"]
strings <- na.omit(strings)
strings <- strings[nchar(strings) < aa.length]
strings <- strings[sample(seq_len(length(strings)), min)]
strings <- .padded_strings(strings, aa.length)
strings <- do.call(rbind, strings)
aa.output <- apply(strings, 2, function(z) {
summary <- as.data.frame(table(z, useNA = "always"))
})
res <- suppressWarnings(Reduce(function(...) merge(..., all = TRUE, by="z"), aa.output))
colnames(res) <- c("AA", paste0("pos.", seq_len(aa.length)))
res[seq_len(20),][is.na(res[seq_len(20),])] <- 0
diversity <- sapply(res[,2:ncol(res)], diversityFunc)
diversity[is.nan(diversity)] <- 0
diversity
}) -> diversity.calculations
diversity.calculations <- do.call(rbind, diversity.calculations)
diversity.means <- colMeans(diversity.calculations)
diversity.means
}) -> positional.diversity

mat <- do.call(rbind, positional.diversity)
mat_melt <- suppressMessages(melt(mat))

plot <- ggplot(mat_melt, aes(x=Var2, y = value, group= Var1, color = Var1)) +
geom_line(stat = "identity") +
geom_point() +
scale_color_manual(name = "Groups",
values = rev(.colorizer(palette,nrow(mat)))) +
xlab("Amino Acid Residues") +
ylab("Relative Diversity") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
if (exportTable == TRUE) {
return(mat_melt)
}
return(plot)
}




8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
[![BioC status](http://www.bioconductor.org/shields/build/release/bioc/scRepertoire.svg)](https://bioconductor.org/checkResults/release/bioc-LATEST/scRepertoire)
[![R-CMD-check](https://github.com/ncborcherding/scRepertoire/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/ncborcherding/scRepertoire/actions/workflows/R-CMD-check.yaml)
[![Codecov test coverage](https://codecov.io/gh/ncborcherding/scRepertoire/branch/master/graph/badge.svg)](https://app.codecov.io/gh/ncborcherding/scRepertoire?branch=master)
[![Documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://www.borch.dev/uploads/vignette/vignette)
[![Documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://www.borch.dev/uploads/screpertoire/)
<!-- badges: end -->

## A toolkit for single-cell immune profiling
Expand All @@ -28,10 +28,12 @@ scRepertoire has a comprehensive [website](https://www.borch.dev/uploads/screper
devtools::install_github("ncborcherding/scRepertoire")
```

### Most up-to-date version
### Installing from Bioconductor
The current version of scRepertoire is also available in the development version of Bioconductor. Important to note, the version is listed as 1.99.0 on [Bioconductor](https://bioconductor.org/packages/3.19/bioc/html/scRepertoire.html) per their version guidelines.

```R
devtools::install_github("ncborcherding/scRepertoire@dev")
BiocManager::install(version='devel')
BiocManager::install("scRepertoire")
```

### Legacy Version 1
Expand Down
3 changes: 2 additions & 1 deletion _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ navbar:
href: articles/Attaching_SC.html
- text: Visualizations for Single-Cell Objects
href: articles/SC_Visualizations.html
- text: Clonal Bias
- text: Quantifying Clonal Bias
href: articles/Clonal_Bias.html
- text: '-------'
- text: Combining Deep Learning and TCRs with Trex
Expand Down Expand Up @@ -88,6 +88,7 @@ reference:
desc: Functions to summarize clonal sequences across the repertoire.
- contents:
- percentAA
- positionalEntropy
- percentGenes
- percentKmer
- percentVJ
Expand Down
7 changes: 5 additions & 2 deletions index.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@ scRepertoire is compatible and integrated with the R packages [Trex](https://git
devtools::install_github("ncborcherding/scRepertoire")
```

#### Most up-to-date version
### Installing from Bioconductor
The current version of scRepertoire is also available in the development version of Bioconductor. Important to note, the version is listed as 1.99.0 on [Bioconductor](https://bioconductor.org/packages/3.19/bioc/html/scRepertoire.html) per their version guidelines.

```
devtools::install_github("ncborcherding/scRepertoire@dev")
BiocManager::install(version='devel')
BiocManager::install("scRepertoire")
```

#### Legacy Version 1
Expand Down
2 changes: 1 addition & 1 deletion inst/pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ articles:
Repertoire_Summary: Repertoire_Summary.html
SC_Visualizations: SC_Visualizations.html
Trex: Trex.html
last_built: 2024-01-10T16:45Z
last_built: 2024-01-22T10:54Z
urls:
reference: https://www.borch.dev/uploads/scRepertoire/reference
article: https://www.borch.dev/uploads/scRepertoire/articles
Expand Down
3 changes: 2 additions & 1 deletion man/clonalCluster.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/clonalRarefaction.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit e55da4c

Please sign in to comment.