Skip to content

Commit 7202e9c

Browse files
David DyjackDavid Dyjack
David Dyjack
authored and
David Dyjack
committed
added hpb and bsf functions
1 parent b45f461 commit 7202e9c

File tree

8 files changed

+150
-1
lines changed

8 files changed

+150
-1
lines changed

NAMESPACE

+2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# Generated by roxygen2: do not edit by hand
22

3+
export(hpb)
34
export(hpe)
5+
importFrom(stats,dist)
46
importFrom(stats,median)
57
importFrom(stats,quantile)

R/.bsf.R.swp

12 KB
Binary file not shown.

R/.hpb.R.swp

12 KB
Binary file not shown.

R/bsf.R

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#' Bootstrap function (bsf) for H+ Estimate
2+
#'
3+
#' Estimates H+ for a random subset of the supplied data
4+
#' to be replicated within the main hpb() function
5+
#' Samples the observations (rows) of d and
6+
#' the entries of l to generate a single bootstrap estimate.
7+
#'
8+
#' @param d (data, nxm)
9+
#' @param l (identities, nx1)
10+
#' @param r (numeric, 1x1)
11+
#' @param q (list, pre-computed by hpb)
12+
#' @return One iteration of H+ bootstrap estimate
13+
#' @importFrom stats dist
14+
#' @keywords internal
15+
16+
bsf <- function(d,l,s,q){
17+
#is this a general solution to sampling from classes in a balanced way?
18+
#q <- table(l)/length(l)
19+
#p <- as.vector(sapply(names(q), function(x) sample(which(l==x),round(s/length(q)))))
20+
p <- as.vector(sapply(q, function(x) sample(x,round(s/length(q)))))
21+
lab <- l[p]
22+
dis <- as.matrix(dist(d[p,]))
23+
dis <- dis[upper.tri(dis)]
24+
ind <- sapply(lab, function(x) x==lab)
25+
ind <- ind[upper.tri(ind)]
26+
iw <- which(ind)
27+
ib <- which(!ind)
28+
dw <- dis[iw]
29+
db <- dis[ib]
30+
sp <- sum(sapply(dw, function(x) sum(x>db)))
31+
hp <- sp / (as.numeric(length(dw))*as.numeric(length(db)))
32+
return(hp)
33+
34+
}

R/hpb.R

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#' @title H+ discordance estimation with bootstrapping
2+
#'
3+
#' @description Estimates the H+ discordance metric
4+
#' given data Dat and a label vector L.
5+
#' Bootstrapping is performed over r iterations with s points sampled per iterate.
6+
#'
7+
#' @param D numeric matrix or data frame with observations in rows (nxm)
8+
#' @param L numeric vector containing a set of length n
9+
#' @param r numeric number of bootstrap iterations
10+
#' @param s numeric pre-bootstrap sample size
11+
#'
12+
#' @return list, h is the estimated H+ value.
13+
#' @return gamma1 and gamma2 are plausible ranges for what % of A (or Dw)
14+
#' @return are strictly greater than B (or Db)
15+
#' @export
16+
#'
17+
#' @importFrom stats median quantile
18+
#'
19+
#' @examples
20+
#' a <- sapply(1:500, function(i) rnorm(n=50, mean=0))
21+
#' b <- sapply(1:500, function(i) rnorm(n=50, mean=0))
22+
#' x <- t(cbind(a,b))
23+
#' l <- c(rep(0,500), rep(1,500))
24+
#' h <- hpb(D=x, L=l, r=30,s=50)
25+
#'
26+
hpb <- function(D, L, r=30, s) {
27+
dlflg <- missing(D) | missing(L)
28+
if(dlflg){
29+
stop("please provide both D and L")
30+
}
31+
tyflg <- !any(class(D) %in% c('matrix','data.frame')) & is.numeric(D)
32+
if(tyflg){
33+
stop("please ensure D is a numeric matrix or data.frame")
34+
}
35+
dmflg <- ! nrow(D) == length(L)
36+
if(dmflg){
37+
stop("Dimension mismatch for D and L")
38+
}
39+
srflg <- ! class(r) %in% c('numeric','integer') & class(s) %in% c('numeric','integer')
40+
if(srflg){
41+
stop("please ensure r and s are numeric or integer")
42+
}
43+
snflg <- s >= nrow(D)
44+
if(snflg){
45+
stop("s is too large, try ~1-5% of observations")
46+
}
47+
lbtab <- table(L)/length(L) #pre-compute this table / list
48+
lblis <- lapply(names(lbtab), function(x) which(L==x))
49+
hpb <- mean(replicate(r,bsf(d=D,l=L,s=s,q=lblis),T))
50+
return(hpb)
51+
}

R/hpe.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ hpe <- function(A, B, D, L, p = 101, alg = "brute_force",alphas=F) {
3939
if (abflg & dlflg) {
4040
stop("please provide either (A and B) or (D and L)")
4141
} else if (!abflg) {
42-
nmflg <- (!is.numeric(A) & !is.numeric(A))
42+
nmflg <- (!is.numeric(A) & !is.numeric(B))
4343
if (nmflg) {
4444
stop("please ensure A B are numeric")
4545
} else {

man/bsf.Rd

+25
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/hpb.Rd

+37
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)