Skip to content

Commit 2396cc2

Browse files
updating readme; adding doc to hpe
1 parent 94ef26e commit 2396cc2

14 files changed

+535
-237
lines changed

.Rbuildignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
^LICENSE\.md$

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
inst/doc

DESCRIPTION

+16-6
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,26 @@
11
Package: fasthplus
2-
Title: What the Package Does (One Line, Title Case)
2+
Title: Fast diconcordance estimation using percentiles.
33
Version: 0.0.0.9000
4-
Authors@R:
4+
Authors@R: c(
55
person(given = "Nathan",
66
family = "Dyjack",
77
role = c("aut", "cre"),
88
email = "ntdyjack@gmail.com",
9-
comment = c(ORCID = "0000-0001-6944-6252"))
10-
Description: Fast diconcordance estimation using percentiles.
11-
License: `use_mit_license()`, `use_gpl3_license()` or friends to pick a
12-
license
9+
comment = c(ORCID = "0000-0001-6944-6252")),
10+
person(given = c("Stephanie", "C."),
11+
family = "Hicks",
12+
role = c("aut", "ctb"),
13+
email = "shicks19@jhu.edu",
14+
comment = c(ORCID = "0000-0002-7858-0231")))
15+
Description: This package contains a generalized disconcordance measure called H+ that is extended from G+ proposed by Rohlf in 1975.
16+
License: CC BY 4.0
1317
Encoding: UTF-8
1418
LazyData: true
1519
Roxygen: list(markdown = TRUE)
1620
RoxygenNote: 7.1.1
21+
BugReports: https://github.com/ntdyjack/fasthplus/
22+
URL: https://github.com/ntdyjack/fasthplus/
23+
Suggests:
24+
knitr,
25+
rmarkdown
26+
VignetteBuilder: knitr

DESCRIPTION~

-16
This file was deleted.

LICENSE.md

+395
Large diffs are not rendered by default.

R/alg1.R~

-14
This file was deleted.

R/alg2.R~

-25
This file was deleted.

R/hpe.R

+41-35
Original file line numberDiff line numberDiff line change
@@ -1,81 +1,87 @@
1-
#' H+ Estimate
1+
#' @title H+ discordance metric
22
#'
3-
#' Estimates H+ for two sets (vectors) A and B, or
4-
#' a dissimilarity matrix D and a label vector L.
3+
#' @description Estimates the H+ discordance metric
4+
#' for either (1) two sets (vectors) A and B, or
5+
#' (2) a dissimilarity matrix D and a label vector L.
56
#' Approximation is calculated using p+1 percentiles,
67
#' with an accuracy bound of 1/p.
78
#'
8-
#' @param A (numeric vector)
9-
#' @param B (numeric vector)
10-
#' @param D (dist, nxn)
11-
#' @param L (numeric or character vector, nx1)
12-
#' @param p (integer)
13-
#' @param alg (integer), Algorithm 1 or 2
14-
#' @return Estimated H+ (numeric)
9+
#' @param A numeric vector containing a set of length n
10+
#' @param B numeric vector containing a set of length n
11+
#' @param D distance matrix of dimension nxn
12+
#' @param L numeric or character vector of length n
13+
#' @param p integer representing the number of percentiles
14+
#' @param alg integer (1 or 2) representing the choice of algorithm used to estimate H+ (Algorithm 1 or 2)
15+
#'
16+
#' @return A numeric returning the estimated value of H+.
1517
#' @export
18+
#'
1619
#' @examples
1720
#' a <- rnorm(n=500,mean=0)
18-
#' b <- rnorm(n=500,mean=1
19-
#' h <- hpe(a=a,b=b,p=101,alg=1)
21+
#' b <- rnorm(n=500,mean=1)
22+
#' h <- hpe(A=a,B=b,p=101,alg=1)
2023
#'
2124
#' a <- sapply(1:500, function(i) rnorm(n=50,mean=0))
2225
#' b <- sapply(1:500, function(i) rnorm(n=50,mean=0))
2326
#' x <- cbind(a,b)
2427
#' d <- dist(t(x))
2528
#' l <- c(rep(0,500),rep(1,500))
2629
#' h <- hpe(D=d,L=l,p=101,alg=1)
27-
hpe <- function(A,B,D,L,p=101,alg=1){
28-
abflg<- missing(A) & missing(B)
30+
#'
31+
hpe <- function(A, B, D, L, p = 101, alg = 1) {
32+
abflg <- missing(A) & missing(B)
2933
dlflg <- missing(D) & missing(L)
30-
if( abflg & dlflg){
34+
if (abflg & dlflg) {
3135
stop("please provide either A B or D L")
32-
} else if (!abflg){
36+
} else if (!abflg) {
3337
nmflg <- (!is.numeric(A) & !is.numeric(A))
34-
if(nmflg){
38+
if (nmflg) {
3539
stop("please ensure A B are numeric")
3640
} else {
3741
print("Estimating H+ using A B formulation...")
3842
}
39-
} else if(!dlflg){
40-
# tyflg <- !(class(D)=='dist' | (is.matrix(D) & isSymmetric(D) & is.numeric(D)) )
41-
tyflg <- !( class(D)=='dist' )
42-
if(tyflg){
43+
} else if (!dlflg) {
44+
# tyflg <- !(class(D)=='dist' | (is.matrix(D) & isSymmetric(D) & is.numeric(D)) )
45+
tyflg <- !(class(D) == 'dist')
46+
if (tyflg) {
4347
stop("please ensure D is a dist objcect")
4448
}
4549
tyflg <- !(is.numeric(L) | is.character(L))
46-
if(tyflg){
50+
if (tyflg) {
4751
stop("please ensure L is numeric or character vector")
4852
}
4953
D <- as.matrix(D)
50-
dmflg <- !( ncol(D) == nrow(D) & ncol(D) == length(L))
51-
if(dmflg){
54+
dmflg <- !(ncol(D) == nrow(D) & ncol(D) == length(L))
55+
if (dmflg) {
5256
stop("Dimension mismatch for D and L")
5357
} else {
5458
print("Estimating H+ using D L formulation...")
5559
}
56-
ind <- sapply(L, function(x) sapply(L, function(y) x==y))
60+
ind <- sapply(L, function(x)
61+
sapply(L, function(y)
62+
x == y))
5763
ind <- ind[upper.tri(ind)]
5864
iw <- which(ind)
5965
ib <- which(!ind)
6066
D <- D[upper.tri(D)]
6167
A <- D[iw]
6268
B <- D[ib]
6369
}
64-
65-
66-
ps <- seq(0,1,length.out=p)
70+
71+
72+
ps <- seq(0, 1, length.out = p)
6773
qA <- quantile(A, probs = ps)
6874
qB <- quantile(B, probs = ps)
69-
75+
7076
#call hp alg1 or 2
71-
if(alg==1){
72-
he <- alg1(qA,qB,p)
73-
}else if(alg==2){
74-
he <- alg2(qA,qB,p)
75-
}else{
77+
if (alg == 1) {
78+
he <- alg1(qA, qB, p)
79+
} else if (alg == 2) {
80+
he <- alg2(qA, qB, p)
81+
} else{
7682
stop("please specify a vaid algorithm (alg=1 or 2)")
7783
}
78-
84+
7985
#return estimate
8086
return(he)
8187
}

R/hpe.R~

-81
This file was deleted.

README.md

+46-14
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,66 @@
11
# fasthplus
22

3-
fasthplus provides fast approximations for the disconcordance of two sets.
4-
We present h+, a slight improvement modification of g+, a generalized disconcordance measure proposed by [Rohlf,1975] (https://www.annualreviews.org/doi/abs/10.1146/annurev.es.05.110174.000533).
5-
We present discorcodance in the sense of label fitness (clustering) for a generalized dissimilarity matrix or more generally for two arbitrary sets.
3+
fasthplus provides fast approximations for the discordance of two sets.
4+
We present H+, a slight improvement modification of G+, a generalized discordance measure proposed by [Rohlf, 1975] (https://www.annualreviews.org/doi/abs/10.1146/annurev.es.05.110174.000533).
5+
We present discordance in the sense of label fitness (clustering) for a generalized dissimilarity matrix or more generally for two arbitrary sets.
66

77

8-
# installation
8+
### Installing fasthplus
99

10-
At present, our package is available only via github installtion using the devtools package.
10+
At present, our package is available only via github installation using the `devtools` package.
1111

12-
```R {cmd=false}
12+
```
1313
library(devtools)
1414
install_github(repo="ntdyjack/fasthplus", ref = "main")
15+
```
16+
17+
After installation, the package can be loaded into R
18+
19+
```
1520
library(fasthplus)
1621
```
1722

18-
# functions
23+
### Using fasthplus
24+
25+
The main function in the **fasthplus** package is `hpe()`.
26+
The `hpe()` function accepts either (1) two sets (`A` and `B`) or
27+
(2) a distance matrix (`D`) and set of labels (`L`).
28+
29+
To provide a fast way to calculate H+, we provide an algorithm that approximates H+ using the number of percentiles (`p`), which is also an argument in the `hpe()` function.
30+
31+
To run the `hpe()` function with two sets (`A` and `B`) and the number of `p` + 1 percentiles:
32+
33+
```
34+
a <- rnorm(n=500,mean=0)
35+
b <- rnorm(n=500,mean=1)
36+
h <- hpe(A=a,B=b,p=101,alg=1)
37+
```
1938

20-
##estimate h+ for a dissimilarity matrix and set of labels
21-
```R {cmd=false}
39+
To run the `hpe()` function with a dissimilarity matrix (`D`) and set of labels (`L`):
40+
41+
```
42+
# Two sets
2243
a <- sapply(1:100, function(i) rnorm(n=50,mean=0.0,sd=1))
2344
b <- sapply(1:100, function(i) rnorm(n=50,mean=0.0,sd=1))
2445
c <- cbind(a,b)
46+
47+
# Create a set of labels
2548
l <- c(rep(0,100),rep(1,100))
49+
50+
# Calculate a distance matrix
2651
d <- dist(t(c))
27-
h <- hp_estm(dis=d,lab=l,p=0.005)
52+
h <- hpe(D=d,L=l,p=0.005)
2853
```
2954

30-
##estimate h+ for two sets
31-
```R {cmd=false}
32-
h <- hp_estm(a=a,b=b)
33-
```
3455

56+
### Issues and bug reports
57+
58+
Please use https://github.com/ntdyjack/fasthplus/issues to submit issues, bug reports, and comments.
59+
60+
### Contributors
61+
62+
* [Nathan Dyjack](https://github.com/ntdyjack)
63+
* [Stephanie Hicks](https://github.com/stephaniehicks)
64+
65+
66+

0 commit comments

Comments
 (0)