-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrank_turbulence_divergence.js
108 lines (89 loc) · 5.06 KB
/
rank_turbulence_divergence.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import { sum } from "d3-array";
import { which } from "./utils_helpers.js";
// This function calculates the divergence between two arrays of inverse ranks
// based on the specified alpha value. If alpha is equal to infinity, it returns
// an array with the maximum of each element in inv_r1 and inv_r2. If alpha is
// equal to 0, it returns an array with the log of the ratio of the maximum and
// minimum of 1/inv_r1 and 1/inv_r2 for each element. Otherwise, it returns an
// array with the absolute value of (alpha + 1)/alpha * (inv_r1^alpha - inv_r2^alpha)^(1/(alpha + 1))
// for each element.
function divElems(inv_r1, inv_r2, alpha) {
if (alpha === Infinity) {
return inv_r1.map((d,i) => inv_r1[i] == inv_r2[i] ? 0 : Math.max(inv_r1[i], inv_r2[i]))
} else if (alpha == 0) {
const x_max = inv_r1.map((d,i) => Math.max(1 / inv_r1[i], 1 / inv_r2[i]))
const x_min = inv_r1.map((d,i) => Math.min(1 / inv_r1[i], 1 / inv_r2[i]))
return inv_r1.map((d,i) => Math.log10(x_max[i] / x_min[i]))
} else {
return inv_r1.map((d,i) => (alpha+1) / alpha * Math.abs(inv_r1[i]**alpha - inv_r2[i]**alpha)**(1. / (alpha+1)))
}
}
// This function calculates the normalization factor for the divergence between
// two arrays of inverse ranks. It first extracts the counts from the mixedelements
// parameter and finds the indices where the counts are greater than 0. It then
// calculates the disjoint set for each array based on the number of non-zero
// counts. If alpha is equal to infinity, it returns the sum of the elements in
// inv_r1 and inv_r2 for the indices with non-zero counts. If alpha is equal to
// 0, it returns the sum of the absolute value of the log of the ratio of each
// element in inv_r1 and the disjoint set for inv_r2, and the absolute value of
// the log of the ratio of each element in inv_r2 and the disjoint set for inv_r1.
// Otherwise, it returns the sum of (alpha + 1)/alpha * (inv_r1^alpha - disjoint set^alpha)^(1/(alpha + 1))
// for each element in inv_r1, and the same for inv_r2.
function norm_divElems(mixedelements, inv_r1, inv_r2, alpha) {
const c1 = mixedelements[0]['counts']
const c2 = mixedelements[1]['counts']
const indices1 = which(c1.map(d => d > 0))
const indices2 = which(c2.map(d => d > 0))
const N1 = indices1.length
const N2 = indices2.length
// This function calculates the disjoint set for a given array of inverse ranks
// based on the number of non-zero counts in the array and the number of non-zero
// counts in the other array. It returns 1/(number of non-zero counts in other array +
// number of non-zero counts in this array/2)
function calc_disjoint(N1, N2) {
return( 1 / (N2 + N1/2) )
}
const inv_r1_disjoint = calc_disjoint(N1, N2)
const inv_r2_disjoint = calc_disjoint(N2, N1)
if (alpha === Infinity) {
return sum(indices1.map((i) => inv_r1[i])) + sum(indices2.map((i) => inv_r2[i]))
} else if (alpha === 0) {
const term1 = sum(
indices1.map((i) => Math.abs(Math.log(inv_r1[i] / inv_r2_disjoint)))
)
const term2 = sum(
indices2.map((i) => Math.abs(Math.log(inv_r2[i] / inv_r1_disjoint)))
)
return term1 + term2
} else {
const term1 = (alpha+1)/alpha * sum(
indices1.map((i) => inv_r1[i]).map( d => (Math.abs(d**alpha) - inv_r2_disjoint**alpha)**(1./(alpha+1) ))
)
const term2 = (alpha+1)/alpha * sum(
indices2.map((i) => inv_r2[i]).map(d => Math.abs(inv_r1_disjoint**alpha - d**alpha)**(1. / (alpha+1)))
)
return term1 + term2
}
}
// This function calculates the rank turbulence divergence for two arrays of mixed
// elements, using the specified alpha value. It first calculates the arrays of
// inverse ranks for each mixed element array and then calculates the divergence
// and normalization factors using the divElems and norm_divElems functions. It
// returns the divergence divided by the normalization.
export default function rank_turbulence_divergence(mixedelements, alpha) {
const inv_r1 = mixedelements[0]['ranks'].map(d => Math.pow(d, -1))
const inv_r2 = mixedelements[1]['ranks'].map(d => Math.pow(d, -1))
const divergence_elements = divElems(inv_r1, inv_r2, alpha)
const normalization = norm_divElems(mixedelements, inv_r1, inv_r2, alpha)
return { // the divergence used to wordshift dat to sort name in wordshift plot
// is equal to the formula in the upperleft of the diamond plot. However
// the formula is a proportionality and miss the normalization constant
// shown here.
// Example: for alpha = infinity, for the rank 1 names on both systems, the formula as written is equal to max(1/1, 1/former_rank) = 1/1 =1
// this value of 1 is then divided by the normalization constant.
// this constant of proportionality is the reason for the difference between the 1/1 that the written formula gives you
// and the decimal value that wordshift_dat states and which is actuallly used to sort the types.
'divergence_elements': divergence_elements.map(d => d / normalization),
'normalization': normalization
}
}