-
-
Notifications
You must be signed in to change notification settings - Fork 784
/
Copy pathsearch.js
188 lines (156 loc) · 5.11 KB
/
search.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
import computeScore from './computeScore'
import convertMaskToIndices from './convertMaskToIndices'
import Config from '../../core/config'
import { MAX_BITS } from './constants'
import * as ErrorMsg from '../../core/errorMessages'
export default function search(
text,
pattern,
patternAlphabet,
{
location = Config.location,
distance = Config.distance,
threshold = Config.threshold,
findAllMatches = Config.findAllMatches,
minMatchCharLength = Config.minMatchCharLength,
includeMatches = Config.includeMatches,
ignoreLocation = Config.ignoreLocation
} = {}
) {
if (pattern.length > MAX_BITS) {
throw new Error(ErrorMsg.PATTERN_LENGTH_TOO_LARGE(MAX_BITS))
}
const patternLen = pattern.length
// Set starting location at beginning text and initialize the alphabet.
const textLen = text.length
// Handle the case when location > text.length
const expectedLocation = Math.max(0, Math.min(location, textLen))
// Highest score beyond which we give up.
let currentThreshold = threshold
// Is there a nearby exact match? (speedup)
let bestLocation = expectedLocation
// Performance: only computer matches when the minMatchCharLength > 1
// OR if `includeMatches` is true.
const computeMatches = minMatchCharLength > 1 || includeMatches
// A mask of the matches, used for building the indices
const matchMask = computeMatches ? Array(textLen) : []
let index
// Get all exact matches, here for speed up
while ((index = text.indexOf(pattern, bestLocation)) > -1) {
let score = computeScore(pattern, {
currentLocation: index,
expectedLocation,
distance,
ignoreLocation
})
currentThreshold = Math.min(score, currentThreshold)
bestLocation = index + patternLen
if (computeMatches) {
let i = 0
while (i < patternLen) {
matchMask[index + i] = 1
i += 1
}
}
}
// Reset the best location
bestLocation = -1
let lastBitArr = []
let finalScore = 1
let binMax = patternLen + textLen
const mask = 1 << (patternLen - 1)
for (let i = 0; i < patternLen; i += 1) {
// Scan for the best match; each iteration allows for one more error.
// Run a binary search to determine how far from the match location we can stray
// at this error level.
let binMin = 0
let binMid = binMax
while (binMin < binMid) {
const score = computeScore(pattern, {
errors: i,
currentLocation: expectedLocation + binMid,
expectedLocation,
distance,
ignoreLocation
})
if (score <= currentThreshold) {
binMin = binMid
} else {
binMax = binMid
}
binMid = Math.floor((binMax - binMin) / 2 + binMin)
}
// Use the result from this iteration as the maximum for the next.
binMax = binMid
let start = Math.max(1, expectedLocation - binMid + 1)
let finish = findAllMatches
? textLen
: Math.min(expectedLocation + binMid, textLen) + patternLen
// Initialize the bit array
let bitArr = Array(finish + 2)
bitArr[finish + 1] = (1 << i) - 1
for (let j = finish; j >= start; j -= 1) {
let currentLocation = j - 1
let charMatch = patternAlphabet[text.charAt(currentLocation)]
if (computeMatches) {
// Speed up: quick bool to int conversion (i.e, `charMatch ? 1 : 0`)
matchMask[currentLocation] = +!!charMatch
}
// First pass: exact match
bitArr[j] = ((bitArr[j + 1] << 1) | 1) & charMatch
// Subsequent passes: fuzzy match
if (i) {
bitArr[j] |=
((lastBitArr[j + 1] | lastBitArr[j]) << 1) | 1 | lastBitArr[j + 1]
}
if (bitArr[j] & mask) {
finalScore = computeScore(pattern, {
errors: i,
currentLocation,
expectedLocation,
distance,
ignoreLocation
})
// This match will almost certainly be better than any existing match.
// But check anyway.
if (finalScore <= currentThreshold) {
// Indeed it is
currentThreshold = finalScore
bestLocation = currentLocation
// Already passed `loc`, downhill from here on in.
if (bestLocation <= expectedLocation) {
break
}
// When passing `bestLocation`, don't exceed our current distance from `expectedLocation`.
start = Math.max(1, 2 * expectedLocation - bestLocation)
}
}
}
// No hope for a (better) match at greater error levels.
const score = computeScore(pattern, {
errors: i + 1,
currentLocation: expectedLocation,
expectedLocation,
distance,
ignoreLocation
})
if (score > currentThreshold) {
break
}
lastBitArr = bitArr
}
const result = {
isMatch: bestLocation >= 0,
// Count exact matches (those with a score of 0) to be "almost" exact
score: Math.max(0.001, finalScore)
}
if (computeMatches) {
const indices = convertMaskToIndices(matchMask, minMatchCharLength)
if (!indices.length) {
result.isMatch = false
} else if (includeMatches) {
result.indices = indices
}
}
return result
}