Skip to content

Commit 5a6908b

Browse files
committed
Revert batched hashing of String/ByteArray
In 03b129b the implementation of String.hash and ByteArray.hash was changed to hash bytes in chunks of 2/4/8 bytes at a time. While this speeds up the hashing process, it also degrades the quality of the resulting hash. This in turn can result in a significant increase in hash collisions. Refer to #836 (comment) for more details. This reverts commit 03b129b.
1 parent 0769c9f commit 5a6908b

File tree

2 files changed

+6
-30
lines changed

2 files changed

+6
-30
lines changed

std/src/std/bytes.inko

+5-30
Original file line numberDiff line numberDiff line change
@@ -38,38 +38,13 @@ fn pub inline check_slice_range(start: Int, end: Int, size: Int) {
3838

3939
fn hash_bytes[T: Bytes, H: mut + Hasher](bytes: ref T, hasher: mut H) {
4040
let mut len = bytes.size
41-
let mut cur = bytes.pointer
41+
let mut start = bytes.pointer
42+
let mut i = 0
4243

43-
len.hash(hasher)
44+
hasher.write(len)
4445

45-
while len > 8 {
46-
let val = (cur as Pointer[UInt64]).0 as Int
47-
48-
val.hash(hasher)
49-
cur = ptr.add(cur, 8)
50-
len -= 8
51-
}
52-
53-
while len > 4 {
54-
let val = (cur as Pointer[UInt32]).0 as Int
55-
56-
val.hash(hasher)
57-
cur = ptr.add(cur, 4)
58-
len -= 4
59-
}
60-
61-
while len > 2 {
62-
let val = (cur as Pointer[UInt16]).0 as Int
63-
64-
val.hash(hasher)
65-
cur = ptr.add(cur, 2)
66-
len -= 2
67-
}
68-
69-
while len > 0 {
70-
(cur.0 as Int).hash(hasher)
71-
cur = ptr.add(cur, 1)
72-
len -= 1
46+
while i < len {
47+
hasher.write(ptr.add(start, i := i.wrapping_add(1)).0 as Int)
7348
}
7449
}
7550

std/test/std/test_string.inko

+1
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,7 @@ fn pub tests(t: mut Tests) {
339339
}
340340

341341
t.not_equal(hash('hello'), hash('world'))
342+
t.not_equal(hash(('ab', 'c')), hash(('a', 'bc')))
342343
})
343344

344345
t.test('String.+', fn (t) {

0 commit comments

Comments
 (0)