Skip to content

Commit 910875f

Browse files
committed
buffered parsing of the hash, and modify the hash function slightly
1 parent 56cf545 commit 910875f

4 files changed

+32
-61
lines changed

hash-table/PACKED_WEIGHTED_HASH_TABLE.sml

+1-7
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,7 @@ sig
1414
val size: table -> int
1515
val capacity: table -> int
1616

17-
val insertCombineWeights: table -> K.t * W.t -> unit
18-
val insertCombineWeightsLimitProbes: {probes: int}
19-
-> table
20-
-> K.t * W.t
21-
-> unit
22-
23-
val forceInsertUnique: table -> K.t * W.t -> unit
17+
val insertCombineWeightsKnownHash: table -> K.t * W.t -> {hash: int} -> unit
2418

2519
(* not safe for concurrency with insertions *)
2620
val lookup: table -> K.t -> W.t option

hash-table/PackedWeightedHashTable.sml

+4-45
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,11 @@ struct
7070
MLton.eq (old, Concurrency.casArray (arr, i) (old, new))
7171

7272

73-
fun insertCombineWeightsLimitProbes {probes = tolerance}
74-
(input as T {keys, packedWeights}) (x, v) =
73+
fun insertCombineWeightsKnownHash (input as T {keys, packedWeights}) (x, v)
74+
{hash = hashval} =
7575
let
7676
val n = Array.length keys
77-
(* val _ = print
78-
("insertCombineWeightsLimitProbes capacity=" ^ Int.toString n ^ "\n") *)
77+
val tolerance = n
7978

8079
val j = shard ()
8180

@@ -91,8 +90,6 @@ struct
9190
loop 0 probes
9291
else
9392
let
94-
(* val _ = print
95-
("insertCombineWeightsLimitProbes.loop " ^ Int.toString i ^ "\n") *)
9693
val k = Array.sub (keys, i)
9794
in
9895
if K.equal (k, K.empty) then
@@ -103,50 +100,12 @@ struct
103100
loop (i + 1) (probes + 1)
104101
end
105102

106-
val start = (K.hash x) mod (Array.length keys)
103+
val start = hashval mod (Array.length keys)
107104
in
108105
loop start 0
109106
end
110107

111108

112-
fun insertCombineWeights table (x, v) =
113-
insertCombineWeightsLimitProbes {probes = capacity table} table (x, v)
114-
115-
116-
fun forceInsertUnique (T {keys, packedWeights}) (x, v) =
117-
let
118-
val n = Array.length keys
119-
val start = (K.hash x) mod n
120-
121-
val j = shard ()
122-
123-
fun claimSlotAt i = bcas (keys, i, K.empty, x)
124-
125-
fun putValueAt i =
126-
W.unpack_into (v, locationOfPack n packedWeights i j)
127-
128-
fun loop i =
129-
if i >= n then
130-
loop 0
131-
else
132-
let
133-
val k = Array.sub (keys, i)
134-
in
135-
if K.equal (k, K.empty) then
136-
if claimSlotAt i then putValueAt i else loop i
137-
else if K.equal (k, x) then
138-
raise DuplicateKey
139-
else
140-
loopNext (i + 1)
141-
end
142-
143-
and loopNext i =
144-
if i = start then raise Full else loop i
145-
in
146-
loop start
147-
end
148-
149-
150109
fun lookup (T {keys, packedWeights, ...}) x =
151110
let
152111
val n = Array.length keys

main.sml

+23-5
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,11 @@ struct
182182
* Define the hash table type. We identify entries by their starting index.
183183
*)
184184

185-
structure Key: KEY =
185+
structure Key:
186+
sig
187+
include KEY
188+
val parseAndHashBuffered: buffer -> int -> buffer * int * int
189+
end =
186190
struct
187191
type t = index (* int *)
188192

@@ -220,7 +224,7 @@ struct
220224
if cursor >= definitely_stop orelse x = semicolon_id then
221225
acc
222226
else
223-
loop (LargeWord.orb (LargeWord.<< (acc, 0w8), Word8.toLarge x))
227+
loop (LargeWord.xorb (LargeWord.<< (acc, 0w3), Word8.toLarge x))
224228
(cursor + 1)
225229
end
226230

@@ -229,6 +233,21 @@ struct
229233
in
230234
Word64.toIntX result
231235
end
236+
237+
238+
fun parseAndHashBuffered buffer start =
239+
let
240+
val (b, p, h) = bufferLoop buffer
241+
{ start = start
242+
, continue = fn byte => byte <> semicolon_id
243+
, z = 0w0
244+
, func = fn (acc, byte) =>
245+
LargeWord.xorb (LargeWord.<< (acc, 0w3), Word8.toLarge byte)
246+
}
247+
in
248+
(b, p, Word64.toIntX h)
249+
end
250+
232251
end
233252

234253

@@ -332,14 +351,13 @@ struct
332351
else
333352
let
334353
val start = cursor
335-
val (buffer, cursor) = findNextBuffered buffer semicolon_id cursor
336-
val cursor = valOf cursor
354+
val (buffer, cursor, h) = Key.parseAndHashBuffered buffer cursor
337355
val cursor = cursor + 1 (* get past the ";" *)
338356
val (buffer, cursor, m) = parseMeasurement buffer cursor
339357
val cursor = cursor + 1 (* get past the newline character *)
340358
val weight = {min = m, max = m, tot = m, count = 1}
341359
in
342-
T.insertCombineWeights table (start, weight);
360+
T.insertCombineWeightsKnownHash table (start, weight) {hash = h};
343361
loop buffer cursor stop
344362
end
345363

Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
$ hyperfine --warmup 1 './main @mpl procs 144 set-affinity -- /usr3/data/1brc/measurements.txt --unsafe-no-bounds-checks'
22
Benchmark 1: ./main @mpl procs 144 set-affinity -- /usr3/data/1brc/measurements.txt --unsafe-no-bounds-checks
3-
Time (mean ± σ): 2.408 s ± 0.011 s [User: 334.217 s, System: 2.892 s]
4-
Range (min … max): 2.390 s … 2.422 s 10 runs
3+
Time (mean ± σ): 2.342 s ± 0.020 s [User: 324.751 s, System: 2.546 s]
4+
Range (min … max): 2.317 s … 2.380 s 10 runs
55

66

77
$ hyperfine --warmup 1 './main @mpl procs 144 set-affinity -- /usr3/data/1brc/measurements.txt'
88
Benchmark 1: ./main @mpl procs 144 set-affinity -- /usr3/data/1brc/measurements.txt
9-
Time (mean ± σ): 2.655 s ± 0.057 s [User: 370.180 s, System: 2.765 s]
10-
Range (min … max): 2.599 s … 2.772 s 10 runs
9+
Time (mean ± σ): 2.443 s ± 0.018 s [User: 339.081 s, System: 2.687 s]
10+
Range (min … max): 2.417 s … 2.475 s 10 runs

0 commit comments

Comments
 (0)