Skip to content

Commit 0111789

Browse files
committed
WIP: new hashing
1 parent 9abea93 commit 0111789

File tree

3 files changed

+83
-70
lines changed

3 files changed

+83
-70
lines changed

src/core/n-hash.c

+55-64
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,18 @@
4444
FORCE_INLINE REBCNT getblock32(const REBCNT* p, int i) {
4545
return p[i];
4646
}
47+
/*-----------------------------------------------------------------------------
48+
// Block mix - mix the key block, combine with hash block, mix the hash block,
49+
// repeat. */
50+
51+
FORCE_INLINE void bmix(REBCNT* h1, REBCNT* k1)
52+
{
53+
*k1 *= MURMUR_HASH_3_X86_32_C1;
54+
*k1 = ROTL32(*k1, 15);
55+
*k1 *= MURMUR_HASH_3_X86_32_C2;
56+
*h1 ^= *k1;
57+
*h1 = ROTL32(*h1, 13); *h1 = *h1 * 5 + 0xe6546b64;
58+
}
4759
//-----------------------------------------------------------------------------
4860
// Finalization mix - force all bits of a hash block to avalanche
4961
FORCE_INLINE REBCNT fmix32(REBCNT h) {
@@ -69,14 +81,7 @@ REBCNT MurmurHash3_x86_32(const void* key, int len, REBCNT seed)
6981
for (int i = -nblocks; i; i++)
7082
{
7183
REBCNT k1 = getblock32(blocks, i);
72-
73-
k1 *= MURMUR_HASH_3_X86_32_C1;
74-
k1 = ROTL32(k1, 15);
75-
k1 *= MURMUR_HASH_3_X86_32_C2;
76-
77-
h1 ^= k1;
78-
h1 = ROTL32(h1, 13);
79-
h1 = h1 * 5 + 0xe6546b64;
84+
bmix(&h1, &k1);
8085
}
8186

8287
//----------
@@ -90,7 +95,7 @@ REBCNT MurmurHash3_x86_32(const void* key, int len, REBCNT seed)
9095
case 2: k1 ^= tail[1] << 8;
9196
case 1: k1 ^= tail[0];
9297
k1 *= MURMUR_HASH_3_X86_32_C1;
93-
k1 = ROTL32(k1, 15);
98+
k1 = ROTL32(k1, 16);
9499
k1 *= MURMUR_HASH_3_X86_32_C2;
95100
h1 ^= k1;
96101
};
@@ -101,8 +106,6 @@ REBCNT MurmurHash3_x86_32(const void* key, int len, REBCNT seed)
101106
h1 = fmix32(h1);
102107

103108
return h1;
104-
105-
//*(REBCNT*)out = h1;
106109
}
107110

108111

@@ -120,9 +123,10 @@ REBCNT MurmurHash3_x86_32(const void* key, int len, REBCNT seed)
120123
return R_RET;
121124
}
122125

126+
FORCE_INLINE
123127
/***********************************************************************
124128
**
125-
*/ FORCE_INLINE REBCNT Hash_Probe(REBCNT hash, REBCNT idx, REBCNT len)
129+
*/ REBCNT Hash_Probe(REBCNT hash, REBCNT idx, REBCNT len)
126130
/*
127131
** Calculates the index based on a quadratic probing approach.
128132
**
@@ -147,7 +151,7 @@ REBCNT MurmurHash3_x86_32(const void* key, int len, REBCNT seed)
147151
if (ANY_WORD(val))
148152
return VAL_WORD_CANON(val); //plain value seems to be faster than: fmix32(VAL_WORD_CANON(val));
149153
if (ANY_STR(val))
150-
return CRC_String(VAL_BIN_DATA(val), Val_Byte_Len(val)) ^ VAL_TYPE(val);
154+
return Hash_String_Value(val) ^ VAL_TYPE(val);
151155
if (ANY_BLOCK(val))
152156
return Hash_Block_Value(val);
153157

@@ -203,26 +207,15 @@ REBCNT MurmurHash3_x86_32(const void* key, int len, REBCNT seed)
203207
***********************************************************************/
204208
{
205209
REBCNT k1, h1 = 0;
206-
REBCNT n;
207210
REBVAL *data = VAL_BLK_DATA(block);
208211
REBLEN len = VAL_LEN(block);
209212

210213
k1 = VAL_TYPE(block);
211-
k1 *= MURMUR_HASH_3_X86_32_C1;
212-
k1 = ROTL32(k1, 15);
213-
k1 *= MURMUR_HASH_3_X86_32_C2;
214-
h1 ^= k1;
215-
h1 = ROTL32(h1, 13);
216-
h1 = h1 * 5 + 0xe6546b64;
214+
bmix(&h1, &k1);
217215

218216
while (NZ(VAL_TYPE(data))) {
219217
k1 = Hash_Value(data++);
220-
k1 *= MURMUR_HASH_3_X86_32_C1;
221-
k1 = ROTL32(k1, 15);
222-
k1 *= MURMUR_HASH_3_X86_32_C2;
223-
h1 ^= k1;
224-
h1 = ROTL32(h1, 13);
225-
h1 = h1 * 5 + 0xe6546b64;
218+
bmix(&h1, &k1);
226219
}
227220
h1 ^= len;
228221
h1 = fmix32(h1);
@@ -232,49 +225,53 @@ REBCNT MurmurHash3_x86_32(const void* key, int len, REBCNT seed)
232225

233226
/***********************************************************************
234227
**
235-
*/ FORCE_INLINE REBCNT Hash_Binary(const REBYTE* bin, REBCNT len)
228+
*/ REBCNT Hash_String_Value(REBVAL* val)
236229
/*
237-
** Return a case sensitive hash value for the binary.
230+
** Return a case insensitive hash value for the string value.
238231
**
239232
***********************************************************************/
240233
{
241-
return MurmurHash3_x86_32(bin, len, 0);
242-
//return XXH64(bin, len, 0);
243-
234+
REBYTE* bin;
235+
REBUNI* uni;
236+
REBLEN len;
237+
REBCNT n, ch;
238+
REBCNT hash = 0;
239+
240+
if (BYTE_SIZE(VAL_SERIES(val))) {
241+
bin = VAL_BIN_DATA(val);
242+
len = VAL_TAIL(val) - VAL_INDEX(val);
243+
for (n = 0; n < len; n++) {
244+
ch = (REBCNT)LO_CASE(*bin++);
245+
bmix(&hash, &ch);
246+
}
247+
}
248+
else {
249+
uni = VAL_UNI_DATA(val);
250+
len = Val_Series_Len(val);
251+
for (n = 0; n < len; n++) {
252+
ch = (REBCNT)(*uni++);
253+
if (ch < UNICODE_CASES)
254+
ch = (REBCNT)LO_CASE(ch);
255+
bmix(&hash, &ch);
256+
}
257+
}
258+
hash ^= len;
259+
hash = fmix32(hash);
260+
return hash;
244261
}
245262

263+
FORCE_INLINE
246264
/***********************************************************************
247265
**
248-
*/ REBCNT Hash_String(REBVAL* val)
266+
*/ REBCNT Hash_Binary(const REBYTE* bin, REBCNT len)
249267
/*
250-
** Return a case insensitive hash value for the string. The
251-
** string does not have to be zero terminated and UTF8 is ok.
268+
** Return a case sensitive hash value for the binary.
252269
**
253270
***********************************************************************/
254271
{
255-
REBSER* ser = VAL_SERIES(val);
256-
REBCNT pos = VAL_INDEX(val);
257-
REBCNT tail = VAL_TAIL(val);
258-
REBLEN len = VAL_TAIL(val) - pos;
259-
REBUNI ch;
260-
REBCNT hash=0;
261-
262-
for (; pos < tail; pos++) {
263-
ch = GET_ANY_CHAR(ser, pos);
264-
if (ch < UNICODE_CASES)
265-
ch = LO_CASE(ch);
266-
267-
ch *= MURMUR_HASH_3_X86_32_C1;
268-
ch = ROTL32(ch, 15);
269-
ch *= MURMUR_HASH_3_X86_32_C2;
270-
271-
hash ^= ch;
272-
hash = ROTL32(hash, 13);
273-
hash = hash * 5 + 0xe6546b64;
274-
}
275-
hash ^= len;
276-
hash = fmix32(hash);
277-
return hash;
272+
return MurmurHash3_x86_32(bin, len, 0);
273+
//return XXH64(bin, len, 0);
274+
278275
}
279276

280277

@@ -298,13 +295,7 @@ REBCNT MurmurHash3_x86_32(const void* key, int len, REBCNT seed)
298295
if (!ch) Trap0(RE_INVALID_CHARS);
299296
}
300297
if (ch < UNICODE_CASES) ch = LO_CASE(ch);
301-
ch *= MURMUR_HASH_3_X86_32_C1;
302-
ch = ROTL32(ch, 15);
303-
ch *= MURMUR_HASH_3_X86_32_C2;
304-
305-
hash ^= ch;
306-
hash = ROTL32(hash, 13);
307-
hash = hash * 5 + 0xe6546b64;
298+
bmix(&hash, &ch);
308299
}
309300
hash ^= len;
310301
hash = fmix32(hash);

src/core/s-crc.c

+25-6
Original file line numberDiff line numberDiff line change
@@ -150,25 +150,44 @@ static REBCNT *CRC32_Table = 0;
150150
Free_Mem(CRC32_Table, 0);
151151
}
152152

153+
#ifdef unused
153154
/***********************************************************************
154155
**
155-
*/ REBINT CRC_String(REBYTE* str, REBCNT len)
156+
X*/ REBINT CRC_String(REBVAL *val)
156157
/*
157158
** Return a case insensitive hash value for the string. The
158159
** string does not have to be zero terminated and UTF8 is ok.
159160
**
160161
***********************************************************************/
161162
{
162163
REBYTE n;
163-
REBINT hash = (REBINT)len + (REBINT)((REBYTE)LO_CASE(*str));
164-
165-
for (; len > 0; len--) {
166-
n = (REBYTE)((hash >> CRCSHIFTS) ^ (REBYTE)LO_CASE(*str++));
167-
hash = MASK_CRC(hash << 8) ^ (REBINT)CRC24_Table[n];
164+
REBINT hash;
165+
REBYTE* bin;
166+
REBUNI* uni;
167+
REBLEN len;
168+
169+
if (BYTE_SIZE(VAL_SERIES(val))) {
170+
bin = VAL_BIN_DATA(val);
171+
len = Val_Byte_Len(val);
172+
hash = (REBINT)len + (REBINT)((REBYTE)LO_CASE(*bin));
173+
for (; len > 0; len--) {
174+
n = (REBYTE)((hash >> CRCSHIFTS) ^ (REBYTE)LO_CASE(*bin++));
175+
hash = MASK_CRC(hash << 8) ^ (REBINT)CRC24_Table[n];
176+
}
177+
}
178+
else {
179+
uni = VAL_UNI_DATA(val);
180+
len = Val_Series_Len(val);
181+
hash = (REBINT)len + (REBINT)((REBYTE)LO_CASE(*uni));
182+
for (; len > 0; len--) {
183+
n = (REBYTE)((hash >> CRCSHIFTS) ^ (REBYTE)LO_CASE(*uni++));
184+
hash = MASK_CRC(hash << 8) ^ (REBINT)CRC24_Table[n];
185+
}
168186
}
169187

170188
return hash;
171189
}
190+
#endif
172191

173192
/***********************************************************************
174193
**

src/tests/units/series-test.r3

+3
Original file line numberDiff line numberDiff line change
@@ -2521,6 +2521,9 @@ Rebol [
25212521
;; unique block with block values...
25222522
--assert [1 [1] [2]] = unique reduce [1 [1] [2] [1] next [1 1]]
25232523
--assert [1 [1] [2]] = unique reduce [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [1] [2] [1] next [1 1]]
2524+
;@@ https://github.com/Oldes/Rebol-issues/issues/2591
2525+
--assert [1 "a"] = unique reduce [1 1 "a" remove "ša"]
2526+
--assert [1 "a"] = unique reduce [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 "a" remove "ša"]
25242527
--test-- "unique/skip on block"
25252528
--assert [1 2 3 4] = unique/skip b: [1 2 1 2 3 4] 2
25262529
--assert b = [1 2 1 2 3 4]

0 commit comments

Comments
 (0)