Skip to content

Commit da4718b

Browse files
committed
FEAT: not using hashing for small enough maps (reduces memory usage)
related to: 7091c94
1 parent dad1e5f commit da4718b

File tree

4 files changed

+111
-25
lines changed

4 files changed

+111
-25
lines changed

src/core/n-sets.c

+2-5
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,8 @@ enum {
9090
if (GET_FLAG(flags, SOP_BOTH)) i += VAL_LEN(val2);
9191
retser = BUF_EMIT; // use preallocated shared block
9292
Resize_Series(retser, i);
93-
hret = Make_Hash_Array(i); // allocated
94-
95-
// Optimization note: !!
96-
// This code could be optimized for small blocks by not hashing them
97-
// and extending Find_Key to do a FIND on the value itself w/o the hash.
93+
// don't hash small blocks...
94+
hret = (i <= MIN_DICT) ? NULL : Make_Hash_Array(i); // allocated
9895

9996
do {
10097
// Check what is in first series/map but not in second series/map:

src/core/t-block.c

+49
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,55 @@ static void No_Nones_Or_Logic(REBVAL *arg) {
198198
}
199199
}
200200

201+
/***********************************************************************
202+
**
203+
*/ REBCNT Find_Block_Key(REBSER* series, REBVAL* key, REBCNT skip, REBOOL cased)
204+
/*
205+
** Try to find the key value in the block.
206+
**
207+
** RETURNS: the index to the KEY or NOT_FOUND if there is none.
208+
**
209+
***********************************************************************/
210+
{
211+
REBSER* hser = series->series; // can be null
212+
REBCNT* hashes = NULL;
213+
REBCNT hash;
214+
REBCNT n;
215+
REBVAL* val;
216+
217+
val = BLK_HEAD(series);
218+
if (ANY_WORD(key)) {
219+
for (n = 0; n < series->tail; n += skip, val += skip) {
220+
if (
221+
ANY_WORD(val) && (
222+
VAL_WORD_SYM(key) == VAL_BIND_SYM(val) ||
223+
(!cased && VAL_WORD_CANON(key) == VAL_BIND_CANON(val))
224+
)
225+
) {
226+
return n;
227+
}
228+
}
229+
}
230+
else if (ANY_BINSTR(key)) {
231+
for (n = 0; n < series->tail; n += skip, val += skip) {
232+
if (
233+
VAL_TYPE(val) == VAL_TYPE(key)
234+
&& 0 == Compare_String_Vals(key, val, (REBOOL)(!IS_BINARY(key) && !cased))
235+
) {
236+
return n;
237+
}
238+
}
239+
}
240+
else {
241+
for (n = 0; n < series->tail; n += skip, val += skip) {
242+
if (VAL_TYPE(val) == VAL_TYPE(key) && 0 == Cmp_Value(key, val, cased)) {
243+
return n;
244+
}
245+
}
246+
}
247+
return NOT_FOUND;
248+
}
249+
201250

202251
/***********************************************************************
203252
**

src/core/t-map.c

+59-20
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,8 @@
9393
REBSER *blk = Make_Block(size*2);
9494
REBSER *ser = 0;
9595

96-
ser = Make_Hash_Array(size);
96+
// Use hashing only when there is more then MIN_DICT keys.
97+
if (size > MIN_DICT) ser = Make_Hash_Array(size);
9798

9899
blk->series = ser;
99100

@@ -124,6 +125,19 @@
124125
REBCNT n;
125126
REBVAL *val;
126127

128+
if (!hser) {
129+
// If there are no hashes for the keys, use plain linear search...
130+
hash = Find_Block_Key(series, key, wide, cased);
131+
if (hash == NOT_FOUND) {
132+
if (mode > 1) {
133+
// Append new value the target series:
134+
Append_Series(series, (REBYTE*)key, wide);
135+
}
136+
return -1;
137+
}
138+
return hash;
139+
}
140+
127141
// Compute hash for value:
128142
len = hser->tail;
129143
hash = Hash_Value(key, len);
@@ -214,18 +228,43 @@
214228
** and val is SET, create the entry and store the key and
215229
** val.
216230
**
217-
** RETURNS: the index to the VALUE or zero if there is none.
231+
** RETURNS: the index to the VALUE or NOT_FOUND if there is none.
218232
**
219233
***********************************************************************/
220234
{
221235
REBSER *hser = series->series; // can be null
222-
REBCNT *hashes;
236+
REBCNT *hashes = NULL;
223237
REBCNT hash;
224238
REBCNT n;
225239
REBVAL *set;
226240

227-
if (IS_NONE(key) || hser == NULL) return 0;
241+
if (IS_NONE(key)) return NOT_FOUND;
242+
243+
// We may not be large enough yet for the hash table to
244+
// be worthwhile, so just do a linear search:
245+
if (!hser) {
246+
if (series->tail <= MIN_DICT * 2) {
247+
hash = Find_Block_Key(series, key, 2, cased);
248+
if (hash != NOT_FOUND) {
249+
hash++; // position of the value
250+
// Key already exists so update the value, if needed...
251+
if (val) {
252+
set = BLK_SKIP(series, hash);
253+
*set = *val;
254+
}
255+
// Return
256+
return hash;
257+
}
258+
if (!val) return NOT_FOUND;
259+
hash /= 2;
260+
goto new_entry;
261+
}
228262

263+
// Add hash table:
264+
//Print("hash added %d", series->tail);
265+
series->series = hser = Make_Hash_Array(series->tail);
266+
Rehash_Hash(series);
267+
}
229268
// Get hash table, expand it if needed:
230269
if (series->tail > hser->tail/2) {
231270
Expand_Hash(hser); // modifies size value
@@ -237,16 +276,17 @@
237276
n = hashes[hash];
238277

239278
// Just a GET of value:
240-
if (!val) return n;
279+
if (!val) return ((n-1)*2)+1;
241280

242281
// Must set the value:
243282
if (n) { // re-set it:
244-
set = BLK_SKIP(series, ((n-1)*2)); // find the key
245-
VAL_CLR_OPT(set++, OPTS_HIDE); // clear HIDE flag in case it was removed key; change to value position
246-
*set = *val; // set the value
247-
return n;
283+
n = (n-1)*2; // index of the key
284+
set = BLK_SKIP(series, n); // find the key
285+
VAL_CLR_OPT(set++, OPTS_HIDE); // clear HIDE flag in case it was removed key; change to value position
286+
*set = *val; // set the value
287+
return n+1; // index of the value
248288
}
249-
289+
new_entry:
250290
// Create new entry:
251291
#ifndef DO_NOT_NORMALIZE_MAP_KEYS
252292
// append key
@@ -275,8 +315,8 @@
275315
#endif
276316
// append value
277317
Append_Val(series, val); // no Copy_Series_Value(val) on strings
278-
279-
return (hashes[hash] = series->tail/2);
318+
if (hashes) hashes[hash] = series->tail / 2; // Hash index is not a real index position of the value!
319+
return series->tail; // Index of the new value.
280320
}
281321

282322

@@ -320,9 +360,9 @@
320360

321361
n = Find_Entry(VAL_SERIES(data), pvs->select, val, FALSE);
322362

323-
if (!n) return PE_NONE;
363+
if (n == NOT_FOUND) return PE_NONE;
324364

325-
pvs->value = VAL_BLK_SKIP(data, ((n-1)*2)+1);
365+
pvs->value = VAL_BLK_SKIP(data, n);
326366
return PE_OK;
327367
}
328368

@@ -507,8 +547,8 @@
507547
case A_SELECT:
508548
case A_FIND:
509549
n = Find_Entry(series, arg, 0, Find_Refines(ds, AM_SELECT_CASE) ? AM_FIND_CASE : 0);
510-
if (!n) return R_NONE;
511-
*D_RET = *VAL_BLK_SKIP(val, ((n-1)*2)+((action == A_FIND)?0:1));
550+
if (n == NOT_FOUND) return R_NONE;
551+
*D_RET = *VAL_BLK_SKIP(val, n - ((action == A_FIND)?1:0)); // `find` returns the key
512552
break;
513553

514554
case A_INSERT:
@@ -534,10 +574,9 @@
534574
case A_REMOVE:
535575
//O: throw an error if /part is used?
536576
n = Find_Entry(series, D_ARG(ARG_REMOVE_KEY_ARG), 0, TRUE);
537-
if (n) {
538-
n = (n-1)*2;
539-
VAL_SET_OPT(VAL_BLK_SKIP(val, n), OPTS_HIDE);
540-
VAL_SET(VAL_BLK_SKIP(val, n+1), REB_NONE); // set value to none (so the old one may be GCed)
577+
if (n != NOT_FOUND) {
578+
VAL_SET_OPT(VAL_BLK_SKIP(val, n-1), OPTS_HIDE); // hide the key
579+
VAL_SET(VAL_BLK_SKIP(val, n), REB_NONE); // set value to none (so the old one may be GCed)
541580
}
542581
return R_ARG1;
543582

src/include/reb-config.h

+1
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ These are now obsolete (as of A107) and should be removed:
7777
#include REBOL_OPTIONS_FILE
7878
#endif
7979

80+
#define MIN_DICT 16 // Maximum number of keys in a map withouth hashing
8081

8182
//* Common *************************************************************
8283

0 commit comments

Comments
 (0)