From ab8aa49b8d2383adfa9ef86ab4bd4a1a0d43c7fa Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 17 Aug 2021 11:31:15 -0400 Subject: [PATCH 01/16] Fix Benchmark Corruption Display --- programs/benchzstd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/benchzstd.c b/programs/benchzstd.c index 32ce605930e..d77959aac94 100644 --- a/programs/benchzstd.c +++ b/programs/benchzstd.c @@ -523,7 +523,7 @@ BMK_benchMemAdvancedNoAlloc( DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]); DISPLAY(" \n"); DISPLAY("decode: "); - for (n=lowest; n>0; n++) + for (n=lowest; n>0; n--) DISPLAY("%02X ", resultBuffer[u-n]); DISPLAY(" :%02X: ", resultBuffer[u]); for (n=1; n<3; n++) From 80bc12b33a039d5bef59d7ca4ce8809feabbfbaa Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 17 Aug 2021 12:42:39 -0400 Subject: [PATCH 02/16] Initial Pipelined Implementation for ZSTD_fast --- lib/compress/zstd_fast.c | 259 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 254 insertions(+), 5 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index b6a1a7e696b..fa214e63d67 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -183,6 +183,255 @@ ZSTD_compressBlock_fast_generic( } +/** + * If you squint hard enough (and ignore repcodes), the search operation at any + * given position is broken into 4 stages: + * + * 1. Hash (map position to hash value via input read) + * 2. Lookup (map hash val to index via hashtable read) + * 3. Load (map index to value at that position via input read) + * 4. Compare + * + * Each of these steps involves a memory read at an address which is computed + * from the previous step. This means these steps must be sequenced and their + * latencies are cumulative. + * + * Rather than do 1->2->3->4 sequentially for a single position before moving + * onto the next, this implementation interleaves these operations across the + * next few positions: + * + * Pos | Time --> + * ----+------------------- + * N | ...4 + * N+1 | ... 3 4 + * N+2 | ... 2 3 4 + * N+3 | 1 2 3 + * N+4 | 1 2 + * N+5 | 1 + * + * This is very much analogous to the pipelining of execution in a CPU. And just + * like a CPU, we have to dump the pipeline when we find a match (i.e., take a + * branch). + * + * When this happens, we throw away our current state, and do the following prep + * to re-enter the loop: + * + * Pos | Time --> + * ----+------------------- + * N | 1 2 3 + * N+1 | 1 2 + * N+2 | 1 + * + * This is also the work we do at the beginning to enter the loop initially. + */ +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_fast_generic_pipelined( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + size_t const stepSize = cParams->targetLength + !(cParams->targetLength); + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + + const BYTE* anchor = istart; + const BYTE* ip0 = istart; + const BYTE* ip1; + const BYTE* ip2; + const BYTE* ip3; + U32 current0; + + U32 rep_offset1 = rep[0]; + U32 rep_offset2 = rep[1]; + U32 offsetSaved = 0; + + size_t hash0; /* hash for ip0 */ + size_t hash1; /* hash for ip1 */ + size_t hash2; /* hash for ip2 */ + size_t hash3; /* hash for ip3 */ + U32 idx0; /* match idx for ip0 */ + U32 idx1; /* match idx for ip1 */ + U32 idx2; /* match idx for ip2 */ + U32 mval; /* src value at match idx */ + U32 rval; /* src value at ip2 - repcode */ + + U32 offcode; + const BYTE* match0; + size_t mLength; + + DEBUGLOG(5, "ZSTD_compressBlock_fast_generic_pipelined"); + ip0 += (ip0 == prefixStart); + { U32 const curr = (U32)(ip0 - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); + U32 const maxRep = curr - windowLow; + if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0; + if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0; + } + + /* start each op */ +_start: /* Requires: ip0 */ + + /* calculate positions, ip0 - anchor == 0, so we skip step calc */ + ip1 = ip0 + stepSize; + ip2 = ip1 + stepSize; + ip3 = ip2 + stepSize; + + if (ip3 >= ilimit) { + goto _cleanup; + } + + hash0 = ZSTD_hashPtr(ip0, hlog, mls); + hash1 = ZSTD_hashPtr(ip1, hlog, mls); + hash2 = ZSTD_hashPtr(ip2, hlog, mls); + + idx0 = hashTable[hash0]; + idx1 = hashTable[hash1]; + + if (idx0 >= prefixStartIndex) { + mval = MEM_read32(base + idx0); + } else { + mval = MEM_read32(ip0) ^ 1; + } + + rval = MEM_read32(ip2 - rep_offset1); + + do { + current0 = ip0 - base; + + // DEBUGLOG(5, "Searching ip0 = %u", (U32)(ip0 - istart)); + + /* write back hash table entry */ + hashTable[hash0] = current0; + + /* check repcode at ip[2] */ + if ((MEM_read32(ip2) == rval) & (rep_offset1 > 0)) { + ip0 = ip2; + match0 = ip0 - rep_offset1; + mLength = ip0[-1] == match0[-1]; + ip0 -= mLength; + match0 -= mLength; + offcode = 0; + mLength += 4; + goto _match; + } + + /* check match at ip[0] */ + if (MEM_read32(ip0) == mval) { + /* found a match! */ + goto _offset; + } + + /* load next rval */ + rval = MEM_read32(ip3 - rep_offset1); + + /* load match for ip[1] */ + if (idx1 >= prefixStartIndex) { + mval = MEM_read32(base + idx1); + } else { + mval = MEM_read32(ip1) ^ 1; /* guaranteed to not match. */ + } + + /* lookup ip[2] */ + idx2 = hashTable[hash2]; + + /* hash ip[3] */ + hash3 = ZSTD_hashPtr(ip3, hlog, mls); + + /* advance to next positions */ + { + size_t const step = ((size_t)(ip2 - anchor) >> (kSearchStrength - 1)) + stepSize; + assert(step >= 1); + + idx0 = idx1; + idx1 = idx2; + + hash0 = hash1; + hash1 = hash2; + hash2 = hash3; + + ip0 = ip1; + ip1 = ip2; + ip2 = ip3; + ip3 = ip3 + step; + } + } while (ip3 < ilimit); + +_cleanup: + + /* Find matches at end of block. */ + + /* TODO */ + + /* save reps for next block */ + rep[0] = rep_offset1 ? rep_offset1 : offsetSaved; + rep[1] = rep_offset2 ? rep_offset2 : offsetSaved; + + /* Return the last literals size */ + return (size_t)(iend - anchor); + +_offset: /* Requires: ip0, idx0 */ + + /* Compute the offset code. */ + match0 = base + idx0; + rep_offset2 = rep_offset1; + rep_offset1 = (U32)(ip0-match0); + offcode = rep_offset1 + ZSTD_REP_MOVE; + mLength = 4; + + /* Count the backwards match length. */ + while (((ip0>anchor) & (match0>prefixStart)) && (ip0[-1] == match0[-1])) { + ip0--; + match0--; + mLength++; + } + +_match: /* Requires: ip0, match0, offcode */ + + /* Count the forward length. */ + mLength += ZSTD_count(ip0 + mLength, match0 + mLength, iend); + + ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength - MINMATCH); + + ip0 += mLength; + anchor = ip0; + + /* write next hash table entry */ + if (ip1 < ip0) { + hashTable[hash1] = ip1 - base; + } + + /* Fill table and check for immediate repcode. */ + if (ip0 <= ilimit) { + /* Fill Table */ + assert(base+current0+2 > istart); /* check base overflow */ + hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); + + if (rep_offset2 > 0) { /* rep_offset2==0 means rep_offset2 is invalidated */ + while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - rep_offset2)) ) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip0+4, ip0+4-rep_offset2, iend) + 4; + { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */ + hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); + ip0 += rLength; + ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH); + anchor = ip0; + continue; /* faster when present (confirmed on gcc-8) ... (?) */ + } } } + + goto _start; +} + + size_t ZSTD_compressBlock_fast( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) @@ -193,13 +442,13 @@ size_t ZSTD_compressBlock_fast( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4); + return ZSTD_compressBlock_fast_generic_pipelined(ms, seqStore, rep, src, srcSize, 4); case 5 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5); + return ZSTD_compressBlock_fast_generic_pipelined(ms, seqStore, rep, src, srcSize, 5); case 6 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6); + return ZSTD_compressBlock_fast_generic_pipelined(ms, seqStore, rep, src, srcSize, 6); case 7 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7); + return ZSTD_compressBlock_fast_generic_pipelined(ms, seqStore, rep, src, srcSize, 7); } } @@ -402,7 +651,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( /* switch to "regular" variant if extDict is invalidated due to maxDistance */ if (prefixStartIndex == dictStartIndex) - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); + return ZSTD_compressBlock_fast_generic_pipelined(ms, seqStore, rep, src, srcSize, mls); /* Search Loop */ while (ip < ilimit) { /* < instead of <=, because (ip+1) */ From bc768bccc036fe0d60ff63fe240dfca7474636d9 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 18 Aug 2021 12:47:14 -0400 Subject: [PATCH 03/16] Track Step Size Statefully, Rather than Recalculating Every Time --- lib/compress/zstd_fast.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index fa214e63d67..ff2faab33fd 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -268,6 +268,10 @@ ZSTD_compressBlock_fast_generic_pipelined( const BYTE* match0; size_t mLength; + size_t step; + const BYTE* nextStep; + const size_t kStepIncr = (1 << (kSearchStrength - 1)); + DEBUGLOG(5, "ZSTD_compressBlock_fast_generic_pipelined"); ip0 += (ip0 == prefixStart); { U32 const curr = (U32)(ip0 - base); @@ -280,6 +284,9 @@ ZSTD_compressBlock_fast_generic_pipelined( /* start each op */ _start: /* Requires: ip0 */ + step = stepSize; + nextStep = ip0 + kStepIncr; + /* calculate positions, ip0 - anchor == 0, so we skip step calc */ ip1 = ip0 + stepSize; ip2 = ip1 + stepSize; @@ -348,8 +355,10 @@ ZSTD_compressBlock_fast_generic_pipelined( /* advance to next positions */ { - size_t const step = ((size_t)(ip2 - anchor) >> (kSearchStrength - 1)) + stepSize; - assert(step >= 1); + if (ip2 >= nextStep) { + step++; + nextStep += kStepIncr; + } idx0 = idx1; idx1 = idx2; From 387840af79a86660c8f83bcd6cc4de584de9f3a6 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 18 Aug 2021 12:47:48 -0400 Subject: [PATCH 04/16] Re-Order Operations for Slightly Better Performance --- lib/compress/zstd_fast.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index ff2faab33fd..a1a9ef445e9 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -331,27 +331,27 @@ ZSTD_compressBlock_fast_generic_pipelined( goto _match; } + /* load match for ip[1] */ + if (idx0 >= prefixStartIndex) { + mval = MEM_read32(base + idx0); + } else { + mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */ + } + /* check match at ip[0] */ if (MEM_read32(ip0) == mval) { /* found a match! */ goto _offset; } - /* load next rval */ - rval = MEM_read32(ip3 - rep_offset1); - - /* load match for ip[1] */ - if (idx1 >= prefixStartIndex) { - mval = MEM_read32(base + idx1); - } else { - mval = MEM_read32(ip1) ^ 1; /* guaranteed to not match. */ - } + /* hash ip[3] */ + hash3 = ZSTD_hashPtr(ip3, hlog, mls); /* lookup ip[2] */ idx2 = hashTable[hash2]; - /* hash ip[3] */ - hash3 = ZSTD_hashPtr(ip3, hlog, mls); + /* load next rval */ + rval = MEM_read32(ip3 - rep_offset1); /* advance to next positions */ { From b092dd75b7e0ed48dc94fe58391aac7b805cb178 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 18 Aug 2021 13:44:27 -0400 Subject: [PATCH 05/16] Shrink Pipeline from 4 Positions to 3 --- lib/compress/zstd_fast.c | 42 ++++++++++------------------------------ 1 file changed, 10 insertions(+), 32 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index a1a9ef445e9..411ffc11869 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -247,7 +247,6 @@ ZSTD_compressBlock_fast_generic_pipelined( const BYTE* ip0 = istart; const BYTE* ip1; const BYTE* ip2; - const BYTE* ip3; U32 current0; U32 rep_offset1 = rep[0]; @@ -257,12 +256,9 @@ ZSTD_compressBlock_fast_generic_pipelined( size_t hash0; /* hash for ip0 */ size_t hash1; /* hash for ip1 */ size_t hash2; /* hash for ip2 */ - size_t hash3; /* hash for ip3 */ U32 idx0; /* match idx for ip0 */ U32 idx1; /* match idx for ip1 */ - U32 idx2; /* match idx for ip2 */ U32 mval; /* src value at match idx */ - U32 rval; /* src value at ip2 - repcode */ U32 offcode; const BYTE* match0; @@ -290,32 +286,20 @@ ZSTD_compressBlock_fast_generic_pipelined( /* calculate positions, ip0 - anchor == 0, so we skip step calc */ ip1 = ip0 + stepSize; ip2 = ip1 + stepSize; - ip3 = ip2 + stepSize; - if (ip3 >= ilimit) { + if (ip2 >= ilimit) { goto _cleanup; } hash0 = ZSTD_hashPtr(ip0, hlog, mls); hash1 = ZSTD_hashPtr(ip1, hlog, mls); - hash2 = ZSTD_hashPtr(ip2, hlog, mls); idx0 = hashTable[hash0]; - idx1 = hashTable[hash1]; - - if (idx0 >= prefixStartIndex) { - mval = MEM_read32(base + idx0); - } else { - mval = MEM_read32(ip0) ^ 1; - } - - rval = MEM_read32(ip2 - rep_offset1); do { + const U32 rval = MEM_read32(ip2 - rep_offset1); current0 = ip0 - base; - // DEBUGLOG(5, "Searching ip0 = %u", (U32)(ip0 - istart)); - /* write back hash table entry */ hashTable[hash0] = current0; @@ -331,7 +315,7 @@ ZSTD_compressBlock_fast_generic_pipelined( goto _match; } - /* load match for ip[1] */ + /* load match for ip[0] */ if (idx0 >= prefixStartIndex) { mval = MEM_read32(base + idx0); } else { @@ -344,35 +328,29 @@ ZSTD_compressBlock_fast_generic_pipelined( goto _offset; } - /* hash ip[3] */ - hash3 = ZSTD_hashPtr(ip3, hlog, mls); - - /* lookup ip[2] */ - idx2 = hashTable[hash2]; + /* hash ip[2] */ + hash2 = ZSTD_hashPtr(ip2, hlog, mls); - /* load next rval */ - rval = MEM_read32(ip3 - rep_offset1); + /* lookup ip[1] */ + idx1 = hashTable[hash1]; /* advance to next positions */ { - if (ip2 >= nextStep) { + if (ip1 >= nextStep) { step++; nextStep += kStepIncr; } idx0 = idx1; - idx1 = idx2; hash0 = hash1; hash1 = hash2; - hash2 = hash3; ip0 = ip1; ip1 = ip2; - ip2 = ip3; - ip3 = ip3 + step; + ip2 = ip2 + step; } - } while (ip3 < ilimit); + } while (ip2 < ilimit); _cleanup: From 35932ab2f1e129dce0d19bfa82787dc4dc262eed Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 18 Aug 2021 15:21:43 -0400 Subject: [PATCH 06/16] Prefetch Input in Incompressible Sections (+0.25% Speed) --- lib/compress/zstd_fast.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 411ffc11869..f7df666811e 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -337,6 +337,7 @@ ZSTD_compressBlock_fast_generic_pipelined( /* advance to next positions */ { if (ip1 >= nextStep) { + PREFETCH_L1(ip1 + 64); step++; nextStep += kStepIncr; } From 7c24c3e6ce8faa5a6d23ef9d11c71a0a106b3bb3 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 18 Aug 2021 16:02:01 -0400 Subject: [PATCH 07/16] Give Up on Searching End of Block Amusingly, it seems to be a non-trivial performance hit to add in final searches or even hash table insertions during cleanup. So let's not. It seems to not make any meaningful difference in compression ratio. --- lib/compress/zstd_fast.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index f7df666811e..50bceef74b4 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -354,10 +354,9 @@ ZSTD_compressBlock_fast_generic_pipelined( } while (ip2 < ilimit); _cleanup: - - /* Find matches at end of block. */ - - /* TODO */ + /* Note that there are probably still a couple positions we could search. + * However, it seems to be a meaningful performance hit to try to search + * them. So let's not. */ /* save reps for next block */ rep[0] = rep_offset1 ? rep_offset1 : offsetSaved; From 8706bc115a28757a7a684c4a6bcf435ad9e5eb03 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 18 Aug 2021 16:11:22 -0400 Subject: [PATCH 08/16] Nit: Dedup idx0 and idx1 --- lib/compress/zstd_fast.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 50bceef74b4..f731b957d12 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -256,8 +256,7 @@ ZSTD_compressBlock_fast_generic_pipelined( size_t hash0; /* hash for ip0 */ size_t hash1; /* hash for ip1 */ size_t hash2; /* hash for ip2 */ - U32 idx0; /* match idx for ip0 */ - U32 idx1; /* match idx for ip1 */ + U32 idx; /* match idx for ip0 */ U32 mval; /* src value at match idx */ U32 offcode; @@ -294,7 +293,7 @@ ZSTD_compressBlock_fast_generic_pipelined( hash0 = ZSTD_hashPtr(ip0, hlog, mls); hash1 = ZSTD_hashPtr(ip1, hlog, mls); - idx0 = hashTable[hash0]; + idx = hashTable[hash0]; do { const U32 rval = MEM_read32(ip2 - rep_offset1); @@ -316,8 +315,8 @@ ZSTD_compressBlock_fast_generic_pipelined( } /* load match for ip[0] */ - if (idx0 >= prefixStartIndex) { - mval = MEM_read32(base + idx0); + if (idx >= prefixStartIndex) { + mval = MEM_read32(base + idx); } else { mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */ } @@ -332,7 +331,7 @@ ZSTD_compressBlock_fast_generic_pipelined( hash2 = ZSTD_hashPtr(ip2, hlog, mls); /* lookup ip[1] */ - idx1 = hashTable[hash1]; + idx = hashTable[hash1]; /* advance to next positions */ { @@ -342,8 +341,6 @@ ZSTD_compressBlock_fast_generic_pipelined( nextStep += kStepIncr; } - idx0 = idx1; - hash0 = hash1; hash1 = hash2; @@ -365,10 +362,10 @@ ZSTD_compressBlock_fast_generic_pipelined( /* Return the last literals size */ return (size_t)(iend - anchor); -_offset: /* Requires: ip0, idx0 */ +_offset: /* Requires: ip0, idx */ /* Compute the offset code. */ - match0 = base + idx0; + match0 = base + idx; rep_offset2 = rep_offset1; rep_offset1 = (U32)(ip0-match0); offcode = rep_offset1 + ZSTD_REP_MOVE; From 991d660ea9fc0e0453a6fa580831352478b56104 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 18 Aug 2021 16:15:53 -0400 Subject: [PATCH 09/16] Nit: Only Store 2 Hash Variables --- lib/compress/zstd_fast.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index f731b957d12..c43c8a6af1e 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -255,7 +255,6 @@ ZSTD_compressBlock_fast_generic_pipelined( size_t hash0; /* hash for ip0 */ size_t hash1; /* hash for ip1 */ - size_t hash2; /* hash for ip2 */ U32 idx; /* match idx for ip0 */ U32 mval; /* src value at match idx */ @@ -327,11 +326,13 @@ ZSTD_compressBlock_fast_generic_pipelined( goto _offset; } + hash0 = hash1; + /* hash ip[2] */ - hash2 = ZSTD_hashPtr(ip2, hlog, mls); + hash1 = ZSTD_hashPtr(ip2, hlog, mls); /* lookup ip[1] */ - idx = hashTable[hash1]; + idx = hashTable[hash0]; /* advance to next positions */ { @@ -341,9 +342,6 @@ ZSTD_compressBlock_fast_generic_pipelined( nextStep += kStepIncr; } - hash0 = hash1; - hash1 = hash2; - ip0 = ip1; ip1 = ip2; ip2 = ip2 + step; From 57a100f6dcb46fff20eacdfc9fc000b0f226b76f Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 19 Aug 2021 13:58:09 -0400 Subject: [PATCH 10/16] Add `ip1 + 128` Prefetch; Tiny Cleanup --- lib/compress/zstd_fast.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index c43c8a6af1e..ebbef491926 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -295,7 +295,9 @@ ZSTD_compressBlock_fast_generic_pipelined( idx = hashTable[hash0]; do { + /* load repcode match for ip[2]*/ const U32 rval = MEM_read32(ip2 - rep_offset1); + current0 = ip0 - base; /* write back hash table entry */ @@ -334,18 +336,18 @@ ZSTD_compressBlock_fast_generic_pipelined( /* lookup ip[1] */ idx = hashTable[hash0]; - /* advance to next positions */ - { - if (ip1 >= nextStep) { - PREFETCH_L1(ip1 + 64); - step++; - nextStep += kStepIncr; - } - - ip0 = ip1; - ip1 = ip2; - ip2 = ip2 + step; + /* calculate step */ + if (ip1 >= nextStep) { + PREFETCH_L1(ip1 + 64); + PREFETCH_L1(ip1 + 128); + step++; + nextStep += kStepIncr; } + + /* advance to next positions */ + ip0 = ip1; + ip1 = ip2; + ip2 += step; } while (ip2 < ilimit); _cleanup: From 24fcccd05c6a3609715b9d9d1020129105c55116 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Fri, 20 Aug 2021 15:56:14 -0400 Subject: [PATCH 11/16] Unroll Loop Core; Reduce Frequency of Repcode Check & Step Calc (+>1% Speed) Unrolling the loop to handle 2 positions in each iteration allows us to reduce the frequency of some operations that don't need to happen at every position. One such operation is the step calculation, which is a very rough heuristic anyways. It's fine if we do this a position later. The other operation is the repcode check. But since the repcode check already tries expanding back one position, we're really not missing much of importance by only trying it every other position. This commit also slightly reorders some operations. --- lib/compress/zstd_fast.c | 47 +++++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index ebbef491926..9b40558e197 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -247,6 +247,7 @@ ZSTD_compressBlock_fast_generic_pipelined( const BYTE* ip0 = istart; const BYTE* ip1; const BYTE* ip2; + const BYTE* ip3; U32 current0; U32 rep_offset1 = rep[0]; @@ -284,8 +285,9 @@ ZSTD_compressBlock_fast_generic_pipelined( /* calculate positions, ip0 - anchor == 0, so we skip step calc */ ip1 = ip0 + stepSize; ip2 = ip1 + stepSize; + ip3 = ip2 + stepSize; - if (ip2 >= ilimit) { + if (ip3 >= ilimit) { goto _cleanup; } @@ -298,9 +300,8 @@ ZSTD_compressBlock_fast_generic_pipelined( /* load repcode match for ip[2]*/ const U32 rval = MEM_read32(ip2 - rep_offset1); - current0 = ip0 - base; - /* write back hash table entry */ + current0 = ip0 - base; hashTable[hash0] = current0; /* check repcode at ip[2] */ @@ -328,16 +329,45 @@ ZSTD_compressBlock_fast_generic_pipelined( goto _offset; } - hash0 = hash1; + /* lookup ip[1] */ + idx = hashTable[hash1]; /* hash ip[2] */ + hash0 = hash1; hash1 = ZSTD_hashPtr(ip2, hlog, mls); + /* advance to next positions */ + ip0 = ip1; + ip1 = ip2; + ip2 = ip3; + ip3 += step; + + /* write back hash table entry */ + current0 = ip0 - base; + hashTable[hash0] = current0; + + /* load match for ip[0] */ + if (idx >= prefixStartIndex) { + mval = MEM_read32(base + idx); + } else { + mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */ + } + + /* check match at ip[0] */ + if (MEM_read32(ip0) == mval) { + /* found a match! */ + goto _offset; + } + /* lookup ip[1] */ - idx = hashTable[hash0]; + idx = hashTable[hash1]; + + /* hash ip[2] */ + hash0 = hash1; + hash1 = ZSTD_hashPtr(ip2, hlog, mls); /* calculate step */ - if (ip1 >= nextStep) { + if (ip2 >= nextStep) { PREFETCH_L1(ip1 + 64); PREFETCH_L1(ip1 + 128); step++; @@ -347,8 +377,9 @@ ZSTD_compressBlock_fast_generic_pipelined( /* advance to next positions */ ip0 = ip1; ip1 = ip2; - ip2 += step; - } while (ip2 < ilimit); + ip2 = ip3; + ip3 += step; + } while (ip3 < ilimit); _cleanup: /* Note that there are probably still a couple positions we could search. From 64054dec442a99e4c065be1319202e18bd4b8d8a Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Fri, 20 Aug 2021 17:06:41 -0400 Subject: [PATCH 12/16] Tweak Step --- lib/compress/zstd_fast.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 9b40558e197..d60a0bbe5ae 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -340,7 +340,6 @@ ZSTD_compressBlock_fast_generic_pipelined( ip0 = ip1; ip1 = ip2; ip2 = ip3; - ip3 += step; /* write back hash table entry */ current0 = ip0 - base; @@ -377,8 +376,8 @@ ZSTD_compressBlock_fast_generic_pipelined( /* advance to next positions */ ip0 = ip1; ip1 = ip2; - ip2 = ip3; - ip3 += step; + ip2 = ip2 + step; + ip3 = ip2 + step; } while (ip3 < ilimit); _cleanup: From 15e67bfa7e7ec1384e42001ef1eeb5af9a896f02 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 26 Aug 2021 13:29:58 -0400 Subject: [PATCH 13/16] Deduplicate Implementations This removes the old `ZSTD_compressBlock_fast_generic()` and renames the new `ZSTD_compressBlock_fast_generic_pipelined()` to replace it. This is functionally a no-op. --- lib/compress/zstd_fast.c | 177 +++++---------------------------------- 1 file changed, 21 insertions(+), 156 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index d60a0bbe5ae..33a392dfa01 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -43,146 +43,6 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms, } -FORCE_INLINE_TEMPLATE size_t -ZSTD_compressBlock_fast_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, - U32 const mls) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hlog = cParams->hashLog; - /* support stepSize of 0 */ - size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; - const BYTE* const base = ms->window.base; - const BYTE* const istart = (const BYTE*)src; - /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */ - const BYTE* ip0 = istart; - const BYTE* ip1; - const BYTE* anchor = istart; - const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); - const BYTE* const prefixStart = base + prefixStartIndex; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=rep[0], offset_2=rep[1]; - U32 offsetSaved = 0; - - /* init */ - DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); - ip0 += (ip0 == prefixStart); - ip1 = ip0 + 1; - { U32 const curr = (U32)(ip0 - base); - U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); - U32 const maxRep = curr - windowLow; - if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; - if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; - } - - /* Main Search Loop */ -#ifdef __INTEL_COMPILER - /* From intel 'The vector pragma indicates that the loop should be - * vectorized if it is legal to do so'. Can be used together with - * #pragma ivdep (but have opted to exclude that because intel - * warns against using it).*/ - #pragma vector always -#endif - while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */ - size_t mLength; - BYTE const* ip2 = ip0 + 2; - size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls); - U32 const val0 = MEM_read32(ip0); - size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls); - U32 const val1 = MEM_read32(ip1); - U32 const current0 = (U32)(ip0-base); - U32 const current1 = (U32)(ip1-base); - U32 const matchIndex0 = hashTable[h0]; - U32 const matchIndex1 = hashTable[h1]; - BYTE const* repMatch = ip2 - offset_1; - const BYTE* match0 = base + matchIndex0; - const BYTE* match1 = base + matchIndex1; - U32 offcode; - -#if defined(__aarch64__) - PREFETCH_L1(ip0+256); -#endif - - hashTable[h0] = current0; /* update hash table */ - hashTable[h1] = current1; /* update hash table */ - - assert(ip0 + 1 == ip1); - - if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) { - mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0; - ip0 = ip2 - mLength; - match0 = repMatch - mLength; - mLength += 4; - offcode = 0; - goto _match; - } - if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) { - /* found a regular match */ - goto _offset; - } - if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) { - /* found a regular match after one literal */ - ip0 = ip1; - match0 = match1; - goto _offset; - } - { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize; - assert(step >= 2); - ip0 += step; - ip1 += step; - continue; - } -_offset: /* Requires: ip0, match0 */ - /* Compute the offset code */ - offset_2 = offset_1; - offset_1 = (U32)(ip0-match0); - offcode = offset_1 + ZSTD_REP_MOVE; - mLength = 4; - /* Count the backwards match length */ - while (((ip0>anchor) & (match0>prefixStart)) - && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */ - -_match: /* Requires: ip0, match0, offcode */ - /* Count the forward length */ - mLength += ZSTD_count(ip0+mLength, match0+mLength, iend); - ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH); - /* match found */ - ip0 += mLength; - anchor = ip0; - - if (ip0 <= ilimit) { - /* Fill Table */ - assert(base+current0+2 > istart); /* check base overflow */ - hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ - hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); - - if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */ - while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; - { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ - hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); - ip0 += rLength; - ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH); - anchor = ip0; - continue; /* faster when present (confirmed on gcc-8) ... (?) */ - } } } - ip1 = ip0 + 1; - } - - /* save reps for next block */ - rep[0] = offset_1 ? offset_1 : offsetSaved; - rep[1] = offset_2 ? offset_2 : offsetSaved; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -} - - /** * If you squint hard enough (and ignore repcodes), the search operation at any * given position is broken into 4 stages: @@ -200,14 +60,20 @@ ZSTD_compressBlock_fast_generic( * onto the next, this implementation interleaves these operations across the * next few positions: * + * R = Repcode Read & Compare + * H = Hash + * T = Table Lookup + * M = Match Read & Compare + * * Pos | Time --> * ----+------------------- - * N | ...4 - * N+1 | ... 3 4 - * N+2 | ... 2 3 4 - * N+3 | 1 2 3 - * N+4 | 1 2 - * N+5 | 1 + * N | ... M + * N+1 | ... TM + * N+2 | R H T M + * N+3 | H TM + * N+4 | R H T M + * N+5 | H ... + * N+6 | R ... * * This is very much analogous to the pipelining of execution in a CPU. And just * like a CPU, we have to dump the pipeline when we find a match (i.e., take a @@ -218,14 +84,13 @@ ZSTD_compressBlock_fast_generic( * * Pos | Time --> * ----+------------------- - * N | 1 2 3 - * N+1 | 1 2 - * N+2 | 1 + * N | H T + * N+1 | H * * This is also the work we do at the beginning to enter the loop initially. */ FORCE_INLINE_TEMPLATE size_t -ZSTD_compressBlock_fast_generic_pipelined( +ZSTD_compressBlock_fast_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize, U32 const mls) @@ -267,7 +132,7 @@ ZSTD_compressBlock_fast_generic_pipelined( const BYTE* nextStep; const size_t kStepIncr = (1 << (kSearchStrength - 1)); - DEBUGLOG(5, "ZSTD_compressBlock_fast_generic_pipelined"); + DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); ip0 += (ip0 == prefixStart); { U32 const curr = (U32)(ip0 - base); U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); @@ -456,13 +321,13 @@ size_t ZSTD_compressBlock_fast( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_fast_generic_pipelined(ms, seqStore, rep, src, srcSize, 4); + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4); case 5 : - return ZSTD_compressBlock_fast_generic_pipelined(ms, seqStore, rep, src, srcSize, 5); + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5); case 6 : - return ZSTD_compressBlock_fast_generic_pipelined(ms, seqStore, rep, src, srcSize, 6); + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6); case 7 : - return ZSTD_compressBlock_fast_generic_pipelined(ms, seqStore, rep, src, srcSize, 7); + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7); } } @@ -665,7 +530,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( /* switch to "regular" variant if extDict is invalidated due to maxDistance */ if (prefixStartIndex == dictStartIndex) - return ZSTD_compressBlock_fast_generic_pipelined(ms, seqStore, rep, src, srcSize, mls); + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); /* Search Loop */ while (ip < ilimit) { /* < instead of <=, because (ip+1) */ From 98d3df326b8dfddd11786e45e7ba8406ffc08942 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 26 Aug 2021 13:46:32 -0400 Subject: [PATCH 14/16] Change Target Size in Fuzzer It's a bit strange, because this is hitting the dictionary special case where the dictionary is contiguous with the input and still runs in the single- segment path. We should probably change that to hit the `extDict` path instead? --- tests/fuzzer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 960050f9674..fff963176fd 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1966,7 +1966,7 @@ static int basicUnitTests(U32 const seed, double compressibility) 3742, 3670, 3670, 3660, 3660, 3660, 3660, 3660, 3660, 3660, 3660, 3660, 3660 }; - size_t const target_wdict_cSize[22+1] = { 2830, 2890, 2890, 2820, 2940, + size_t const target_wdict_cSize[22+1] = { 2830, 2896, 2890, 2820, 2940, 2950, 2950, 2925, 2900, 2891, 2910, 2910, 2910, 2770, 2760, 2750, 2750, 2750, 2750, 2750, From d6fd7761c963db8b88c14210f9ca1f972fe7fd71 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Mon, 30 Aug 2021 12:27:49 -0400 Subject: [PATCH 15/16] Fix VS Build: Explicitly Cast to Narrow Ints --- lib/compress/zstd_fast.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 33a392dfa01..2555e072044 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -166,7 +166,7 @@ ZSTD_compressBlock_fast_generic( const U32 rval = MEM_read32(ip2 - rep_offset1); /* write back hash table entry */ - current0 = ip0 - base; + current0 = (U32)(ip0 - base); hashTable[hash0] = current0; /* check repcode at ip[2] */ @@ -207,7 +207,7 @@ ZSTD_compressBlock_fast_generic( ip2 = ip3; /* write back hash table entry */ - current0 = ip0 - base; + current0 = (U32)(ip0 - base); hashTable[hash0] = current0; /* load match for ip[0] */ @@ -285,7 +285,7 @@ ZSTD_compressBlock_fast_generic( /* write next hash table entry */ if (ip1 < ip0) { - hashTable[hash1] = ip1 - base; + hashTable[hash1] = (U32)(ip1 - base); } /* Fill table and check for immediate repcode. */ From b0977e4ed2e58a2db2eaf6be6721393fe964daa9 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 1 Sep 2021 14:45:00 -0400 Subject: [PATCH 16/16] Update results.csv --- tests/regression/results.csv | 360 +++++++++++++++++------------------ 1 file changed, 180 insertions(+), 180 deletions(-) diff --git a/tests/regression/results.csv b/tests/regression/results.csv index bcecafae7ed..d2a1ea997f9 100644 --- a/tests/regression/results.csv +++ b/tests/regression/results.csv @@ -1,9 +1,9 @@ Data, Config, Method, Total compressed size -silesia.tar, level -5, compress simple, 6738593 -silesia.tar, level -3, compress simple, 6446372 -silesia.tar, level -1, compress simple, 6186042 +silesia.tar, level -5, compress simple, 7359401 +silesia.tar, level -3, compress simple, 6901672 +silesia.tar, level -1, compress simple, 6182241 silesia.tar, level 0, compress simple, 4861423 -silesia.tar, level 1, compress simple, 5334885 +silesia.tar, level 1, compress simple, 5331946 silesia.tar, level 3, compress simple, 4861423 silesia.tar, level 4, compress simple, 4799632 silesia.tar, level 5, compress simple, 4650202 @@ -15,12 +15,12 @@ silesia.tar, level 16, compress silesia.tar, level 19, compress simple, 4264388 silesia.tar, uncompressed literals, compress simple, 4861423 silesia.tar, uncompressed literals optimal, compress simple, 4264388 -silesia.tar, huffman literals, compress simple, 6186042 -github.tar, level -5, compress simple, 46856 -github.tar, level -3, compress simple, 43754 -github.tar, level -1, compress simple, 42490 +silesia.tar, huffman literals, compress simple, 6182241 +github.tar, level -5, compress simple, 66914 +github.tar, level -3, compress simple, 52127 +github.tar, level -1, compress simple, 42560 github.tar, level 0, compress simple, 38441 -github.tar, level 1, compress simple, 39265 +github.tar, level 1, compress simple, 39200 github.tar, level 3, compress simple, 38441 github.tar, level 4, compress simple, 38467 github.tar, level 5, compress simple, 38376 @@ -32,12 +32,12 @@ github.tar, level 16, compress github.tar, level 19, compress simple, 32837 github.tar, uncompressed literals, compress simple, 38441 github.tar, uncompressed literals optimal, compress simple, 32837 -github.tar, huffman literals, compress simple, 42490 -silesia, level -5, compress cctx, 6737607 -silesia, level -3, compress cctx, 6444677 -silesia, level -1, compress cctx, 6178460 +github.tar, huffman literals, compress simple, 42560 +silesia, level -5, compress cctx, 7354675 +silesia, level -3, compress cctx, 6902374 +silesia, level -1, compress cctx, 6177565 silesia, level 0, compress cctx, 4849551 -silesia, level 1, compress cctx, 5313202 +silesia, level 1, compress cctx, 5309097 silesia, level 3, compress cctx, 4849551 silesia, level 4, compress cctx, 4786969 silesia, level 5, compress cctx, 4638960 @@ -56,17 +56,17 @@ silesia, small chain log, compress silesia, explicit params, compress cctx, 4794480 silesia, uncompressed literals, compress cctx, 4849551 silesia, uncompressed literals optimal, compress cctx, 4283236 -silesia, huffman literals, compress cctx, 6178460 +silesia, huffman literals, compress cctx, 6177565 silesia, multithreaded with advanced params, compress cctx, 4849551 -github, level -5, compress cctx, 205285 +github, level -5, compress cctx, 232315 github, level -5 with dict, compress cctx, 47294 -github, level -3, compress cctx, 190643 +github, level -3, compress cctx, 220760 github, level -3 with dict, compress cctx, 48047 -github, level -1, compress cctx, 175568 +github, level -1, compress cctx, 175468 github, level -1 with dict, compress cctx, 43527 github, level 0, compress cctx, 136335 github, level 0 with dict, compress cctx, 41534 -github, level 1, compress cctx, 142465 +github, level 1, compress cctx, 142365 github, level 1 with dict, compress cctx, 42157 github, level 3, compress cctx, 136335 github, level 3 with dict, compress cctx, 41534 @@ -95,13 +95,13 @@ github, small chain log, compress github, explicit params, compress cctx, 140932 github, uncompressed literals, compress cctx, 136335 github, uncompressed literals optimal, compress cctx, 134064 -github, huffman literals, compress cctx, 175568 +github, huffman literals, compress cctx, 175468 github, multithreaded with advanced params, compress cctx, 141102 -silesia, level -5, zstdcli, 6737655 -silesia, level -3, zstdcli, 6444725 -silesia, level -1, zstdcli, 6178508 +silesia, level -5, zstdcli, 7354723 +silesia, level -3, zstdcli, 6902422 +silesia, level -1, zstdcli, 6177613 silesia, level 0, zstdcli, 4849599 -silesia, level 1, zstdcli, 5313250 +silesia, level 1, zstdcli, 5309145 silesia, level 3, zstdcli, 4849599 silesia, level 4, zstdcli, 4787017 silesia, level 5, zstdcli, 4639008 @@ -120,13 +120,13 @@ silesia, small chain log, zstdcli, silesia, explicit params, zstdcli, 4795856 silesia, uncompressed literals, zstdcli, 5128030 silesia, uncompressed literals optimal, zstdcli, 4317944 -silesia, huffman literals, zstdcli, 5326317 +silesia, huffman literals, zstdcli, 5326394 silesia, multithreaded with advanced params, zstdcli, 5128030 -silesia.tar, level -5, zstdcli, 6738934 -silesia.tar, level -3, zstdcli, 6448419 -silesia.tar, level -1, zstdcli, 6186912 +silesia.tar, level -5, zstdcli, 7363866 +silesia.tar, level -3, zstdcli, 6902158 +silesia.tar, level -1, zstdcli, 6182939 silesia.tar, level 0, zstdcli, 4861511 -silesia.tar, level 1, zstdcli, 5336318 +silesia.tar, level 1, zstdcli, 5333184 silesia.tar, level 3, zstdcli, 4861511 silesia.tar, level 4, zstdcli, 4800529 silesia.tar, level 5, zstdcli, 4651159 @@ -146,17 +146,17 @@ silesia.tar, small chain log, zstdcli, silesia.tar, explicit params, zstdcli, 4821277 silesia.tar, uncompressed literals, zstdcli, 5129559 silesia.tar, uncompressed literals optimal, zstdcli, 4307404 -silesia.tar, huffman literals, zstdcli, 5347610 +silesia.tar, huffman literals, zstdcli, 5344915 silesia.tar, multithreaded with advanced params, zstdcli, 5129559 -github, level -5, zstdcli, 207285 +github, level -5, zstdcli, 234315 github, level -5 with dict, zstdcli, 48718 -github, level -3, zstdcli, 192643 +github, level -3, zstdcli, 222760 github, level -3 with dict, zstdcli, 47395 -github, level -1, zstdcli, 177568 +github, level -1, zstdcli, 177468 github, level -1 with dict, zstdcli, 45170 github, level 0, zstdcli, 138335 github, level 0 with dict, zstdcli, 43148 -github, level 1, zstdcli, 144465 +github, level 1, zstdcli, 144365 github, level 1 with dict, zstdcli, 43682 github, level 3, zstdcli, 138335 github, level 3 with dict, zstdcli, 43148 @@ -185,18 +185,18 @@ github, small chain log, zstdcli, github, explicit params, zstdcli, 136197 github, uncompressed literals, zstdcli, 167915 github, uncompressed literals optimal, zstdcli, 159227 -github, huffman literals, zstdcli, 144465 +github, huffman literals, zstdcli, 144365 github, multithreaded with advanced params, zstdcli, 167915 -github.tar, level -5, zstdcli, 46860 -github.tar, level -5 with dict, zstdcli, 44575 -github.tar, level -3, zstdcli, 43758 -github.tar, level -3 with dict, zstdcli, 41451 -github.tar, level -1, zstdcli, 42494 -github.tar, level -1 with dict, zstdcli, 41135 +github.tar, level -5, zstdcli, 66918 +github.tar, level -5 with dict, zstdcli, 51529 +github.tar, level -3, zstdcli, 52131 +github.tar, level -3 with dict, zstdcli, 44246 +github.tar, level -1, zstdcli, 42564 +github.tar, level -1 with dict, zstdcli, 41140 github.tar, level 0, zstdcli, 38445 github.tar, level 0 with dict, zstdcli, 37999 -github.tar, level 1, zstdcli, 39269 -github.tar, level 1 with dict, zstdcli, 38284 +github.tar, level 1, zstdcli, 39204 +github.tar, level 1 with dict, zstdcli, 38288 github.tar, level 3, zstdcli, 38445 github.tar, level 3 with dict, zstdcli, 37999 github.tar, level 4, zstdcli, 38471 @@ -226,13 +226,13 @@ github.tar, small chain log, zstdcli, github.tar, explicit params, zstdcli, 41227 github.tar, uncompressed literals, zstdcli, 41126 github.tar, uncompressed literals optimal, zstdcli, 35392 -github.tar, huffman literals, zstdcli, 38781 +github.tar, huffman literals, zstdcli, 38857 github.tar, multithreaded with advanced params, zstdcli, 41126 -silesia, level -5, advanced one pass, 6737607 -silesia, level -3, advanced one pass, 6444677 -silesia, level -1, advanced one pass, 6178460 +silesia, level -5, advanced one pass, 7354675 +silesia, level -3, advanced one pass, 6902374 +silesia, level -1, advanced one pass, 6177565 silesia, level 0, advanced one pass, 4849551 -silesia, level 1, advanced one pass, 5313202 +silesia, level 1, advanced one pass, 5309097 silesia, level 3, advanced one pass, 4849551 silesia, level 4, advanced one pass, 4786969 silesia, level 5 row 1, advanced one pass, 4640753 @@ -260,13 +260,13 @@ silesia, small chain log, advanced silesia, explicit params, advanced one pass, 4795856 silesia, uncompressed literals, advanced one pass, 5127982 silesia, uncompressed literals optimal, advanced one pass, 4317896 -silesia, huffman literals, advanced one pass, 5326269 +silesia, huffman literals, advanced one pass, 5326346 silesia, multithreaded with advanced params, advanced one pass, 5127982 -silesia.tar, level -5, advanced one pass, 6738593 -silesia.tar, level -3, advanced one pass, 6446372 -silesia.tar, level -1, advanced one pass, 6186042 +silesia.tar, level -5, advanced one pass, 7359401 +silesia.tar, level -3, advanced one pass, 6901672 +silesia.tar, level -1, advanced one pass, 6182241 silesia.tar, level 0, advanced one pass, 4861423 -silesia.tar, level 1, advanced one pass, 5334885 +silesia.tar, level 1, advanced one pass, 5331946 silesia.tar, level 3, advanced one pass, 4861423 silesia.tar, level 4, advanced one pass, 4799632 silesia.tar, level 5 row 1, advanced one pass, 4652862 @@ -294,13 +294,13 @@ silesia.tar, small chain log, advanced silesia.tar, explicit params, advanced one pass, 4807383 silesia.tar, uncompressed literals, advanced one pass, 5129458 silesia.tar, uncompressed literals optimal, advanced one pass, 4307400 -silesia.tar, huffman literals, advanced one pass, 5347335 +silesia.tar, huffman literals, advanced one pass, 5344545 silesia.tar, multithreaded with advanced params, advanced one pass, 5129555 -github, level -5, advanced one pass, 205285 +github, level -5, advanced one pass, 232315 github, level -5 with dict, advanced one pass, 46718 -github, level -3, advanced one pass, 190643 +github, level -3, advanced one pass, 220760 github, level -3 with dict, advanced one pass, 45395 -github, level -1, advanced one pass, 175568 +github, level -1, advanced one pass, 175468 github, level -1 with dict, advanced one pass, 43170 github, level 0, advanced one pass, 136335 github, level 0 with dict, advanced one pass, 41148 @@ -308,7 +308,7 @@ github, level 0 with dict dms, advanced github, level 0 with dict dds, advanced one pass, 41148 github, level 0 with dict copy, advanced one pass, 41124 github, level 0 with dict load, advanced one pass, 42252 -github, level 1, advanced one pass, 142465 +github, level 1, advanced one pass, 142365 github, level 1 with dict, advanced one pass, 41682 github, level 1 with dict dms, advanced one pass, 41682 github, level 1 with dict dds, advanced one pass, 41682 @@ -419,26 +419,26 @@ github, small chain log, advanced github, explicit params, advanced one pass, 137727 github, uncompressed literals, advanced one pass, 165915 github, uncompressed literals optimal, advanced one pass, 157227 -github, huffman literals, advanced one pass, 142465 +github, huffman literals, advanced one pass, 142365 github, multithreaded with advanced params, advanced one pass, 165915 -github.tar, level -5, advanced one pass, 46856 -github.tar, level -5 with dict, advanced one pass, 44571 -github.tar, level -3, advanced one pass, 43754 -github.tar, level -3 with dict, advanced one pass, 41447 -github.tar, level -1, advanced one pass, 42490 -github.tar, level -1 with dict, advanced one pass, 41131 +github.tar, level -5, advanced one pass, 66914 +github.tar, level -5 with dict, advanced one pass, 51525 +github.tar, level -3, advanced one pass, 52127 +github.tar, level -3 with dict, advanced one pass, 44242 +github.tar, level -1, advanced one pass, 42560 +github.tar, level -1 with dict, advanced one pass, 41136 github.tar, level 0, advanced one pass, 38441 github.tar, level 0 with dict, advanced one pass, 37995 github.tar, level 0 with dict dms, advanced one pass, 38003 github.tar, level 0 with dict dds, advanced one pass, 38003 github.tar, level 0 with dict copy, advanced one pass, 37995 github.tar, level 0 with dict load, advanced one pass, 37956 -github.tar, level 1, advanced one pass, 39265 -github.tar, level 1 with dict, advanced one pass, 38280 -github.tar, level 1 with dict dms, advanced one pass, 38290 -github.tar, level 1 with dict dds, advanced one pass, 38290 -github.tar, level 1 with dict copy, advanced one pass, 38280 -github.tar, level 1 with dict load, advanced one pass, 38729 +github.tar, level 1, advanced one pass, 39200 +github.tar, level 1 with dict, advanced one pass, 38284 +github.tar, level 1 with dict dms, advanced one pass, 38294 +github.tar, level 1 with dict dds, advanced one pass, 38294 +github.tar, level 1 with dict copy, advanced one pass, 38284 +github.tar, level 1 with dict load, advanced one pass, 38724 github.tar, level 3, advanced one pass, 38441 github.tar, level 3 with dict, advanced one pass, 37995 github.tar, level 3 with dict dms, advanced one pass, 38003 @@ -544,13 +544,13 @@ github.tar, small chain log, advanced github.tar, explicit params, advanced one pass, 41227 github.tar, uncompressed literals, advanced one pass, 41122 github.tar, uncompressed literals optimal, advanced one pass, 35388 -github.tar, huffman literals, advanced one pass, 38777 +github.tar, huffman literals, advanced one pass, 38853 github.tar, multithreaded with advanced params, advanced one pass, 41122 -silesia, level -5, advanced one pass small out, 6737607 -silesia, level -3, advanced one pass small out, 6444677 -silesia, level -1, advanced one pass small out, 6178460 +silesia, level -5, advanced one pass small out, 7354675 +silesia, level -3, advanced one pass small out, 6902374 +silesia, level -1, advanced one pass small out, 6177565 silesia, level 0, advanced one pass small out, 4849551 -silesia, level 1, advanced one pass small out, 5313202 +silesia, level 1, advanced one pass small out, 5309097 silesia, level 3, advanced one pass small out, 4849551 silesia, level 4, advanced one pass small out, 4786969 silesia, level 5 row 1, advanced one pass small out, 4640753 @@ -578,13 +578,13 @@ silesia, small chain log, advanced silesia, explicit params, advanced one pass small out, 4795856 silesia, uncompressed literals, advanced one pass small out, 5127982 silesia, uncompressed literals optimal, advanced one pass small out, 4317896 -silesia, huffman literals, advanced one pass small out, 5326269 +silesia, huffman literals, advanced one pass small out, 5326346 silesia, multithreaded with advanced params, advanced one pass small out, 5127982 -silesia.tar, level -5, advanced one pass small out, 6738593 -silesia.tar, level -3, advanced one pass small out, 6446372 -silesia.tar, level -1, advanced one pass small out, 6186042 +silesia.tar, level -5, advanced one pass small out, 7359401 +silesia.tar, level -3, advanced one pass small out, 6901672 +silesia.tar, level -1, advanced one pass small out, 6182241 silesia.tar, level 0, advanced one pass small out, 4861423 -silesia.tar, level 1, advanced one pass small out, 5334885 +silesia.tar, level 1, advanced one pass small out, 5331946 silesia.tar, level 3, advanced one pass small out, 4861423 silesia.tar, level 4, advanced one pass small out, 4799632 silesia.tar, level 5 row 1, advanced one pass small out, 4652862 @@ -612,13 +612,13 @@ silesia.tar, small chain log, advanced silesia.tar, explicit params, advanced one pass small out, 4807383 silesia.tar, uncompressed literals, advanced one pass small out, 5129458 silesia.tar, uncompressed literals optimal, advanced one pass small out, 4307400 -silesia.tar, huffman literals, advanced one pass small out, 5347335 +silesia.tar, huffman literals, advanced one pass small out, 5344545 silesia.tar, multithreaded with advanced params, advanced one pass small out, 5129555 -github, level -5, advanced one pass small out, 205285 +github, level -5, advanced one pass small out, 232315 github, level -5 with dict, advanced one pass small out, 46718 -github, level -3, advanced one pass small out, 190643 +github, level -3, advanced one pass small out, 220760 github, level -3 with dict, advanced one pass small out, 45395 -github, level -1, advanced one pass small out, 175568 +github, level -1, advanced one pass small out, 175468 github, level -1 with dict, advanced one pass small out, 43170 github, level 0, advanced one pass small out, 136335 github, level 0 with dict, advanced one pass small out, 41148 @@ -626,7 +626,7 @@ github, level 0 with dict dms, advanced github, level 0 with dict dds, advanced one pass small out, 41148 github, level 0 with dict copy, advanced one pass small out, 41124 github, level 0 with dict load, advanced one pass small out, 42252 -github, level 1, advanced one pass small out, 142465 +github, level 1, advanced one pass small out, 142365 github, level 1 with dict, advanced one pass small out, 41682 github, level 1 with dict dms, advanced one pass small out, 41682 github, level 1 with dict dds, advanced one pass small out, 41682 @@ -737,26 +737,26 @@ github, small chain log, advanced github, explicit params, advanced one pass small out, 137727 github, uncompressed literals, advanced one pass small out, 165915 github, uncompressed literals optimal, advanced one pass small out, 157227 -github, huffman literals, advanced one pass small out, 142465 +github, huffman literals, advanced one pass small out, 142365 github, multithreaded with advanced params, advanced one pass small out, 165915 -github.tar, level -5, advanced one pass small out, 46856 -github.tar, level -5 with dict, advanced one pass small out, 44571 -github.tar, level -3, advanced one pass small out, 43754 -github.tar, level -3 with dict, advanced one pass small out, 41447 -github.tar, level -1, advanced one pass small out, 42490 -github.tar, level -1 with dict, advanced one pass small out, 41131 +github.tar, level -5, advanced one pass small out, 66914 +github.tar, level -5 with dict, advanced one pass small out, 51525 +github.tar, level -3, advanced one pass small out, 52127 +github.tar, level -3 with dict, advanced one pass small out, 44242 +github.tar, level -1, advanced one pass small out, 42560 +github.tar, level -1 with dict, advanced one pass small out, 41136 github.tar, level 0, advanced one pass small out, 38441 github.tar, level 0 with dict, advanced one pass small out, 37995 github.tar, level 0 with dict dms, advanced one pass small out, 38003 github.tar, level 0 with dict dds, advanced one pass small out, 38003 github.tar, level 0 with dict copy, advanced one pass small out, 37995 github.tar, level 0 with dict load, advanced one pass small out, 37956 -github.tar, level 1, advanced one pass small out, 39265 -github.tar, level 1 with dict, advanced one pass small out, 38280 -github.tar, level 1 with dict dms, advanced one pass small out, 38290 -github.tar, level 1 with dict dds, advanced one pass small out, 38290 -github.tar, level 1 with dict copy, advanced one pass small out, 38280 -github.tar, level 1 with dict load, advanced one pass small out, 38729 +github.tar, level 1, advanced one pass small out, 39200 +github.tar, level 1 with dict, advanced one pass small out, 38284 +github.tar, level 1 with dict dms, advanced one pass small out, 38294 +github.tar, level 1 with dict dds, advanced one pass small out, 38294 +github.tar, level 1 with dict copy, advanced one pass small out, 38284 +github.tar, level 1 with dict load, advanced one pass small out, 38724 github.tar, level 3, advanced one pass small out, 38441 github.tar, level 3 with dict, advanced one pass small out, 37995 github.tar, level 3 with dict dms, advanced one pass small out, 38003 @@ -862,13 +862,13 @@ github.tar, small chain log, advanced github.tar, explicit params, advanced one pass small out, 41227 github.tar, uncompressed literals, advanced one pass small out, 41122 github.tar, uncompressed literals optimal, advanced one pass small out, 35388 -github.tar, huffman literals, advanced one pass small out, 38777 +github.tar, huffman literals, advanced one pass small out, 38853 github.tar, multithreaded with advanced params, advanced one pass small out, 41122 -silesia, level -5, advanced streaming, 6882505 -silesia, level -3, advanced streaming, 6568376 -silesia, level -1, advanced streaming, 6183403 +silesia, level -5, advanced streaming, 7292053 +silesia, level -3, advanced streaming, 6867875 +silesia, level -1, advanced streaming, 6183923 silesia, level 0, advanced streaming, 4849551 -silesia, level 1, advanced streaming, 5314161 +silesia, level 1, advanced streaming, 5312694 silesia, level 3, advanced streaming, 4849551 silesia, level 4, advanced streaming, 4786969 silesia, level 5 row 1, advanced streaming, 4640753 @@ -896,13 +896,13 @@ silesia, small chain log, advanced silesia, explicit params, advanced streaming, 4795884 silesia, uncompressed literals, advanced streaming, 5127982 silesia, uncompressed literals optimal, advanced streaming, 4317896 -silesia, huffman literals, advanced streaming, 5331171 +silesia, huffman literals, advanced streaming, 5332234 silesia, multithreaded with advanced params, advanced streaming, 5127982 -silesia.tar, level -5, advanced streaming, 6982759 -silesia.tar, level -3, advanced streaming, 6641283 -silesia.tar, level -1, advanced streaming, 6190795 +silesia.tar, level -5, advanced streaming, 7260007 +silesia.tar, level -3, advanced streaming, 6845151 +silesia.tar, level -1, advanced streaming, 6187938 silesia.tar, level 0, advanced streaming, 4861425 -silesia.tar, level 1, advanced streaming, 5336941 +silesia.tar, level 1, advanced streaming, 5334890 silesia.tar, level 3, advanced streaming, 4861425 silesia.tar, level 4, advanced streaming, 4799632 silesia.tar, level 5 row 1, advanced streaming, 4652866 @@ -930,13 +930,13 @@ silesia.tar, small chain log, advanced silesia.tar, explicit params, advanced streaming, 4807403 silesia.tar, uncompressed literals, advanced streaming, 5129461 silesia.tar, uncompressed literals optimal, advanced streaming, 4307400 -silesia.tar, huffman literals, advanced streaming, 5352360 +silesia.tar, huffman literals, advanced streaming, 5350519 silesia.tar, multithreaded with advanced params, advanced streaming, 5129555 -github, level -5, advanced streaming, 205285 +github, level -5, advanced streaming, 232315 github, level -5 with dict, advanced streaming, 46718 -github, level -3, advanced streaming, 190643 +github, level -3, advanced streaming, 220760 github, level -3 with dict, advanced streaming, 45395 -github, level -1, advanced streaming, 175568 +github, level -1, advanced streaming, 175468 github, level -1 with dict, advanced streaming, 43170 github, level 0, advanced streaming, 136335 github, level 0 with dict, advanced streaming, 41148 @@ -944,7 +944,7 @@ github, level 0 with dict dms, advanced github, level 0 with dict dds, advanced streaming, 41148 github, level 0 with dict copy, advanced streaming, 41124 github, level 0 with dict load, advanced streaming, 42252 -github, level 1, advanced streaming, 142465 +github, level 1, advanced streaming, 142365 github, level 1 with dict, advanced streaming, 41682 github, level 1 with dict dms, advanced streaming, 41682 github, level 1 with dict dds, advanced streaming, 41682 @@ -1055,26 +1055,26 @@ github, small chain log, advanced github, explicit params, advanced streaming, 137727 github, uncompressed literals, advanced streaming, 165915 github, uncompressed literals optimal, advanced streaming, 157227 -github, huffman literals, advanced streaming, 142465 +github, huffman literals, advanced streaming, 142365 github, multithreaded with advanced params, advanced streaming, 165915 -github.tar, level -5, advanced streaming, 46747 -github.tar, level -5 with dict, advanced streaming, 44440 -github.tar, level -3, advanced streaming, 43537 -github.tar, level -3 with dict, advanced streaming, 41112 -github.tar, level -1, advanced streaming, 42465 -github.tar, level -1 with dict, advanced streaming, 41196 +github.tar, level -5, advanced streaming, 64132 +github.tar, level -5 with dict, advanced streaming, 48642 +github.tar, level -3, advanced streaming, 50964 +github.tar, level -3 with dict, advanced streaming, 42750 +github.tar, level -1, advanced streaming, 42536 +github.tar, level -1 with dict, advanced streaming, 41198 github.tar, level 0, advanced streaming, 38441 github.tar, level 0 with dict, advanced streaming, 37995 github.tar, level 0 with dict dms, advanced streaming, 38003 github.tar, level 0 with dict dds, advanced streaming, 38003 github.tar, level 0 with dict copy, advanced streaming, 37995 github.tar, level 0 with dict load, advanced streaming, 37956 -github.tar, level 1, advanced streaming, 39342 -github.tar, level 1 with dict, advanced streaming, 38293 -github.tar, level 1 with dict dms, advanced streaming, 38303 -github.tar, level 1 with dict dds, advanced streaming, 38303 -github.tar, level 1 with dict copy, advanced streaming, 38293 -github.tar, level 1 with dict load, advanced streaming, 38766 +github.tar, level 1, advanced streaming, 39270 +github.tar, level 1 with dict, advanced streaming, 38316 +github.tar, level 1 with dict dms, advanced streaming, 38326 +github.tar, level 1 with dict dds, advanced streaming, 38326 +github.tar, level 1 with dict copy, advanced streaming, 38316 +github.tar, level 1 with dict load, advanced streaming, 38761 github.tar, level 3, advanced streaming, 38441 github.tar, level 3 with dict, advanced streaming, 37995 github.tar, level 3 with dict dms, advanced streaming, 38003 @@ -1180,13 +1180,13 @@ github.tar, small chain log, advanced github.tar, explicit params, advanced streaming, 41227 github.tar, uncompressed literals, advanced streaming, 41122 github.tar, uncompressed literals optimal, advanced streaming, 35388 -github.tar, huffman literals, advanced streaming, 38800 +github.tar, huffman literals, advanced streaming, 38874 github.tar, multithreaded with advanced params, advanced streaming, 41122 -silesia, level -5, old streaming, 6882505 -silesia, level -3, old streaming, 6568376 -silesia, level -1, old streaming, 6183403 +silesia, level -5, old streaming, 7292053 +silesia, level -3, old streaming, 6867875 +silesia, level -1, old streaming, 6183923 silesia, level 0, old streaming, 4849551 -silesia, level 1, old streaming, 5314161 +silesia, level 1, old streaming, 5312694 silesia, level 3, old streaming, 4849551 silesia, level 4, old streaming, 4786969 silesia, level 5, old streaming, 4638960 @@ -1199,12 +1199,12 @@ silesia, level 19, old stre silesia, no source size, old streaming, 4849515 silesia, uncompressed literals, old streaming, 4849551 silesia, uncompressed literals optimal, old streaming, 4283236 -silesia, huffman literals, old streaming, 6183403 -silesia.tar, level -5, old streaming, 6982759 -silesia.tar, level -3, old streaming, 6641283 -silesia.tar, level -1, old streaming, 6190795 +silesia, huffman literals, old streaming, 6183923 +silesia.tar, level -5, old streaming, 7260007 +silesia.tar, level -3, old streaming, 6845151 +silesia.tar, level -1, old streaming, 6187938 silesia.tar, level 0, old streaming, 4861425 -silesia.tar, level 1, old streaming, 5336941 +silesia.tar, level 1, old streaming, 5334890 silesia.tar, level 3, old streaming, 4861425 silesia.tar, level 4, old streaming, 4799632 silesia.tar, level 5, old streaming, 4650207 @@ -1217,16 +1217,16 @@ silesia.tar, level 19, old stre silesia.tar, no source size, old streaming, 4861421 silesia.tar, uncompressed literals, old streaming, 4861425 silesia.tar, uncompressed literals optimal, old streaming, 4264388 -silesia.tar, huffman literals, old streaming, 6190795 -github, level -5, old streaming, 205285 +silesia.tar, huffman literals, old streaming, 6187938 +github, level -5, old streaming, 232315 github, level -5 with dict, old streaming, 46718 -github, level -3, old streaming, 190643 +github, level -3, old streaming, 220760 github, level -3 with dict, old streaming, 45395 -github, level -1, old streaming, 175568 +github, level -1, old streaming, 175468 github, level -1 with dict, old streaming, 43170 github, level 0, old streaming, 136335 github, level 0 with dict, old streaming, 41148 -github, level 1, old streaming, 142465 +github, level 1, old streaming, 142365 github, level 1 with dict, old streaming, 41682 github, level 3, old streaming, 136335 github, level 3 with dict, old streaming, 41148 @@ -1250,17 +1250,17 @@ github, no source size, old stre github, no source size with dict, old streaming, 40654 github, uncompressed literals, old streaming, 136335 github, uncompressed literals optimal, old streaming, 134064 -github, huffman literals, old streaming, 175568 -github.tar, level -5, old streaming, 46747 -github.tar, level -5 with dict, old streaming, 44440 -github.tar, level -3, old streaming, 43537 -github.tar, level -3 with dict, old streaming, 41112 -github.tar, level -1, old streaming, 42465 -github.tar, level -1 with dict, old streaming, 41196 +github, huffman literals, old streaming, 175468 +github.tar, level -5, old streaming, 64132 +github.tar, level -5 with dict, old streaming, 48642 +github.tar, level -3, old streaming, 50964 +github.tar, level -3 with dict, old streaming, 42750 +github.tar, level -1, old streaming, 42536 +github.tar, level -1 with dict, old streaming, 41198 github.tar, level 0, old streaming, 38441 github.tar, level 0 with dict, old streaming, 37995 -github.tar, level 1, old streaming, 39342 -github.tar, level 1 with dict, old streaming, 38293 +github.tar, level 1, old streaming, 39270 +github.tar, level 1 with dict, old streaming, 38316 github.tar, level 3, old streaming, 38441 github.tar, level 3 with dict, old streaming, 37995 github.tar, level 4, old streaming, 38467 @@ -1283,12 +1283,12 @@ github.tar, no source size, old stre github.tar, no source size with dict, old streaming, 38000 github.tar, uncompressed literals, old streaming, 38441 github.tar, uncompressed literals optimal, old streaming, 32837 -github.tar, huffman literals, old streaming, 42465 -silesia, level -5, old streaming advanced, 6882505 -silesia, level -3, old streaming advanced, 6568376 -silesia, level -1, old streaming advanced, 6183403 +github.tar, huffman literals, old streaming, 42536 +silesia, level -5, old streaming advanced, 7292053 +silesia, level -3, old streaming advanced, 6867875 +silesia, level -1, old streaming advanced, 6183923 silesia, level 0, old streaming advanced, 4849551 -silesia, level 1, old streaming advanced, 5314161 +silesia, level 1, old streaming advanced, 5312694 silesia, level 3, old streaming advanced, 4849551 silesia, level 4, old streaming advanced, 4786969 silesia, level 5, old streaming advanced, 4638960 @@ -1308,13 +1308,13 @@ silesia, small chain log, old stre silesia, explicit params, old streaming advanced, 4795884 silesia, uncompressed literals, old streaming advanced, 4849551 silesia, uncompressed literals optimal, old streaming advanced, 4283236 -silesia, huffman literals, old streaming advanced, 6183403 +silesia, huffman literals, old streaming advanced, 6183923 silesia, multithreaded with advanced params, old streaming advanced, 4849551 -silesia.tar, level -5, old streaming advanced, 6982759 -silesia.tar, level -3, old streaming advanced, 6641283 -silesia.tar, level -1, old streaming advanced, 6190795 +silesia.tar, level -5, old streaming advanced, 7260007 +silesia.tar, level -3, old streaming advanced, 6845151 +silesia.tar, level -1, old streaming advanced, 6187938 silesia.tar, level 0, old streaming advanced, 4861425 -silesia.tar, level 1, old streaming advanced, 5336941 +silesia.tar, level 1, old streaming advanced, 5334890 silesia.tar, level 3, old streaming advanced, 4861425 silesia.tar, level 4, old streaming advanced, 4799632 silesia.tar, level 5, old streaming advanced, 4650207 @@ -1334,17 +1334,17 @@ silesia.tar, small chain log, old stre silesia.tar, explicit params, old streaming advanced, 4807403 silesia.tar, uncompressed literals, old streaming advanced, 4861425 silesia.tar, uncompressed literals optimal, old streaming advanced, 4264388 -silesia.tar, huffman literals, old streaming advanced, 6190795 +silesia.tar, huffman literals, old streaming advanced, 6187938 silesia.tar, multithreaded with advanced params, old streaming advanced, 4861425 -github, level -5, old streaming advanced, 216734 +github, level -5, old streaming advanced, 241214 github, level -5 with dict, old streaming advanced, 49562 -github, level -3, old streaming advanced, 192160 +github, level -3, old streaming advanced, 222937 github, level -3 with dict, old streaming advanced, 44956 -github, level -1, old streaming advanced, 181108 +github, level -1, old streaming advanced, 181107 github, level -1 with dict, old streaming advanced, 42383 github, level 0, old streaming advanced, 141104 github, level 0 with dict, old streaming advanced, 41113 -github, level 1, old streaming advanced, 143692 +github, level 1, old streaming advanced, 143693 github, level 1 with dict, old streaming advanced, 42430 github, level 3, old streaming advanced, 141104 github, level 3 with dict, old streaming advanced, 41113 @@ -1375,18 +1375,18 @@ github, small chain log, old stre github, explicit params, old streaming advanced, 140937 github, uncompressed literals, old streaming advanced, 141104 github, uncompressed literals optimal, old streaming advanced, 134064 -github, huffman literals, old streaming advanced, 181108 +github, huffman literals, old streaming advanced, 181107 github, multithreaded with advanced params, old streaming advanced, 141104 -github.tar, level -5, old streaming advanced, 46747 -github.tar, level -5 with dict, old streaming advanced, 44824 -github.tar, level -3, old streaming advanced, 43537 -github.tar, level -3 with dict, old streaming advanced, 41800 -github.tar, level -1, old streaming advanced, 42465 -github.tar, level -1 with dict, old streaming advanced, 41471 +github.tar, level -5, old streaming advanced, 64132 +github.tar, level -5 with dict, old streaming advanced, 48982 +github.tar, level -3, old streaming advanced, 50964 +github.tar, level -3 with dict, old streaming advanced, 43357 +github.tar, level -1, old streaming advanced, 42536 +github.tar, level -1 with dict, old streaming advanced, 41494 github.tar, level 0, old streaming advanced, 38441 github.tar, level 0 with dict, old streaming advanced, 38013 -github.tar, level 1, old streaming advanced, 39342 -github.tar, level 1 with dict, old streaming advanced, 38940 +github.tar, level 1, old streaming advanced, 39270 +github.tar, level 1 with dict, old streaming advanced, 38934 github.tar, level 3, old streaming advanced, 38441 github.tar, level 3 with dict, old streaming advanced, 38013 github.tar, level 4, old streaming advanced, 38467 @@ -1416,7 +1416,7 @@ github.tar, small chain log, old stre github.tar, explicit params, old streaming advanced, 41227 github.tar, uncompressed literals, old streaming advanced, 38441 github.tar, uncompressed literals optimal, old streaming advanced, 32837 -github.tar, huffman literals, old streaming advanced, 42465 +github.tar, huffman literals, old streaming advanced, 42536 github.tar, multithreaded with advanced params, old streaming advanced, 38441 github, level -5 with dict, old streaming cdict, 46718 github, level -3 with dict, old streaming cdict, 45395 @@ -1433,11 +1433,11 @@ github, level 13 with dict, old stre github, level 16 with dict, old streaming cdict, 37577 github, level 19 with dict, old streaming cdict, 37576 github, no source size with dict, old streaming cdict, 40654 -github.tar, level -5 with dict, old streaming cdict, 45018 -github.tar, level -3 with dict, old streaming cdict, 41886 -github.tar, level -1 with dict, old streaming cdict, 41636 +github.tar, level -5 with dict, old streaming cdict, 49146 +github.tar, level -3 with dict, old streaming cdict, 43468 +github.tar, level -1 with dict, old streaming cdict, 41662 github.tar, level 0 with dict, old streaming cdict, 37956 -github.tar, level 1 with dict, old streaming cdict, 38766 +github.tar, level 1 with dict, old streaming cdict, 38761 github.tar, level 3 with dict, old streaming cdict, 37956 github.tar, level 4 with dict, old streaming cdict, 37927 github.tar, level 5 with dict, old streaming cdict, 37600