Skip to content

Commit

Permalink
Merge pull request #3895 from facebook/fix_3793
Browse files Browse the repository at this point in the history
Improve compression of Arrays of Integers (High compression mode)
  • Loading branch information
Cyan4973 authored Feb 6, 2024
2 parents 5a08f58 + b88c593 commit 06b5b37
Show file tree
Hide file tree
Showing 7 changed files with 330 additions and 246 deletions.
5 changes: 3 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -328,8 +328,9 @@ asan-%: clean
msan: clean
$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=memory -fno-omit-frame-pointer -Werror $(MOREFLAGS)" HAVE_LZMA=0 # datagen.c fails this test for no obvious reason

msan-%: clean
LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=memory -fno-omit-frame-pointer -Werror $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)" $(MAKE) -C $(TESTDIR) HAVE_LZMA=0 $*
msan-%:
$(MAKE) clean
LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=memory -fno-omit-frame-pointer -Werror $(MOREFLAGS)" FUZZER_FLAGS="--no-big-tests $(FUZZER_FLAGS)" $(MAKE) -j -C $(TESTDIR) HAVE_LZMA=0 $*

asan32: clean
$(MAKE) -C $(TESTDIR) test32 CC=clang MOREFLAGS="-g -fsanitize=address $(MOREFLAGS)"
Expand Down
8 changes: 4 additions & 4 deletions lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -1661,8 +1661,8 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
+ ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32))
+ ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32))
+ ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32))
+ ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
+ ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+ ZSTD_cwksp_aligned_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_match_t))
+ ZSTD_cwksp_aligned_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t));
size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
? ZSTD_cwksp_aligned_alloc_size(hSize)
: 0;
Expand Down Expand Up @@ -2045,8 +2045,8 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t));
ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_match_t));
ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t));
}

ms->cParams = *cParams;
Expand Down
15 changes: 8 additions & 7 deletions lib/compress/zstd_compress_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,23 +159,24 @@ typedef struct {
UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};

typedef struct {
int price;
U32 off;
U32 mlen;
U32 litlen;
U32 rep[ZSTD_REP_NUM];
int price; /* price from beginning of segment to this position */
U32 off; /* offset of previous match */
U32 mlen; /* length of previous match */
U32 litlen; /* nb of literals since previous match */
U32 rep[ZSTD_REP_NUM]; /* offset history after previous match */
} ZSTD_optimal_t;

typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;

#define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+2)
typedef struct {
/* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
unsigned* litFreq; /* table of literals statistics, of size 256 */
unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */
ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_SIZE */
ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_SIZE */

U32 litSum; /* nb of literals */
U32 litLengthSum; /* nb of litLength codes */
Expand Down
Loading

0 comments on commit 06b5b37

Please sign in to comment.