Skip to content

Commit

Permalink
Merge pull request #241 from Cyan4973/dev
Browse files Browse the repository at this point in the history
v0.7.3
  • Loading branch information
Cyan4973 authored Jul 8, 2016
2 parents 2b61f74 + 722e14b commit 75ed1a8
Show file tree
Hide file tree
Showing 22 changed files with 1,253 additions and 470 deletions.
14 changes: 13 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
v0.7.3
New : compression format specification
New : `--` separator, stating that all following arguments are file names. Suggested by Chip Turner.
New : `ZSTD_getDecompressedSize()`
New : OpenBSD target, by Juan Francisco Cantero Hurtado
New : `examples` directory
fixed : dictBuilder using HC levels, reported by Bartosz Taudul
fixed : legacy support from ZSTD_decompress_usingDDict(), reported by Felix Handte
fixed : multi-blocks decoding with intermediate uncompressed blocks, reported by Greg Slazinski
modified : removed "mem.h" and "error_public.h" dependencies from "zstd.h" (experimental section)
modified : legacy functions no longer need magic number

v0.7.2
fixed : ZSTD_decompressBlock() using multiple consecutive blocks. Reported by Greg Slazinski
fixed : ZSTD_decompressBlock() using multiple consecutive blocks. Reported by Greg Slazinski.
fixed : potential segfault on very large files (many gigabytes). Reported by Chip Turner.
fixed : CLI displays system error message when destination file cannot be created (#231). Reported by Chip Turner.

Expand Down
7 changes: 7 additions & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Zstandard library : usage examples
==================================

- [Dictionary decompression](dictionary_decompression.c)
Decompress multiple files using the same dictionary.
Compatible with Legacy modes.
Introduces usage of : `ZSTD_createDDict()` and `ZSTD_decompress_usingDDict()`
112 changes: 112 additions & 0 deletions examples/dictionary_decompression.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#include <stdlib.h> // exit
#include <stdio.h> // printf
#include <string.h> // strerror
#include <errno.h> // errno
#include <sys/stat.h> // stat
#include <zstd.h>


static off_t fsizeX(const char *filename)
{
struct stat st;
if (stat(filename, &st) == 0) return st.st_size;
/* error */
printf("stat: %s : %s \n", filename, strerror(errno));
exit(1);
}

static FILE* fopenX(const char *filename, const char *instruction)
{
FILE* const inFile = fopen(filename, instruction);
if (inFile) return inFile;
/* error */
printf("fopen: %s : %s \n", filename, strerror(errno));
exit(2);
}

static void* mallocX(size_t size)
{
void* const buff = malloc(size);
if (buff) return buff;
/* error */
printf("malloc: %s \n", strerror(errno));
exit(3);
}

static void* loadFileX(const char* fileName, size_t* size)
{
off_t const buffSize = fsizeX(fileName);
FILE* const inFile = fopenX(fileName, "rb");
void* const buffer = mallocX(buffSize);
size_t const readSize = fread(buffer, 1, buffSize, inFile);
if (readSize != (size_t)buffSize) {
printf("fread: %s : %s \n", fileName, strerror(errno));
exit(4);
}
fclose(inFile);
*size = buffSize;
return buffer;
}


static const ZSTD_DDict* createDict(const char* dictFileName)
{
size_t dictSize;
void* const dictBuffer = loadFileX(dictFileName, &dictSize);
const ZSTD_DDict* const ddict = ZSTD_createDDict(dictBuffer, dictSize);
free(dictBuffer);
return ddict;
}


/* prototype declared here, as it currently is part of experimental section */
unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);

static void decompress(const char* fname, const ZSTD_DDict* ddict)
{
size_t cSize;
void* const cBuff = loadFileX(fname, &cSize);
unsigned long long const rSize = ZSTD_getDecompressedSize(cBuff, cSize);
if (rSize==0) {
printf("%s : original size unknown \n", fname);
exit(5);
}
void* const rBuff = mallocX(rSize);

ZSTD_DCtx* const dctx = ZSTD_createDCtx();
size_t const dSize = ZSTD_decompress_usingDDict(dctx, rBuff, rSize, cBuff, cSize, ddict);

if (dSize != rSize) {
printf("error decoding %s : %s \n", fname, ZSTD_getErrorName(dSize));
exit(7);
}

/* success */
printf("%25s : %6u -> %7u \n", fname, (unsigned)cSize, (unsigned)rSize);

ZSTD_freeDCtx(dctx);
free(rBuff);
free(cBuff);
}


int main(int argc, const char** argv)
{
const char* const exeName = argv[0];

if (argc<3) {
printf("wrong arguments\n");
printf("usage:\n");
printf("%s [FILES] dictionary\n", exeName);
return 1;
}

/* load dictionary only once */
const char* const dictName = argv[argc-1];
const ZSTD_DDict* const dictPtr = createDict(dictName);

int u;
for (u=1; u<argc-1; u++) decompress(argv[u], dictPtr);

printf("All %u files decoded. \n", argc-2);
}
2 changes: 2 additions & 0 deletions lib/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# make install artefact
libzstd.pc
15 changes: 10 additions & 5 deletions lib/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,19 @@ It is used by `zstd` command line utility, and [7zip plugin](http://mcmilk.de/pr
- compress/zbuff_compress.c
- decompress/zbuff_decompress.c


#### Dictionary builder

To create dictionaries from training sets :
In order to create dictionaries from some training sets,
it's needed to include all files from [dictBuilder directory](dictBuilder/)


#### Legacy support

Zstandard can decode previous formats, starting from v0.1.
Support for these format is provided in [folder legacy](legacy/).
It's also required to compile the library with `ZSTD_LEGACY_SUPPORT = 1`.

- dictBuilder/divsufsort.c
- dictBuilder/divsufsort.h
- dictBuilder/zdict.c
- dictBuilder/zdict.h

#### Miscellaneous

Expand Down
6 changes: 5 additions & 1 deletion lib/common/error_public.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,11 @@ typedef enum {
ZSTD_error_maxCode
} ZSTD_ErrorCode;

/* note : compare with size_t function results using ZSTD_getError() */
/*! ZSTD_getErrorCode() :
convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
which can be used to compare directly with enum list published into "error_public.h" */
ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
const char* ZSTD_getErrorString(ZSTD_ErrorCode code);


#if defined (__cplusplus)
Expand Down
2 changes: 1 addition & 1 deletion lib/common/zbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ ZSTDLIB_API ZBUFF_DCtx* ZBUFF_createDCtx_advanced(ZSTD_customMem customMem);
/*--- Advanced Streaming function ---*/
ZSTDLIB_API size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
const void* dict, size_t dictSize,
ZSTD_parameters params, U64 pledgedSrcSize);
ZSTD_parameters params, unsigned long long pledgedSrcSize);

#endif /* ZBUFF_STATIC_LINKING_ONLY */

Expand Down
75 changes: 39 additions & 36 deletions lib/common/zstd.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ extern "C" {
***************************************/
#define ZSTD_VERSION_MAJOR 0
#define ZSTD_VERSION_MINOR 7
#define ZSTD_VERSION_RELEASE 2
#define ZSTD_VERSION_RELEASE 3

#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
#define ZSTD_QUOTE(str) #str
Expand Down Expand Up @@ -197,14 +197,13 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
* Use them only in association with static linking.
* ==================================================================================== */

/*--- Dependency ---*/
#include "mem.h" /* U32 */

/*--- Constants ---*/
#define ZSTD_MAGICNUMBER 0xFD2FB527 /* v0.7 */
#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U

#define ZSTD_WINDOWLOG_MAX ((U32)(MEM_32bits() ? 25 : 27))
#define ZSTD_WINDOWLOG_MAX_32 25
#define ZSTD_WINDOWLOG_MAX_64 27
#define ZSTD_WINDOWLOG_MAX ((U32)(MEM_32bits() ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
#define ZSTD_WINDOWLOG_MIN 18
#define ZSTD_CHAINLOG_MAX (ZSTD_WINDOWLOG_MAX+1)
#define ZSTD_CHAINLOG_MIN 4
Expand All @@ -229,19 +228,19 @@ static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable f
typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt } ZSTD_strategy; /*< from faster to stronger */

typedef struct {
U32 windowLog; /*< largest match distance : larger == more compression, more memory needed during decompression */
U32 chainLog; /*< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
U32 hashLog; /*< dispatch table : larger == faster, more memory */
U32 searchLog; /*< nb of searches : larger == more compression, slower */
U32 searchLength; /*< match length searched : larger == faster decompression, sometimes less compression */
U32 targetLength; /*< acceptable match size for optimal parser (only) : larger == more compression, slower */
unsigned windowLog; /*< largest match distance : larger == more compression, more memory needed during decompression */
unsigned chainLog; /*< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
unsigned hashLog; /*< dispatch table : larger == faster, more memory */
unsigned searchLog; /*< nb of searches : larger == more compression, slower */
unsigned searchLength; /*< match length searched : larger == faster decompression, sometimes less compression */
unsigned targetLength; /*< acceptable match size for optimal parser (only) : larger == more compression, slower */
ZSTD_strategy strategy;
} ZSTD_compressionParameters;

typedef struct {
U32 contentSizeFlag; /*< 1: content size will be in frame header (if known). */
U32 checksumFlag; /*< 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */
U32 noDictIDFlag; /*< 1: no dict ID will be saved into frame header (if dictionary compression) */
unsigned contentSizeFlag; /*< 1: content size will be in frame header (if known). */
unsigned checksumFlag; /*< 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */
unsigned noDictIDFlag; /*< 1: no dict ID will be saved into frame header (if dictionary compression) */
} ZSTD_frameParameters;

typedef struct {
Expand Down Expand Up @@ -272,12 +271,12 @@ ZSTDLIB_API unsigned ZSTD_maxCLevel (void);
/*! ZSTD_getParams() :
* same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of a `ZSTD_compressionParameters`.
* All fields of `ZSTD_frameParameters` are set to default (0) */
ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSize, size_t dictSize);
ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize);

/*! ZSTD_getCParams() :
* @return ZSTD_compressionParameters structure for a selected compression level and srcSize.
* `srcSize` value is optional, select 0 if not known */
ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, U64 srcSize, size_t dictSize);
ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize);

/*! ZSTD_checkCParams() :
* Ensure param values remain within authorized range */
Expand All @@ -286,7 +285,7 @@ ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
/*! ZSTD_adjustCParams() :
* optimize params for a given `srcSize` and `dictSize`.
* both values are optional, select `0` if unknown. */
ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, U64 srcSize, size_t dictSize);
ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);

/*! ZSTD_compress_advanced() :
* Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter */
Expand All @@ -299,6 +298,15 @@ ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,

/*--- Advanced Decompression functions ---*/

/** ZSTD_getDecompressedSize() :
* compatible with legacy mode
* @return : decompressed size if known, 0 otherwise
note : 0 can mean any of the following :
- decompressed size is not provided within frame header
- frame header unknown / not supported
- frame header not completely provided (`srcSize` too small) */
unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);

/*! ZSTD_createDCtx_advanced() :
* Create a ZSTD decompression context using external alloc and free functions */
ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
Expand All @@ -309,7 +317,7 @@ ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
******************************************************************/
ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, U64 pledgedSrcSize);
ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);
ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx);

ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
Expand Down Expand Up @@ -345,10 +353,10 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapaci
*/

typedef struct {
U64 frameContentSize;
U32 windowSize;
U32 dictID;
U32 checksumFlag;
unsigned long long frameContentSize;
unsigned windowSize;
unsigned dictID;
unsigned checksumFlag;
} ZSTD_frameParams;

ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize); /**< doesn't consume input */
Expand Down Expand Up @@ -408,12 +416,14 @@ ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t ds
* Block functions
****************************************/
/*! Block functions produce and decode raw zstd blocks, without frame metadata.
Frame metadata cost is typically ~18 bytes, which is non-negligible on very small blocks.
User will have to take in charge required information to regenerate data, such as compressed and content sizes.
A few rules to respect :
- Uncompressed block size must be <= ZSTD_BLOCKSIZE_MAX (128 KB)
+ If you need to compress more, it's recommended to use ZSTD_compress() instead, since frame metadata costs become negligible.
- Compressing or decompressing requires a context structure
+ If you need to compress more, cut data into multiple blocks
+ Consider using the regular ZSTD_compress() instead, as frame metadata costs become negligible when source size is large.
- Compressing and decompressing require a context structure
+ Use ZSTD_createCCtx() and ZSTD_createDCtx()
- It is necessary to init context before starting
+ compression : ZSTD_compressBegin()
Expand All @@ -423,23 +433,16 @@ ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t ds
- When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero.
In which case, nothing is produced into `dst`.
+ User must test for such outcome and deal directly with uncompressed data
+ ZSTD_decompressBlock() doesn't accept uncompressed data as input !!
+ ZSTD_decompressBlock() doesn't accept uncompressed data as input !!!
+ In case of multiple successive blocks, decoder must be informed of uncompressed block existence to follow proper history.
Use ZSTD_insertBlock() in such a case.
Insert block once it's copied into its final position.
*/

#define ZSTD_BLOCKSIZE_MAX (128 * 1024) /* define, for static allocation */
ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);


/*-*************************************
* Error management
***************************************/
#include "error_public.h"
/*! ZSTD_getErrorCode() :
convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
which can be used to compare directly with enum list published into "error_public.h" */
ZSTDLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
ZSTDLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);
ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert block into `dctx` history. Useful to track uncompressed blocks */


#endif /* ZSTD_STATIC_LINKING_ONLY */
Expand Down
2 changes: 1 addition & 1 deletion lib/compress/zbuff_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ size_t ZBUFF_freeCCtx(ZBUFF_CCtx* zbc)

size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
const void* dict, size_t dictSize,
ZSTD_parameters params, U64 pledgedSrcSize)
ZSTD_parameters params, unsigned long long pledgedSrcSize)
{
/* allocate buffers */
{ size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog;
Expand Down
Loading

0 comments on commit 75ed1a8

Please sign in to comment.