Skip to content

Commit

Permalink
Huffman ASM
Browse files Browse the repository at this point in the history
  • Loading branch information
terrelln committed Sep 20, 2021
1 parent 51b123d commit a5f2c45
Show file tree
Hide file tree
Showing 15 changed files with 1,349 additions and 196 deletions.
1 change: 1 addition & 0 deletions build/meson/lib/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ libzstd_sources = [join_paths(zstd_rootdir, 'lib/common/entropy_common.c'),
join_paths(zstd_rootdir, 'lib/compress/zstd_opt.c'),
join_paths(zstd_rootdir, 'lib/compress/zstd_ldm.c'),
join_paths(zstd_rootdir, 'lib/decompress/huf_decompress.c'),
join_paths(zstd_rootdir, 'lib/decompress/huf_decompress_amd64.S'),
join_paths(zstd_rootdir, 'lib/decompress/zstd_decompress.c'),
join_paths(zstd_rootdir, 'lib/decompress/zstd_decompress_block.c'),
join_paths(zstd_rootdir, 'lib/decompress/zstd_ddict.c'),
Expand Down
2 changes: 2 additions & 0 deletions build/single_file_libs/zstd-in.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
#define ZSTD_MULTITHREAD
#endif
#define ZSTD_TRACE 0
/* TODO: Can't amalgamate ASM function */
#define HUF_DISABLE_ASM 1

/* Include zstd_deps.h first with all the options we need enabled. */
#define ZSTD_DEPS_NEED_MALLOC
Expand Down
2 changes: 2 additions & 0 deletions build/single_file_libs/zstddeclib-in.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
#define ZSTD_LEGACY_SUPPORT 0
#define ZSTD_STRIP_ERROR_STRINGS
#define ZSTD_TRACE 0
/* TODO: Can't amalgamate ASM function */
#define HUF_DISABLE_ASM 1

/* Include zstd_deps.h first with all the options we need enabled. */
#define ZSTD_DEPS_NEED_MALLOC
Expand Down
1 change: 1 addition & 0 deletions contrib/linux-kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ libzstd:
-DXXH_STATIC_LINKING_ONLY \
-DMEM_FORCE_MEMORY_ACCESS=0 \
-D__GNUC__ \
-D__linux__=1 \
-DSTATIC_BMI2=0 \
-DZSTD_ADDRESS_SANITIZER=0 \
-DZSTD_MEMORY_SANITIZER=0 \
Expand Down
5 changes: 5 additions & 0 deletions contrib/linux-kernel/decompress_sources.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@
#include "common/error_private.c"
#include "common/fse_decompress.c"
#include "common/zstd_common.c"
/*
* Disable the ASM Huffman implementation because we need to
* include all the sources.
*/
#define HUF_DISABLE_ASM 1
#include "decompress/huf_decompress.c"
#include "decompress/zstd_ddict.c"
#include "decompress/zstd_decompress.c"
Expand Down
1 change: 1 addition & 0 deletions contrib/linux-kernel/test/macro-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,4 @@ test_not_present "ZSTD_DLL_IMPORT"
test_not_present "__ICCARM__"
test_not_present "_MSC_VER"
test_not_present "_WIN32"
test_not_present "__linux__"
4 changes: 2 additions & 2 deletions lib/common/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@
#if ((defined(__clang__) && __has_attribute(__target__)) \
|| (defined(__GNUC__) \
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
&& (defined(__x86_64__) || defined(_M_X86)) \
&& (defined(__x86_64__) || defined(_M_X64)) \
&& !defined(__BMI2__)
# define DYNAMIC_BMI2 1
#else
Expand Down Expand Up @@ -212,7 +212,7 @@
# elif defined(ZSTD_ARCH_ARM_NEON)
# include <arm_neon.h>
# endif
#endif
#endif

/* compat. with non-clang compilers */
#ifndef __has_builtin
Expand Down
79 changes: 79 additions & 0 deletions lib/common/error_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ extern "C" {
* Dependencies
******************************************/
#include "../zstd_errors.h" /* enum list */
#include "compiler.h"
#include "debug.h"
#include "zstd_deps.h" /* size_t */


Expand Down Expand Up @@ -73,6 +75,83 @@ ERR_STATIC const char* ERR_getErrorName(size_t code)
return ERR_getErrorString(ERR_getErrorCode(code));
}

/**
* Ignore: this is an internal helper.
*
* This is a helper function to help force C99-correctness during compilation.
* Under strict compilation modes, variadic macro arguments can't be empty.
* However, variadic function arguments can be. Using a function therefore lets
* us statically check that at least one (string) argument was passed,
* independent of the compilation flags.
*/
static INLINE_KEYWORD UNUSED_ATTR
void _force_has_format_string(const char *format, ...) {
(void)format;
}

/**
* Ignore: this is an internal helper.
*
* We want to force this function invocation to be syntactically correct, but
* we don't want to force runtime evaluation of its arguments.
*/
#define _FORCE_HAS_FORMAT_STRING(...) \
if (0) { \
_force_has_format_string(__VA_ARGS__); \
}

#define ERR_QUOTE(str) #str

/**
* Return the specified error if the condition evaluates to true.
*
* In debug modes, prints additional information.
* In order to do that (particularly, printing the conditional that failed),
* this can't just wrap RETURN_ERROR().
*/
#define RETURN_ERROR_IF(cond, err, ...) \
if (cond) { \
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
__FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
}

/**
* Unconditionally return the specified error.
*
* In debug modes, prints additional information.
*/
#define RETURN_ERROR(err, ...) \
do { \
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
__FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
} while(0);

/**
* If the provided expression evaluates to an error code, returns that error code.
*
* In debug modes, prints additional information.
*/
#define FORWARD_IF_ERROR(err, ...) \
do { \
size_t const err_code = (err); \
if (ERR_isError(err_code)) { \
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
__FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return err_code; \
} \
} while(0);

#if defined (__cplusplus)
}
#endif
Expand Down
7 changes: 5 additions & 2 deletions lib/common/huf.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,


/* *** Constants *** */
#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */
#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */
#define HUF_SYMBOLVALUE_MAX 255

#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
#define HUF_TABLELOG_ABSOLUTEMAX 12 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
# error "HUF_TABLELOG_MAX is too large !"
#endif
Expand Down Expand Up @@ -353,6 +353,9 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
#endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
#endif

#endif /* HUF_STATIC_LINKING_ONLY */

Expand Down
120 changes: 45 additions & 75 deletions lib/common/zstd_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,81 +58,6 @@ extern "C" {
#define MIN(a,b) ((a)<(b) ? (a) : (b))
#define MAX(a,b) ((a)>(b) ? (a) : (b))

/**
* Ignore: this is an internal helper.
*
* This is a helper function to help force C99-correctness during compilation.
* Under strict compilation modes, variadic macro arguments can't be empty.
* However, variadic function arguments can be. Using a function therefore lets
* us statically check that at least one (string) argument was passed,
* independent of the compilation flags.
*/
static INLINE_KEYWORD UNUSED_ATTR
void _force_has_format_string(const char *format, ...) {
(void)format;
}

/**
* Ignore: this is an internal helper.
*
* We want to force this function invocation to be syntactically correct, but
* we don't want to force runtime evaluation of its arguments.
*/
#define _FORCE_HAS_FORMAT_STRING(...) \
if (0) { \
_force_has_format_string(__VA_ARGS__); \
}

/**
* Return the specified error if the condition evaluates to true.
*
* In debug modes, prints additional information.
* In order to do that (particularly, printing the conditional that failed),
* this can't just wrap RETURN_ERROR().
*/
#define RETURN_ERROR_IF(cond, err, ...) \
if (cond) { \
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
__FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
}

/**
* Unconditionally return the specified error.
*
* In debug modes, prints additional information.
*/
#define RETURN_ERROR(err, ...) \
do { \
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
__FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
} while(0);

/**
* If the provided expression evaluates to an error code, returns that error code.
*
* In debug modes, prints additional information.
*/
#define FORWARD_IF_ERROR(err, ...) \
do { \
size_t const err_code = (err); \
if (ERR_isError(err_code)) { \
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
__FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return err_code; \
} \
} while(0);


/*-*************************************
* Common constants
Expand Down Expand Up @@ -453,6 +378,51 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
}
}

/**
* Computes CTZ on a U64.
* This will be slow on 32-bit mode, and on unsupported compilers.
* If you need this function to be fast (because it is hot) expand
* support.
*/
MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val)
{
if (MEM_64bits()) {
# if defined(_MSC_VER) && defined(_WIN64)
# if STATIC_BMI2
return _tzcnt_u64(val);
# else
unsigned long r = 0;
return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
# endif
# elif defined(__GNUC__) && (__GNUC__ >= 4)
return __builtin_ctzll((U64)val);
# else
static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19,
4, 25, 14, 28, 9, 34, 20, 56,
5, 17, 26, 54, 15, 41, 29, 43,
10, 31, 38, 35, 21, 45, 49, 57,
63, 6, 12, 18, 24, 27, 33, 55,
16, 53, 40, 42, 30, 37, 44, 48,
62, 11, 23, 32, 52, 39, 36, 47,
61, 22, 51, 46, 60, 50, 59, 58 };
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
# endif
} else { /* 32 bits */
# if defined(_MSC_VER)
unsigned long r=0;
return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0;
# elif defined(__GNUC__) && (__GNUC__ >= 3)
return (__builtin_ctz((U32)val) >> 3);
# else
static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3,
30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7,
26, 12, 18, 6, 11, 5, 10, 9 };
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
# endif
}
}


/* ZSTD_invalidateRepCodes() :
* ensures next compression will not use repcodes from previous block.
Expand Down
1 change: 1 addition & 0 deletions lib/compress/huf_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -809,6 +809,7 @@ FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int id
{
size_t const nbBits = HUF_getNbBits(elt);
size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1;
(void)dirtyBits;
/* Middle bits are 0. */
assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
/* We didn't overwrite any bits in the bit container. */
Expand Down
Loading

0 comments on commit a5f2c45

Please sign in to comment.