Skip to content

Commit 869708a

Browse files
committed
FEAT: optional support for Brotli de/compression
1 parent a7bbae0 commit 869708a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

96 files changed

+38636
-0
lines changed

NOTICE

+3
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ Credits for Non-REBOL orginated C files and modules
4646
Copyright (c) 2016-2020 David Bryant
4747
BSD - https://github.com/dbry/lzw-ab
4848

49+
* Brotli (optional):
50+
Copyright (c) 2013 Google Inc. All Rights Reserved.
51+
MIT - https://github.com/google/brotli
4952

5053
* CRUSH (optional):
5154
Copyright (C) 2013, Ilya Muravyov

make/rebol3.nest

+38
Original file line numberDiff line numberDiff line change
@@ -748,6 +748,43 @@ include-crush-compression: [
748748
core-files: %core/u-crush.c
749749
]
750750

751+
include-brotli-compression: [
752+
config: INCLUDE_BROTLI
753+
include: %src/include/brotli/
754+
core-files: [
755+
%core/brotli/common/constants.c
756+
%core/brotli/common/context.c
757+
%core/brotli/common/transform.c
758+
%core/brotli/common/dictionary.c
759+
%core/brotli/common/shared_dictionary.c
760+
%core/brotli/dec/state.c
761+
%core/brotli/dec/huffman.c
762+
%core/brotli/dec/decode.c
763+
%core/brotli/dec/bit_reader.c
764+
%core/brotli/enc/backward_references.c
765+
%core/brotli/enc/backward_references_hq.c
766+
%core/brotli/enc/bit_cost.c
767+
%core/brotli/enc/block_splitter.c
768+
%core/brotli/enc/brotli_bit_stream.c
769+
%core/brotli/enc/cluster.c
770+
%core/brotli/enc/command.c
771+
%core/brotli/enc/compound_dictionary.c
772+
%core/brotli/enc/compress_fragment.c
773+
%core/brotli/enc/compress_fragment_two_pass.c
774+
%core/brotli/enc/dictionary_hash.c
775+
%core/brotli/enc/encode.c
776+
%core/brotli/enc/encoder_dict.c
777+
%core/brotli/enc/entropy_encode.c
778+
%core/brotli/enc/fast_log.c
779+
%core/brotli/enc/histogram.c
780+
%core/brotli/enc/literal_cost.c
781+
%core/brotli/enc/memory.c
782+
%core/brotli/enc/metablock.c
783+
%core/brotli/enc/static_dict.c
784+
%core/brotli/enc/utf8_util.c
785+
]
786+
]
787+
751788
include-png-filter-native: [
752789
config: INCLUDE_PNG_FILTER
753790
core-files: %core/u-png-filter.c
@@ -885,6 +922,7 @@ include-rebol-bulk: [
885922
:include-image-natives
886923
:include-lzma-compression
887924
:include-lzw-compression
925+
:include-brotli-compression
888926
:include-crush-compression
889927
:include-base36-encoding
890928
:include-base85-encoding

src/core/brotli/common/constants.c

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/* Copyright 2013 Google Inc. All Rights Reserved.
2+
3+
Distributed under MIT license.
4+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5+
*/
6+
7+
#include "constants.h"
8+
9+
const BrotliPrefixCodeRange
10+
_kBrotliPrefixCodeRanges[BROTLI_NUM_BLOCK_LEN_SYMBOLS] = {
11+
{1, 2}, {5, 2}, {9, 2}, {13, 2}, {17, 3}, {25, 3},
12+
{33, 3}, {41, 3}, {49, 4}, {65, 4}, {81, 4}, {97, 4},
13+
{113, 5}, {145, 5}, {177, 5}, {209, 5}, {241, 6}, {305, 6},
14+
{369, 7}, {497, 8}, {753, 9}, {1265, 10}, {2289, 11}, {4337, 12},
15+
{8433, 13}, {16625, 24}};

src/core/brotli/common/constants.h

+201
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
/* Copyright 2016 Google Inc. All Rights Reserved.
2+
3+
Distributed under MIT license.
4+
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5+
*/
6+
7+
/**
8+
* @file
9+
* Common constants used in decoder and encoder API.
10+
*/
11+
12+
#ifndef BROTLI_COMMON_CONSTANTS_H_
13+
#define BROTLI_COMMON_CONSTANTS_H_
14+
15+
#include <brotli/port.h>
16+
#include <brotli/types.h>
17+
18+
#include "platform.h"
19+
20+
/* Specification: 7.3. Encoding of the context map */
21+
#define BROTLI_CONTEXT_MAP_MAX_RLE 16
22+
23+
/* Specification: 2. Compressed representation overview */
24+
#define BROTLI_MAX_NUMBER_OF_BLOCK_TYPES 256
25+
26+
/* Specification: 3.3. Alphabet sizes: insert-and-copy length */
27+
#define BROTLI_NUM_LITERAL_SYMBOLS 256
28+
#define BROTLI_NUM_COMMAND_SYMBOLS 704
29+
#define BROTLI_NUM_BLOCK_LEN_SYMBOLS 26
30+
#define BROTLI_MAX_CONTEXT_MAP_SYMBOLS (BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + \
31+
BROTLI_CONTEXT_MAP_MAX_RLE)
32+
#define BROTLI_MAX_BLOCK_TYPE_SYMBOLS (BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 2)
33+
34+
/* Specification: 3.5. Complex prefix codes */
35+
#define BROTLI_REPEAT_PREVIOUS_CODE_LENGTH 16
36+
#define BROTLI_REPEAT_ZERO_CODE_LENGTH 17
37+
#define BROTLI_CODE_LENGTH_CODES (BROTLI_REPEAT_ZERO_CODE_LENGTH + 1)
38+
/* "code length of 8 is repeated" */
39+
#define BROTLI_INITIAL_REPEATED_CODE_LENGTH 8
40+
41+
/* "Large Window Brotli" */
42+
43+
/**
44+
* The theoretical maximum number of distance bits specified for large window
45+
* brotli, for 64-bit encoders and decoders. Even when in practice 32-bit
46+
* encoders and decoders only support up to 30 max distance bits, the value is
47+
* set to 62 because it affects the large window brotli file format.
48+
* Specifically, it affects the encoding of simple huffman tree for distances,
49+
* see Specification RFC 7932 chapter 3.4.
50+
*/
51+
#define BROTLI_LARGE_MAX_DISTANCE_BITS 62U
52+
#define BROTLI_LARGE_MIN_WBITS 10
53+
/**
54+
* The maximum supported large brotli window bits by the encoder and decoder.
55+
* Large window brotli allows up to 62 bits, however the current encoder and
56+
* decoder, designed for 32-bit integers, only support up to 30 bits maximum.
57+
*/
58+
#define BROTLI_LARGE_MAX_WBITS 30
59+
60+
/* Specification: 4. Encoding of distances */
61+
#define BROTLI_NUM_DISTANCE_SHORT_CODES 16
62+
/**
63+
* Maximal number of "postfix" bits.
64+
*
65+
* Number of "postfix" bits is stored as 2 bits in meta-block header.
66+
*/
67+
#define BROTLI_MAX_NPOSTFIX 3
68+
#define BROTLI_MAX_NDIRECT 120
69+
#define BROTLI_MAX_DISTANCE_BITS 24U
70+
#define BROTLI_DISTANCE_ALPHABET_SIZE(NPOSTFIX, NDIRECT, MAXNBITS) ( \
71+
BROTLI_NUM_DISTANCE_SHORT_CODES + (NDIRECT) + \
72+
((MAXNBITS) << ((NPOSTFIX) + 1)))
73+
/* BROTLI_NUM_DISTANCE_SYMBOLS == 1128 */
74+
#define BROTLI_NUM_DISTANCE_SYMBOLS \
75+
BROTLI_DISTANCE_ALPHABET_SIZE( \
76+
BROTLI_MAX_NDIRECT, BROTLI_MAX_NPOSTFIX, BROTLI_LARGE_MAX_DISTANCE_BITS)
77+
78+
/* ((1 << 26) - 4) is the maximal distance that can be expressed in RFC 7932
79+
brotli stream using NPOSTFIX = 0 and NDIRECT = 0. With other NPOSTFIX and
80+
NDIRECT values distances up to ((1 << 29) + 88) could be expressed. */
81+
#define BROTLI_MAX_DISTANCE 0x3FFFFFC
82+
83+
/* ((1 << 31) - 4) is the safe distance limit. Using this number as a limit
84+
allows safe distance calculation without overflows, given the distance
85+
alphabet size is limited to corresponding size
86+
(see kLargeWindowDistanceCodeLimits). */
87+
#define BROTLI_MAX_ALLOWED_DISTANCE 0x7FFFFFFC
88+
89+
90+
/* Specification: 4. Encoding of Literal Insertion Lengths and Copy Lengths */
91+
#define BROTLI_NUM_INS_COPY_CODES 24
92+
93+
/* 7.1. Context modes and context ID lookup for literals */
94+
/* "context IDs for literals are in the range of 0..63" */
95+
#define BROTLI_LITERAL_CONTEXT_BITS 6
96+
97+
/* 7.2. Context ID for distances */
98+
#define BROTLI_DISTANCE_CONTEXT_BITS 2
99+
100+
/* 9.1. Format of the Stream Header */
101+
/* Number of slack bytes for window size. Don't confuse
102+
with BROTLI_NUM_DISTANCE_SHORT_CODES. */
103+
#define BROTLI_WINDOW_GAP 16
104+
#define BROTLI_MAX_BACKWARD_LIMIT(W) (((size_t)1 << (W)) - BROTLI_WINDOW_GAP)
105+
106+
typedef struct BrotliDistanceCodeLimit {
107+
uint32_t max_alphabet_size;
108+
uint32_t max_distance;
109+
} BrotliDistanceCodeLimit;
110+
111+
/* This function calculates maximal size of distance alphabet, such that the
112+
distances greater than the given values can not be represented.
113+
114+
This limits are designed to support fast and safe 32-bit decoders.
115+
"32-bit" means that signed integer values up to ((1 << 31) - 1) could be
116+
safely expressed.
117+
118+
Brotli distance alphabet symbols do not represent consecutive distance
119+
ranges. Each distance alphabet symbol (excluding direct distances and short
120+
codes), represent interleaved (for NPOSTFIX > 0) range of distances.
121+
A "group" of consecutive (1 << NPOSTFIX) symbols represent non-interleaved
122+
range. Two consecutive groups require the same amount of "extra bits".
123+
124+
It is important that distance alphabet represents complete "groups".
125+
To avoid complex logic on encoder side about interleaved ranges
126+
it was decided to restrict both sides to complete distance code "groups".
127+
*/
128+
BROTLI_UNUSED_FUNCTION BrotliDistanceCodeLimit BrotliCalculateDistanceCodeLimit(
129+
uint32_t max_distance, uint32_t npostfix, uint32_t ndirect) {
130+
BrotliDistanceCodeLimit result;
131+
/* Marking this function as unused, because not all files
132+
including "constants.h" use it -> compiler warns about that. */
133+
BROTLI_UNUSED(&BrotliCalculateDistanceCodeLimit);
134+
if (max_distance <= ndirect) {
135+
/* This case never happens / exists only for the sake of completeness. */
136+
result.max_alphabet_size = max_distance + BROTLI_NUM_DISTANCE_SHORT_CODES;
137+
result.max_distance = max_distance;
138+
return result;
139+
} else {
140+
/* The first prohibited value. */
141+
uint32_t forbidden_distance = max_distance + 1;
142+
/* Subtract "directly" encoded region. */
143+
uint32_t offset = forbidden_distance - ndirect - 1;
144+
uint32_t ndistbits = 0;
145+
uint32_t tmp;
146+
uint32_t half;
147+
uint32_t group;
148+
/* Postfix for the last dcode in the group. */
149+
uint32_t postfix = (1u << npostfix) - 1;
150+
uint32_t extra;
151+
uint32_t start;
152+
/* Remove postfix and "head-start". */
153+
offset = (offset >> npostfix) + 4;
154+
/* Calculate the number of distance bits. */
155+
tmp = offset / 2;
156+
/* Poor-man's log2floor, to avoid extra dependencies. */
157+
while (tmp != 0) {ndistbits++; tmp = tmp >> 1;}
158+
/* One bit is covered with subrange addressing ("half"). */
159+
ndistbits--;
160+
/* Find subrange. */
161+
half = (offset >> ndistbits) & 1;
162+
/* Calculate the "group" part of dcode. */
163+
group = ((ndistbits - 1) << 1) | half;
164+
/* Calculated "group" covers the prohibited distance value. */
165+
if (group == 0) {
166+
/* This case is added for correctness; does not occur for limit > 128. */
167+
result.max_alphabet_size = ndirect + BROTLI_NUM_DISTANCE_SHORT_CODES;
168+
result.max_distance = ndirect;
169+
return result;
170+
}
171+
/* Decrement "group", so it is the last permitted "group". */
172+
group--;
173+
/* After group was decremented, ndistbits and half must be recalculated. */
174+
ndistbits = (group >> 1) + 1;
175+
/* The last available distance in the subrange has all extra bits set. */
176+
extra = (1u << ndistbits) - 1;
177+
/* Calculate region start. NB: ndistbits >= 1. */
178+
start = (1u << (ndistbits + 1)) - 4;
179+
/* Move to subregion. */
180+
start += (group & 1) << ndistbits;
181+
/* Calculate the alphabet size. */
182+
result.max_alphabet_size = ((group << npostfix) | postfix) + ndirect +
183+
BROTLI_NUM_DISTANCE_SHORT_CODES + 1;
184+
/* Calculate the maximal distance representable by alphabet. */
185+
result.max_distance = ((start + extra) << npostfix) + postfix + ndirect + 1;
186+
return result;
187+
}
188+
}
189+
190+
/* Represents the range of values belonging to a prefix code:
191+
[offset, offset + 2^nbits) */
192+
typedef struct {
193+
uint16_t offset;
194+
uint8_t nbits;
195+
} BrotliPrefixCodeRange;
196+
197+
/* "Soft-private", it is exported, but not "advertised" as API. */
198+
BROTLI_COMMON_API extern const BrotliPrefixCodeRange
199+
_kBrotliPrefixCodeRanges[BROTLI_NUM_BLOCK_LEN_SYMBOLS];
200+
201+
#endif /* BROTLI_COMMON_CONSTANTS_H_ */

0 commit comments

Comments
 (0)