Skip to content

Commit 2cd3c8f

Browse files
committed
CHANGE: rewritten compress and decompress so it's now compatible with the recent Red language. Now it also correctly handles output buffer expansion. It does not store the uncompress length at the tail of compressed data anymore!
resolves: #31
1 parent 1802649 commit 2cd3c8f

13 files changed

+213
-177
lines changed

src/core/n-strings.c

+56-47
Original file line numberDiff line numberDiff line change
@@ -254,44 +254,53 @@ static struct digest {
254254
*/ REBNATIVE(compress)
255255
/*
256256
// compress: native [
257-
// {Compresses data. Default is deflate with Adler32 checksum and uncompressed size in last 4 bytes.}
257+
// {Compresses data.}
258258
// data [binary! string!] {If string, it will be UTF8 encoded}
259+
// method [word!] "zlib deflate gzip lzma"
259260
// /part length {Length of source data}
260-
// /zlib {Use ZLIB (Adler32 checksum) without uncompressed length appended}
261-
// /gzip {Use ZLIB with GZIP envelope (using CRC32 checksum)}
262-
// /lzma {Use LZMA compression}
263261
// /level lvl [integer!] {Compression level 0-9}
264262
// ]
265263
***********************************************************************/
266264
{
267-
REBVAL *data = D_ARG(1);
268-
//REBOOL ref_part = D_REF(2);
269-
REBVAL *length = D_ARG(3);
270-
REBOOL ref_zlib = D_REF(4);
271-
REBOOL ref_gzip = D_REF(5);
272-
REBOOL ref_lzma = D_REF(6);
273-
REBOOL ref_level= D_REF(7);
274-
REBVAL *level = D_ARG(8);
265+
REBVAL *data = D_ARG(1);
266+
REBINT method = VAL_WORD_CANON(D_ARG(2));
267+
// REBOOL ref_part = D_REF(3);
268+
REBVAL *length = D_ARG(4);
269+
REBOOL ref_level = D_REF(5);
270+
REBVAL *level = D_ARG(6);
275271

276272
REBSER *ser;
277273
REBCNT index;
278274
REBCNT len;
279-
280-
if ((ref_zlib && (ref_gzip || ref_lzma)) || (ref_gzip && ref_lzma)) Trap0(RE_BAD_REFINES);
275+
REBINT windowBits = MAX_WBITS;
281276

282277
len = Partial1(data, length);
283278
ser = Prep_Bin_Str(data, &index, &len); // result may be a SHARED BUFFER!
284279

285-
if(ref_lzma) {
280+
switch (method) {
281+
case SYM_ZLIB:
282+
zlib_compress:
283+
Set_Binary(D_RET, CompressZlib(ser, index, (REBINT)len, ref_level ? VAL_INT32(level) : -1, windowBits));
284+
break;
285+
286+
case SYM_DEFLATE:
287+
windowBits = -windowBits;
288+
goto zlib_compress;
289+
290+
case SYM_GZIP:
291+
windowBits |= 16;
292+
goto zlib_compress;
293+
294+
case SYM_LZMA:
286295
#ifdef INCLUDE_LZMA
287296
Set_Binary(D_RET, CompressLzma(ser, index, (REBINT)len, ref_level ? VAL_INT32(level) : -1));
288297
#else
289298
Trap0(RE_FEATURE_NA);
290299
#endif
291-
} else {
292-
int windowBits = MAX_WBITS;
293-
if (ref_gzip) windowBits |= 16;
294-
Set_Binary(D_RET, CompressZlib(ser, index, (REBINT)len, ref_level ? VAL_INT32(level) : -1, windowBits));
300+
break;
301+
302+
default:
303+
Trap1(RE_INVALID_ARG, D_ARG(2));
295304
}
296305

297306
return R_RET;
@@ -303,56 +312,56 @@ static struct digest {
303312
*/ REBNATIVE(decompress)
304313
/*
305314
// decompress: native [
306-
// {Decompresses data. Result is binary.}
315+
// {Decompresses data.}
307316
// data [binary!] {Source data to decompress}
317+
// method [word!] "zlib deflate gzip lzma"
308318
// /part "Limits source data to a given length or position"
309319
// length [number! series!] {Length of compressed data (must match end marker)}
310-
// /zlib {Data are in ZLIB format with Adler32 checksum}
311-
// /gzip {Data are in ZLIB format with CRC32 checksum}
312-
// /lzma {Data are in LZMA format}
313-
// /deflate {Data are raw DEFLATE data}
314320
// /size
315-
// bytes [integer!] {Number of decompressed bytes. If not used, size is detected from last 4 source data bytes.}
321+
// bytes [integer!] {Number of uncompressed bytes.}
316322
]
317323
***********************************************************************/
318324
{
319-
REBVAL *data = D_ARG(1);
320-
//REBOOL ref_part = D_REF(2);
321-
REBVAL *length = D_ARG(3);
322-
REBOOL ref_zlib = D_REF(4);
323-
REBOOL ref_gzip = D_REF(5);
324-
REBOOL ref_lzma = D_REF(6);
325-
REBOOL ref_defl = D_REF(7);
326-
REBOOL ref_size = D_REF(8);
327-
REBVAL *size = D_ARG(9);
325+
REBVAL *data = D_ARG(1);
326+
REBINT method = VAL_WORD_CANON(D_ARG(2));
327+
// REBOOL ref_part = D_REF(3);
328+
REBVAL *length = D_ARG(4);
329+
REBOOL ref_size = D_REF(5);
330+
REBVAL *size = D_ARG(6);
328331

329332
REBCNT limit = 0;
330333
REBCNT len;
331334
REBINT windowBits = MAX_WBITS;
332335

333-
// test if only one compression type refinement is used
334-
if (
335-
(ref_zlib && (ref_gzip || ref_lzma || ref_defl)) ||
336-
(ref_gzip && (ref_zlib || ref_lzma || ref_defl)) ||
337-
(ref_lzma && (ref_zlib || ref_gzip || ref_defl))
338-
) Trap0(RE_BAD_REFINES);
339-
340336
len = Partial1(data, length);
341337

342338
if (ref_size) limit = (REBCNT)Int32s(size, 1); // /limit size
343339

344-
if (ref_lzma) {
340+
switch (method) {
341+
case SYM_ZLIB:
342+
zlib_decompress:
343+
Set_Binary(D_RET, DecompressZlib(VAL_SERIES(data), VAL_INDEX(data), (REBINT)len, limit, windowBits));
344+
break;
345+
346+
case SYM_DEFLATE:
347+
windowBits = -windowBits;
348+
goto zlib_decompress;
349+
350+
case SYM_GZIP:
351+
windowBits |= 16;
352+
goto zlib_decompress;
353+
354+
case SYM_LZMA:
345355
#ifdef INCLUDE_LZMA
346356
Set_Binary(D_RET, DecompressLzma(VAL_SERIES(data), VAL_INDEX(data), (REBINT)len, limit));
347357
#else
348358
Trap0(RE_FEATURE_NA);
349359
#endif
350-
} else {
351-
if (ref_defl) windowBits = -windowBits;
352-
else if (ref_gzip) windowBits |= 16;
353-
Set_Binary(D_RET, DecompressZlib(VAL_SERIES(data), VAL_INDEX(data), (REBINT)len, limit, windowBits));
360+
break;
361+
362+
default:
363+
Trap1(RE_INVALID_ARG, D_ARG(2));
354364
}
355-
356365

357366
return R_RET;
358367
}

src/core/u-compress.c

+36-35
Original file line numberDiff line numberDiff line change
@@ -131,26 +131,29 @@ void Trap_ZStream_Error(z_stream *stream, int err, REBOOL while_compression)
131131
stream.next_in = cast(const z_Bytef*, BIN_HEAD(input) + index);
132132

133133
output = Make_Binary(size);
134-
stream.avail_out = size;
134+
stream.avail_out = SERIES_AVAIL(output);
135135
stream.next_out = BIN_HEAD(output);
136136

137-
err = deflate(&stream, Z_FINISH);
138-
//printf("deflate err: %i stream.total_out: %i .avail_out: %i\n", err, stream.total_out, stream.avail_out);
139-
140-
if (err != Z_STREAM_END)
141-
Trap_ZStream_Error(&stream, err, TRUE);
137+
for (;;) {
138+
err = deflate(&stream, Z_FINISH);
139+
if (err == Z_STREAM_END)
140+
break; // Finished or we have enough data.
141+
//printf("deflate err: %i stream.total_out: %i .avail_out: %i\n", err, stream.total_out, stream.avail_out);
142+
if (err != Z_OK)
143+
Trap_ZStream_Error(&stream, err, FALSE);
144+
if (stream.avail_out == 0) {
145+
// expand output buffer...
146+
SERIES_TAIL(output) = stream.total_out;
147+
Expand_Series(output, AT_TAIL, in_len);
148+
stream.next_out = BIN_SKIP(output, stream.total_out);
149+
stream.avail_out = SERIES_REST(output) - stream.total_out;
150+
}
151+
}
142152

143153
SET_STR_END(output, stream.total_out);
144154
SERIES_TAIL(output) = stream.total_out;
145-
146-
if((windowBits & 16) != 16) { // Not GZIP
147-
// Tag the size to the end. Only when not using GZIP envelope.
148-
REBYTE out_size[sizeof(REBCNT)];
149-
REBCNT_To_Bytes(out_size, (REBCNT)in_len);
150-
Append_Series(output, (REBYTE*)out_size, sizeof(REBCNT));
151-
}
152155

153-
if (SERIES_AVAIL(output) > 1024) // Is there wasted space?
156+
if (SERIES_AVAIL(output) > 4096) // Is there wasted space?
154157
output = Copy_Series(output); // Trim it down if too big. !!! Revisit this based on mem alloc alg.
155158

156159
deflateEnd(&stream);
@@ -171,19 +174,7 @@ void Trap_ZStream_Error(z_stream *stream, int err, REBOOL while_compression)
171174
REBINT err;
172175

173176
if (len < 0 || (index + len > BIN_LEN(input))) len = BIN_LEN(input) - index;
174-
if (limit > 0) {
175-
size = limit;
176-
} else if (windowBits < 0) {
177-
// limit was not specified, but data are supposed to be raw DEFLATE data
178-
// max teoretic DEFLATE ration is 1032:1, but that is quite unrealistic
179-
// it will be more around 3:1 or 4:1, so 10:1 could be enough for automatic setup.
180-
size = 10 * (REBCNT)len; //@@ fix me, if you don't agree with above claim
181-
} else {
182-
// Get the uncompressed size from last 4 source data bytes.
183-
if (len < 4) Trap0(RE_PAST_END); // !!! better msg needed
184-
size = cast(REBU64, Bytes_To_REBCNT(BIN_SKIP(input, index + len) - sizeof(REBCNT)));
185-
if (size > (uLongf)len * 14) Trap_Num(RE_SIZE_LIMIT, size); // check for a realistic limit
186-
}
177+
size = (limit > 0) ? limit : (uLongf)len * 3;
187178

188179
output = Make_Binary(size);
189180

@@ -194,30 +185,40 @@ void Trap_ZStream_Error(z_stream *stream, int err, REBOOL while_compression)
194185
stream.total_out = 0;
195186

196187
stream.avail_in = len;
197-
stream.next_in = cast(const Bytef*, BIN_HEAD(input) + index);
188+
stream.next_in = cast(const Bytef*, BIN_SKIP(input, index));
198189

199190
err = inflateInit2(&stream, windowBits);
200191
if (err != Z_OK) Trap_ZStream_Error(&stream, err, FALSE);
201192

202-
stream.avail_out = size;
193+
stream.avail_out = SERIES_AVAIL(output);
203194
stream.next_out = BIN_HEAD(output);
204195

205196
for(;;) {
206197
err = inflate(&stream, Z_NO_FLUSH);
207-
if (err == Z_STREAM_END || stream.total_out == size)
208-
break; // Finished. (and buffer was big enough)
198+
if (err == Z_STREAM_END || (limit && stream.total_out >= limit))
199+
break; // Finished or we have enough data.
209200
//printf("err: %i size: %i avail_out: %i total_out: %i\n", err, size, stream.avail_out, stream.total_out);
210-
if(err != Z_OK) Trap_ZStream_Error(&stream, err, FALSE);
211-
//@@: may need to resize the destination buffer! But...
212-
//@@: so far let's expect that size is always correct
213-
//@@: and introduce self expanding buffers in compression port implementation
201+
if (err != Z_OK) Trap_ZStream_Error(&stream, err, FALSE);
202+
if (stream.avail_out == 0) {
203+
// expand output buffer...
204+
SERIES_TAIL(output) = stream.total_out;
205+
Expand_Series(output, AT_TAIL, len);
206+
stream.next_out = BIN_SKIP(output, stream.total_out);
207+
stream.avail_out = SERIES_REST(output) - stream.total_out;
208+
}
214209
}
215210
//printf("total_out: %i\n", stream.total_out);
216211
inflateEnd(&stream);
217212

213+
if (limit && stream.total_out > limit) {
214+
stream.total_out = limit;
215+
}
218216
SET_STR_END(output, stream.total_out);
219217
SERIES_TAIL(output) = stream.total_out;
220218

219+
if (SERIES_AVAIL(output) > 4096) // Is there wasted space?
220+
output = Copy_Series(output); // Trim it down if too big. !!! Revisit this based on mem alloc alg.
221+
221222
return output;
222223
}
223224

src/mezz/codec-gzip.reb

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ register-codec [
1313
suffixes: [%.gz]
1414
decode: function[data [binary!]] [
1515
if verbose > 0 [ identify data ]
16-
decompress/gzip data
16+
decompress data 'gzip
1717
]
1818

1919
encode: function [data [binary!]][
20-
compress/gzip/level data level
20+
compress/level data 'gzip level
2121
]
2222

2323
identify: function [data [binary!]][

src/mezz/codec-pdf.reb

+2-2
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ decompress-obj: func[obj [object!] /local p][
301301
switch obj/spec/Filter [
302302
FlateDecode [
303303
try [
304-
obj/data: decompress/deflate skip obj/data 2
304+
obj/data: decompress skip obj/data 2 'deflate
305305
obj/spec/Filter: none
306306
obj/spec/Length: length? obj/data
307307

@@ -431,7 +431,7 @@ emit-stream: func[obj [object!] /local data][
431431
300 > length? data ; if data are small enough
432432
][
433433
obj/spec/Filter: 'FlateDecode
434-
data: compress/zlib obj/data
434+
data: compress obj/data 'zlib
435435
]
436436
unless binary? data [
437437
; make sure that data are in binary, so the length is correct!

src/mezz/codec-swf.reb

+4-4
Original file line numberDiff line numberDiff line change
@@ -1253,7 +1253,7 @@ import module [
12531253
; not a SWF file
12541254
return none
12551255
]
1256-
fileSize: binary/read bin 'UI32LE
1256+
fileSize: (binary/read bin 'UI32LE) - 8
12571257
if verbose > 0 [
12581258
print [
12591259
"SWF file version:" version
@@ -1263,17 +1263,17 @@ import module [
12631263
90 "compressed using LZMA"
12641264
] compression
12651265
lf
1266-
"Data size:" fileSize - 8 "bytes"
1266+
"Data size:" fileSize "bytes"
12671267
]
12681268
]
12691269

12701270
switch compression [
12711271
67 [
1272-
binary/init bin decompress/zlib/size bin/buffer fileSize - 8
1272+
binary/init bin decompress/size bin/buffer 'zlib fileSize
12731273
]
12741274
90 [
12751275
packed: binary/read bin 'UI32LE
1276-
binary/init bin decompress/lzma/size bin/buffer fileSize - 8
1276+
binary/init bin decompress/size bin/buffer 'lzma fileSize
12771277
]
12781278
]
12791279

src/mezz/codec-zip.reb

+3-4
Original file line numberDiff line numberDiff line change
@@ -207,12 +207,11 @@ register-codec [
207207
]
208208
method: either any [
209209
none? data
210-
lesser-or-equal? size: length? data length? compressed-data: compress data
210+
lesser-or-equal? size: length? data length? compressed-data: compress data 'deflate
211211
][
212212
compressed-data: data
213213
0 ;store
214214
][
215-
compressed-data: copy/part skip compressed-data 2 skip tail compressed-data -8 ;@@ FIXME once compress/zlib will be fixed!
216215
8 ;deflate
217216
]
218217

@@ -347,10 +346,10 @@ register-codec [
347346

348347
switch/default method [
349348
8 [ ;- deflate
350-
output: decompress/deflate/size data unc-size
349+
output: decompress/size data 'deflate unc-size
351350
]
352351
14 [ ;- LZMA
353-
output: decompress/lzma/part/size (skip data 4) cmp-size unc-size
352+
output: decompress/part/size (skip data 4) 'lzma cmp-size unc-size
354353
]
355354
0 [ ;- store
356355
output: copy/part data cmp-size

src/mezz/mezz-debug.reb

+1-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ speed?: function [
6565
insert/dup tmp "abcdefghij" 50000
6666
loop 10 [
6767
random tmp
68-
decompress compress tmp
68+
decompress compress tmp 'zlib 'zlib
6969
]
7070
calc: [(length? tmp) * 10 / secs / 1900]
7171
][

src/mezz/mezz-save.reb

+1-1
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ save: function [
102102
; Checksum uncompressed data, if requested
103103
tmp: find header-data 'checksum [change next tmp checksum data: to-binary data 'sha1]
104104
; Compress the data if necessary
105-
compress [data: lib/compress data]
105+
compress [data: lib/compress data 'zlib]
106106
; File content is encoded as base-64:
107107
method = 'script [data: mold64 data]
108108
not binary? data [data: to-binary data]

src/mezz/prot-http.reb

+2-2
Original file line numberDiff line numberDiff line change
@@ -640,8 +640,8 @@ decode-result: func[
640640
either find ["gzip" "deflate"] encoding [
641641
try/except [
642642
result/2: switch encoding [
643-
"gzip" [ decompress/gzip result/2 ]
644-
"deflate" [ decompress/deflate result/2 ]
643+
"gzip" [ decompress result/2 'gzip]
644+
"deflate" [ decompress result/2 'deflate]
645645
]
646646
][
647647
sys/log/info 'HTTP ["Failed to decode data using:^[[22m" encoding]

0 commit comments

Comments
 (0)