Skip to content

Commit e46effe

Browse files
committed
FEAT: initial support for (optional) LZMA compression in COMPRESS and DECOMPRESS functions
So far it is implemented like the original COMPRESS, which is appending size of uncompressed data at tail of compressed data. To include the LZMA, there must be used USE_LZMA compiler's definition while compiling Rebol. LZMA source is from LZMA SDK version 18.05 from https://www.7-zip.org/sdk.html slightly modified for use with Rebol source.
1 parent 8efcf41 commit e46effe

File tree

8 files changed

+6120
-26
lines changed

8 files changed

+6120
-26
lines changed

NOTICE

+4
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ ZLIB general purpose compression library:
3232
Version 1.1.2, March 19th, 1998
3333
Copyright 1995-1998 Jean-loup Gailly and Mark Adler
3434

35+
LZMA (optional):
36+
Copyright (C) 2018, Igor Pavlov
37+
Public domain - https://www.7-zip.org/sdk.html
38+
3539
JPEG decoder:
3640
Copyright 1994-1996, Thomas G. Lane.
3741
This file is part of the Independent JPEG Group's software.

make/make-settings.r

+1
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,5 @@ Defines: [
1414
USE_JPG_CODEC
1515
;USE_WAV_CODEC ;-- deprecated; using Rebol codec instead
1616
;USE_NO_INFINITY ;-- use when you don't want to support IEEE infinity
17+
USE_LZMA ;-- adds support for LZMA [de]compression
1718
]

src/boot/natives.r

+2-14
Original file line numberDiff line numberDiff line change
@@ -386,20 +386,8 @@ checksum: native [
386386
key-value [any-string! binary!] {Key to use}
387387
]
388388

389-
compress: native [
390-
{Compresses a string series and returns it.}
391-
data [binary! string!] {If string, it will be UTF8 encoded}
392-
/part length {Length of data (elements)}
393-
/gzip {Use GZIP checksum}
394-
]
395-
396-
decompress: native [
397-
{Decompresses data. Result is binary.}
398-
data [binary!] {Data to decompress}
399-
/part length {Length of compressed data (must match end marker)}
400-
/gzip {Use GZIP checksum}
401-
/limit size {Error out if result is larger than this}
402-
]
389+
;compress: native [] ; defined in %n-string.c
390+
;decompress: native [] ; defined in %n-string.c
403391

404392
construct: native [
405393
{Creates an object with scant (safe) evaluation.}

src/core/n-strings.c

+51-11
Original file line numberDiff line numberDiff line change
@@ -290,19 +290,37 @@ static struct digest {
290290
**
291291
*/ REBNATIVE(compress)
292292
/*
293-
** Binary and string (gets UTF8 converted).
294-
**
293+
// compress: native [
294+
// {Compresses data. Default is deflate with Adler32 checksum.}
295+
// data [binary! string!] {If string, it will be UTF8 encoded}
296+
// /part length {Length of data (elements)}
297+
// /gzip {Use deflate with GZIP checksum (CRC32)}
298+
// /lzma {Use LZMA compression}
299+
// ]
295300
***********************************************************************/
296301
{
302+
REBVAL *data = D_ARG(1);
303+
//REBOOL ref_part = D_REF(2);
304+
REBVAL *length = D_ARG(3);
305+
REBOOL ref_gzip = D_REF(4);
306+
REBOOL ref_lzma = D_REF(5);
307+
297308
REBSER *ser;
298309
REBCNT index;
299310
REBCNT len;
300311

301-
len = Partial1(D_ARG(1), D_ARG(3));
302-
303-
ser = Prep_Bin_Str(D_ARG(1), &index, &len); // result may be a SHARED BUFFER!
312+
len = Partial1(data, length);
313+
ser = Prep_Bin_Str(data, &index, &len); // result may be a SHARED BUFFER!
304314

305-
Set_Binary(D_RET, Compress(ser, index, (REBINT)len, D_REF(4))); // /gzip
315+
if(ref_lzma) {
316+
#ifdef USE_LZMA
317+
Set_Binary(D_RET, CompressLzma(ser, index, (REBINT)len));
318+
#else
319+
Trap0(RE_FEATURE_NA);
320+
#endif
321+
} else {
322+
Set_Binary(D_RET, Compress(ser, index, (REBINT)len, ref_gzip)); // /gzip
323+
}
306324

307325
return R_RET;
308326
}
@@ -312,19 +330,41 @@ static struct digest {
312330
**
313331
*/ REBNATIVE(decompress)
314332
/*
315-
** Binary only.
316-
**
333+
// decompress: native [
334+
// {Decompresses data. Result is binary.}
335+
// data [binary!] {Data to decompress}
336+
// /part length {Length of compressed data (must match end marker)}
337+
// /gzip {Use GZIP checksum}
338+
// /lzma {Use LZMA encoding}
339+
// /limit size {Error out if result is larger than this}
340+
]
317341
***********************************************************************/
318342
{
319-
REBVAL *arg = D_ARG(1);
343+
REBVAL *data = D_ARG(1);
344+
//REBOOL ref_part = D_REF(2);
345+
REBVAL *length = D_ARG(3);
346+
REBOOL ref_gzip = D_REF(4);
347+
REBOOL ref_lzma = D_REF(5);
348+
REBOOL ref_limit = D_REF(6);
349+
REBVAL *size = D_ARG(7);
350+
320351
REBINT limit = 0;
321352
REBCNT len;
322353

323354
len = Partial1(D_ARG(1), D_ARG(3));
324355

325-
if (D_REF(5)) limit = Int32s(D_ARG(6), 1); // /limit size
356+
if (ref_limit) limit = Int32s(size, 1); // /limit size
357+
358+
if (ref_lzma) {
359+
#ifdef USE_LZMA
360+
Set_Binary(D_RET, DecompressLzma(VAL_SERIES(data), VAL_INDEX(data), (REBINT)len, limit));
361+
#else
362+
Trap0(RE_FEATURE_NA);
363+
#endif
364+
} else {
365+
Set_Binary(D_RET, Decompress(VAL_SERIES(data), VAL_INDEX(data), (REBINT)len, limit, ref_gzip)); // /gzip
366+
}
326367

327-
Set_Binary(D_RET, Decompress(VAL_SERIES(arg), VAL_INDEX(arg), (REBINT)len, limit, D_REF(4))); // /gzip
328368

329369
return R_RET;
330370
}

src/core/u-compress.c

+120-1
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,19 @@
2020
************************************************************************
2121
**
2222
** Module: u-compress.c
23-
** Summary: interface to zlib compression
23+
** Summary: interface to zlib and or optional lzma compression
2424
** Section: utility
2525
** Notes:
2626
**
2727
***********************************************************************/
2828

2929
#include "sys-core.h"
3030
#include "sys-zlib.h"
31+
#ifdef USE_LZMA
32+
#include "sys-lzma.h"
33+
#endif // USE_LZMA
34+
35+
3136

3237
/*
3338
* This number represents the top file size that,
@@ -134,3 +139,117 @@
134139
//ENABLE_GC;
135140
return output;
136141
}
142+
143+
#ifdef USE_LZMA
144+
145+
static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED(p); return malloc(size); }
146+
static void SzFree(ISzAllocPtr p, void *address) { UNUSED(p); free(address); }
147+
const ISzAlloc g_Alloc = { SzAlloc, SzFree };
148+
149+
/***********************************************************************
150+
**
151+
*/ REBSER *CompressLzma(REBSER *input, REBINT index, REBCNT len)
152+
/*
153+
** Compress a binary (only) using LZMA compression.
154+
** data
155+
** /part
156+
** length
157+
**
158+
***********************************************************************/
159+
{
160+
REBU64 size;
161+
REBU64 size_in = len;
162+
REBSER *output;
163+
REBINT err;
164+
REBYTE *dest;
165+
REBYTE out_size[sizeof(REBCNT)];
166+
167+
//@@ are these Sterling's magic numbers correct for LZMA too?
168+
size = LZMA_PROPS_SIZE + size_in + (size_in > STERLINGS_MAGIC_NUMBER ? size_in / 10 + 12 : STERLINGS_MAGIC_FIX);
169+
output = Make_Binary(size);
170+
171+
// so far hardcoded LZMA encoder properties... it would be nice to be able specify these by user if needed.
172+
CLzmaEncProps props;
173+
LzmaEncProps_Init(&props);
174+
props.level = 5;
175+
props.dictSize = 0; // use default value
176+
props.lc = -1; // -1 = default value
177+
props.lp = -1;
178+
props.pb = -1;
179+
props.fb = -1;
180+
props.numThreads = -1;
181+
// Possible values:
182+
// int level, /* 0 <= level <= 9, default = 5 */
183+
// unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */
184+
// int lc, /* 0 <= lc <= 8, default = 3 */
185+
// int lp, /* 0 <= lp <= 4, default = 0 */
186+
// int pb, /* 0 <= pb <= 4, default = 2 */
187+
// int fb, /* 5 <= fb <= 273, default = 32 */
188+
// int numThreads /* 1 or 2, default = 2 */
189+
190+
dest = BIN_HEAD(output);
191+
192+
/* header: 5 bytes of LZMA properties */
193+
REBU64 headerSize = LZMA_PROPS_SIZE;
194+
size -= headerSize;
195+
196+
err = LzmaEncode(dest + headerSize, (SizeT*)&size, BIN_HEAD(input) + index, (SizeT)len, &props, dest, &headerSize, 0,
197+
NULL, &g_Alloc, &g_Alloc);
198+
//printf("lzmaencode res: %i size: %u headerSize: %u\n", err, size, headerSize);
199+
if (err) {
200+
if (err == SZ_ERROR_MEM) Trap0(RE_NO_MEMORY);
201+
SET_INTEGER(DS_RETURN, err);
202+
Trap1(RE_BAD_PRESS, DS_RETURN); //!!!provide error string descriptions
203+
}
204+
size += headerSize;
205+
//SET_STR_END(output, size);
206+
SERIES_TAIL(output) = size;
207+
REBCNT_To_Bytes(out_size, (REBCNT)len); // Tag the size to the end.
208+
Append_Series(output, (REBYTE*)out_size, sizeof(REBCNT));
209+
if (SERIES_AVAIL(output) > 1024) // Is there wasted space?
210+
output = Copy_Series(output); // Trim it down if too big. !!! Revisit this based on mem alloc alg.
211+
return output;
212+
}
213+
214+
/***********************************************************************
215+
**
216+
*/ REBSER *DecompressLzma(REBSER *input, REBCNT index, REBINT len, REBCNT limit)
217+
/*
218+
** Decompress a binary (only).
219+
**
220+
***********************************************************************/
221+
{
222+
REBU64 size;
223+
REBU64 unpackSize;
224+
REBSER *output;
225+
REBINT err;
226+
REBYTE *dest;
227+
REBYTE *src = BIN_HEAD(input) + index;
228+
REBU64 headerSize = LZMA_PROPS_SIZE;
229+
ELzmaStatus status = 0;
230+
231+
if (len < 0 || (index + len > BIN_LEN(input))) len = BIN_LEN(input) - index;
232+
if (len < 9) Trap0(RE_PAST_END); // !!! better msg needed
233+
size = cast(REBU64, len - LZMA_PROPS_SIZE); // don't include size of properties
234+
235+
// Get the uncompressed size from the end.
236+
unpackSize = cast(REBU64, Bytes_To_REBCNT(BIN_SKIP(input, len) - sizeof(REBCNT)));
237+
if(limit > 0 && unpackSize > limit) unpackSize = limit;
238+
239+
output = Make_Binary(unpackSize);
240+
dest = BIN_HEAD(output);
241+
242+
err = LzmaDecode(dest, (SizeT*)&unpackSize, src + LZMA_PROPS_SIZE, (SizeT*)&size, src, headerSize, LZMA_FINISH_ANY, &status, &g_Alloc);
243+
//printf("lzmadecode res: %i status: %i size: %u\n", err, status, size);
244+
245+
if (err) {
246+
if (err == SZ_ERROR_MEM) Trap0(RE_NO_MEMORY);
247+
SET_INTEGER(DS_RETURN, err);
248+
Trap1(RE_BAD_PRESS, DS_RETURN); //!!!provide error string descriptions
249+
}
250+
SET_STR_END(output, unpackSize);
251+
SERIES_TAIL(output) = unpackSize;
252+
return output;
253+
}
254+
255+
#endif //USE_LZMA

0 commit comments

Comments
 (0)