Skip to content

Commit 628234c

Browse files
committed
FEAT: support for Ascii85 (Base85) encoding
`Ascii85` is a form of binary-to-text encoding used for example in PDF files. One can now use 85 as a `base` value in `debase` and `enbase` functions: ``` >> enbase/base "Lion" 85 == "9PJE_" >> debase/base "9PJE_" 85 == #{4C696F6E} ;== "Lion" if converted back to string ``` This addition is optional, requires compilation with `INCLUDE_BASE85` define. Implements wish: Oldes/Rebol-issues#2410
1 parent 6395779 commit 628234c

File tree

5 files changed

+362
-3
lines changed

5 files changed

+362
-3
lines changed

make/make-settings.r

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Defines: [
1818
USE_MIDI_DEVICE ;-- includes MIDI device when possible (Windows & macOS)
1919

2020
;INCLUDE_TASK ;-- tasks are not implemented yet, so include it only on demand
21+
INCLUDE_BASE85 ;-- adds support for enbase/debase with base 85 (ASCII85)
2122

2223
;@@ optional fine tuning:
2324
;DO_NOT_NORMALIZE_MAP_KEYS

src/boot/natives.r

+2-2
Original file line numberDiff line numberDiff line change
@@ -406,15 +406,15 @@ debase: native [
406406
{Decodes binary-coded string (BASE-64 default) to binary value.}
407407
value [binary! string!] {The string to decode}
408408
/base {Binary base to use}
409-
base-value [integer!] {The base to convert from: 64, 16, or 2}
409+
base-value [integer!] {The base to convert from: 85, 64, 16, or 2}
410410
/url {Base 64 Decoding with URL and Filename Safe Alphabet}
411411
]
412412

413413
enbase: native [
414414
{Encodes a string into a binary-coded string (BASE-64 default).}
415415
value [binary! string!] {If string, will be UTF8 encoded}
416416
/base {Binary base to use}
417-
base-value [integer!] {The base to convert to: 64, 16, or 2}
417+
base-value [integer!] {The base to convert to: 85, 64, 16, or 2}
418418
/url {Base 64 Encoding with URL and Filename Safe Alphabet}
419419
]
420420

src/core/f-enbase.c

+286-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
***********************************************************************/
4141
{
4242
#define BIN_ERROR (REBYTE)0x80
43-
#define BIN_SPACE (REBYTE)0x40
43+
#define BIN_SPACE (REBYTE)0x55
4444
#define BIN_VALUE (REBYTE)0x3f
4545
#define IS_BIN_SPACE(c) (Debase64[c] & BIN_SPACE)
4646

@@ -279,6 +279,124 @@
279279
/* 7F DEL */ BIN_ERROR,
280280
};
281281

282+
#ifdef INCLUDE_BASE85
283+
#define BASE85_DIGITS 5 /* log85 (2^32) is 4.9926740807112 */
284+
/***********************************************************************
285+
**
286+
*/ static const REBYTE Debase85[128] =
287+
/*
288+
** Base-85 (ASCII85) binary decoder table.
289+
**
290+
***********************************************************************/
291+
{
292+
/* Control Chars */
293+
BIN_ERROR,BIN_ERROR,BIN_ERROR,BIN_ERROR, /* 80 */
294+
BIN_ERROR,BIN_ERROR,BIN_ERROR,BIN_ERROR,
295+
BIN_SPACE,BIN_SPACE,BIN_SPACE,BIN_ERROR,
296+
BIN_SPACE,BIN_SPACE,BIN_ERROR,BIN_ERROR,
297+
BIN_ERROR,BIN_ERROR,BIN_ERROR,BIN_ERROR,
298+
BIN_ERROR,BIN_ERROR,BIN_ERROR,BIN_ERROR,
299+
BIN_ERROR,BIN_ERROR,BIN_ERROR,BIN_ERROR,
300+
BIN_ERROR,BIN_ERROR,BIN_ERROR,BIN_ERROR,
301+
302+
/* 20 */ BIN_SPACE,
303+
/* 21 ! */ 0,
304+
/* 22 " */ 1,
305+
/* 23 # */ 2,
306+
/* 24 $ */ 3,
307+
/* 25 % */ 4,
308+
/* 26 & */ 5,
309+
/* 27 ' */ 6,
310+
/* 28 ( */ 7,
311+
/* 29 ) */ 8,
312+
/* 2A * */ 9,
313+
/* 2B + */ 10,
314+
/* 2C , */ 11,
315+
/* 2D - */ 12,
316+
/* 2E . */ 13,
317+
/* 2F / */ 14,
318+
/* 30 0 */ 15,
319+
/* 31 1 */ 16,
320+
/* 32 2 */ 17,
321+
/* 33 3 */ 18,
322+
/* 34 4 */ 19,
323+
/* 35 5 */ 20,
324+
/* 36 6 */ 21,
325+
/* 37 7 */ 22,
326+
/* 38 8 */ 23,
327+
/* 39 9 */ 24,
328+
/* 3A : */ 25,
329+
/* 3B ; */ 26,
330+
/* 3C < */ 27,
331+
/* 3D = */ 28,
332+
/* 3E > */ 29,
333+
/* 3F ? */ 30,
334+
/* 40 @ */ 31,
335+
/* 41 A */ 32,
336+
/* 42 B */ 33,
337+
/* 43 C */ 34,
338+
/* 44 D */ 35,
339+
/* 45 E */ 36,
340+
/* 46 F */ 37,
341+
/* 47 G */ 38,
342+
/* 48 H */ 39,
343+
/* 49 I */ 40,
344+
/* 4A J */ 41,
345+
/* 4B K */ 42,
346+
/* 4C L */ 43,
347+
/* 4D M */ 44,
348+
/* 4E N */ 45,
349+
/* 4F O */ 46,
350+
/* 50 P */ 47,
351+
/* 51 Q */ 48,
352+
/* 52 R */ 49,
353+
/* 53 S */ 50,
354+
/* 54 T */ 51,
355+
/* 55 U */ 52,
356+
/* 56 V */ 53,
357+
/* 57 W */ 54,
358+
/* 58 X */ 55,
359+
/* 59 Y */ 56,
360+
/* 5A Z */ 57,
361+
/* 5B [ */ 58,
362+
/* 5C \ */ 59,
363+
/* 5D ] */ 60,
364+
/* 5E ^ */ 61,
365+
/* 5F _ */ 62,
366+
/* 60 ` */ 63,
367+
/* 61 a */ 64,
368+
/* 62 b */ 65,
369+
/* 63 c */ 66,
370+
/* 64 d */ 67,
371+
/* 65 e */ 68,
372+
/* 66 f */ 69,
373+
/* 67 g */ 70,
374+
/* 68 h */ 71,
375+
/* 69 i */ 72,
376+
/* 6A j */ 73,
377+
/* 6B k */ 74,
378+
/* 6C l */ 75,
379+
/* 6D m */ 76,
380+
/* 6E n */ 77,
381+
/* 6F o */ 78,
382+
/* 70 p */ 79,
383+
/* 71 q */ 80,
384+
/* 72 r */ 81,
385+
/* 73 s */ 82,
386+
/* 74 t */ 83,
387+
/* 75 u */ 84,
388+
/* 76 v */ BIN_ERROR,
389+
/* 77 w */ BIN_ERROR,
390+
/* 78 x */ BIN_ERROR,
391+
/* 79 y */ BIN_ERROR,
392+
/* 7A z */ BIN_ERROR,
393+
/* 7B { */ BIN_ERROR,
394+
/* 7C | */ BIN_ERROR,
395+
/* 7D } */ BIN_ERROR,
396+
/* 7E ~ */ BIN_ERROR,
397+
/* 7F DEL */ BIN_ERROR
398+
};
399+
#endif
282400

283401
/***********************************************************************
284402
**
@@ -307,6 +425,22 @@
307425
};
308426

309427

428+
#ifdef INCLUDE_BASE85
429+
/***********************************************************************
430+
**
431+
*/ static const REBYTE Enbase85[85] =
432+
/*
433+
** Base-85 binary encoder table.
434+
**
435+
***********************************************************************/
436+
{
437+
"!\"#$%&'()*+,-./0123456789:;<=>?@"
438+
"ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`"
439+
"abcdefghijklmnopqrstu"
440+
};
441+
#endif
442+
443+
310444
/***********************************************************************
311445
**
312446
*/ static REBSER *Decode_Base2(const REBYTE **src, REBCNT len, REBYTE delim)
@@ -511,6 +645,89 @@
511645
}
512646

513647

648+
#ifdef INCLUDE_BASE85
649+
/***********************************************************************
650+
**
651+
*/ static REBSER *Decode_Base85(const REBYTE **src, REBCNT len, REBYTE delim)
652+
/*
653+
***********************************************************************/
654+
{
655+
REBYTE *bp;
656+
const REBYTE *cp;
657+
REBSER *ser;
658+
REBCNT ser_size;
659+
REBCNT chunk;
660+
REBCNT pos, c;
661+
REBINT pad=0;
662+
663+
// Allocate buffer large enough to hold result:
664+
ser = Make_Binary(((len + 4) / 5) * 4);
665+
ser_size = SERIES_AVAIL(ser);
666+
667+
bp = STR_HEAD(ser);
668+
cp = *src;
669+
670+
pos = 0;
671+
while(len > 0) {
672+
if(pos >= ser_size) {
673+
// in extreme cases (a lot of 'z' chars) initialy computed size may not be enough
674+
SERIES_TAIL(ser) = ser_size; // sets current series' tail (used by expand function bellow)
675+
Expand_Series(ser, ser_size, 8); // may expand more than 8 bytes
676+
bp = STR_HEAD(ser); // because above could reallocate
677+
ser_size = SERIES_AVAIL(ser);
678+
}
679+
/* 'z' is a special way to encode 4 bytes of 0s */
680+
if(*cp=='z') {
681+
cp++;
682+
len--;
683+
bp[pos++] = 0u;
684+
bp[pos++] = 0u;
685+
bp[pos++] = 0u;
686+
bp[pos++] = 0u;
687+
continue;
688+
}
689+
chunk = 0;
690+
for(c=0; c<BASE85_DIGITS; c++) {
691+
REBYTE d = 0;
692+
if(len > 0) {
693+
len--;
694+
d=Debase85[(REBYTE)*cp++];
695+
if (d == BIN_SPACE) {
696+
// ignore spaces
697+
c--; continue;
698+
}
699+
if (d > 127) goto err; /* failure - invalid character */
700+
}
701+
else {
702+
if(!pad) chunk++;
703+
pad++;
704+
}
705+
if(c == 4) {
706+
// math overflow checking.. for example input: {s8W-"}
707+
if (chunk > (MAX_U32 / 85u)) goto err;
708+
chunk *= 85;
709+
if (chunk > (MAX_U32 - d)) goto err;
710+
chunk += d;
711+
} else chunk = chunk * 85 + d;
712+
}
713+
bp[pos ] = (REBYTE)(chunk >> 24);
714+
bp[pos+1] = (REBYTE)(chunk >> 16);
715+
bp[pos+2] = (REBYTE)(chunk >> 8);
716+
bp[pos+3] = (REBYTE)(chunk );
717+
pos += 4;
718+
}
719+
bp[pos] = 0;
720+
ser->tail = pos - pad;
721+
return ser;
722+
723+
err:
724+
Free_Series(ser);
725+
*src = cp;
726+
return 0;
727+
}
728+
#endif
729+
730+
514731
/***********************************************************************
515732
**
516733
*/ const REBYTE *Decode_Binary(REBVAL *value, const REBYTE *src, REBCNT len, REBINT base, REBYTE delim, REBOOL urlSafe)
@@ -531,6 +748,13 @@
531748
case 2:
532749
ser = Decode_Base2 (&src, len, delim);
533750
break;
751+
case 85:
752+
#ifdef INCLUDE_BASE85
753+
ser = Decode_Base85 (&src, len, delim);
754+
#else
755+
Trap0(RE_FEATURE_NA);
756+
#endif
757+
break;
534758
}
535759

536760
if (!ser) return 0;
@@ -674,3 +898,64 @@
674898

675899
return series;
676900
}
901+
902+
903+
#ifdef INCLUDE_BASE85
904+
/***********************************************************************
905+
**
906+
*/ REBSER *Encode_Base85(REBVAL *value, REBSER *series, REBFLG brk)
907+
/*
908+
** Base85 encode a given series. Must be BYTES, not UNICODE.
909+
**
910+
***********************************************************************/
911+
{
912+
REBCNT len;
913+
REBYTE *bp;
914+
REBYTE *src;
915+
REBCNT x=0;
916+
REBINT loop;
917+
REBCNT i, chunk;
918+
919+
len = VAL_LEN(value);
920+
src = VAL_BIN_DATA(value);
921+
922+
// Account for hex, lines, and extra syntax:
923+
series = Prep_String(series, &bp, ((len + 3) / 4) * 5);
924+
// (Note: tail not properly set yet)
925+
926+
//if (len >= 32 && brk) *bp++ = LF;
927+
loop = (len / 4) - 1;
928+
if(loop >= 0) {
929+
for (x = 0; x <= 4 * loop;) {
930+
chunk = ((REBCNT)src[x++]) << 24u;
931+
chunk |= ((REBCNT)src[x++]) << 16u;
932+
chunk |= ((REBCNT)src[x++]) << 8u;
933+
chunk |= ((REBCNT)src[x++]) ;
934+
if(chunk==0) {
935+
*bp++='z'; /* this is a special zero character */
936+
} else {
937+
for(i = BASE85_DIGITS;i--;) {
938+
bp[i] = Enbase85[chunk%85];
939+
chunk /= 85;
940+
}
941+
bp += 5;
942+
}
943+
}
944+
}
945+
if ((len % 4) != 0) {
946+
chunk = (((REBCNT)src[x++]) << 24u);
947+
chunk |= ((x < (REBCNT)len) ? (((REBCNT)src[x++]) << 16u): 0u);
948+
chunk |= ((x < (REBCNT)len) ? (((REBCNT)src[x++]) << 8u): 0u);
949+
chunk |= ((x < (REBCNT)len) ? (((REBCNT)src[x++]) ): 0u);
950+
for(i = BASE85_DIGITS;i--;) {
951+
bp[i] = Enbase85[chunk%85];
952+
chunk /= 85;
953+
}
954+
bp += (len % 4) + 1;
955+
}
956+
*bp = 0;
957+
SERIES_TAIL(series) = DIFF_PTRS(bp, series->data);
958+
959+
return series;
960+
}
961+
#endif

src/core/n-strings.c

+8
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,14 @@ static struct digest {
497497
break;
498498
case 2:
499499
ser = Encode_Base2(arg, 0, FALSE);
500+
break;
501+
case 85:
502+
#ifdef INCLUDE_BASE85
503+
ser = Encode_Base85(arg, 0, FALSE);
504+
#else
505+
Trap0(RE_FEATURE_NA);
506+
#endif
507+
500508
break;
501509
default:
502510
Trap_Arg(D_ARG(3));

0 commit comments

Comments
 (0)