Skip to content

Commit 52a67db

Browse files
committed
FEAT: Allow line-comments inside binary value specification.
resolves: Oldes/Rebol-wishes#23
1 parent e0bc23b commit 52a67db

File tree

3 files changed

+127
-8
lines changed

3 files changed

+127
-8
lines changed

src/core/l-scan.c

+86-2
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,84 @@
473473
return src;
474474
}
475475

476+
/***********************************************************************
477+
**
478+
*/ const REBYTE *Scan_Quote_Binary(const REBYTE *src, SCAN_STATE *scan_state)
479+
/*
480+
** Scan a binary string, remove spaceces and comments.
481+
**
482+
** The result will be put into the temporary MOLD_BUF binary.
483+
**
484+
***********************************************************************/
485+
{
486+
REBOOL comm = FALSE;
487+
REBINT chr;
488+
REBCNT lines = 0;
489+
REBSER *buf = BUF_MOLD;
490+
491+
RESET_TAIL(buf);
492+
493+
if (*src++ != '{') return 0;
494+
495+
while (*src != '}') {
496+
chr = *src;
497+
498+
switch (chr) {
499+
500+
case 0:
501+
return 0; // Scan_state shows error location.
502+
case '^':
503+
chr = Scan_Char(&src);
504+
if (chr == -1) return 0;
505+
src--;
506+
break;
507+
case ';':
508+
while (chr != 0) {
509+
chr = *++src;
510+
if (chr == '^') {
511+
chr = Scan_Char(&src);
512+
if (chr == -1) return 0;
513+
src--;
514+
}
515+
if (chr == LF || chr == CR) {
516+
goto new_line;
517+
}
518+
}
519+
return 0; // end of input reached
520+
case CR:
521+
if (src[1] == LF) src++;
522+
// fall thru
523+
case LF:
524+
new_line:
525+
lines++;
526+
// fall thru
527+
case ' ':
528+
case TAB:
529+
src++;
530+
continue;
531+
532+
default:
533+
if (chr >= 0x80) return 0;
534+
}
535+
536+
src++;
537+
538+
if (SERIES_FULL(buf))
539+
Extend_Series(buf, 1);
540+
541+
*BIN_SKIP(buf, buf->tail) = chr;
542+
buf->tail++;
543+
}
544+
545+
src++; // Skip ending quote or brace.
546+
547+
if (scan_state) scan_state->line_count += lines;
548+
549+
STR_TERM(buf);
550+
551+
return src;
552+
}
553+
476554

477555
/***********************************************************************
478556
**
@@ -929,7 +1007,11 @@
9291007
if (*cp == '{') { /* BINARY #{12343132023902902302938290382} */
9301008
scan_state->end = scan_state->begin; /* save start */
9311009
scan_state->begin = cp;
932-
cp = Scan_Quote(cp, scan_state); // stores result string in BUF_MOLD !!??
1010+
// Originally there was used Scan_Quote collecting into BUF_MOLD, but this was not used later.
1011+
// It was wasting resources, because Scan_Quote collects unicode (2 bytes per char).
1012+
// Scan_Quote_Binary collects ANSI and report invalit input (like unicode char) much sooner.
1013+
// It also skips spaces and line-comments so these should not have to be tested by Decode_Binary later.
1014+
cp = Scan_Quote_Binary(cp, scan_state); // stores result string in BUF_MOLD !!??
9331015
scan_state->begin = scan_state->end; /* restore start */
9341016
if (cp) {
9351017
scan_state->end = cp;
@@ -1436,7 +1518,9 @@ extern REBSER *Scan_Full_Block(SCAN_STATE *scan_state, REBYTE mode_char);
14361518
break;
14371519

14381520
case TOKEN_BINARY:
1439-
Scan_Binary(bp, len, value);
1521+
// In BUF_MOLD is preprocessed ANSI result without comments and spaces
1522+
// we just still need to resolve the binary base (like `64#{`) from the input
1523+
Scan_Binary(Scan_Binary_Base(bp, len), BIN_DATA(BUF_MOLD), BIN_LEN(BUF_MOLD), value);
14401524
LABEL_SERIES(VAL_SERIES(value), "scan binary");
14411525
break;
14421526

src/core/l-types.c

+17-6
Original file line numberDiff line numberDiff line change
@@ -910,9 +910,9 @@ bad_hex: Trap0(RE_INVALID_CHARS);
910910

911911
/***********************************************************************
912912
**
913-
*/ const REBYTE *Scan_Binary(const REBYTE *cp, REBCNT len, REBVAL *value)
913+
*/ REBINT *Scan_Binary_Base(const REBYTE *cp, REBCNT len)
914914
/*
915-
** Scan and convert binary strings.
915+
** Scan for binary base
916916
**
917917
***********************************************************************/
918918
{
@@ -926,14 +926,25 @@ bad_hex: Trap0(RE_INVALID_CHARS);
926926
cp = ep;
927927
}
928928
cp++; // skip #
929-
if (*cp++ != '{') return 0;
930-
len -= 2;
929+
if (*cp++ != '{' || (len - 2) < 1) return 0;
930+
return base;
931+
}
931932

933+
/***********************************************************************
934+
**
935+
*/ const REBYTE *Scan_Binary(REBINT base, const REBYTE *cp, REBCNT len, REBVAL *value)
936+
/*
937+
** Scan and convert binary strings according given base (like 2, 16, 64, 85).
938+
**
939+
***********************************************************************/
940+
{
941+
//O: no need to check the base here... Decode_Binary handles any case
932942
cp = Decode_Binary(value, cp, len, base, '}', FALSE);
933943
if (!cp) return 0;
934944

935-
cp = Skip_To_Char(cp, cp + len, '}');
936-
if (!cp) return 0; // series will be gc'd
945+
//O: bellow check is not needed, because scener already validated the input
946+
//cp = Skip_To_Char(cp, cp + len, '}');
947+
//if (!cp) return 0; // series will be gc'd
937948

938949
return cp;
939950
}

src/tests/units/lexer-test.r3

+24
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,30 @@ Rebol [
171171

172172
===end-group===
173173

174+
===start-group=== "BINARY"
175+
--test-- {binary! with spaces}
176+
--assert #{00} = first transcode/only to binary! " #{0 0}"
177+
--assert #{00} = first transcode/only to binary! "2#{0000 00 00}"
178+
--assert #{00} = first transcode/only to binary! "2#{0000^/0000}"
179+
--assert #{00} = first transcode/only to binary! "2#{0000^M0000}"
180+
--assert #{01} = first transcode/only to binary! "2#{0000^-0001}"
181+
--assert #{02} = first transcode/only to binary! "2#{0000^ 0010}"
182+
--assert #{0001} = first transcode/only to binary! "16#{00 01}"
183+
--assert #{0001} = first transcode/only to binary! "64#{AA E=}"
184+
185+
--test-- {binary! with comments inside}
186+
;@@ https://github.com/Oldes/Rebol-wishes/issues/23
187+
--assert #{00} = first transcode/only/error to binary! "#{;XXX^/00}"
188+
--assert #{00} = first transcode/only/error to binary! "#{00;XXX^/}"
189+
--assert #{0002} = first transcode/only/error to binary! "#{00;XXX^/02}"
190+
--assert #{0002} = first transcode/only/error to binary! "#{00;XXX^M02}" ;CR is also comment stopper
191+
--test-- {binary! with other valid escapes}
192+
--assert #{0003} = first transcode/only/error to binary! "#{^(30)^(30)03}"
193+
--test-- {binary! with unicode char} ; is handled early
194+
--assert error? first transcode/only/error to binary! "#{0č}"
195+
196+
===end-group===
197+
174198

175199
===start-group=== "Special tests"
176200
;if "true" <> get-env "CONTINUOUS_INTEGRATION" [

0 commit comments

Comments
 (0)