Skip to content

Commit 2e55f6a

Browse files
committed
FEAT: allow string input to transcode
related to: Oldes/Rebol-issues#688
1 parent e9990a0 commit 2e55f6a

File tree

3 files changed

+127
-84
lines changed

3 files changed

+127
-84
lines changed

src/boot/natives.reb

+2-2
Original file line numberDiff line numberDiff line change
@@ -652,8 +652,8 @@ to-rebol-file: native [
652652
]
653653

654654
transcode: native [
655-
{Translates UTF-8 binary source to values. Returns [value binary].}
656-
source [binary!] "Must be Unicode UTF-8 encoded"
655+
{Translates UTF-8 binary source to values. Returns one or several values in a block.}
656+
source [binary! string!] "UTF-8 input buffer; string argument will be UTF-8 encoded"
657657
/next "Translate next complete value (blocks as single value)"
658658
/only "Translate only a single value (blocks dissected)"
659659
/error "Do not cause errors - return error object as value in place"

src/core/l-scan.c

+31-5
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
** REBOL [R3] Language Interpreter and Run-time Environment
44
**
55
** Copyright 2012 REBOL Technologies
6+
** Copyright 2012-2022 Rebol Open Source Contributors
67
** REBOL is a trademark of REBOL Technologies
78
**
89
** Licensed under the Apache License, Version 2.0 (the "License");
@@ -1889,10 +1890,25 @@ extern REBSER *Scan_Full_Block(SCAN_STATE *scan_state, REBYTE mode_char);
18891890
**
18901891
***********************************************************************/
18911892
{
1893+
SCAN_STATE scan_state;
1894+
REBVAL *src = D_ARG(1);
18921895
REBSER *blk;
1893-
SCAN_STATE scan_state;
1896+
REBSER *ser;
1897+
REBYTE *bin;
1898+
REBCNT len;
1899+
1900+
if (VAL_BYTE_SIZE(src)) {
1901+
bin = VAL_BIN_DATA(src);
1902+
len = VAL_LEN(src);
1903+
} else {
1904+
// unicode string must be converted to UTF-8 first
1905+
// the result is temporary stored in the shared buffer (BUF_FORM)
1906+
ser = Encode_UTF8_String(VAL_UNI_DATA(src), VAL_LEN(src), TRUE, 0);
1907+
bin = BIN_HEAD(ser);
1908+
len = BIN_LEN(ser);
1909+
}
18941910

1895-
Init_Scan_State(&scan_state, VAL_BIN_DATA(D_ARG(1)), VAL_LEN(D_ARG(1)));
1911+
Init_Scan_State(&scan_state, bin, len);
18961912

18971913
if (D_REF(2)) SET_FLAG(scan_state.opts, SCAN_NEXT);
18981914
if (D_REF(3)) SET_FLAG(scan_state.opts, SCAN_ONLY);
@@ -1902,9 +1918,19 @@ extern REBSER *Scan_Full_Block(SCAN_STATE *scan_state, REBYTE mode_char);
19021918
DS_RELOAD(ds); // in case stack moved
19031919
Set_Block(D_RET, blk);
19041920

1905-
VAL_INDEX(D_ARG(1)) = scan_state.end - VAL_BIN(D_ARG(1));
1906-
Append_Val(blk, D_ARG(1));
1907-
1921+
if (VAL_BYTE_SIZE(src)) {
1922+
VAL_INDEX(src) = scan_state.end - VAL_BIN(src);
1923+
} else {
1924+
// the scan state used the shared buffer, to get how many codepoints
1925+
// we advanced, we must first mark end...
1926+
len = scan_state.end - bin;
1927+
bin[len+1] = 0;
1928+
// ... and count the real length advanced
1929+
len = Length_As_UTF8_Code_Points(bin);
1930+
//printf("%i\n", len);
1931+
VAL_INDEX(src) = len;
1932+
}
1933+
Append_Val(blk, src);
19081934
return R_RET;
19091935
}
19101936

src/tests/units/lexer-test.r3

+94-77
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,23 @@ Rebol [
88

99
~~~start-file~~~ "Lexer"
1010

11+
===start-group=== "TRANSCODE"
12+
--test-- "transcode basic"
13+
;@@ https://github.com/Oldes/Rebol-issues/issues/688
14+
--assert all [
15+
block? code: transcode to binary! "1 + 1"
16+
code = [1 + 1 #{}]
17+
#{31202B2031} = head last code
18+
]
19+
--assert all [
20+
block? code: transcode "1 + 1"
21+
code = [1 + 1 ""]
22+
"1 + 1" = head last code
23+
]
24+
25+
26+
===end-group===
27+
1128
===start-group=== "Invalid construction"
1229
--test-- "Invalid MAP"
1330
--assert error? err: try [load {#(x)}]
@@ -401,99 +418,99 @@ Rebol [
401418
--assert error? try [load {#[file! "ab" 2 x]}]
402419
--test-- {object!}
403420
;@@ https://github.com/Oldes/Rebol-issues/issues/864
404-
--assert block? try [transcode to-binary "#[object! [a: 1 b: 2]]"]
405-
--assert block? try [transcode/only to-binary "#[object! [a: 1 b: 2]]"]
421+
--assert block? try [transcode "#[object! [a: 1 b: 2]]"]
422+
--assert block? try [transcode/only "#[object! [a: 1 b: 2]]"]
406423
--test-- {function!}
407424
;@@ https://github.com/Oldes/Rebol-issues/issues/1114
408-
--assert function? first transcode to binary! {#[function! [[a [series!]][print a]]]}
425+
--assert function? first transcode {#[function! [[a [series!]][print a]]]}
409426

410427
--test-- {datatype!}
411428
;@@ https://github.com/Oldes/Rebol-issues/issues/2508
412-
--assert datatype? first transcode/only to binary! {#[unset!]}
413-
--assert datatype? first transcode/only to binary! {#[none!]}
414-
--assert datatype? first transcode/only to binary! {#[logic!]}
415-
--assert datatype? first transcode/only to binary! {#[integer!]}
416-
--assert datatype? first transcode/only to binary! {#[decimal!]}
417-
--assert datatype? first transcode/only to binary! {#[percent!]}
418-
--assert datatype? first transcode/only to binary! {#[money!]}
419-
--assert datatype? first transcode/only to binary! {#[char!]}
420-
--assert datatype? first transcode/only to binary! {#[pair!]}
421-
--assert datatype? first transcode/only to binary! {#[tuple!]}
422-
--assert datatype? first transcode/only to binary! {#[time!]}
423-
--assert datatype? first transcode/only to binary! {#[date!]}
424-
--assert datatype? first transcode/only to binary! {#[binary!]}
425-
--assert datatype? first transcode/only to binary! {#[string!]}
426-
--assert datatype? first transcode/only to binary! {#[file!]}
427-
--assert datatype? first transcode/only to binary! {#[email!]}
428-
--assert datatype? first transcode/only to binary! {#[ref!]}
429-
--assert datatype? first transcode/only to binary! {#[url!]}
430-
--assert datatype? first transcode/only to binary! {#[tag!]}
431-
--assert datatype? first transcode/only to binary! {#[bitset!]}
432-
--assert datatype? first transcode/only to binary! {#[image!]}
433-
--assert datatype? first transcode/only to binary! {#[vector!]}
434-
--assert datatype? first transcode/only to binary! {#[block!]}
435-
--assert datatype? first transcode/only to binary! {#[paren!]}
436-
--assert datatype? first transcode/only to binary! {#[path!]}
437-
--assert datatype? first transcode/only to binary! {#[set-path!]}
438-
--assert datatype? first transcode/only to binary! {#[get-path!]}
439-
--assert datatype? first transcode/only to binary! {#[lit-path!]}
440-
--assert datatype? first transcode/only to binary! {#[map!]}
441-
--assert datatype? first transcode/only to binary! {#[datatype!]}
442-
--assert datatype? first transcode/only to binary! {#[typeset!]}
443-
--assert datatype? first transcode/only to binary! {#[word!]}
444-
--assert datatype? first transcode/only to binary! {#[set-word!]}
445-
--assert datatype? first transcode/only to binary! {#[get-word!]}
446-
--assert datatype? first transcode/only to binary! {#[lit-word!]}
447-
--assert datatype? first transcode/only to binary! {#[refinement!]}
448-
--assert datatype? first transcode/only to binary! {#[issue!]}
449-
--assert datatype? first transcode/only to binary! {#[native!]}
450-
--assert datatype? first transcode/only to binary! {#[action!]}
451-
--assert datatype? first transcode/only to binary! {#[rebcode!]}
452-
--assert datatype? first transcode/only to binary! {#[command!]}
453-
--assert datatype? first transcode/only to binary! {#[op!]}
454-
--assert datatype? first transcode/only to binary! {#[closure!]}
455-
--assert datatype? first transcode/only to binary! {#[function!]}
456-
--assert datatype? first transcode/only to binary! {#[frame!]}
457-
--assert datatype? first transcode/only to binary! {#[object!]}
458-
--assert datatype? first transcode/only to binary! {#[module!]}
459-
--assert datatype? first transcode/only to binary! {#[error!]}
460-
--assert datatype? first transcode/only to binary! {#[task!]}
461-
--assert datatype? first transcode/only to binary! {#[port!]}
462-
--assert datatype? first transcode/only to binary! {#[gob!]}
463-
--assert datatype? first transcode/only to binary! {#[event!]}
464-
--assert datatype? first transcode/only to binary! {#[handle!]}
465-
--assert datatype? first transcode/only to binary! {#[struct!]}
466-
--assert datatype? first transcode/only to binary! {#[library!]}
467-
--assert datatype? first transcode/only to binary! {#[utype!]}
429+
--assert datatype? first transcode/only {#[unset!]}
430+
--assert datatype? first transcode/only {#[none!]}
431+
--assert datatype? first transcode/only {#[logic!]}
432+
--assert datatype? first transcode/only {#[integer!]}
433+
--assert datatype? first transcode/only {#[decimal!]}
434+
--assert datatype? first transcode/only {#[percent!]}
435+
--assert datatype? first transcode/only {#[money!]}
436+
--assert datatype? first transcode/only {#[char!]}
437+
--assert datatype? first transcode/only {#[pair!]}
438+
--assert datatype? first transcode/only {#[tuple!]}
439+
--assert datatype? first transcode/only {#[time!]}
440+
--assert datatype? first transcode/only {#[date!]}
441+
--assert datatype? first transcode/only {#[binary!]}
442+
--assert datatype? first transcode/only {#[string!]}
443+
--assert datatype? first transcode/only {#[file!]}
444+
--assert datatype? first transcode/only {#[email!]}
445+
--assert datatype? first transcode/only {#[ref!]}
446+
--assert datatype? first transcode/only {#[url!]}
447+
--assert datatype? first transcode/only {#[tag!]}
448+
--assert datatype? first transcode/only {#[bitset!]}
449+
--assert datatype? first transcode/only {#[image!]}
450+
--assert datatype? first transcode/only {#[vector!]}
451+
--assert datatype? first transcode/only {#[block!]}
452+
--assert datatype? first transcode/only {#[paren!]}
453+
--assert datatype? first transcode/only {#[path!]}
454+
--assert datatype? first transcode/only {#[set-path!]}
455+
--assert datatype? first transcode/only {#[get-path!]}
456+
--assert datatype? first transcode/only {#[lit-path!]}
457+
--assert datatype? first transcode/only {#[map!]}
458+
--assert datatype? first transcode/only {#[datatype!]}
459+
--assert datatype? first transcode/only {#[typeset!]}
460+
--assert datatype? first transcode/only {#[word!]}
461+
--assert datatype? first transcode/only {#[set-word!]}
462+
--assert datatype? first transcode/only {#[get-word!]}
463+
--assert datatype? first transcode/only {#[lit-word!]}
464+
--assert datatype? first transcode/only {#[refinement!]}
465+
--assert datatype? first transcode/only {#[issue!]}
466+
--assert datatype? first transcode/only {#[native!]}
467+
--assert datatype? first transcode/only {#[action!]}
468+
--assert datatype? first transcode/only {#[rebcode!]}
469+
--assert datatype? first transcode/only {#[command!]}
470+
--assert datatype? first transcode/only {#[op!]}
471+
--assert datatype? first transcode/only {#[closure!]}
472+
--assert datatype? first transcode/only {#[function!]}
473+
--assert datatype? first transcode/only {#[frame!]}
474+
--assert datatype? first transcode/only {#[object!]}
475+
--assert datatype? first transcode/only {#[module!]}
476+
--assert datatype? first transcode/only {#[error!]}
477+
--assert datatype? first transcode/only {#[task!]}
478+
--assert datatype? first transcode/only {#[port!]}
479+
--assert datatype? first transcode/only {#[gob!]}
480+
--assert datatype? first transcode/only {#[event!]}
481+
--assert datatype? first transcode/only {#[handle!]}
482+
--assert datatype? first transcode/only {#[struct!]}
483+
--assert datatype? first transcode/only {#[library!]}
484+
--assert datatype? first transcode/only {#[utype!]}
468485
--test-- {direct values}
469-
--assert logic? first transcode/only to binary! {#[true]}
470-
--assert logic? first transcode/only to binary! {#[false]}
471-
--assert none? first transcode/only to binary! {#[none]}
472-
--assert unset? first transcode/only to binary! {#[unset]}
486+
--assert logic? first transcode/only {#[true]}
487+
--assert logic? first transcode/only {#[false]}
488+
--assert none? first transcode/only {#[none]}
489+
--assert unset? first transcode/only {#[unset]}
473490

474491
===end-group===
475492

476493
===start-group=== "BINARY"
477494
--test-- {binary! with spaces}
478-
--assert #{00} = first transcode/only to binary! " #{0 0}"
479-
--assert #{00} = first transcode/only to binary! "2#{0000 00 00}"
480-
--assert #{00} = first transcode/only to binary! "2#{0000^/0000}"
481-
--assert #{00} = first transcode/only to binary! "2#{0000^M0000}"
482-
--assert #{01} = first transcode/only to binary! "2#{0000^-0001}"
483-
--assert #{02} = first transcode/only to binary! "2#{0000^ 0010}"
484-
--assert #{0001} = first transcode/only to binary! "16#{00 01}"
485-
--assert #{0001} = first transcode/only to binary! "64#{AA E=}"
495+
--assert #{00} = first transcode/only " #{0 0}"
496+
--assert #{00} = first transcode/only "2#{0000 00 00}"
497+
--assert #{00} = first transcode/only "2#{0000^/0000}"
498+
--assert #{00} = first transcode/only "2#{0000^M0000}"
499+
--assert #{01} = first transcode/only "2#{0000^-0001}"
500+
--assert #{02} = first transcode/only "2#{0000^ 0010}"
501+
--assert #{0001} = first transcode/only "16#{00 01}"
502+
--assert #{0001} = first transcode/only "64#{AA E=}"
486503

487504
--test-- {binary! with comments inside}
488505
;@@ https://github.com/Oldes/Rebol-wishes/issues/23
489-
--assert #{00} = first transcode/only/error to binary! "#{;XXX^/00}"
490-
--assert #{00} = first transcode/only/error to binary! "#{00;XXX^/}"
491-
--assert #{0002} = first transcode/only/error to binary! "#{00;XXX^/02}"
492-
--assert #{0002} = first transcode/only/error to binary! "#{00;XXX^M02}" ;CR is also comment stopper
506+
--assert #{00} = first transcode/only/error "#{;XXX^/00}"
507+
--assert #{00} = first transcode/only/error "#{00;XXX^/}"
508+
--assert #{0002} = first transcode/only/error "#{00;XXX^/02}"
509+
--assert #{0002} = first transcode/only/error "#{00;XXX^M02}" ;CR is also comment stopper
493510
--test-- {binary! with other valid escapes}
494-
--assert #{0003} = first transcode/only/error to binary! "#{^(30)^(30)03}"
511+
--assert #{0003} = first transcode/only/error "#{^(30)^(30)03}"
495512
--test-- {binary! with unicode char} ; is handled early
496-
--assert error? first transcode/only/error to binary! "#{0č}"
513+
--assert error? first transcode/only/error "#{0č}"
497514
--test-- "Invalid binary"
498515
;@@ https://github.com/Oldes/Rebol-issues/issues/1431
499516
--assert all [error? e: try [load {000016#{FF}}] e/id = 'invalid e/arg1 = "integer"]

0 commit comments

Comments
 (0)