Skip to content

Commit 7f46bc6

Browse files
committed
FEAT: implemented a raw string syntax in a form %{...}%
resolves: Oldes/Rebol-issues#1194
1 parent 9fa0f54 commit 7f46bc6

File tree

3 files changed

+155
-30
lines changed

3 files changed

+155
-30
lines changed

src/core/l-scan.c

+61-3
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,53 @@
553553
}
554554

555555

556+
/***********************************************************************
557+
**
558+
*/ const REBYTE *Scan_Raw_String(const REBYTE *src, SCAN_STATE *scan_state, int num)
559+
/*
560+
** Scan a raw string (without any modifications).
561+
** Eliminates need of double escaping and allowes unmatched braces.
562+
**
563+
** The result will be put into the temporary MOLD_BUF unistring.
564+
**
565+
***********************************************************************/
566+
{
567+
REBCNT lines = 0;
568+
REBSER *buf = BUF_MOLD;
569+
REBYTE *bp = src;
570+
REBLEN n;
571+
REBINT chr;
572+
573+
RESET_TAIL(buf);
574+
575+
while (*bp) {
576+
577+
//if (*bp == CR && bp[1] == LF) bp++; // replace CRLF with LF
578+
579+
chr = *bp;
580+
if (chr == LF) lines++;
581+
else if (chr == '}' && bp[1] == '%') {
582+
n = 1;
583+
while (bp[n] == '%') n++; n--;
584+
if (num == n) {
585+
// success
586+
if (scan_state) scan_state->line_count += lines;
587+
UNI_TERM(buf);
588+
return bp + 1 + n; // Skip ending %
589+
}
590+
if (n > num) return 0;
591+
}
592+
bp++;
593+
if (SERIES_FULL(buf))
594+
Extend_Series(buf, 1);
595+
596+
*UNI_SKIP(buf, buf->tail) = chr;
597+
buf->tail++;
598+
}
599+
return 0; // end of source intput without closing
600+
}
601+
602+
556603
/***********************************************************************
557604
**
558605
*/ const REBYTE *Scan_Item(const REBYTE *src, const REBYTE *end, REBUNI term, const REBYTE *invalid)
@@ -933,13 +980,24 @@
933980
return TOKEN_REF;
934981

935982
case LEX_SPECIAL_PERCENT:
936-
if (IS_LEX_DELIMIT(cp[1]) && cp[1] != '"' && cp[1] != '/') {
983+
if (IS_LEX_DELIMIT(cp[1]) && cp[1] != '"' && cp[1] != '/' && cp[1] != '{') {
937984
return TOKEN_WORD; // special case for having % as a word (reminder op!)
938985
} else if (cp[1] == ':' && IS_LEX_DELIMIT(cp[2])) {
939986
return TOKEN_SET;
940987
}
941-
/* %filename */
942-
cp = scan_state->end;
988+
int n = 1;
989+
while (*cp) {
990+
if (cp[n] == '{') {
991+
cp = Scan_Raw_String(cp+n+1, scan_state, n); // stores result string in BUF_MOLD
992+
if (!cp) return -TOKEN_STRING;
993+
scan_state->end = cp;
994+
return TOKEN_STRING;
995+
}
996+
if (cp[n] != '%') break;
997+
n++;
998+
}
999+
cp = scan_state->end;
1000+
/* %"file name" or %filename */
9431001
if (*cp == '"') {
9441002
cp = Scan_Quote(cp, scan_state); // stores result string in BUF_MOLD
9451003
if (!cp) return -TOKEN_FILE;

src/os/host-main.c

+78-27
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,11 @@ void Host_Repl(void) {
121121
// REBOOL why_alert = TRUE;
122122

123123
#define MAX_CONT_LEVEL 1024
124+
#define INPUT_NO_STRING 0
125+
#define INPUT_SINGLE_LINE_STRING 1
126+
#define INPUT_MULTI_LINE_STRING 2
127+
#define INPUT_RAW_STRING 3
128+
124129
REBYTE cont_str[] = CONTIN_STR;
125130
REBCNT cont_level = 0;
126131
REBYTE cont_stack[MAX_CONT_LEVEL] = { 0 };
@@ -134,8 +139,10 @@ void Host_Repl(void) {
134139
REBLEN line_len;
135140

136141
REBYTE *utf8byte;
137-
BOOL inside_short_str = FALSE;
142+
int raw_str_level = 0;
138143
int long_str_level = 0;
144+
int n = 0;
145+
int state = INPUT_NO_STRING;
139146

140147
while (TRUE) {
141148
if (cont_level > 0) {
@@ -154,6 +161,7 @@ void Host_Repl(void) {
154161
cont_level = 0;
155162
input_len = 0;
156163
input[0] = 0;
164+
raw_str_level = 0;
157165
continue;
158166
}
159167
RESET_COLOR;
@@ -163,45 +171,87 @@ void Host_Repl(void) {
163171
line_len = 0;
164172
for (utf8byte = line; *utf8byte; utf8byte++) {
165173
line_len++;
166-
switch (*utf8byte) {
167-
case '^':
168-
if (*(utf8byte + 1) != 0) {
174+
if (state == INPUT_NO_STRING)
175+
{
176+
switch (*utf8byte) {
177+
case '"':
178+
state = INPUT_SINGLE_LINE_STRING;
179+
break;
180+
case '[':
181+
case '(':
182+
if (cont_level < MAX_CONT_LEVEL) cont_stack[cont_level] = *utf8byte;
183+
cont_level++;
184+
break;
185+
case ']':
186+
case ')':
187+
if (cont_level > 0) cont_level--;
188+
break;
189+
case '{':
190+
if (cont_level < MAX_CONT_LEVEL) cont_stack[cont_level] = *utf8byte;
191+
cont_level++;
192+
long_str_level++;
193+
state = INPUT_MULTI_LINE_STRING;
194+
break;
195+
case '%':
196+
n = 1;
197+
while (utf8byte[n] == '%') n++;
198+
if (utf8byte[n] == '{') {
199+
raw_str_level = n;
200+
if (cont_level < MAX_CONT_LEVEL) cont_stack[cont_level] = '{';
201+
cont_level++;
202+
state = INPUT_RAW_STRING;
203+
}
204+
line_len += n;
205+
utf8byte += n;
206+
break;
207+
}
208+
}
209+
210+
else if (state == INPUT_SINGLE_LINE_STRING)
211+
{
212+
if (*utf8byte == '^' && utf8byte[1]) {
169213
line_len++;
170214
utf8byte++;
171215
}
172-
break;
173-
case '"':
174-
if (long_str_level == 0) inside_short_str = !inside_short_str;
175-
break;
176-
case '[':
177-
case '(':
178-
if (!inside_short_str && long_str_level == 0) {
179-
if (cont_level < MAX_CONT_LEVEL) cont_stack[cont_level] = *utf8byte;
180-
cont_level++;
216+
else if (*utf8byte == '"') {
217+
state = INPUT_NO_STRING;
181218
}
182-
break;
183-
case ']':
184-
case ')':
185-
if (!inside_short_str && long_str_level == 0) {
186-
cont_level--;
219+
}
220+
221+
else if (state == INPUT_MULTI_LINE_STRING)
222+
{
223+
if (*utf8byte == '^' && utf8byte[1]) {
224+
line_len++;
225+
utf8byte++;
187226
}
188-
break;
189-
case '{':
190-
if (!inside_short_str) {
227+
else if (*utf8byte == '{') {
191228
if (cont_level < MAX_CONT_LEVEL) cont_stack[cont_level] = *utf8byte;
192229
cont_level++;
193230
long_str_level++;
194231
}
195-
break;
196-
case '}':
197-
if (!inside_short_str) {
232+
else if (*utf8byte == '}') {
198233
cont_level--;
199-
if (long_str_level > 0) long_str_level--;
234+
long_str_level--;
235+
if (long_str_level == 0) state = INPUT_NO_STRING;
236+
}
237+
}
238+
239+
else if (state == INPUT_RAW_STRING)
240+
{
241+
if (*utf8byte == '}' && utf8byte[1] == '%') {
242+
n = 1;
243+
while (utf8byte[n] == '%') n++;
244+
if (raw_str_level < n) {
245+
raw_str_level = 0;
246+
line_len += n;
247+
utf8byte += n;
248+
cont_level--;
249+
state = INPUT_NO_STRING;
250+
}
200251
}
201-
break;
202252
}
203253
}
204-
inside_short_str = FALSE;
254+
if (state == INPUT_SINGLE_LINE_STRING) state = INPUT_NO_STRING;
205255

206256
if (input_len + line_len > input_max) {
207257
// limit maximum input size to 2GB (it should be more than enough)
@@ -230,6 +280,7 @@ void Host_Repl(void) {
230280

231281
input_len = 0;
232282
cont_level = 0;
283+
raw_str_level = 0;
233284

234285
RESET_COLOR;
235286

src/tests/units/lexer-test.r3

+16
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,22 @@ Rebol [
155155

156156
===end-group===
157157

158+
159+
===start-group=== "Raw string"
160+
--test-- "rawstring %{}%"
161+
--assert "" == transcode/one "%{}%"
162+
--assert "" == transcode/one "%%{}%%"
163+
--assert "a^^b" == transcode/one "%{a^^b}%"
164+
--assert "}" == transcode/one "%{}}%"
165+
--assert "{" == transcode/one "%{{}%"
166+
--assert " %{^}% " == transcode/one "%%{ %{^}% }%%"
167+
--test-- "rawstring %{}% multiline"
168+
--assert "^/" == transcode/one rejoin ["%{" LF "}%"]
169+
--assert "^M^/" == transcode/one rejoin ["%{" CR LF "}%"]
170+
171+
===end-group===
172+
173+
158174
===start-group=== "Special % word"
159175
--test-- "valid % word cases"
160176
--assert word? try [load {%}]

0 commit comments

Comments
 (0)