Skip to content

Commit cefbc96

Browse files
committed
FIX: allow various variants of arrow-like words
resolves: Oldes/Rebol-issues#1302 resolves: Oldes/Rebol-issues#1318 resolves: Oldes/Rebol-issues#1342 resolves: Oldes/Rebol-issues#1478
1 parent c2833d9 commit cefbc96

File tree

2 files changed

+238
-28
lines changed

2 files changed

+238
-28
lines changed

src/core/l-scan.c

+110-28
Original file line numberDiff line numberDiff line change
@@ -652,6 +652,55 @@
652652
return 0;
653653
}
654654

655+
/***********************************************************************
656+
**
657+
*/ static const REBYTE* Skip_Left_Arrow(const REBYTE* cp)
658+
/*
659+
** Skip the entire contents of a `left arrow` like words.
660+
** Zero is returned on errors.
661+
**
662+
***********************************************************************/
663+
{
664+
while (*cp == '<') cp++;
665+
while (*cp) {
666+
if (*cp == '-' || *cp == '=' || *cp == '>' || *cp == '~') {
667+
cp++;
668+
continue;
669+
}
670+
if (IS_LEX_DELIMIT(*cp)) break;
671+
if (*cp == ':') {
672+
cp++;
673+
break;
674+
}
675+
return 0;
676+
}
677+
return cp;
678+
}
679+
680+
/***********************************************************************
681+
**
682+
*/ static const REBYTE* Skip_Right_Arrow(const REBYTE* cp)
683+
/*
684+
** Skip the entire contents of a `right arrow` like words.
685+
** Zero is returned on errors.
686+
**
687+
***********************************************************************/
688+
{
689+
while (*cp) {
690+
if (*cp == '-' || *cp == '=' || *cp == '>' || *cp == '~') {
691+
cp++;
692+
continue;
693+
}
694+
if (IS_LEX_DELIMIT(*cp)) break;
695+
if (*cp == ':') {
696+
cp++;
697+
break;
698+
}
699+
return 0;
700+
}
701+
return cp;
702+
}
703+
655704

656705
/***********************************************************************
657706
**
@@ -777,6 +826,7 @@
777826
REBCNT flags;
778827
const REBYTE *cp;
779828
REBINT type;
829+
REBYTE *np = NULL;
780830

781831
flags = Prescan(scan_state);
782832
cp = scan_state->begin;
@@ -847,9 +897,9 @@
847897
if (ONLY_LEX_FLAG(flags, LEX_SPECIAL_WORD)) return type;
848898
goto scanword;
849899
}
850-
if (cp[0] == '<' || cp[0] == '>') {
851-
scan_state->end = cp+1;
852-
return -TOKEN_REFINE;
900+
if (*cp == '<' || *cp == '>') {
901+
type = TOKEN_REFINE;
902+
goto scan_arrow_word;
853903
}
854904
scan_state->end = cp;
855905
return TOKEN_WORD;
@@ -902,10 +952,8 @@
902952
// Various special cases of < << <> >> > >= <=
903953
if (cp[1] == '<' || cp[1] == '>') {
904954
cp++;
905-
if (cp[1] == '<' || cp[1] == '>' || cp[1] == '=') cp++;
906-
if (!IS_LEX_DELIMIT(cp[1])) return -TOKEN_GET;
907-
scan_state->end = cp+1;
908-
return TOKEN_GET;
955+
type = TOKEN_GET;
956+
goto scan_arrow_word;
909957
}
910958
if (cp[1] == '%' && IS_LEX_DELIMIT(cp[2])) {
911959
if (cp[2] == '"' || cp[2] == '/') { // no :%"" or :%/
@@ -922,25 +970,22 @@
922970
if (IS_LEX_NUMBER(cp[1])) return -TOKEN_LIT; // no '2nd
923971
if (cp[1] == ':') return -TOKEN_LIT; // no ':X
924972
if (ONLY_LEX_FLAG(flags, LEX_SPECIAL_WORD)) return TOKEN_LIT; /* common case */
973+
cp++;
974+
if (*cp == '<' || *cp == '>') {
975+
type = TOKEN_LIT;
976+
goto scan_arrow_word;
977+
}
925978
if (!IS_LEX_WORD(cp[1])) {
926-
// Various special cases of < << <> >> > >= <=
927-
if ((cp[1] == '-' || cp[1] == '+') && IS_LEX_NUMBER(cp[2])) return -TOKEN_WORD;
928-
if (cp[1] == '<' || cp[1] == '>') {
929-
cp++;
930-
if (cp[1] == '<' || cp[1] == '>' || cp[1] == '=') cp++;
931-
if (!IS_LEX_DELIMIT(cp[1])) return -TOKEN_LIT;
932-
scan_state->end = cp+1;
933-
return TOKEN_LIT;
934-
}
935-
if (cp[1] == '%' && IS_LEX_DELIMIT(cp[2])) {
936-
if (cp[2] == '"' || cp[2] == '/') { // no '%"" or '%/
937-
scan_state->end = cp + 3;
979+
if ((*cp == '-' || *cp == '+') && IS_LEX_NUMBER(cp[1])) return -TOKEN_WORD;
980+
if (*cp == '%' && IS_LEX_DELIMIT(cp[1])) {
981+
if (cp[1] == '"' || cp[1] == '/') { // no '%"" or '%/
982+
scan_state->end = cp + 2;
938983
return -TOKEN_LIT;
939984
}
940985
return TOKEN_LIT; // allowed '%
941986
}
942987
}
943-
if (cp[1] == '\'') return -TOKEN_LIT; // no ''foo
988+
if (*cp == '\'') return -TOKEN_LIT; // no ''foo
944989
type = TOKEN_LIT;
945990
goto scanword;
946991

@@ -954,15 +999,28 @@
954999

9551000
case LEX_SPECIAL_GREATER:
9561001
if (IS_LEX_DELIMIT(cp[1])) return TOKEN_WORD; // RAMBO 3903
957-
if (cp[1] == '>') {
958-
if (IS_LEX_DELIMIT(cp[2])) return TOKEN_WORD;
959-
return -TOKEN_WORD;
1002+
if (cp[1] == '>' || cp[1] == '=' || cp[1] == '-' || cp[1] == '~') {
1003+
np = Skip_Right_Arrow(cp);
1004+
if (np != NULL) {
1005+
scan_state->end = np;
1006+
return (np[-1] == ':' ? TOKEN_SET : TOKEN_WORD);
1007+
}
9601008
}
1009+
return -TOKEN_WORD;
1010+
9611011
case LEX_SPECIAL_LESSER:
9621012
if (IS_LEX_ANY_SPACE(cp[1]) || cp[1] == ']' || cp[1] == 0) return TOKEN_WORD; // CES.9121 Was LEX_DELIMIT - changed for </tag>
963-
if ((cp[0] == '<' && cp[1] == '<') || cp[1] == '=' || cp[1] == '>') {
964-
if (IS_LEX_DELIMIT(cp[2])) return TOKEN_WORD;
965-
return -TOKEN_WORD;
1013+
1014+
if (IS_LEX_DELIMIT(cp[2]) && (cp[1] == '>' || cp[1] == '=' || cp[1] == '<')) {
1015+
return TOKEN_WORD; // common cases: <> <= <<
1016+
}
1017+
1018+
if (cp[1] == '<' || cp[1] == '>' || cp[1] == '=' || cp[1] == '-' || cp[1] == '~') {
1019+
np = Skip_Left_Arrow(cp);
1020+
if (np != NULL) {
1021+
scan_state->end = np;
1022+
return (np[-1] == ':' ? TOKEN_SET : TOKEN_WORD);
1023+
}
9661024
}
9671025
if (GET_LEX_VALUE(*cp) == LEX_SPECIAL_GREATER) return -TOKEN_WORD;
9681026
cp = Skip_Tag(cp);
@@ -1169,8 +1227,33 @@
11691227
/*bogus: if (HAS_LEX_FLAG(flags, LEX_SPECIAL_GREATER) &&
11701228
Skip_To_Char(scan_state->begin, cp, '>')) return -TOKEN_WORD; */
11711229
scan_state->end = cp;
1172-
} else if (HAS_LEX_FLAG(flags, LEX_SPECIAL_GREATER)) return -type;
1230+
}
1231+
else if (HAS_LEX_FLAG(flags, LEX_SPECIAL_GREATER)) {
1232+
if (*cp == '=' || *cp == '-' || *cp == '~') {
1233+
np = Skip_Right_Arrow(cp);
1234+
if (np != NULL) {
1235+
scan_state->end = np;
1236+
return (np[-1] == ':' ? TOKEN_SET : type);
1237+
}
1238+
}
1239+
return -type;
1240+
}
11731241
return type;
1242+
1243+
scan_arrow_word:
1244+
// Various special cases of < << <> >> > >= <= <--- >--->
1245+
if (cp[0] == '<') {
1246+
np = Skip_Left_Arrow(cp);
1247+
if (!np) return -type;
1248+
scan_state->end = np;
1249+
return type;
1250+
}
1251+
else {
1252+
np = Skip_Right_Arrow(cp);
1253+
if (!np) return -type;
1254+
scan_state->end = np;
1255+
return type;
1256+
}
11741257
}
11751258

11761259

@@ -1822,7 +1905,6 @@ extern REBSER *Scan_Full_Block(SCAN_STATE *scan_state, REBYTE mode_char);
18221905
// (space and tab chars at tail are truncated and so accepted)
18231906
&& scan_state.end == scan_state.limit)
18241907
return Make_Word(cp, len);
1825-
18261908
return 0;
18271909
}
18281910

src/tests/units/lexer-test.r3

+128
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,134 @@ Rebol [
7676

7777
===end-group===
7878

79+
===start-group=== "Special arrow-like words"
80+
;@@ https://github.com/Oldes/Rebol-issues/issues/1302
81+
;@@ https://github.com/Oldes/Rebol-issues/issues/1318
82+
;@@ https://github.com/Oldes/Rebol-issues/issues/1342
83+
;@@ https://github.com/Oldes/Rebol-issues/issues/1478
84+
85+
--test-- "valid arrow-like words"
86+
--assert word? try [load {<-->}]
87+
--assert word? try [load {<==>}]
88+
--assert word? try [load {<-==->}]
89+
--assert word? try [load {<~~~>}]
90+
91+
--test-- "valid left-arrow-like words"
92+
--assert word? try [load {<<}]
93+
--assert word? try [load {<<<}]
94+
--assert word? try [load {<<<<}]
95+
--assert word? try [load {<<==}]
96+
--assert word? try [load {<===}]
97+
--assert word? try [load {<---}]
98+
--assert word? try [load {<~~~}]
99+
--assert all [block? b: try [load {<<<""}] parse b [word! string!]]
100+
101+
--test-- "valid right-arrow-like words"
102+
--assert word? try [load {>>}]
103+
--assert word? try [load {>>>}]
104+
--assert word? try [load {>>>>}]
105+
--assert word? try [load {==>>}]
106+
--assert word? try [load {===>}]
107+
--assert word? try [load {--->}]
108+
--assert word? try [load {~~~>}]
109+
--assert all [block? b: try [load {>>>""}] parse b [word! string!]]
110+
111+
--test-- "invalid cases"
112+
--assert error? try [load {a<}]
113+
--assert error? try [load {a>}]
114+
--assert error? try [load {a<--}]
115+
--assert error? try [load {a-->}]
116+
117+
--test-- "valid arrow-like lit-words"
118+
--assert lit-word? try [load {'<-->}]
119+
--assert lit-word? try [load {'<==>}]
120+
--assert lit-word? try [load {'<-==->}]
121+
--assert lit-word? try [load {'<~~~>}]
122+
123+
--test-- "valid left-arrow-like lit-words"
124+
--assert lit-word? try [load {'<<}]
125+
--assert lit-word? try [load {'<<<}]
126+
--assert lit-word? try [load {'<<<<}]
127+
--assert all [block? b: try [load {'<<<""}] parse b [lit-word! string!]]
128+
129+
--test-- "valid right-arrow-like lit-words"
130+
--assert lit-word? try [load {'>>}]
131+
--assert lit-word? try [load {'>>>}]
132+
--assert lit-word? try [load {'>>>>}]
133+
--assert lit-word? try [load {'==>>}]
134+
--assert lit-word? try [load {'===>}]
135+
--assert lit-word? try [load {'--->}]
136+
--assert lit-word? try [load {'~~~>}]
137+
--assert all [block? b: try [load {'>>>""}] parse b [lit-word! string!]]
138+
139+
--test-- "valid arrow-like get-words"
140+
--assert get-word? try [load {:<-->}]
141+
--assert get-word? try [load {:<==>}]
142+
--assert get-word? try [load {:<-==->}]
143+
--assert get-word? try [load {:<~~~>}]
144+
145+
--test-- "valid left-arrow-like get-words"
146+
--assert get-word? try [load {:<<}]
147+
--assert get-word? try [load {:<<<}]
148+
--assert get-word? try [load {:<<<<}]
149+
--assert all [block? b: try [load {:<<<""}] parse b [get-word! string!]]
150+
151+
--test-- "valid right-arrow-like get-words"
152+
--assert get-word? try [load {:>>}]
153+
--assert get-word? try [load {:>>>}]
154+
--assert get-word? try [load {:>>>>}]
155+
--assert get-word? try [load {:==>>}]
156+
--assert get-word? try [load {:===>}]
157+
--assert get-word? try [load {:--->}]
158+
--assert get-word? try [load {:~~~>}]
159+
--assert all [block? b: try [load {:>>>""}] parse b [get-word! string!]]
160+
161+
--test-- "valid arrow-like set-words"
162+
--assert set-word? try [load {<-->:}]
163+
--assert set-word? try [load {<==>:}]
164+
--assert set-word? try [load {<-==->:}]
165+
--assert set-word? try [load {<~~~>:}]
166+
167+
--test-- "valid left-arrow-like set-words"
168+
--assert set-word? try [load {<<:}]
169+
--assert set-word? try [load {<<<:}]
170+
--assert set-word? try [load {<<<<:}]
171+
--assert all [block? b: try [load {<<<:""}] parse b [set-word! string!]]
172+
173+
--test-- "valid right-arrow-like set-words"
174+
--assert set-word? try [load {>>:}]
175+
--assert set-word? try [load {>>>:}]
176+
--assert set-word? try [load {>>>>:}]
177+
--assert set-word? try [load {==>>:}]
178+
--assert set-word? try [load {===>:}]
179+
--assert set-word? try [load {--->:}]
180+
--assert set-word? try [load {~~~>:}]
181+
--assert all [block? b: try [load {>>>:""}] parse b [set-word! string!]]
182+
183+
--test-- "valid arrow-like refinements"
184+
--assert refinement? try [load {/<-->}]
185+
--assert refinement? try [load {/<==>}]
186+
--assert refinement? try [load {/<-==->}]
187+
--assert refinement? try [load {/<~~~>}]
188+
189+
--test-- "valid left-arrow-like refinements"
190+
--assert refinement? try [load {/<<}]
191+
--assert refinement? try [load {/<<<}]
192+
--assert refinement? try [load {/<<<<}]
193+
--assert all [block? b: try [load {/<<<""}] parse b [refinement! string!]]
194+
195+
--test-- "valid right-arrow-like refinements"
196+
--assert refinement? try [load {/>>}]
197+
--assert refinement? try [load {/>>>}]
198+
--assert refinement? try [load {/>>>>}]
199+
--assert refinement? try [load {/==>>}]
200+
--assert refinement? try [load {/===>}]
201+
--assert refinement? try [load {/--->}]
202+
--assert refinement? try [load {/~~~>}]
203+
--assert all [block? b: try [load {/>>>""}] parse b [refinement! string!]]
204+
205+
===end-group===
206+
79207
===start-group=== "Email"
80208
--test-- "valid `emails`"
81209
--assert email? load {name@where}

0 commit comments

Comments
 (0)