Skip to content

Commit 4c14cde

Browse files
committed
FEAT: initial implementation of parse's collect and keep commands.
resolves: Oldes/Rebol-issues#2471
1 parent 8549cab commit 4c14cde

File tree

4 files changed

+374
-6
lines changed

4 files changed

+374
-6
lines changed

src/boot/errors.reb

+1
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ Script: [
120120
parse-variable: [{PARSE - expected a variable, not:} :arg1]
121121
parse-command: [{PARSE - command cannot be used as variable:} :arg1]
122122
parse-series: [{PARSE - input must be a series:} :arg1]
123+
parse-no-collect: {PARSE - KEEP is used without a wrapping COLLECT}
123124

124125
; bad-prompt: [{Error executing prompt block}]
125126
; bad-port-action: [{Cannot use} :arg1 {on this type port}]

src/boot/words.reb

+2
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,8 @@ if
153153
fail
154154
reject
155155
while
156+
collect
157+
keep
156158
return
157159
limit
158160
??

src/core/u-parse.c

+198-6
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,19 @@ enum Parse_Flags {
3737
PF_CASED = 4, // was set as initial option
3838
};
3939

40+
typedef struct reb_parse_collect {
41+
REBVAL *result;
42+
REBSER *block;
43+
REBINT depth;
44+
} REB_PARSE_COLLECT;
45+
4046
typedef struct reb_parse {
4147
REBSER *series;
4248
REBCNT type;
4349
REBCNT flags;
4450
REBINT result;
45-
REBVAL retval;
51+
REBVAL *retval;
52+
REB_PARSE_COLLECT *collect;
4653
} REBPARSE;
4754

4855
enum parse_flags {
@@ -58,6 +65,9 @@ enum parse_flags {
5865
PF_RETURN,
5966
PF_WHILE,
6067
PF_ADVANCE, // used to report that although index was not changed, rule is suppose to advance
68+
PF_COLLECT,
69+
PF_KEEP,
70+
PF_PICK,
6171
};
6272

6373
#define MAX_PARSE_DEPTH 512
@@ -83,7 +93,7 @@ void Print_Parse_Index(REBCNT type, REBVAL *rules, REBSER *series, REBCNT index)
8393

8494
/***********************************************************************
8595
**
86-
*/ static REBCNT Parse_Series(REBVAL *val, REBVAL *rules, REBCNT flags, REBCNT depth)
96+
*/ static REBCNT Parse_Series(REBVAL *val, REBVAL *rules, REBCNT flags, REBCNT depth, REB_PARSE_COLLECT *collect)
8797
/*
8898
***********************************************************************/
8999
{
@@ -93,6 +103,8 @@ void Print_Parse_Index(REBCNT type, REBVAL *rules, REBSER *series, REBCNT index)
93103
parse.type = VAL_TYPE(val);
94104
parse.flags = flags;
95105
parse.result = 0;
106+
//parse.retval = NULL;
107+
parse.collect = collect;
96108

97109
return Parse_Rules_Loop(&parse, VAL_INDEX(val), rules, depth);
98110
}
@@ -655,6 +667,93 @@ void Print_Parse_Index(REBCNT type, REBVAL *rules, REBSER *series, REBCNT index)
655667
}
656668
#endif // USE_DO_PARSE_RULE
657669

670+
/***********************************************************************
671+
**
672+
*/ static REBSER *Parse_Collect_Block(REBPARSE *parse)
673+
/*
674+
***********************************************************************/
675+
{
676+
REBVAL *val;
677+
if (parse->collect->depth == 0) Trap0(RE_PARSE_NO_COLLECT);
678+
679+
if (!parse->collect->block) {
680+
// there is no yet allocated block for collection
681+
// but the parent is on top of the stack, so we can
682+
// allocate a new block for the keep.
683+
val = DS_TOP;
684+
val = Append_Value(VAL_SERIES(val));
685+
Set_Series(REB_BLOCK, val, Make_Block(2));
686+
// and mark it for use
687+
parse->collect->block = VAL_SERIES(val);
688+
}
689+
return parse->collect->block;
690+
}
691+
692+
/***********************************************************************
693+
**
694+
*/ static void Parse_Keep(REBPARSE *parse, REBSER *series, REBCNT begin, REBCNT count, REBOOL pick)
695+
/*
696+
***********************************************************************/
697+
{
698+
REBVAL *val;
699+
REBINT i, e;
700+
REBSER *block = Parse_Collect_Block(parse);
701+
702+
ASSERT1(block, RP_MISC); // should never happen
703+
704+
if (parse->collect->depth == 0) Trap0(RE_PARSE_NO_COLLECT);
705+
706+
//printf("Keep from %i count: %i to: %x\n", begin, count, block);
707+
708+
if (count > 1) {
709+
710+
if (IS_BLOCK_INPUT(parse)) {
711+
if (pick) {
712+
Insert_Series(block, AT_TAIL, SERIES_SKIP(series, begin), count);
713+
}
714+
else {
715+
val = Append_Value(block);
716+
Set_Block(val, Copy_Block_Len(series, begin, count));
717+
}
718+
}
719+
else {
720+
if (pick) {
721+
e = begin + count;
722+
if (parse->type == REB_BINARY) {
723+
for (i = begin; i < e; i++) {
724+
val = Append_Value(block);
725+
SET_INTEGER(val, BIN_HEAD(series)[i]);
726+
}
727+
}
728+
else {
729+
for (i = begin; i < e; i++) {
730+
val = Append_Value(block);
731+
SET_CHAR(val, GET_ANY_CHAR(series, i));
732+
}
733+
}
734+
}
735+
else {
736+
val = Append_Value(block);
737+
VAL_SERIES(val) = Copy_String(series, begin, count);
738+
VAL_INDEX(val) = 0;
739+
VAL_SET(val, parse->type);
740+
}
741+
}
742+
}
743+
else if (count == 1) {
744+
val = Append_Value(block);
745+
if (IS_BLOCK_INPUT(parse)) {
746+
*val = *BLK_SKIP(series, begin);
747+
}
748+
else if (parse->type == REB_BINARY) {
749+
SET_INTEGER(val, BIN_HEAD(series)[begin]);
750+
}
751+
else {
752+
SET_CHAR(val, GET_ANY_CHAR(series, begin));
753+
}
754+
}
755+
}
756+
658757
/***********************************************************************
659758
**
660759
*/ static REBCNT Parse_Rules_Loop(REBPARSE *parse, REBCNT index, REBVAL *rules, REBCNT depth)
@@ -676,6 +775,7 @@ void Print_Parse_Index(REBCNT type, REBVAL *rules, REBSER *series, REBCNT index)
676775
REBSER *ser;
677776
REBFLG flags;
678777
REBCNT cmd;
778+
REBSER *blk;
679779
//REBVAL *rule_head = rules;
680780

681781
CHECK_STACK(&flags);
@@ -766,6 +866,52 @@ void Print_Parse_Index(REBCNT type, REBVAL *rules, REBSER *series, REBCNT index)
766866
SET_FLAG(flags, PF_CHANGE);
767867
continue;
768868

869+
case SYM_COLLECT:
870+
if (IS_END(rules))
871+
Trap1(RE_PARSE_END, rules - 1);
872+
//printf("COLLECT start %i\n", parse->collect->depth);
873+
// reserve a new value on stack
874+
DS_PUSH_NONE;
875+
if (parse->collect->block == NULL) {
876+
// --- FIRST collect -------------------------
877+
// allocate the resulting block on the stack, so it is GC safe
878+
Set_Series(REB_BLOCK, DS_TOP, Make_Block(2));
879+
parse->collect->result = DS_TOP;
880+
parse->collect->block = VAL_SERIES(DS_TOP);
881+
} else {
882+
// --- SUBSEQUENT collect ---------------------
883+
// store current block on stack
884+
Set_Series(REB_BLOCK, DS_TOP, parse->collect->block);
885+
// do not allocate a new one, until it is needed, else
886+
// there could be unwanted empty blocks like in case:
887+
// parse [1][collect some [collect keep integer!]]
888+
parse->collect->block = NULL;
889+
}
890+
SET_FLAG(flags, PF_COLLECT);
891+
parse->collect->depth++;
892+
continue;
893+
894+
case SYM_KEEP:
895+
if (IS_END(rules)) {
896+
Trap1(RE_PARSE_END, rules - 1);
897+
}
898+
if (IS_WORD(rules) && VAL_SYM_CANON(rules) == SYM_PICK) {
899+
SET_FLAG(flags, PF_PICK);
900+
rules++;
901+
if (IS_END(rules))
902+
Trap1(RE_PARSE_END, rules - 2);
903+
}
904+
if (IS_PAREN(rules)) {
905+
blk = Parse_Collect_Block(parse);
906+
item = Do_Block_Value_Throw(rules); // might GC
907+
Append_Val(blk, item);
908+
rules++;
909+
continue;
910+
}
911+
SET_FLAG(flags, PF_KEEP);
912+
913+
continue;
914+
769915
case SYM_RETURN:
770916
if (IS_PAREN(rules)) {
771917
item = Do_Block_Value_Throw(rules); // might GC
@@ -939,7 +1085,7 @@ void Print_Parse_Index(REBCNT type, REBVAL *rules, REBSER *series, REBCNT index)
9391085
val = BLK_SKIP(series, index);
9401086
i = (
9411087
(ANY_BINSTR(val) || ANY_BLOCK(val))
942-
&& (Parse_Series(val, VAL_BLK_DATA(item), parse->flags, depth+1) == VAL_TAIL(val))
1088+
&& (Parse_Series(val, VAL_BLK_DATA(item), parse->flags, depth+1, &parse->collect) == VAL_TAIL(val))
9431089
) ? index+1 : NOT_FOUND;
9441090
break;
9451091
#ifdef USE_DO_PARSE_RULE
@@ -999,7 +1145,13 @@ void Print_Parse_Index(REBCNT type, REBVAL *rules, REBSER *series, REBCNT index)
9991145
}
10001146
//if (i >= series->tail) { // OLD check: no more input
10011147
else {
1002-
if (count < mincount) index = NOT_FOUND; // was not enough
1148+
if (count < mincount) {
1149+
index = NOT_FOUND; // was not enough
1150+
// Uncomment bellow code, to have result:
1151+
// [? []] = parse ["a"][collect some [keep ('?) collect keep integer!]]
1152+
// if (GET_FLAG(flags, PF_KEEP))
1153+
// Parse_Collect_Block(parse);
1154+
}
10031155
else if (i != NOT_FOUND) index = i;
10041156
// else keep index as is.
10051157
break;
@@ -1035,6 +1187,7 @@ void Print_Parse_Index(REBCNT type, REBVAL *rules, REBSER *series, REBCNT index)
10351187
}
10361188
else { // Success actions:
10371189
count = (begin > index) ? 0 : index - begin; // how much we advanced the input
1190+
ser = NULL;
10381191
if (GET_FLAG(flags, PF_COPY)) {
10391192
ser = (IS_BLOCK_INPUT(parse))
10401193
? Copy_Block_Len(series, begin, count)
@@ -1060,6 +1213,35 @@ void Print_Parse_Index(REBCNT type, REBVAL *rules, REBSER *series, REBCNT index)
10601213
}
10611214
}
10621215
}
1216+
if (GET_FLAG(flags, PF_KEEP)) {
1217+
if (ser && GET_FLAG(flags, PF_COPY)) {
1218+
val = Append_Value(parse->collect->block);
1219+
if (IS_BLOCK_INPUT(parse)) {
1220+
Set_Block(val, ser);
1221+
}
1222+
else if (parse->type == REB_BINARY) {
1223+
Set_Binary(val, ser);
1224+
}
1225+
else {
1226+
VAL_SET(val, parse->type);
1227+
VAL_SERIES(val) = ser;
1228+
VAL_INDEX(val) = 0;
1229+
VAL_SERIES_SIDE(val) = 0;
1230+
}
1231+
}
1232+
else {
1233+
Parse_Keep(parse, series, begin, count, GET_FLAG(flags, PF_PICK));
1234+
}
1235+
}
1236+
if (GET_FLAG(flags, PF_COLLECT)) {
1237+
// COLLECT ends
1238+
// get the previous target block from the stack and use it
1239+
val = DS_POP;
1240+
parse->collect->block = VAL_SERIES(val);
1241+
parse->collect->depth--;
1242+
//printf("COLLECT done %i\n", parse->collect->depth);
1243+
}
1244+
10631245
if (GET_FLAG(flags, PF_RETURN)) {
10641246
ser = (IS_BLOCK_INPUT(parse))
10651247
? Copy_Block_Len(series, begin, count)
@@ -1309,6 +1491,7 @@ void Print_Parse_Index(REBCNT type, REBVAL *rules, REBSER *series, REBCNT index)
13091491
#endif
13101492
REBCNT n;
13111493
REBOL_STATE state;
1494+
REB_PARSE_COLLECT collect;
13121495
// Let user RETURN and THROW out of the PARSE. All other errors should relay.
13131496
PUSH_STATE(state, Saved_State);
13141497
if (SET_JUMP(state)) {
@@ -1328,8 +1511,17 @@ void Print_Parse_Index(REBCNT type, REBVAL *rules, REBSER *series, REBCNT index)
13281511
Throw_Error(VAL_ERR_OBJECT(DS_RETURN));
13291512
}
13301513
SET_STATE(state, Saved_State);
1331-
n = Parse_Series(val, VAL_BLK_DATA(arg), (opts & PF_CASE) ? AM_FIND_CASE : 0, 0);
1332-
SET_LOGIC(DS_RETURN, n >= VAL_TAIL(val) && n != NOT_FOUND);
1514+
collect.depth = 0;
1515+
collect.result = NULL;
1516+
collect.block = NULL;
1517+
1518+
n = Parse_Series(val, VAL_BLK_DATA(arg), (opts & PF_CASE) ? AM_FIND_CASE : 0, 0, &collect);
1519+
if (collect.result) {
1520+
*D_RET = *collect.result;
1521+
}
1522+
else {
1523+
SET_LOGIC(DS_RETURN, n >= VAL_TAIL(val) && n != NOT_FOUND);
1524+
}
13331525
POP_STATE(state, Saved_State);
13341526
#ifdef INCLUDE_PARSE_SERIES_SPLITTING
13351527
}

0 commit comments

Comments
 (0)