Skip to content

Commit 8319d5c

Browse files
committed
FEAT: special integer notation (bit, octal, decimal and hexadecimal)
resolves: Oldes/Rebol-issues#1781
1 parent 2048a62 commit 8319d5c

File tree

4 files changed

+202
-6
lines changed

4 files changed

+202
-6
lines changed

src/core/l-scan.c

+92-5
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,58 @@
751751
return cp;
752752
}
753753

754+
/***********************************************************************
755+
**
756+
*/ static const REBYTE* Prescan_Spec_Integer(const REBYTE *cp, REBINT base)
757+
/*
758+
** Validate special integer notation:
759+
** bit: 2#0101
760+
** octal: 8#777
761+
** decimal: 10#123
762+
** hexadecimal: 16#FF or just 0#FF
763+
**
764+
***********************************************************************/
765+
{
766+
REBCNT n, m;
767+
REBYTE lex;
768+
REBINT val;
769+
if (base == 16) {
770+
n = m = 16;
771+
while (n > 0) {
772+
lex = Lex_Map[*cp];
773+
if (lex < LEX_WORD || (!(lex & LEX_VALUE) && lex < LEX_NUMBER))
774+
break;
775+
cp++;
776+
n--;
777+
}
778+
}
779+
else if (base == 2) {
780+
n = m = 64; // max 64 bits
781+
while (n > 0 && (*cp == '0' || *cp == '1')) {
782+
cp++; n--;
783+
}
784+
}
785+
else if (base == 8) {
786+
n = m = 22; // max length 22 bytes
787+
while (n > 0) {
788+
val = *cp - '0';
789+
if (val < 0 || val > 7)
790+
break;
791+
cp++; n--;
792+
}
793+
}
794+
else if (base == 10) {
795+
n = m = 18; // max length 18 digits
796+
while (n > 0) {
797+
if (!IS_LEX_NUMBER(*cp))
798+
break;
799+
cp++; n--;
800+
}
801+
}
802+
803+
return (n < m && IS_LEX_DELIMIT(*cp)) ? cp : 0;
804+
}
805+
754806

755807
/***********************************************************************
756808
**
@@ -1169,6 +1221,7 @@
11691221
}
11701222
}
11711223
if (*cp == '{') { /* BINARY #{12343132023902902302938290382} */
1224+
scan_binary:
11721225
scan_state->end = scan_state->begin; /* save start */
11731226
scan_state->begin = cp;
11741227
// Originally there was used Scan_Quote collecting into BUF_MOLD, but this was not used later.
@@ -1212,12 +1265,42 @@
12121265
if (HAS_LEX_FLAG(flags, LEX_SPECIAL_AT)) return TOKEN_EMAIL;
12131266
if (HAS_LEX_FLAG(flags, LEX_SPECIAL_POUND)) {
12141267
if (cp == scan_state->begin) { // no +2 +16 +64 allowed
1268+
REBINT base = 0;
1269+
REBYTE c0 = cp[0];
12151270
if (
1216-
(cp[0] == '6' && cp[1] == '4' && cp[2] == '#' && cp[3] == '{')
1217-
|| (cp[0] == '1' && cp[1] == '6' && cp[2] == '#' && cp[3] == '{') // rare
1218-
) {cp += 2; goto pound;}
1219-
if (cp[0] == '2' && cp[1] == '#' && cp[2] == '{')
1220-
{cp++; goto pound;} // very rare
1271+
(c0 == '6' && cp[1] == '4' && cp[2] == '#')
1272+
|| (c0 == '1' && cp[1] == '6' && cp[2] == '#') // rare
1273+
) {
1274+
cp += 3;
1275+
if (cp[0] == '{') goto scan_binary;
1276+
if (c0 == '1') base = 16;
1277+
}
1278+
else if (cp[1] == '#') {
1279+
cp += 2;
1280+
if (c0 == '2') {
1281+
if (cp[0] == '{') goto scan_binary; // very rare
1282+
base = 2;
1283+
}
1284+
else if (c0 == '0') {
1285+
base = 16;
1286+
}
1287+
else if (c0 == '8') {
1288+
base = 8;
1289+
}
1290+
}
1291+
else if (c0 == '1' && cp[1] == '0' && cp[2] == '#') // ultra rare
1292+
{
1293+
cp += 3;
1294+
base = 10;
1295+
}
1296+
1297+
if (base) {
1298+
np = Prescan_Spec_Integer(cp, base);
1299+
if (np) {
1300+
scan_state->end = np;
1301+
return TOKEN_INTEGER_SPEC;
1302+
}
1303+
}
12211304
}
12221305

12231306
#ifndef USE_NO_INFINITY
@@ -1744,6 +1827,10 @@ extern REBSER *Scan_Full_Block(SCAN_STATE *scan_state, REBYTE mode_char);
17441827
if(!MT_Map(value, value, 0)) Trap1(RE_INVALID_ARG, value);
17451828
break;
17461829

1830+
case TOKEN_INTEGER_SPEC:
1831+
Scan_Spec_Integer(bp, len, value);
1832+
break;
1833+
17471834
case TOKEN_EOF: continue;
17481835

17491836
default: ;

src/core/l-types.c

+61
Original file line numberDiff line numberDiff line change
@@ -910,6 +910,67 @@ bad_hex: Trap0(RE_INVALID_CHARS);
910910
return ep;
911911
}
912912

913+
/***********************************************************************
914+
**
915+
*/ const REBYTE *Scan_Spec_Integer(const REBYTE *cp, REBINT len, REBVAL *value)
916+
/*
917+
** Scan and convert bit, octal, decimal or hexadecimal integer.
918+
**
919+
** The input is expected to be pre-validated from l-scan!
920+
**
921+
***********************************************************************/
922+
{
923+
REBU64 accum = 0;
924+
925+
if (*cp == '0') {
926+
// base16: 0#beaf
927+
cp += 2;
928+
len -= 2;
929+
base16:
930+
while (len-- > 0) {
931+
accum = (accum << 4) + (Lex_Map[*cp] & LEX_VALUE); // char num encoded into lex
932+
cp++;
933+
}
934+
}
935+
else if (*cp == '2') {
936+
// base2: 2#0101
937+
cp += 2;
938+
len -= 2;
939+
while (len-- > 0) {
940+
accum *= 2;
941+
if (*cp == '1') accum += 1;
942+
cp++;
943+
}
944+
}
945+
else if (*cp == '8') {
946+
// base8: 8#140
947+
cp += 2;
948+
len -= 2;
949+
while (len-- > 0) {
950+
accum = (accum * 8) + (*cp - '0');
951+
cp++;
952+
}
953+
}
954+
else if (cp[1] == '6') {
955+
// base16: 16#beaf
956+
cp += 3;
957+
len -= 3;
958+
goto base16;
959+
}
960+
else if (cp[1] == '0') {
961+
// base10: 10#123
962+
cp += 3;
963+
len -= 3;
964+
while (len-- > 0) {
965+
accum = (accum * 10) + (*cp - '0');
966+
cp++;
967+
}
968+
}
969+
VAL_UNT64(value) = accum;
970+
VAL_SET(value, REB_INTEGER);
971+
return cp;
972+
}
973+
913974

914975
/***********************************************************************
915976
**

src/include/sys-scan.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ enum Value_Types {
6363
TOKEN_REFINE,
6464
TOKEN_CONSTRUCT,
6565
TOKEN_MAP,
66+
TOKEN_INTEGER_SPEC,
6667
TOKEN_MAX
6768
};
6869

@@ -99,7 +100,8 @@ const char *Token_Names[TOKEN_MAX] = {
99100
"PATH",
100101
"REFINE",
101102
"CONSTRUCT",
102-
"MAP"
103+
"MAP",
104+
"INTEGER_SPEC"
103105
};
104106
#endif
105107

src/tests/units/lexer-test.r3

+46
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,52 @@ Rebol [
438438
--test-- "-0"
439439
--assert 0 = load "-0" ;@@ https://github.com/Oldes/Rebol-issues/issues/33
440440

441+
===end-group===
442+
443+
===start-group=== "Integer (bit/octal/decimal/hexadecimal)"
444+
;@@ https://github.com/Oldes/Rebol-issues/issues/2197
445+
--test-- "base2"
446+
--assert 1 = transcode/one "2#01"
447+
--assert 1 = transcode/one "2#01[]"
448+
--assert 1 = transcode/one "2#01{}"
449+
--assert 3 = transcode/one "2#11"
450+
--assert 3 = transcode/one "2#011"
451+
--assert 3 = transcode/one "2#000011"
452+
--assert -1 = transcode/one "2#1111111111111111111111111111111111111111111111111111111111111111"
453+
--assert error? transcode/one/error "2#12"
454+
--assert error? transcode/one/error "2#11111111111111111111111111111111111111111111111111111111111111111"
455+
--assert error? transcode/one/error "-2#11"
456+
--test-- "base8"
457+
--assert 666 = transcode/one "8#1232"
458+
--assert 502 = transcode/one "8#766"
459+
--assert -1 = transcode/one "8#7777777777777777777777"
460+
--assert error? transcode/one/error "8#88"
461+
--assert error? transcode/one/error "8#77777777777777777777777"
462+
--assert error? transcode/one/error "-8#123"
463+
--test-- "base10"
464+
--assert 123 = transcode/one "10#123"
465+
--assert 999999999999999999 = transcode/one "10#999999999999999999"
466+
--assert error? transcode/one/error "10#9999999999999999999"
467+
--assert error? transcode/one/error "10#1A2"
468+
--assert error? transcode/one/error "-10#123"
469+
--test-- "base16"
470+
--assert 15 = transcode/one "0#F"
471+
--assert 15 = transcode/one "0#0F"
472+
--assert 255 = transcode/one "0#FF"
473+
--assert -1 = transcode/one "0#FFFFFFFFFFFFFFFF"
474+
--assert error? transcode/one/error "0#XA"
475+
--assert error? transcode/one/error "0#FFFFFFFFFFFFFFFFF"
476+
--assert error? transcode/one/error "-0#FF"
477+
--assert 15 = transcode/one "16#F"
478+
--assert 15 = transcode/one "16#0F"
479+
--assert 255 = transcode/one "16#FF"
480+
--assert -1 = transcode/one "16#FFFFFFFFFFFFFFFF"
481+
--assert error? transcode/one/error "16#XA"
482+
--assert error? transcode/one/error "16#FFFFFFFFFFFFFFFFF"
483+
--assert error? transcode/one/error "-16#FF"
484+
485+
486+
441487
===end-group===
442488

443489

0 commit comments

Comments
 (0)