Skip to content

Commit 62d8db7

Browse files
committed
CHANGE: using Gregg Irwin's split updates
Taken from: https://gist.github.com/greggirwin/66d7c6892fc310097cd91ab354189542
1 parent f3e2a99 commit 62d8db7

File tree

2 files changed

+131
-41
lines changed

2 files changed

+131
-41
lines changed

src/mezz/mezz-series.reb

+58-29
Original file line numberDiff line numberDiff line change
@@ -486,28 +486,55 @@ printf: func [
486486
print format :fmt :val
487487
]
488488

489-
split: func [
489+
split: function [
490490
"Split a series into pieces; fixed or variable size, fixed number, or at delimiters"
491-
series [series!] "The series to split"
492-
dlm [block! integer! char! bitset! any-string!] "Split size, delimiter(s), or rule(s)."
493-
/skip "If dlm is an integer, split into n pieces, rather than pieces of length n."
494-
/local size piece-size count mk1 mk2 res fill-val add-fill-val
491+
series [series!] "The series to split"
492+
;!! If we support /at, dlm could be any-value.
493+
dlm ;[block! integer! char! bitset! any-string! any-function!] "Split size, delimiter(s), predicate, or rule(s)."
494+
/parts "If dlm is an integer, split into n pieces, rather than pieces of length n."
495+
/at "Split into 2, at the index position if an integer or the first occurrence of the dlm"
495496
][
496-
either all [block? dlm parse dlm [some integer!]] [
497+
if any-function? :dlm [
498+
res: reduce [ copy [] copy [] ]
499+
foreach value series [
500+
append/only pick res make logic! dlm :value :value
501+
]
502+
return res
503+
]
504+
if at [
505+
return reduce either integer? dlm [
506+
[
507+
copy/part series dlm
508+
copy lib/at series dlm + 1
509+
]
510+
][
511+
;-- Without adding a /tail refinement, we don't know if they want
512+
; to split at the head or tail of the delimiter, so we'll exclude
513+
; the delimiter from the result entirely. They know what the dlm
514+
; was that they passed in, so they can add it back to either side
515+
; of the result if they want to.
516+
[
517+
copy/part series find series :dlm
518+
copy find/tail series :dlm
519+
]
520+
]
521+
]
522+
;print ['split 'parts? parts mold series mold dlm]
523+
either all [block? dlm parse dlm [some integer!]][
497524
map-each len dlm [
498525
either positive? len [
499526
copy/part series series: skip series len
500-
] [
527+
][
501528
series: skip series negate len
502-
; return unset so that nothing is added to output
503-
()
529+
() ;-- return unset so that nothing is added to output
504530
]
505531
]
506532
][
507-
size: dlm ; alias for readability
533+
size: dlm ;-- alias for readability
508534
res: collect [
509-
parse/all series case [
510-
all [integer? size skip] [
535+
;print ['split 'parts? parts mold series mold dlm newline]
536+
parse series case [
537+
all [integer? dlm parts][
511538
if size < 1 [cause-error 'Script 'invalid-arg size]
512539
count: size - 1
513540
piece-size: to integer! round/down divide length? series size
@@ -521,36 +548,38 @@ split: func [
521548
if size < 1 [cause-error 'Script 'invalid-arg size]
522549
[any [copy series 1 size skip (keep/only series)]]
523550
]
524-
'else [ ; = any [bitset? dlm any-string? dlm char? dlm]
551+
'else [ ;-- = any [bitset? dlm any-string? dlm char? dlm]
525552
[any [mk1: some [mk2: dlm break | skip] (keep/only copy/part mk1 mk2)]]
526553
]
527554
]
528555
]
529-
;-- Special processing, to handle cases where the spec'd more items in
530-
; /skip than the series contains (so we want to append empty items),
556+
;-- Special processing, to handle cases where they spec'd more items in
557+
; /parts than the series contains (so we want to append empty items),
531558
; or where the dlm was a char/string/charset and it was the last char
532559
; (so we want to append an empty field that the above rule misses).
533-
fill-val: does [copy either any-block? series [[]] [""]]
560+
fill-val: does [copy either any-block? series [ [] ][ "" ]]
534561
add-fill-val: does [append/only res fill-val]
535562
case [
536-
all [integer? size skip] [
537-
; If the result is too short, i.e., less items than 'size, add
538-
; empty items to fill it to 'size.
539-
; We loop here, because insert/dup doesn't copy the value inserted.
563+
all [integer? size parts][
564+
;-- If the result is too short, i.e., less items than 'size, add
565+
; empty items to fill it to 'size.
566+
; We loop here, because insert/dup doesn't copy the value inserted.
540567
if size > length? res [
541568
loop (size - length? res) [add-fill-val]
542569
]
543570
]
544-
; integer? dlm [
545-
; ]
546-
'else [ ; = any [bitset? dlm any-string? dlm char? dlm]
547-
; If the last thing in the series is a delimiter, there is an
548-
; implied empty field after it, which we add here.
571+
;-- integer? size
572+
; If they spec'd an integer size, but did not use /parts, there is
573+
; no special filing to be done. The final element may be less than
574+
; size, which is intentional.
575+
'else [ ;-- = any [bitset? dlm any-string? dlm char? dlm]
576+
;-- If the last thing in the series is a delimiter, there is an
577+
; implied empty field after it, which we add here.
549578
case [
550579
bitset? dlm [
551-
; ATTEMPT is here because LAST will return NONE for an
552-
; empty series, and finding none in a bitest is not allowed.
553-
if attempt [find dlm last series] [add-fill-val]
580+
;-- ATTEMPT is here because LAST will return NONE for an
581+
; empty series, and finding none in a bitest is not allowed.
582+
if attempt [find dlm last series][add-fill-val]
554583
]
555584
char? dlm [
556585
if dlm = last series [add-fill-val]
@@ -559,7 +588,7 @@ split: func [
559588
if all [
560589
find series dlm
561590
empty? find/last/tail series dlm
562-
] [add-fill-val]
591+
][add-fill-val]
563592
]
564593
]
565594
]

src/tests/units/series-test.r3

+73-12
Original file line numberDiff line numberDiff line change
@@ -1501,13 +1501,13 @@ Rebol [
15011501
--test-- "split block!"
15021502
;@@ https://github.com/Oldes/Rebol-issues/issues/2051
15031503
b: [a b c d e f]
1504-
--assert [[a b c d e f]] = split/skip b 1
1505-
--assert [[a b c] [d e f]] = split/skip b 2
1506-
--assert [[a b] [c d] [e f]] = split/skip b 3
1507-
--assert [[a] [b] [c] [d e f]] = split/skip b 4
1508-
--assert [[a] [b] [c] [d] [e f]] = split/skip b 5
1509-
--assert [[a] [b] [c] [d] [e] [f]] = split/skip b 6
1510-
--assert [[a] [b] [c] [d] [e] [f] []] = split/skip b 7
1504+
--assert [[a b c d e f]] = split/parts b 1
1505+
--assert [[a b c] [d e f]] = split/parts b 2
1506+
--assert [[a b] [c d] [e f]] = split/parts b 3
1507+
--assert [[a] [b] [c] [d e f]] = split/parts b 4
1508+
--assert [[a] [b] [c] [d] [e f]] = split/parts b 5
1509+
--assert [[a] [b] [c] [d] [e] [f]] = split/parts b 6
1510+
--assert [[a] [b] [c] [d] [e] [f] []] = split/parts b 7
15111511

15121512
--test-- "split string!"
15131513
;@@ https://github.com/Oldes/Rebol-issues/issues/1886
@@ -1519,13 +1519,74 @@ Rebol [
15191519
--assert ["abc" "de" "fghi" "jk"] = split "abc|de/fghi:jk" charset "|/:"
15201520
--assert ["abc" "de" "fghi" "jk"] = split "abc^M^Jde^Mfghi^Jjk" [crlf | #"^M" | newline]
15211521
--assert ["abc" "de" "fghi" "jk"] = split "abc de fghi jk" [some #" "]
1522-
--assert ["12345678" "12345678"] = split/skip "1234567812345678" 2
1523-
--assert ["12345" "67812" "345678"] = split/skip "1234567812345678" 3
1524-
--assert ["123" "456" "781" "234" "5678"] = split/skip "1234567812345678" 5
1522+
--assert ["12345678" "12345678"] = split/parts "1234567812345678" 2
1523+
--assert ["12345" "67812" "345678"] = split/parts "1234567812345678" 3
1524+
--assert ["123" "456" "781" "234" "5678"] = split/parts "1234567812345678" 5
15251525
;@@ https://github.com/Oldes/Rebol-issues/issues/573
1526-
--assert ["c" "c"] = split "c c" " "
1527-
--assert ["1,2"] = split "1,2" " "
1526+
--assert ["c" "c"] = split "c c" " "
1527+
--assert ["1,2"] = split "1,2" " "
15281528
--assert ["c" "c "] = split "c,c " ","
1529+
--test-- "split gregg 1"
1530+
;@@ https://gist.github.com/greggirwin/66d7c6892fc310097cd91ab354189542
1531+
--assert (split "1234567812345678" 4) = ["1234" "5678" "1234" "5678"]
1532+
--assert (split "1234567812345678" 3) = ["123" "456" "781" "234" "567" "8"]
1533+
--assert (split "1234567812345678" 5) = ["12345" "67812" "34567" "8"]
1534+
--assert (split/parts [1 2 3 4 5 6] 2) = [[1 2 3] [4 5 6]]
1535+
--assert (split/parts "1234567812345678" 2) = ["12345678" "12345678"]
1536+
--assert (split/parts "1234567812345678" 3) = ["12345" "67812" "345678"]
1537+
--assert (split/parts "1234567812345678" 5) = ["123" "456" "781" "234" "5678"]
1538+
1539+
--test-- "split gregg 2"
1540+
; Dlm longer than series"
1541+
--assert (split/parts "123" 6) = ["1" "2" "3" "" "" ""] ;or ["1" "2" "3"]
1542+
--assert (split/parts [1 2 3] 6) = [[1] [2] [3] [] [] []] ;or [1 2 3]
1543+
1544+
--test-- "split gregg 3"
1545+
--assert (split [1 2 3 4 5 6] [2 1 3]) = [[1 2] [3] [4 5 6]]
1546+
--assert (split "1234567812345678" [4 4 2 2 1 1 1 1]) = ["1234" "5678" "12" "34" "5" "6" "7" "8"]
1547+
--assert (split first [(1 2 3 4 5 6 7 8 9)] 3) = [(1 2 3) (4 5 6) (7 8 9)]
1548+
--assert (split #{0102030405060708090A} [4 3 1 2]) = [#{01020304} #{050607} #{08} #{090A}]
1549+
--assert (split [1 2 3 4 5 6] [2 1]) = [[1 2] [3]]
1550+
--assert (split [1 2 3 4 5 6] [2 1 3 5]) = [[1 2] [3] [4 5 6] []]
1551+
--assert (split [1 2 3 4 5 6] [2 1 6]) = [[1 2] [3] [4 5 6]]
1552+
1553+
; Old design for negative skip vals
1554+
; --assert (split [1 2 3 4 5 6] [3 2 2 -2 2 -4 3]] [[1 2 3] [4 5] [6] [5 6] [3 4 5]]
1555+
; New design for negative skip vals
1556+
--assert (split [1 2 3 4 5 6] [2 -2 2]) = [[1 2] [5 6]]
1557+
1558+
--test-- "split gregg 4"
1559+
--assert (split "abc,de,fghi,jk" #",") = ["abc" "de" "fghi" "jk"]
1560+
--assert (split "abc<br>de<br>fghi<br>jk" <br>) = ["abc" "de" "fghi" "jk"]
1561+
1562+
--assert (split "a.b.c" ".") = ["a" "b" "c"]
1563+
--assert (split "c c" " ") = ["c" "c"]
1564+
--assert (split "1,2,3" " ") = ["1,2,3"]
1565+
--assert (split "1,2,3" ",") = ["1" "2" "3"]
1566+
--assert (split "1,2,3," ",") = ["1" "2" "3" ""]
1567+
--assert (split "1,2,3," charset ",.") = ["1" "2" "3" ""]
1568+
--assert (split "1.2,3." charset ",.") = ["1" "2" "3" ""]
1569+
1570+
--assert (split "-a-a" ["a"]) = ["-" "-"]
1571+
--assert (split "-a-a'" ["a"]) = ["-" "-" "'"]
1572+
1573+
--test-- "split gregg 5"
1574+
--assert (split "abc|de/fghi:jk" charset "|/:") = ["abc" "de" "fghi" "jk"]
1575+
--assert (split "abc^M^Jde^Mfghi^Jjk" [crlf | #"^M" | newline]) = ["abc" "de" "fghi" "jk"]
1576+
--assert (split "abc de fghi jk" [some #" "]) = ["abc" "de" "fghi" "jk"]
1577+
1578+
--test-- "split gregg 6"
1579+
--assert (split [1 2 3 4 5 6] :even?) = [[2 4 6] [1 3 5]]
1580+
--assert (split [1 2 3 4 5 6] :odd?) = [[1 3 5] [2 4 6]]
1581+
--assert (split [1 2.3 /a word "str" #iss x: :y] :refinement?) = [[/a] [1 2.3 word "str" #iss x: :y]]
1582+
--assert (split [1 2.3 /a word "str" #iss x: :y] :number?) = [[1 2.3] [/a word "str" #iss x: :y]]
1583+
--assert (split [1 2.3 /a word "str" #iss x: :y] :any-word?) = [[/a word #iss x: :y] [1 2.3 "str"]]
1584+
1585+
--test-- "split gregg 7"
1586+
--assert (split/at [1 2.3 /a word "str" #iss x: :y] 4) = [[1 2.3 /a word] ["str" #iss x: :y]]
1587+
;!! Splitting /at with a non-integer excludes the delimiter from the result
1588+
--assert (split/at [1 2.3 /a word "str" #iss x: :y] "str") = [[1 2.3 /a word] [#iss x: :y]]
1589+
--assert (split/at [1 2.3 /a word "str" #iss x: :y] 'word) = [[1 2.3 /a] ["str" #iss x: :y]]
15291590

15301591
===end-group===
15311592

0 commit comments

Comments
 (0)