Skip to content

Commit b241076

Browse files
committed
FEAT: enhex/uri and dehex/uri for a special short variant of space encoding
1 parent 3943708 commit b241076

File tree

3 files changed

+32
-3
lines changed

3 files changed

+32
-3
lines changed

src/boot/natives.reb

+2-1
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,7 @@ dehex: native [
511511
{Converts URL-style hex encoded (%xx) strings. If input is UTF-8 encode, you should first convert it to binary!}
512512
value [any-string! binary!] {The string to dehex}
513513
/escape char [char!] {Can be used to change the default escape char #"%"}
514-
/url {Decode + as a space}
514+
/uri {Decode space from a special char (#"+" by default or #"_" when escape char is #"=")}
515515
]
516516

517517
enhex: native [
@@ -521,6 +521,7 @@ enhex: native [
521521
char [char!]
522522
/except {Can be used to specify, which chars can be left unescaped}
523523
unescaped [bitset!] {By default it is URI bitset when value is file or url, else URI-Component}
524+
/uri {Encode space using a special char (#"+" by default or #"_" when escape char is #"=")}
524525
]
525526

526527
get: native [

src/core/n-strings.c

+18-2
Original file line numberDiff line numberDiff line change
@@ -613,12 +613,13 @@ static struct digest {
613613
REBVAL *arg = D_ARG(1);
614614
// REBOOL ref_escape = D_REF(2);
615615
// REBVAL *val_escape = D_ARG(3);
616-
REBOOL as_url = D_REF(4);
616+
REBOOL as_uri = D_REF(4);
617617
REBINT len = (REBINT)VAL_LEN(arg); // due to len -= 2 below
618618
REBUNI n;
619619
REBSER *ser;
620620

621621
const REBCHR escape_char = D_REF(2) ? VAL_CHAR(D_ARG(3)) : '%';
622+
const REBCHR space_char = escape_char == '=' ? '_' : '+';
622623

623624
if (VAL_BYTE_SIZE(arg)) {
624625
REBYTE *bp = VAL_BIN_DATA(arg);
@@ -629,7 +630,12 @@ static struct digest {
629630
*dp++ = (REBYTE)n;
630631
bp += 3;
631632
len -= 2;
632-
} else {
633+
}
634+
else if (*bp == space_char && as_uri) {
635+
*dp++ = ' ';
636+
bp++;
637+
}
638+
else {
633639
*dp++ = *bp++;
634640
}
635641
}
@@ -680,10 +686,12 @@ static struct digest {
680686
// REBVAL *val_escape = D_ARG(3);
681687
REBOOL ref_bitset = D_REF(4);
682688
REBVAL *val_bitset = D_ARG(5);
689+
REBOOL no_space = D_REF(6);
683690
REBYTE encoded[4];
684691
REBCNT n, encoded_size;
685692
REBSER *ser;
686693
const REBCHR escape_char = D_REF(2) ? VAL_CHAR(D_ARG(3)) : '%';
694+
const REBCHR space_char = escape_char == '=' ? '_' : '+';
687695

688696
if (!ref_bitset) {
689697
// use bitset value from system/catalog/bitsets
@@ -710,10 +718,18 @@ static struct digest {
710718
while (bp < ep) {
711719
REBYTE c = bp[0];
712720
bp++;
721+
if (no_space) {
722+
if (c == ' ') {
723+
*dp++ = space_char;
724+
continue;
725+
}
726+
if (c == space_char) goto escaped;
727+
}
713728
if (Check_Bit_Cased(VAL_SERIES(val_bitset), c)) {
714729
*dp++ = c;
715730
continue;
716731
}
732+
escaped:
717733
*dp++ = escape_char;
718734
*dp++ = Hex_Digits[(c & 0xf0) >> 4];
719735
*dp++ = Hex_Digits[ c & 0xf];

src/tests/units/series-test.r3

+12
Original file line numberDiff line numberDiff line change
@@ -1720,6 +1720,18 @@ Rebol [
17201720
--assert "12%20%61%62" = enhex/except "12 ab" charset "12"
17211721
--assert "12 %61%62" = enhex/except "12 ab" charset "12 "
17221722

1723+
--test-- "ENHEX/uri"
1724+
--assert "a%20b%2B" = enhex "a b+"
1725+
--assert "a+b%2B" = enhex/uri "a b+"
1726+
; quoted-printable:
1727+
--assert "a=20b_" = enhex/escape "a b_" #"="
1728+
--assert "a_b=5F" = enhex/uri/escape "a b_" #"="
1729+
--test-- "DEHEX/uri"
1730+
--assert "a+b+" = dehex "a+b%2B"
1731+
--assert "a b+" = dehex/uri "a+b%2B"
1732+
; quoted-printable:
1733+
--assert "a_b_" = dehex/escape"a_b=5F" #"="
1734+
--assert "a b_" = dehex/uri/escape"a_b=5F" #"="
17231735

17241736
===end-group===
17251737

0 commit comments

Comments
 (0)