Skip to content

Commit 66bf626

Browse files
committed
FIX: enhex/uri and dehex/uri with an unicode input
1 parent c6bbbae commit 66bf626

File tree

2 files changed

+30
-5
lines changed

2 files changed

+30
-5
lines changed

src/core/n-strings.c

+19-4
Original file line numberDiff line numberDiff line change
@@ -652,7 +652,12 @@ static struct digest {
652652
*dp++ = (REBUNI)n;
653653
up += 3;
654654
len -= 2;
655-
} else {
655+
}
656+
else if (*up == space_char && as_uri) {
657+
*dp++ = ' ';
658+
up++;
659+
}
660+
else {
656661
*dp++ = *up++;
657662
}
658663
}
@@ -723,13 +728,13 @@ static struct digest {
723728
*dp++ = space_char;
724729
continue;
725730
}
726-
if (c == space_char) goto escaped;
731+
if (c == space_char) goto escaped_ascii;
727732
}
728733
if (Check_Bit_Cased(VAL_SERIES(val_bitset), c)) {
729734
*dp++ = c;
730735
continue;
731736
}
732-
escaped:
737+
escaped_ascii:
733738
*dp++ = escape_char;
734739
*dp++ = Hex_Digits[(c & 0xf0) >> 4];
735740
*dp++ = Hex_Digits[ c & 0xf];
@@ -742,7 +747,16 @@ static struct digest {
742747
while (up < ep) {
743748
REBUNI c = up[0];
744749
up++;
745-
750+
if (no_space) {
751+
if (c == ' ') {
752+
*dp++ = space_char;
753+
continue;
754+
}
755+
if (c == space_char) {
756+
encoded_size = Encode_UTF8_Char(encoded, c);
757+
goto escaped_uni;
758+
}
759+
}
746760
if (c >= 0x80) {// all non-ASCII characters *must* be percent encoded
747761
encoded_size = Encode_UTF8_Char(encoded, c);
748762
} else {
@@ -753,6 +767,7 @@ static struct digest {
753767
encoded[0] = cast(REBYTE, c);
754768
encoded_size = 1;
755769
}
770+
escaped_uni:
756771
for (n = 0; n < encoded_size; ++n) {
757772
*dp++ = escape_char;
758773
*dp++ = Hex_Digits[(encoded[n] & 0xf0) >> 4];

src/tests/units/series-test.r3

+11-1
Original file line numberDiff line numberDiff line change
@@ -1723,15 +1723,25 @@ Rebol [
17231723
--test-- "ENHEX/uri"
17241724
--assert "a%20b%2B" = enhex "a b+"
17251725
--assert "a+b%2B" = enhex/uri "a b+"
1726+
--assert "a%20%C3%A1%2B" = enhex "a á+"
1727+
--assert "a+%C3%A1%2B" = enhex/uri "a á+"
17261728
; quoted-printable:
17271729
--assert "a=20b_" = enhex/escape "a b_" #"="
1728-
--assert "a_b=5F" = enhex/uri/escape "a b_" #"="
1730+
--assert "a_b=5F" = enhex/uri/escape "a b_" #"="
1731+
--assert "a=20=C3=A1_" = enhex/escape "a á_" #"="
1732+
--assert "a_=C3=A1=5F" = enhex/escape/uri "a á_" #"="
1733+
17291734
--test-- "DEHEX/uri"
17301735
--assert "a+b+" = dehex "a+b%2B"
17311736
--assert "a b+" = dehex/uri "a+b%2B"
17321737
; quoted-printable:
17331738
--assert "a_b_" = dehex/escape"a_b=5F" #"="
17341739
--assert "a b_" = dehex/uri/escape"a_b=5F" #"="
1740+
; to get propper UTF8 results, we must use binary input (for now?)
1741+
--assert "a á+" = to string! dehex to binary! "a%20%C3%A1%2B"
1742+
--assert "a á+" = to string! dehex/uri to binary! "a+%C3%A1%2B"
1743+
--assert "a á_" = to string! dehex/escape to binary! "a=20=C3=A1_" #"="
1744+
--assert "a á_" = to string! dehex/escape/uri to binary! "a_=C3=A1=5F" #"="
17351745

17361746
===end-group===
17371747

0 commit comments

Comments
 (0)