@@ -600,10 +600,6 @@ static struct digest {
600
600
* dp ++ = (REBYTE )n ;
601
601
bp += 3 ;
602
602
len -= 2 ;
603
- }
604
- else if (as_url && * bp == '+' ) {
605
- * dp ++ = ' ' ;
606
- bp ++ ;
607
603
} else {
608
604
* dp ++ = * bp ++ ;
609
605
}
@@ -621,9 +617,6 @@ static struct digest {
621
617
* dp ++ = (REBUNI )n ;
622
618
up += 3 ;
623
619
len -= 2 ;
624
- } else if (as_url && * up == '+' ) {
625
- * dp ++ = ' ' ;
626
- up ++ ;
627
620
} else {
628
621
* dp ++ = * up ++ ;
629
622
}
@@ -644,26 +637,35 @@ static struct digest {
644
637
*/ REBNATIVE (enhex )
645
638
/*
646
639
** Works for any string.
647
- ** Compatible with encodeURIComponent http ://es5.github.io/#x15.1.3.4
640
+ ** Compatible with https ://tc39.es/ecma262/#sec-encodeuri-uri
648
641
** If source is unicode (wide) string, result is ASCII.
649
642
** value [any-string! binary!] {The string to encode}
650
643
** /escape char [char!] {Can be used to change the default escape char #"%"}
651
- ** /url {Encode space as a #"+"}
644
+ ** /unescaped set [bitset!] {Can be used to specify, which chars can be unescaped}
645
+ **
652
646
**
653
647
***********************************************************************/
654
648
{
655
649
REBVAL * arg = D_ARG (1 );
656
650
// REBOOL ref_escape = D_REF(2);
657
651
// REBVAL *val_escape = D_ARG(3);
658
- REBOOL as_url = D_REF (4 );
659
- REBSER * ser ;
660
- REBINT lex ;
661
- REBCNT n ;
652
+ REBOOL ref_bitset = D_REF (4 );
653
+ REBVAL * val_bitset = D_ARG (5 );
662
654
REBYTE encoded [4 ];
663
- REBCNT encoded_size ;
664
-
655
+ REBCNT n , encoded_size ;
656
+ REBSER * ser ;
665
657
const REBCHR escape_char = D_REF (2 ) ? VAL_CHAR (D_ARG (3 )) : '%' ;
666
658
659
+ if (!ref_bitset ) {
660
+ // use bitset value from system/catalog/bitsets
661
+ // use URI bitset when value is file or url
662
+ // else use URI_COMPONENT
663
+ val_bitset = Get_Object (
664
+ Get_System (SYS_CATALOG , CAT_BITSETS ),
665
+ (IS_URL (arg ) || IS_FILE (arg )) ? CAT_BITSETS_URI : CAT_BITSETS_URI_COMPONENT
666
+ );
667
+ }
668
+
667
669
// using FORM buffer for intermediate conversion;
668
670
// counting with the worst scenario, where each single codepoint
669
671
// might need 4 bytes of UTF-8 data (%XX%XX%XX%XX)
@@ -679,20 +681,10 @@ static struct digest {
679
681
while (bp < ep ) {
680
682
REBYTE c = bp [0 ];
681
683
bp ++ ;
682
-
683
- if ((c >= 'a' && c <= 'z' )
684
- || (c >= 'A' && c <= 'Z' )
685
- || (c >= '0' && c <= '9' )
686
- || (c >= 40 && c <= 42 ) // ()*
687
- || (c == '-' || c == '.' || c == '_' || c == '!' || c == '~' || c == '\'' )
688
- ) { // leaving char as is
684
+ if (Check_Bit_Cased (VAL_SERIES (val_bitset ), c )) {
689
685
* dp ++ = c ;
690
686
continue ;
691
687
}
692
- if (as_url && c == ' ' ) {
693
- * dp ++ = '+' ;
694
- continue ;
695
- }
696
688
* dp ++ = escape_char ;
697
689
* dp ++ = Hex_Digits [(c & 0xf0 ) >> 4 ];
698
690
* dp ++ = Hex_Digits [ c & 0xf ];
@@ -709,19 +701,10 @@ static struct digest {
709
701
if (c >= 0x80 ) {// all non-ASCII characters *must* be percent encoded
710
702
encoded_size = Encode_UTF8_Char (encoded , c );
711
703
} else {
712
- if ((c >= 'a' && c <= 'z' )
713
- || (c >= 'A' && c <= 'Z' )
714
- || (c >= '0' && c <= '9' )
715
- || (c >= 40 && c <= 42 ) // ()*
716
- || (c == '-' || c == '.' || c == '_' || c == '!' || c == '~' || c == '\'' )
717
- ) { // leaving char as is
704
+ if (Check_Bit_Cased (VAL_SERIES (val_bitset ), c )) {
718
705
* dp ++ = (REBYTE )c ;
719
706
continue ;
720
707
}
721
- if (as_url && c == ' ' ) {
722
- * dp ++ = '+' ;
723
- continue ;
724
- }
725
708
encoded [0 ] = cast (REBYTE , c );
726
709
encoded_size = 1 ;
727
710
}
0 commit comments