@@ -583,12 +583,13 @@ static struct digest {
583
583
{
584
584
REBVAL * arg = D_ARG (1 );
585
585
// REBOOL ref_escape = D_REF(2);
586
- REBVAL * val_escape = D_ARG (3 );
586
+ // REBVAL *val_escape = D_ARG(3);
587
+ REBOOL as_url = D_REF (4 );
587
588
REBINT len = (REBINT )VAL_LEN (arg ); // due to len -= 2 below
588
589
REBUNI n ;
589
590
REBSER * ser ;
590
591
591
- const REBCHR escape_char = ( IS_CHAR ( val_escape )) ? VAL_CHAR (val_escape ) : '%' ;
592
+ const REBCHR escape_char = D_REF ( 2 ) ? VAL_CHAR (D_ARG ( 3 ) ) : '%' ;
592
593
593
594
if (VAL_BYTE_SIZE (arg )) {
594
595
REBYTE * bp = VAL_BIN_DATA (arg );
@@ -600,7 +601,12 @@ static struct digest {
600
601
bp += 3 ;
601
602
len -= 2 ;
602
603
}
603
- else * dp ++ = * bp ++ ;
604
+ else if (as_url && * bp == '+' ) {
605
+ * dp ++ = ' ' ;
606
+ bp ++ ;
607
+ } else {
608
+ * dp ++ = * bp ++ ;
609
+ }
604
610
}
605
611
606
612
* dp = 0 ;
@@ -615,8 +621,12 @@ static struct digest {
615
621
* dp ++ = (REBUNI )n ;
616
622
up += 3 ;
617
623
len -= 2 ;
624
+ } else if (as_url && * up == '+' ) {
625
+ * dp ++ = ' ' ;
626
+ up ++ ;
627
+ } else {
628
+ * dp ++ = * up ++ ;
618
629
}
619
- else * dp ++ = * up ++ ;
620
630
}
621
631
622
632
* dp = 0 ;
@@ -634,17 +644,26 @@ static struct digest {
634
644
*/ REBNATIVE (enhex )
635
645
/*
636
646
** Works for any string.
647
+ ** Compatible with encodeURIComponent http://es5.github.io/#x15.1.3.4
637
648
** If source is unicode (wide) string, result is ASCII.
649
+ ** value [any-string! binary!] {The string to encode}
650
+ ** /escape char [char!] {Can be used to change the default escape char #"%"}
651
+ ** /url {Encode space as a #"+"}
638
652
**
639
653
***********************************************************************/
640
654
{
641
655
REBVAL * arg = D_ARG (1 );
656
+ // REBOOL ref_escape = D_REF(2);
657
+ // REBVAL *val_escape = D_ARG(3);
658
+ REBOOL as_url = D_REF (4 );
642
659
REBSER * ser ;
643
660
REBINT lex ;
644
661
REBCNT n ;
645
662
REBYTE encoded [4 ];
646
663
REBCNT encoded_size ;
647
664
665
+ const REBCHR escape_char = D_REF (2 ) ? VAL_CHAR (D_ARG (3 )) : '%' ;
666
+
648
667
// using FORM buffer for intermediate conversion;
649
668
// counting with the worst scenario, where each single codepoint
650
669
// might need 4 bytes of UTF-8 data (%XX%XX%XX%XX)
@@ -661,40 +680,22 @@ static struct digest {
661
680
REBYTE c = bp [0 ];
662
681
bp ++ ;
663
682
664
- switch (GET_LEX_CLASS (c )) {
665
- case LEX_CLASS_WORD :
666
- if ( (c >= 'a' && c <= 'z' )
667
- || (c >= 'A' && c <= 'Z' )
668
- || c == '?' || c == '!' || c == '&'
669
- || c == '*' || c == '=' || c == '~' || c == '_'
670
- ) break ; // no conversion
671
- goto byte_needs_encoding ;
672
- case LEX_CLASS_NUMBER :
673
- break ; // no conversion
674
- case LEX_CLASS_SPECIAL :
675
- lex = GET_LEX_VALUE (c );
676
- if ( lex == LEX_SPECIAL_PERCENT
677
- || lex == LEX_SPECIAL_BACKSLASH
678
- || lex == LEX_SPECIAL_LESSER
679
- || lex == LEX_SPECIAL_GREATER
680
- ) goto byte_needs_encoding ;
681
- break ; // no conversion
682
- case LEX_CLASS_DELIMIT :
683
- lex = GET_LEX_VALUE (c );
684
- if ( lex <= LEX_DELIMIT_RETURN
685
- || (lex >= LEX_DELIMIT_LEFT_BRACKET && lex <= LEX_DELIMIT_RIGHT_BRACE )
686
- || lex == LEX_DELIMIT_QUOTE
687
- ) goto byte_needs_encoding ;
688
- break ; // no conversion
683
+ if ((c >= 'a' && c <= 'z' )
684
+ || (c >= 'A' && c <= 'Z' )
685
+ || (c >= '0' && c <= '9' )
686
+ || (c >= 40 && c <= 42 ) // ()*
687
+ || (c == '-' || c == '.' || c == '_' || c == '!' || c == '~' || c == '\'' )
688
+ ) { // leaving char as is
689
+ * dp ++ = c ;
690
+ continue ;
689
691
}
690
- // leaving char as is
691
- * dp ++ = c ;
692
- continue ;
693
-
694
- byte_needs_encoding :
695
- * dp ++ = '%' ;
692
+ if (as_url && c == ' ' ) {
693
+ * dp ++ = '+' ;
694
+ continue ;
695
+ }
696
+ * dp ++ = escape_char ;
696
697
* dp ++ = Hex_Digits [(c & 0xf0 ) >> 4 ];
697
- * dp ++ = Hex_Digits [ c & 0xf ];
698
+ * dp ++ = Hex_Digits [ c & 0xf ];
698
699
}
699
700
}
700
701
else { // UNICODE variant
@@ -707,44 +708,25 @@ static struct digest {
707
708
708
709
if (c >= 0x80 ) {// all non-ASCII characters *must* be percent encoded
709
710
encoded_size = Encode_UTF8_Char (encoded , c );
710
- goto char_needs_encoding ;
711
711
} else {
712
+ if ((c >= 'a' && c <= 'z' )
713
+ || (c >= 'A' && c <= 'Z' )
714
+ || (c >= '0' && c <= '9' )
715
+ || (c >= 40 && c <= 42 ) // ()*
716
+ || (c == '-' || c == '.' || c == '_' || c == '!' || c == '~' || c == '\'' )
717
+ ) { // leaving char as is
718
+ * dp ++ = (REBYTE )c ;
719
+ continue ;
720
+ }
721
+ if (as_url && c == ' ' ) {
722
+ * dp ++ = '+' ;
723
+ continue ;
724
+ }
712
725
encoded [0 ] = cast (REBYTE , c );
713
726
encoded_size = 1 ;
714
- switch (GET_LEX_CLASS (c )) {
715
- case LEX_CLASS_WORD :
716
- if ( (c >= 'a' && c <= 'z' )
717
- || (c >= 'A' && c <= 'Z' )
718
- || c == '?' || c == '!' || c == '&'
719
- || c == '*' || c == '=' || c == '~' || c == '_'
720
- ) break ; // no conversion
721
- goto char_needs_encoding ;
722
- case LEX_CLASS_NUMBER :
723
- break ; // no conversion
724
- case LEX_CLASS_SPECIAL :
725
- lex = GET_LEX_VALUE (c );
726
- if ( lex == LEX_SPECIAL_PERCENT
727
- || lex == LEX_SPECIAL_BACKSLASH
728
- || lex == LEX_SPECIAL_LESSER
729
- || lex == LEX_SPECIAL_GREATER
730
- ) goto char_needs_encoding ;
731
- break ; // no conversion
732
- case LEX_CLASS_DELIMIT :
733
- lex = GET_LEX_VALUE (c );
734
- if ( lex <= LEX_DELIMIT_RETURN
735
- || (lex >= LEX_DELIMIT_LEFT_BRACKET && lex <= LEX_DELIMIT_RIGHT_BRACE )
736
- || lex == LEX_DELIMIT_QUOTE
737
- ) goto char_needs_encoding ;
738
- break ; // no conversion
739
- }
740
727
}
741
- // leaving char as is
742
- * dp ++ = (REBYTE )c ;
743
- continue ;
744
-
745
- char_needs_encoding :
746
728
for (n = 0 ; n < encoded_size ; ++ n ) {
747
- * dp ++ = '%' ;
729
+ * dp ++ = escape_char ;
748
730
* dp ++ = Hex_Digits [(encoded [n ] & 0xf0 ) >> 4 ];
749
731
* dp ++ = Hex_Digits [ encoded [n ] & 0xf ];
750
732
}
0 commit comments