|
28 | 28 | ***********************************************************************/
|
29 | 29 |
|
30 | 30 | #include "sys-core.h"
|
| 31 | +#include "sys-scan.h" |
31 | 32 | #include "sys-deci-funcs.h"
|
32 | 33 |
|
33 | 34 | REBCNT z_adler32_z(REBCNT adler, REBYTE *buf, REBCNT len);
|
@@ -599,6 +600,135 @@ static struct digest {
|
599 | 600 | }
|
600 | 601 |
|
601 | 602 |
|
| 603 | +/*********************************************************************** |
| 604 | +** |
| 605 | +*/ REBNATIVE(enhex) |
| 606 | +/* |
| 607 | +** Works for any string. |
| 608 | +** If source is unicode (wide) string, result is ASCII. |
| 609 | +** |
| 610 | +***********************************************************************/ |
| 611 | +{ |
| 612 | + REBVAL *arg = D_ARG(1); |
| 613 | + REBSER *ser; |
| 614 | + REBINT lex; |
| 615 | + REBCNT n; |
| 616 | + REBYTE encoded[4]; |
| 617 | + REBCNT encoded_size; |
| 618 | + |
| 619 | + // using FORM buffer for intermediate conversion; |
| 620 | + // counting with the worst scenario, where each single codepoint |
| 621 | + // might need 4 bytes of UTF-8 data (%XX%XX%XX%XX) |
| 622 | + REBYTE *dp = Reset_Buffer(BUF_FORM, 12 * VAL_LEN(arg)); |
| 623 | + |
| 624 | + if (VAL_BYTE_SIZE(arg)) { |
| 625 | + // byte size is 1, so the series should not contain chars with value over 0x80 |
| 626 | + // see: https://github.com/Oldes/Rebol3/commit/aa939ba41fd71efb9f0a97f85993c95129c7e515 |
| 627 | + |
| 628 | + REBYTE *bp = VAL_BIN_DATA(arg); |
| 629 | + REBYTE *ep = VAL_BIN_TAIL(arg); |
| 630 | + |
| 631 | + while (bp < ep) { |
| 632 | + REBYTE c = bp[0]; |
| 633 | + bp++; |
| 634 | + |
| 635 | + switch (GET_LEX_CLASS(c)) { |
| 636 | + case LEX_CLASS_WORD: |
| 637 | + if ( (c >= 'a' && c <= 'z') |
| 638 | + || (c >= 'A' && c <= 'Z') |
| 639 | + || c == '?' || c == '!' || c == '&' |
| 640 | + || c == '*' || c == '=' || c == '~' |
| 641 | + ) break; // no conversion |
| 642 | + goto byte_needs_encoding; |
| 643 | + case LEX_CLASS_NUMBER: |
| 644 | + break; // no conversion |
| 645 | + case LEX_CLASS_SPECIAL: |
| 646 | + lex = GET_LEX_VALUE(c); |
| 647 | + if ( lex == LEX_SPECIAL_PERCENT |
| 648 | + || lex == LEX_SPECIAL_BACKSLASH |
| 649 | + || lex == LEX_SPECIAL_LESSER |
| 650 | + || lex == LEX_SPECIAL_GREATER |
| 651 | + ) goto byte_needs_encoding; |
| 652 | + break; // no conversion |
| 653 | + case LEX_CLASS_DELIMIT: |
| 654 | + lex = GET_LEX_VALUE(c); |
| 655 | + if ( lex <= LEX_DELIMIT_RETURN |
| 656 | + || (lex >= LEX_DELIMIT_LEFT_BRACKET && lex <= LEX_DELIMIT_RIGHT_BRACE) |
| 657 | + || lex == LEX_DELIMIT_QUOTE |
| 658 | + ) goto byte_needs_encoding; |
| 659 | + break; // no conversion |
| 660 | + } |
| 661 | + // leaving char as is |
| 662 | + *dp++ = c; |
| 663 | + continue; |
| 664 | + |
| 665 | + byte_needs_encoding: |
| 666 | + *dp++ = '%'; |
| 667 | + *dp++ = Hex_Digits[(c & 0xf0) >> 4]; |
| 668 | + *dp++ = Hex_Digits[ c & 0xf]; |
| 669 | + } |
| 670 | + } |
| 671 | + else { // UNICODE variant |
| 672 | + REBUNI *up = VAL_UNI_DATA(arg); |
| 673 | + REBUNI *ep = (REBUNI*)VAL_UNI_TAIL(arg); |
| 674 | + |
| 675 | + while (up < ep) { |
| 676 | + REBUNI c = up[0]; |
| 677 | + up++; |
| 678 | + |
| 679 | + if (c > 0x80) {// all non-ASCII characters *must* be percent encoded |
| 680 | + encoded_size = Encode_UTF8_Char(encoded, c); |
| 681 | + goto char_needs_encoding; |
| 682 | + } else { |
| 683 | + encoded[0] = cast(REBYTE, c); |
| 684 | + encoded_size = 1; |
| 685 | + switch (GET_LEX_CLASS(c)) { |
| 686 | + case LEX_CLASS_WORD: |
| 687 | + if ( (c >= 'a' && c <= 'z') |
| 688 | + || (c >= 'A' && c <= 'Z') |
| 689 | + || c == '?' || c == '!' || c == '&' |
| 690 | + || c == '*' || c == '=' || c == '~' |
| 691 | + ) break; // no conversion |
| 692 | + goto char_needs_encoding; |
| 693 | + case LEX_CLASS_NUMBER: |
| 694 | + break; // no conversion |
| 695 | + case LEX_CLASS_SPECIAL: |
| 696 | + lex = GET_LEX_VALUE(c); |
| 697 | + if ( lex == LEX_SPECIAL_PERCENT |
| 698 | + || lex == LEX_SPECIAL_BACKSLASH |
| 699 | + || lex == LEX_SPECIAL_LESSER |
| 700 | + || lex == LEX_SPECIAL_GREATER |
| 701 | + ) goto char_needs_encoding; |
| 702 | + break; // no conversion |
| 703 | + case LEX_CLASS_DELIMIT: |
| 704 | + lex = GET_LEX_VALUE(c); |
| 705 | + if ( lex <= LEX_DELIMIT_RETURN |
| 706 | + || (lex >= LEX_DELIMIT_LEFT_BRACKET && lex <= LEX_DELIMIT_RIGHT_BRACE) |
| 707 | + || lex == LEX_DELIMIT_QUOTE |
| 708 | + ) goto char_needs_encoding; |
| 709 | + break; // no conversion |
| 710 | + } |
| 711 | + } |
| 712 | + // leaving char as is |
| 713 | + *dp++ = (REBYTE)c; |
| 714 | + continue; |
| 715 | + |
| 716 | + char_needs_encoding: |
| 717 | + for (n = 0; n < encoded_size; ++n) { |
| 718 | + *dp++ = '%'; |
| 719 | + *dp++ = Hex_Digits[(encoded[n] & 0xf0) >> 4]; |
| 720 | + *dp++ = Hex_Digits[ encoded[n] & 0xf]; |
| 721 | + } |
| 722 | + } |
| 723 | + } |
| 724 | + *dp = 0; |
| 725 | + ser = Copy_String(BUF_FORM, 0, dp - BIN_HEAD(BUF_FORM)); |
| 726 | + Set_Series(VAL_TYPE(arg), D_RET, ser); |
| 727 | + |
| 728 | + return R_RET; |
| 729 | +} |
| 730 | + |
| 731 | + |
602 | 732 | /***********************************************************************
|
603 | 733 | **
|
604 | 734 | */ REBNATIVE(deline)
|
|
0 commit comments