Skip to content

Commit 3a72825

Browse files
committed
FIX: using replacement char for all chars outside the Basic Multilingual Plane
1 parent 3eb0b1b commit 3a72825

File tree

2 files changed

+6
-0
lines changed

2 files changed

+6
-0
lines changed

src/core/s-unicode.c

+1
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,7 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
446446
}
447447
*str = src;
448448
if (state != UTF8_ACCEPT) return 0; //UNI_REPLACEMENT_CHAR;
449+
if (codepoint >= UNI_MAX_BMP) return UNI_REPLACEMENT_CHAR;
449450
return codepoint;
450451
#else
451452
const UTF8 *source = *str;

src/tests/units/make-test.r3

+5
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,11 @@ Rebol [
737737
--assert "make bitset! #{FF}" = try [to string! quote #[bitset! #{FF}] ] ; bitset!
738738
--assert "make image! [1x1 #{FFFFFF}]" = try [to string! quote #[image! 1x1 #{FFFFFF}] ] ; image!
739739
--assert "integer! percent!" = try [to string! quote #[typeset! [integer! percent!]] ] ; typeset!
740+
741+
--test-- "to string! with chars outside the BMP"
742+
;; current Rebol is able to use only 16bit Unicode..
743+
--assert #{EFBFBD} = to binary! to string! #{F09F989A}
744+
--assert #{EFBFBD} = to binary! to string! #{F09F989C}
740745
===end-group===
741746

742747
===start-group=== "make/to tag"

0 commit comments

Comments
 (0)