Skip to content

Commit 5a3cbe5

Browse files
committed
FIX: LOAD Unicode encoded text with BOM
fixes: Oldes/Rebol-issues#474 fixes: Oldes/Rebol-issues#2280
1 parent 28b47d3 commit 5a3cbe5

File tree

2 files changed

+30
-1
lines changed

2 files changed

+30
-1
lines changed

src/mezz/sys-load.reb

+11-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,17 @@ load-header: function/with [
113113
; Commented assert statements are for documentation and testing.
114114
;
115115
case/all [
116-
binary? source [tmp: assert-utf8 source]
116+
binary? source [
117+
parse source [
118+
; utf-16 & utf-32
119+
#{0000FEFF} tmp: (tmp: iconv/to tmp 'utf-32BE 'utf8)
120+
| #{FFFE0000} tmp: (tmp: iconv/to tmp 'utf-32LE 'utf8)
121+
| #{FEFF} tmp: (tmp: iconv/to tmp 'utf-16BE 'utf8)
122+
| #{FFFE} tmp: (tmp: iconv/to tmp 'utf-16LE 'utf8)
123+
| ; utf-8 (skip the BOM if found)
124+
opt [#{EFBBBF} source:] (tmp: assert-utf8 source)
125+
]
126+
]
117127
string? source [tmp: to binary! source]
118128
not data: script? tmp [ ; no script header found
119129
return either required ['no-header] [reduce [none tmp tail tmp]]

src/tests/units/series-test.r3

+19
Original file line numberDiff line numberDiff line change
@@ -786,6 +786,25 @@ Rebol [
786786
--assert 5367801 = checksum deline str
787787
--assert 5367801 = checksum read/string %units/files/quickbrown.bin ;converts CRLF to LF
788788

789+
--test-- "LOAD Unicode encoded text with BOM"
790+
--assert "Writer" = form load #{FEFF005700720069007400650072} ;UTF-16BE
791+
--assert "Writer" = form load #{FFFE570072006900740065007200} ;UTF-16LE
792+
--assert "ěšč" = form load #{0000feff0000011b000001610000010d} ;UTF-32BE
793+
--assert "ěšč" = form load #{fffe00001b010000610100000d010000} ;UTF-32LE
794+
--assert "esc" = form load #{0000feff000000650000007300000063} ;UTF-32BE
795+
--assert "esc" = form load #{fffe0000650000007300000063000000} ;UTF-32LE
796+
--assert [a b] = load #{0000feff000000610000002000000062}
797+
--assert [a b] = load #{fffe0000610000002000000062000000}
798+
;@@ https://github.com/Oldes/Rebol-issues/issues/2280
799+
--assert "äöü" = form load #{EFBBBFC3A4C3B6C3BC} ;UTF-8
800+
--assert "aou" = form load #{EFBBBF616F75} ;UTF-8
801+
--assert "1 2" = load #{EFBBBF2231203222}
802+
--assert "1 2" = load #{2231203222}
803+
;@@ https://github.com/Oldes/Rebol-issues/issues/474
804+
write %temp #{EFBBBF612062}
805+
--assert [a b] = load %temp
806+
delete %temp
807+
789808
===end-group===
790809

791810
===start-group=== "ICONV"

0 commit comments

Comments
 (0)