Skip to content

Commit 0bb9d95

Browse files
committed
FIX: ICONV from UTF16 with BOM does not skip the BOM on Windows
fixes: #19
1 parent 2d0ebc1 commit 0bb9d95

File tree

2 files changed

+17
-2
lines changed

2 files changed

+17
-2
lines changed

src/core/u-iconv.c

+13-2
Original file line numberDiff line numberDiff line change
@@ -611,10 +611,21 @@ static REBYTE* get_codepage_name(REBVAL *cp)
611611
return R_NONE;
612612
}
613613

614+
REBYTE *bp;
615+
614616
if (cp == 1200 || cp == 1201) { // data are already wide (UTF-16LE or UTF-16BE)
615-
dst_len = VAL_LEN(data) / 2;
617+
dst_len = src_len / 2;
616618
dst_wide = Make_Series(dst_len + 1, 2, FALSE);
617-
memcpy(BIN_HEAD(dst_wide), VAL_BIN_AT(data), VAL_LEN(data));
619+
bp = VAL_BIN_AT(data);
620+
if ( src_len >= 2 && (
621+
(0xFF == bp[0] && 0xFE == bp[1]) ||
622+
(0xFE == bp[0] && 0xFF == bp[1])
623+
)) { // skip BOM
624+
src_len -= 2;
625+
dst_len -= 1;
626+
bp += 2;
627+
}
628+
memcpy(BIN_HEAD(dst_wide), bp, src_len);
618629
dst_wide->tail = dst_len;
619630
TERM_SERIES(dst_wide);
620631
if (ref_to) {

src/tests/units/series-test.r3

+4
Original file line numberDiff line numberDiff line change
@@ -661,6 +661,10 @@ Rebol [
661661
--assert "" = iconv #{} 28592
662662
--assert "" = iconv #{} 'utf8
663663

664+
--test-- "ICONV from UTF-16 with BOM"
665+
--assert "Writer" = iconv #{FEFF005700720069007400650072} 'UTF-16BE
666+
--assert "Writer" = iconv #{FFFE570072006900740065007200} 'UTF-16LE
667+
664668

665669
--test-- "ICONV/TO (conversion to different codepage - binary result)"
666670
bin: to binary! txt ; normaly conversion is done to UTF-8

0 commit comments

Comments
 (0)