Skip to content

Commit a7d02da

Browse files
committed
FEAT: loading .txt files with unicode encodings
Text files (.txt) are now loaded just as a string and not as a Rebol code. Unicode decoding is handled correctly if specified in BOM. Files (.txt) are always saved as UTF-8. fixes: Oldes/Rebol-issues#2424 related to: Oldes/Rebol-issues#1937
1 parent afebd21 commit a7d02da

File tree

4 files changed

+49
-1
lines changed

4 files changed

+49
-1
lines changed

src/core/b-init.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -666,10 +666,12 @@ extern const REBYTE Str_Banner[];
666666
}
667667

668668
if (codi->action == CODI_DECODE) {
669-
return CODI_TEXT;
669+
codi->other = (void*)Decode_UTF_String(codi->data, codi->len, -1, TRUE);
670+
return CODI_STRING;
670671
}
671672

672673
if (codi->action == CODI_ENCODE) {
674+
//O: This does not happen as in n-system.c only image is allowed to be encoded!
673675
return CODI_BINARY;
674676
}
675677

src/core/n-system.c

+3
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,9 @@ char *evoke_help = "Evoke values:\n"
476476
case CODI_BLOCK:
477477
Set_Block(D_RET, codi.other);
478478
break;
479+
case CODI_STRING:
480+
Set_String(D_RET, codi.other);
481+
break;
479482

480483
default:
481484
Trap0(RE_BAD_MEDIA); // need better!!!

src/include/reb-codec.h

+1
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ enum {
7575
CODI_IMAGE,
7676
CODI_SOUND,
7777
CODI_BLOCK,
78+
CODI_STRING, // result is in codi->other as a series (no need to copy).
7879
};
7980

8081
// Codec commands:

src/tests/units/codecs-test.r3

+42
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,48 @@ Rebol [
7373
--assert error? try [save %temp.jpg #{00}]
7474
===end-group===
7575

76+
77+
===start-group=== "TEXT codec"
78+
;- loading *.txt file does also conversion CRLF to LF
79+
;- it is actually same like using read/string
80+
;@@ https://github.com/Oldes/Rebol-issues/issues/2424
81+
--test-- "load UCS16-LE txt"
82+
--assert all [
83+
string? try [str: load %units/files/issue-2186-UTF16-LE.txt]
84+
11709824 = checksum str
85+
]
86+
--test-- "load UCS16-BE txt"
87+
--assert all [
88+
string? try [str: load %units/files/issue-2186-UTF16-BE.txt]
89+
11709824 = checksum str
90+
]
91+
--test-- "load UCS32-LE txt"
92+
--assert all [
93+
string? try [str: load %units/files/issue-2186-UTF32-LE.txt]
94+
11709824 = checksum str
95+
]
96+
--test-- "load UCS32-BE txt"
97+
--assert all [
98+
string? try [str: load %units/files/issue-2186-UTF32-BE.txt]
99+
11709824 = checksum str
100+
]
101+
--test-- "load/save issue! as .txt"
102+
;@@ https://github.com/Oldes/Rebol-issues/issues/1937
103+
;- notice that result is a string! and not an issue!
104+
--assert "#00000002" = load save %tmp.txt #00000002
105+
--assert #00000002 = load save %tmp.reb #00000002
106+
--test-- "load/save block! as .txt"
107+
--assert {1 "aha"} = load save %tmp.txt [1 "aha"]
108+
--assert [1 "aha"] = load save %tmp.reb [1 "aha"]
109+
--test-- "load/save binary! as .txt"
110+
--assert "12" = load save %tmp.txt #{3132}
111+
--assert #{3132} = load save %tmp.reb #{3132}
112+
113+
delete %tmp.txt
114+
delete %tmp.reb
115+
116+
===end-group===
117+
76118
if find codecs 'wav [
77119
codecs/wav/verbose: 3
78120
===start-group=== "WAV codec"

0 commit comments

Comments
 (0)