Skip to content

Commit 592454e

Browse files
committed
FIX: LF to CRLF conversion when using write with string input
resolves: Oldes/Rebol-issues#2586
1 parent da19058 commit 592454e

File tree

3 files changed

+134
-29
lines changed

3 files changed

+134
-29
lines changed

src/core/p-file.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,8 @@ REBINT Mode_Syms[] = {
340340
}
341341
else if (IS_STRING(data)) {
342342
// Auto convert string to UTF-8
343-
ser = Encode_UTF8_Value(data, len, ENCF_OS_CRLF);
343+
// Using LF to CRLF conversion on Windows if not used /binary refinement!
344+
ser = Encode_UTF8_Value(data, len, (args & AM_WRITE_BINARY) ? 0 : ENCF_OS_CRLF);
344345
file->data = ser? BIN_HEAD(ser) : VAL_BIN_DATA(data); // No encoding may be needed
345346
len = SERIES_TAIL(ser);
346347
}

src/core/s-unicode.c

+77-28
Original file line numberDiff line numberDiff line change
@@ -800,47 +800,96 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
800800
REBINT n;
801801
REBYTE buf[8];
802802
REBYTE *bs = dst; // save start
803-
REBYTE *bp = (REBYTE*)src;
804-
REBUNI *up = (REBUNI*)src;
803+
REBYTE *bp;
804+
REBUNI *up;
805805
REBLEN cnt;
806806

807807
if (len) cnt = *len;
808-
else {
809-
if (uni) {
808+
if (uni) {
809+
up = (REBUNI*)src;
810+
if (!len) {
810811
// not using wcslen, because on some systems wchar_t has 4 bytes!
811812
cnt = 0;
812813
while (*up++ != 0 && cnt < (REBLEN)max) cnt++;
813814
up = (REBUNI*)src;
814-
} else
815-
cnt = LEN_BYTES(bp);
816-
}
817-
818-
for (; max > 0 && cnt > 0; cnt--) {
819-
c = uni ? *up++ : *bp++;
820-
if (c < 0x80) {
815+
}
816+
for (; max > 0 && cnt > 0; cnt--) {
817+
c = *up++;
818+
if (c < 0x80) {
821819
#if defined(TO_WINDOWS)
822-
if (ccr && c == LF) {
823-
// If there's not room, don't try to output CRLF
824-
if (2 > max) {up--; break;}
825-
*dst++ = CR;
820+
if (ccr) {
821+
if (c == CR && up[0] == LF) {
822+
*dst++ = CR;
823+
*dst++ = LF;
824+
up++;
825+
cnt--;
826+
max -= 2;
827+
continue;
828+
}
829+
if (c == LF) {
830+
// If there's not room, don't try to output CRLF
831+
if (2 > max) { up--; break; }
832+
*dst++ = CR;
833+
max--;
834+
c = LF;
835+
}
836+
}
837+
#endif
838+
*dst++ = (REBYTE)c;
826839
max--;
827-
c = LF;
828840
}
829-
#endif
830-
*dst++ = (REBYTE)c;
831-
max--;
841+
else {
842+
n = Encode_UTF8_Char(buf, c);
843+
if (n > max) { up--; break; }
844+
memcpy(dst, buf, n);
845+
dst += n;
846+
max -= n;
847+
}
832848
}
833-
else {
834-
n = Encode_UTF8_Char(buf, c);
835-
if (n > max) {up--; break;}
836-
memcpy(dst, buf, n);
837-
dst += n;
838-
max -= n;
849+
if (len) *len = dst - bs;
850+
return up - (REBUNI*)src;
851+
}
852+
else {
853+
bp = (REBYTE*)src;
854+
if (!len) cnt = LEN_BYTES(bp);
855+
for (; max > 0 && cnt > 0; cnt--) {
856+
c = *bp++;
857+
if (c < 0x80) {
858+
#if defined(TO_WINDOWS)
859+
if (ccr) {
860+
if (c == CR && bp[0] == LF) {
861+
*dst++ = CR;
862+
*dst++ = LF;
863+
bp++;
864+
cnt--;
865+
max -= 2;
866+
continue;
867+
}
868+
if (c == LF) {
869+
// If there's not room, don't try to output CRLF
870+
if (2 > max) { bp--; break; }
871+
*dst++ = CR;
872+
max--;
873+
c = LF;
874+
}
875+
}
876+
#endif
877+
*dst++ = (REBYTE)c;
878+
max--;
879+
}
880+
else {
881+
n = Encode_UTF8_Char(buf, c);
882+
if (n > max) { bp--; break; }
883+
memcpy(dst, buf, n);
884+
dst += n;
885+
max -= n;
886+
}
839887
}
888+
if (len) *len = dst - bs;
889+
return bp - (REBYTE*)src;
840890
}
841891

842-
if (len) *len = dst - bs;
843-
return uni ? up - (REBUNI*)src : bp - (REBYTE*)src;
892+
844893
}
845894

846895

@@ -928,7 +977,7 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
928977
else {
929978
REBYTE *bp = (REBYTE*)src;
930979

931-
if (Is_Not_ASCII(bp, len)) {
980+
if (ccr || Is_Not_ASCII(bp, len)) {
932981
size = Length_As_UTF8((REBUNI*)bp, len, FALSE, (REBOOL)ccr);
933982
cp = Reset_Buffer(ser, size + (GET_FLAG(opts, ENC_OPT_BOM) ? 3 : 0));
934983
Encode_UTF8(cp, size, bp, &len, FALSE, ccr);

src/tests/units/port-test.r3

+55
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,61 @@ if system/platform = 'Windows [
276276
--assert "a^/b^/c" = read/string %units/files/issue-622.txt
277277
delete %units/files/issue-622.txt
278278

279+
--test-- "read write CRLF conversion"
280+
;@@ https://github.com/Oldes/Rebol-issues/issues/2586
281+
;; In these tests are used #"a" and #"á" to have internally plain and wide strings
282+
;; write/binary keeps the linefeeds without modifications
283+
--assert #{0A} = read write/binary %tmp next "a^/"
284+
--assert #{0A} = read write/binary %tmp next ^/"
285+
--assert #{0D0A} = read write/binary %tmp next "a^M^/"
286+
--assert #{0D0A} = read write/binary %tmp next ^M^/"
287+
--assert #{0D0D0A} = read write/binary %tmp next "a^M^M^/"
288+
--assert #{0D0D0A} = read write/binary %tmp next ^M^M^/"
289+
--assert #{0D0A0A} = read write/binary %tmp next "a^M^/^/"
290+
--assert #{0D0A0A} = read write/binary %tmp next ^M^/^/"
291+
;; it is possible to get the original string using implicit conversion
292+
--assert "a^/" = to string! read write/binary %tmp "a^/"
293+
--assert ^/" = to string! read write/binary %tmp ^/"
294+
--assert "a^M^/" = to string! read write/binary %tmp "a^M^/"
295+
--assert ^M^/" = to string! read write/binary %tmp ^M^/"
296+
--assert "a^M^M^/" = to string! read write/binary %tmp "a^M^M^/"
297+
--assert ^M^M^/" = to string! read write/binary %tmp ^M^M^/"
298+
--assert "a^M^/^/" = to string! read write/binary %tmp "a^M^/^/"
299+
--assert ^M^/^/" = to string! read write/binary %tmp ^M^/^/"
300+
either system/platform = 'Windows [
301+
;; on Windows by default write converts LF to CRLF (if the input is string!)
302+
--assert #{0D0A} = read write %tmp next "a^/"
303+
--assert #{0D0A} = read write %tmp next ^/"
304+
;; when there is already CRLF, if does not write it like CRCRLF!
305+
--assert #{0D0A} = read write %tmp next "a^M^/"
306+
--assert #{0D0A} = read write %tmp next ^M^/"
307+
--assert #{0D0D0A} = read write %tmp next "a^M^M^/"
308+
--assert #{0D0D0A} = read write %tmp next ^M^M^/"
309+
--assert #{0D0A0D0A} = read write %tmp next "a^M^/^/"
310+
--assert #{0D0A0D0A} = read write %tmp next ^M^/^/"
311+
][
312+
;; on all other platforms it does no modifications!
313+
--assert #{0A} = read write %tmp next "a^/"
314+
--assert #{0A} = read write %tmp next ^/"
315+
--assert #{0D0A} = read write %tmp next "a^M^/"
316+
--assert #{0D0A} = read write %tmp next ^M^/"
317+
--assert #{0D0D0A} = read write %tmp next "a^M^M^/"
318+
--assert #{0D0D0A} = read write %tmp next ^M^M^/"
319+
--assert #{0D0A0A} = read write %tmp next "a^M^/^/"
320+
--assert #{0D0A0A} = read write %tmp next ^M^/^/"
321+
]
322+
;; read/string converts CRLF (or plain CR) to LF
323+
--assert "^/" = read/string write/binary %tmp next "a^/"
324+
--assert "^/" = read/string write/binary %tmp next ^/"
325+
--assert "^/" = read/string write/binary %tmp next "a^M"
326+
--assert "^/" = read/string write/binary %tmp next ^M"
327+
--assert "^/" = read/string write/binary %tmp next "a^M^/"
328+
--assert "^/" = read/string write/binary %tmp next ^M^/"
329+
--assert "^/^/" = read/string write/binary %tmp next "a^M^M^/"
330+
--assert "^/^/" = read/string write/binary %tmp next ^M^M^/"
331+
--assert "^/^/" = read/string write/binary %tmp next "a^M^/^/"
332+
--assert "^/^/" = read/string write/binary %tmp next ^M^/^/"
333+
279334

280335
--test-- "write file result - wish/2337"
281336
;@@ https://github.com/Oldes/Rebol-issues/issues/2337

0 commit comments

Comments
 (0)