Skip to content

Commit 88b4c94

Browse files
authored
fix issue with incorrect matches for some surrogate chars (#1360)
1 parent cc70bc2 commit 88b4c94

File tree

4 files changed

+49
-6
lines changed

4 files changed

+49
-6
lines changed

release-notes/CREDITS-2.x

+5
Original file line numberDiff line numberDiff line change
@@ -456,3 +456,8 @@ Jared Stehler (@jaredstehler)
456456
Zhanghao (@zhangOranges)
457457
* Contributed #1305: Make helper methods of `WriterBasedJsonGenerator` non-final to allow overriding
458458
(2.18.0)
459+
460+
Justin Gosselin (@jgosselin-accesso)
461+
* Reported #1359: Non-surrogate characters being incorrectly combined when
462+
`JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8` is enabled
463+
(2.18.2)

release-notes/VERSION-2.x

+4
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ a pure JSON library.
1818

1919
#1353: Use fastdoubleparser 1.0.90
2020
(fixed by @pjfanning)
21+
#1359: Non-surrogate characters being incorrectly combined when
22+
`JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8` is enabled
23+
(reported by Justin G)
24+
(fixed by @pjfanning)
2125

2226
2.18.0 (26-Sep-2024)
2327

src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java

+5-4
Original file line numberDiff line numberDiff line change
@@ -1509,7 +1509,7 @@ private final void _writeStringSegment2(final char[] cbuf, int offset, final int
15091509
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
15101510
} else {
15111511
// 3- or 4-byte character
1512-
if (_isSurrogateChar(ch)) {
1512+
if (_isStartOfSurrogatePair(ch)) {
15131513
final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);
15141514
if (combineSurrogates && offset < end) {
15151515
char highSurrogate = (char) ch;
@@ -1557,7 +1557,7 @@ private final void _writeStringSegment2(final String text, int offset, final int
15571557
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
15581558
} else {
15591559
// 3- or 4-byte character
1560-
if (_isSurrogateChar(ch)) {
1560+
if (_isStartOfSurrogatePair(ch)) {
15611561
final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);
15621562
if (combineSurrogates && offset < end) {
15631563
char highSurrogate = (char) ch;
@@ -2247,8 +2247,9 @@ private byte[] getHexBytes() {
22472247
}
22482248

22492249
// @since 2.18
2250-
private boolean _isSurrogateChar(int ch) {
2251-
return (ch & 0xD800) == 0xD800;
2250+
private static boolean _isStartOfSurrogatePair(final int ch) {
2251+
// In 0xD800 - 0xDBFF range?
2252+
return (ch & 0xFC00) == 0xD800;
22522253
}
22532254
}
22542255

src/test/java/com/fasterxml/jackson/core/json/Surrogate223Test.java src/test/java/com/fasterxml/jackson/core/write/SurrogateWrite223Test.java

+35-2
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
1-
package com.fasterxml.jackson.core.json;
1+
package com.fasterxml.jackson.core.write;
22

33
import java.io.ByteArrayOutputStream;
44
import java.io.StringWriter;
55
import java.io.Writer;
66

77
import com.fasterxml.jackson.core.*;
8+
import com.fasterxml.jackson.core.json.JsonWriteFeature;
89

910
import org.junit.jupiter.api.Test;
1011

1112
import static org.junit.jupiter.api.Assertions.assertEquals;
1213
import static org.junit.jupiter.api.Assertions.assertFalse;
14+
import static org.junit.jupiter.api.Assertions.assertTrue;
1315

14-
class Surrogate223Test extends JUnit5TestBase
16+
class SurrogateWrite223Test extends JUnit5TestBase
1517
{
1618
private final JsonFactory DEFAULT_JSON_F = newStreamFactory();
1719

@@ -90,4 +92,35 @@ void surrogatesCharBacked() throws Exception
9092
assertToken(JsonToken.END_ARRAY, p.nextToken());
9193
p.close();
9294
}
95+
96+
//https://github.com/FasterXML/jackson-core/issues/1359
97+
@Test
98+
void checkNonSurrogates() throws Exception {
99+
JsonFactory f = JsonFactory.builder()
100+
.enable(JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8)
101+
.build();
102+
ByteArrayOutputStream out = new ByteArrayOutputStream();
103+
try (JsonGenerator gen = f.createGenerator(out)) {
104+
gen.writeStartObject();
105+
106+
// Inside the BMP, beyond surrogate block; 0xFF0C - full-width comma
107+
gen.writeStringField("test_full_width", "foo" + new String(Character.toChars(0xFF0C)) + "bar");
108+
109+
// Inside the BMP, beyond surrogate block; 0xFE6A - small form percent
110+
gen.writeStringField("test_small_form", "foo" + new String(Character.toChars(0xFE6A)) + "bar");
111+
112+
// Inside the BMP, before the surrogate block; 0x3042 - Hiragana A
113+
gen.writeStringField("test_hiragana", "foo" + new String(Character.toChars(0x3042)) + "bar");
114+
115+
// Outside the BMP; 0x1F60A - emoji
116+
gen.writeStringField("test_emoji", new String(Character.toChars(0x1F60A)));
117+
118+
gen.writeEndObject();
119+
}
120+
String json = out.toString("UTF-8");
121+
assertTrue(json.contains("foo\uFF0Cbar"));
122+
assertTrue(json.contains("foo\uFE6Abar"));
123+
assertTrue(json.contains("foo\u3042bar"));
124+
assertTrue(json.contains("\"test_emoji\":\"\uD83D\uDE0A\""));
125+
}
93126
}

0 commit comments

Comments
 (0)