From 8a197e77ff6a252bdc1a12a3ee40d51962680f53 Mon Sep 17 00:00:00 2001 From: rnetuka Date: Thu, 12 Sep 2024 13:12:11 +0200 Subject: [PATCH] Write 4-byte characters (surrogate pairs) instead of escapes --- .../fasterxml/jackson/core/JsonGenerator.java | 2 ++ .../jackson/core/StreamWriteFeature.java | 2 ++ .../jackson/core/json/UTF8JsonGenerator.java | 34 +++++++++++++++++-- .../core/json/GeneratorFeaturesTest.java | 1 + .../core/json/StringGenerationTest.java | 12 +++++++ 5 files changed, 49 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/fasterxml/jackson/core/JsonGenerator.java b/src/main/java/com/fasterxml/jackson/core/JsonGenerator.java index f8ee301e2b..eba4170e92 100644 --- a/src/main/java/com/fasterxml/jackson/core/JsonGenerator.java +++ b/src/main/java/com/fasterxml/jackson/core/JsonGenerator.java @@ -202,6 +202,8 @@ public enum Feature { */ WRITE_BIGDECIMAL_AS_PLAIN(false), + WRITE_UTF8_SURROGATES(false), + // // Schema/Validity support features /** diff --git a/src/main/java/com/fasterxml/jackson/core/StreamWriteFeature.java b/src/main/java/com/fasterxml/jackson/core/StreamWriteFeature.java index a30c23ea9b..8f41309eeb 100644 --- a/src/main/java/com/fasterxml/jackson/core/StreamWriteFeature.java +++ b/src/main/java/com/fasterxml/jackson/core/StreamWriteFeature.java @@ -74,6 +74,8 @@ public enum StreamWriteFeature */ WRITE_BIGDECIMAL_AS_PLAIN(JsonGenerator.Feature.WRITE_BIGDECIMAL_AS_PLAIN), + WRITE_UTF8_SURROGATES(JsonGenerator.Feature.WRITE_UTF8_SURROGATES), + // // Schema/Validity support features /** diff --git a/src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java b/src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java index de47f9b48b..bb16c8491a 100644 --- a/src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java +++ b/src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java @@ -3,6 +3,7 @@ import java.io.*; import java.math.BigDecimal; import java.math.BigInteger; +import java.nio.charset.StandardCharsets; import com.fasterxml.jackson.core.*; import com.fasterxml.jackson.core.io.CharTypes; @@ -659,6 +660,10 @@ public void writeUTF8String(byte[] text, int offset, int len) throws IOException _outputBuffer[_outputTail++] = _quoteChar; } + private boolean isSurrogatePair(char ch) { + return (ch & 0xD800) == 0xD800; + } + /* /********************************************************** /* Output method implementations, unprocessed ("raw") @@ -1489,6 +1494,8 @@ private final void _writeStringSegment2(final char[] cbuf, int offset, final int final byte[] outputBuffer = _outputBuffer; final int[] escCodes = _outputEscapes; + boolean writeSurrogates = Feature.WRITE_UTF8_SURROGATES.enabledIn(_features); + while (offset < end) { int ch = cbuf[offset++]; if (ch <= 0x7F) { @@ -1510,7 +1517,14 @@ private final void _writeStringSegment2(final char[] cbuf, int offset, final int outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6)); outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f)); } else { - outputPtr = _outputMultiByteChar(ch, outputPtr); + // multibyte character + if (writeSurrogates && isSurrogatePair((char) ch) && offset < end) { + char highSurrogate = (char) ch; + char lowSurrogate = cbuf[offset++]; + outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr); + } else { + outputPtr = _outputMultiByteChar(ch, outputPtr); + } } } _outputTail = outputPtr; @@ -1527,6 +1541,8 @@ private final void _writeStringSegment2(final String text, int offset, final int final byte[] outputBuffer = _outputBuffer; final int[] escCodes = _outputEscapes; + boolean writeSurrogates = Feature.WRITE_UTF8_SURROGATES.enabledIn(_features); + while (offset < end) { int ch = text.charAt(offset++); if (ch <= 0x7F) { @@ -1548,7 +1564,14 @@ private final void _writeStringSegment2(final String text, int offset, final int outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6)); outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f)); } else { - outputPtr = _outputMultiByteChar(ch, outputPtr); + // multibyte character + if (writeSurrogates && isSurrogatePair((char) ch) && offset < end) { + char highSurrogate = (char) ch; + char lowSurrogate = text.charAt(offset++); + outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr); + } else { + outputPtr = _outputMultiByteChar(ch, outputPtr); + } } } _outputTail = outputPtr; @@ -2133,6 +2156,13 @@ protected final void _outputSurrogates(int surr1, int surr2) throws IOException bbuf[_outputTail++] = (byte) (0x80 | (c & 0x3f)); } + private int _outputSurrogatePair(char highSurrogate, char lowSurrogate, int outputPtr) { + String s = String.valueOf(highSurrogate) + lowSurrogate; + byte[] bytes = s.getBytes(StandardCharsets.UTF_8); + System.arraycopy(bytes, 0, _outputBuffer, outputPtr, bytes.length); + return outputPtr + bytes.length; + } + /** * * @param ch diff --git a/src/test/java/com/fasterxml/jackson/core/json/GeneratorFeaturesTest.java b/src/test/java/com/fasterxml/jackson/core/json/GeneratorFeaturesTest.java index 2a54323073..c504a8e224 100644 --- a/src/test/java/com/fasterxml/jackson/core/json/GeneratorFeaturesTest.java +++ b/src/test/java/com/fasterxml/jackson/core/json/GeneratorFeaturesTest.java @@ -28,6 +28,7 @@ void configDefaults() throws IOException assertFalse(g.isEnabled(JsonGenerator.Feature.WRITE_BIGDECIMAL_AS_PLAIN)); assertFalse(g.isEnabled(StreamWriteFeature.WRITE_BIGDECIMAL_AS_PLAIN)); + assertFalse(g.isEnabled(StreamWriteFeature.WRITE_UTF8_SURROGATES)); assertTrue(g.canOmitFields()); assertFalse(g.canWriteBinaryNatively()); diff --git a/src/test/java/com/fasterxml/jackson/core/json/StringGenerationTest.java b/src/test/java/com/fasterxml/jackson/core/json/StringGenerationTest.java index 5aa602374d..977ec1fdcf 100644 --- a/src/test/java/com/fasterxml/jackson/core/json/StringGenerationTest.java +++ b/src/test/java/com/fasterxml/jackson/core/json/StringGenerationTest.java @@ -6,6 +6,7 @@ import java.util.Random; +import static com.fasterxml.jackson.core.JsonGenerator.Feature; import static org.junit.jupiter.api.Assertions.*; import org.junit.jupiter.api.Test; @@ -86,6 +87,17 @@ void longerRandomMultiChunk() throws Exception } } + @Test + public void testWritingSurrogatePairs() throws IOException { + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + JsonGenerator generator = FACTORY.createGenerator(stream, JsonEncoding.UTF8).enable(Feature.WRITE_UTF8_SURROGATES); + String string = "システム\uD867\uDE3D"; // システム𩸽 + generator.writeString(string); + generator.flush(); + generator.close(); + assertEquals("\"" + string + "\"", stream.toString()); + } + /* /********************************************************** /* Internal methods