Skip to content

Commit

Permalink
Write 4-byte characters (surrogate pairs) instead of escapes
Browse files Browse the repository at this point in the history
  • Loading branch information
rnetuka committed Sep 12, 2024
1 parent 89b2381 commit 4c25457
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import java.io.*;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;

import com.fasterxml.jackson.core.*;
import com.fasterxml.jackson.core.io.CharTypes;
Expand Down Expand Up @@ -659,6 +660,10 @@ public void writeUTF8String(byte[] text, int offset, int len) throws IOException
_outputBuffer[_outputTail++] = _quoteChar;
}

private boolean isSurrogatePair(char ch) {
return (ch & 0xD800) == 0xD800;
}

/*
/**********************************************************
/* Output method implementations, unprocessed ("raw")
Expand Down Expand Up @@ -1510,7 +1515,14 @@ private final void _writeStringSegment2(final char[] cbuf, int offset, final int
outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6));
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
} else {
outputPtr = _outputMultiByteChar(ch, outputPtr);
// multibyte character
if (isSurrogatePair((char) ch) && offset < end) {
char highSurrogate = (char) ch;
char lowSurrogate = cbuf[offset++];
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
} else {
outputPtr = _outputMultiByteChar(ch, outputPtr);
}
}
}
_outputTail = outputPtr;
Expand Down Expand Up @@ -1548,7 +1560,14 @@ private final void _writeStringSegment2(final String text, int offset, final int
outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6));
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
} else {
outputPtr = _outputMultiByteChar(ch, outputPtr);
// multibyte character
if (isSurrogatePair((char) ch) && offset < end) {
char highSurrogate = (char) ch;
char lowSurrogate = text.charAt(offset++);
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
} else {
outputPtr = _outputMultiByteChar(ch, outputPtr);
}
}
}
_outputTail = outputPtr;
Expand Down Expand Up @@ -2133,6 +2152,13 @@ protected final void _outputSurrogates(int surr1, int surr2) throws IOException
bbuf[_outputTail++] = (byte) (0x80 | (c & 0x3f));
}

private int _outputSurrogatePair(char highSurrogate, char lowSurrogate, int outputPtr) {
String s = String.valueOf(highSurrogate) + lowSurrogate;
byte[] bytes = s.getBytes(StandardCharsets.UTF_8);
System.arraycopy(bytes, 0, _outputBuffer, outputPtr, bytes.length);
return outputPtr + bytes.length;
}

/**
*
* @param ch
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,17 @@ void longerRandomMultiChunk() throws Exception
}
}

@Test
public void testWritingSurrogatePairs() throws IOException {
ByteArrayOutputStream stream = new ByteArrayOutputStream();
JsonGenerator generator = FACTORY.createGenerator(stream, JsonEncoding.UTF8);
String string = "システム\uD867\uDE3D"; // システム𩸽
generator.writeString(string);
generator.flush();
generator.close();
assertEquals("\"" + string + "\"", stream.toString());
}

/*
/**********************************************************
/* Internal methods
Expand Down

0 comments on commit 4c25457

Please sign in to comment.