From ef9657353f0a0d2b57ac86c06c478629e5808fed Mon Sep 17 00:00:00 2001 From: "Aliaksandr.Asiptsou" Date: Wed, 5 Jun 2024 17:12:59 +0200 Subject: [PATCH] Fixing the issue where multi-byte characters are split in writeCData() if first byte sits right at the end of the buffer --- .../fasterxml/aalto/out/ByteXmlWriter.java | 7 ++++++ .../fasterxml/aalto/sax/TestSaxWriter.java | 25 +++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/src/main/java/com/fasterxml/aalto/out/ByteXmlWriter.java b/src/main/java/com/fasterxml/aalto/out/ByteXmlWriter.java index f8ce09f..c1cacf8 100644 --- a/src/main/java/com/fasterxml/aalto/out/ByteXmlWriter.java +++ b/src/main/java/com/fasterxml/aalto/out/ByteXmlWriter.java @@ -792,6 +792,13 @@ public int writeCData(char[] cbuf, int offset, int len) protected int writeCDataContents(char[] cbuf, int offset, int len) throws IOException, XMLStreamException { + if (_surrogate != 0) { + outputSurrogates(_surrogate, cbuf[offset]); +// reset the temporary surrogate storage + _surrogate = 0; + ++offset; + --len; + } /* Unlike with writeCharacters() and fastWriteName(), let's not * worry about split buffers here: this is unlikely to become * performance bottleneck. This allows keeping it simple; and diff --git a/src/test/java/com/fasterxml/aalto/sax/TestSaxWriter.java b/src/test/java/com/fasterxml/aalto/sax/TestSaxWriter.java index 8c80242..799a50a 100644 --- a/src/test/java/com/fasterxml/aalto/sax/TestSaxWriter.java +++ b/src/test/java/com/fasterxml/aalto/sax/TestSaxWriter.java @@ -54,4 +54,29 @@ public void testSurrogateMemory2() throws Exception { writer.writeEndTag(writer.constructName("testelement")); writer.close(false); } + + public void testSurrogateMemory3() throws Exception { + // This test aims to produce the + // javax.xml.stream.XMLStreamException: Incomplete surrogate pair in content: first char 0xdfce, second 0x78 + // error message. The issue was similar to the one described in testSurrogateMemory1(), except it happened in + // ByteXmlWriter#writeCDataContents(), where check for existing _surrogate was missing prior to the fix, + // as opposed to ByteXmlWriter#writeCharacters(). + StringBuilder testText = new StringBuilder(); + for (int i = 0; i < 511; i++) + testText.append('x'); + testText.append("\uD835\uDFCE"); + for (int i = 0; i < 512; i++) + testText.append('x'); + + WriterConfig writerConfig = new WriterConfig(); + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + Utf8XmlWriter writer = new Utf8XmlWriter(writerConfig, byteArrayOutputStream); + writer.writeStartTagStart(writer.constructName("testelement")); + writer.writeCData(testText.toString()); + writer.writeStartTagEnd(); + writer.writeEndTag(writer.constructName("testelement")); + writer.close(false); + + } + }