Skip to content

Commit cfecd0f

Browse files
committed
Fix #93: handle split surrogate for PI writes too
1 parent 45eaad6 commit cfecd0f

File tree

3 files changed

+39
-30
lines changed

3 files changed

+39
-30
lines changed

release-notes/VERSION

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ Project: aalto-xml
1212
#90: Update stax2-api dep to 4.2.2 (from 4.2)
1313
#91: Multi-byte characters are split in `writeComment()` if first byte sits
1414
right at the end of the buffer
15+
#93: Multi-byte characters are split in `writePI()` if first byte sits
16+
right at the end of the buffer
1517

1618
1.3.2 (25-Apr-2022)
1719

src/main/java/com/fasterxml/aalto/out/ByteXmlWriter.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1193,7 +1193,7 @@ public int writeComment(String data) throws IOException, XMLStreamException
11931193

11941194
/**
11951195
* Note: the only way to fix comment contents is to inject a space
1196-
* to split up consequtive '--' (or '-' that ends a comment).
1196+
* to split up consecutive '--' (or '-' that ends a comment).
11971197
*/
11981198
protected int writeCommentContents(char[] cbuf, int offset, int len)
11991199
throws IOException, XMLStreamException
@@ -1293,6 +1293,14 @@ public void writeDTD(WName rootName, String systemId, String publicId,
12931293
protected int writePIData(char[] cbuf, int offset, int len)
12941294
throws IOException, XMLStreamException
12951295
{
1296+
if (_surrogate != 0) {
1297+
outputSurrogates(_surrogate, cbuf[offset]);
1298+
// reset the temporary surrogate storage
1299+
_surrogate = 0;
1300+
++offset;
1301+
--len;
1302+
}
1303+
12961304
// Unlike with writeCharacters() and fastWriteName(), let's not
12971305
// worry about split buffers here: this is unlikely to become
12981306
// performance bottleneck. This allows keeping it simple; and

src/test/java/com/fasterxml/aalto/sax/TestSaxWriter.java

Lines changed: 28 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,19 @@
77

88
public class TestSaxWriter extends base.BaseTestCase
99
{
10+
private final String TEXT_WITH_SURROGATE;
11+
{
12+
StringBuilder testText = new StringBuilder(1025);
13+
for (int i = 0; i < 511; i++) {
14+
testText.append('x');
15+
}
16+
testText.append("\uD835\uDFCE");
17+
for (int i = 0; i < 512; i++) {
18+
testText.append('x');
19+
}
20+
TEXT_WITH_SURROGATE = testText.toString();
21+
}
22+
1023
public void testSplitSurrogateWithAttributeValue() throws Exception
1124
{
1225
// This test aims to produce the
@@ -16,19 +29,11 @@ public void testSplitSurrogateWithAttributeValue() throws Exception
1629
// to also fill the next two internal reading buffers. Then, the code would try to fuse the first byte
1730
// of the original multi-byte character with the first character in the third buffer because
1831
// ByteXmlWriter#_surrogate was not set back to 0 after writing the original multi-byte character.
19-
StringBuilder testText = new StringBuilder();
20-
for (int i = 0; i < 511; i++) {
21-
testText.append('x');
22-
}
23-
testText.append("\uD835\uDFCE");
24-
for (int i = 0; i < 512; i++) {
25-
testText.append('x');
26-
}
2732
WriterConfig writerConfig = new WriterConfig();
2833
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
2934
Utf8XmlWriter writer = new Utf8XmlWriter(writerConfig, byteArrayOutputStream);
3035
writer.writeStartTagStart(writer.constructName("testelement"));
31-
writer.writeAttribute(writer.constructName("testattr"), testText.toString());
36+
writer.writeAttribute(writer.constructName("testattr"), TEXT_WITH_SURROGATE);
3237
writer.writeStartTagEnd();
3338
writer.writeEndTag(writer.constructName("testelement"));
3439
writer.close(false);
@@ -61,43 +66,37 @@ public void testSplitSurrogateWithAttributeValue2() throws Exception
6166
public void testSplitSurrogateWithCData() throws Exception
6267
{
6368
// Modification of "testSplitSurrogateWithAttributeValue()" but for CDATA
64-
StringBuilder testText = new StringBuilder();
65-
for (int i = 0; i < 511; i++) {
66-
testText.append('x');
67-
}
68-
testText.append("\uD835\uDFCE");
69-
for (int i = 0; i < 512; i++) {
70-
testText.append('x');
71-
}
72-
7369
WriterConfig writerConfig = new WriterConfig();
7470
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
7571
Utf8XmlWriter writer = new Utf8XmlWriter(writerConfig, byteArrayOutputStream);
7672
writer.writeStartTagStart(writer.constructName("testelement"));
77-
writer.writeCData(testText.toString());
73+
writer.writeCData(TEXT_WITH_SURROGATE);
7874
writer.writeStartTagEnd();
7975
writer.writeEndTag(writer.constructName("testelement"));
8076
writer.close(false);
8177
}
8278

83-
8479
public void testSplitSurrogateWithComment() throws Exception
8580
{
8681
// Modification of "testSplitSurrogateWithAttributeValue()" but for Comment
87-
StringBuilder testText = new StringBuilder();
88-
for (int i = 0; i < 511; i++) {
89-
testText.append('x');
90-
}
91-
testText.append("\uD835\uDFCE");
92-
for (int i = 0; i < 512; i++) {
93-
testText.append('x');
94-
}
82+
WriterConfig writerConfig = new WriterConfig();
83+
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
84+
Utf8XmlWriter writer = new Utf8XmlWriter(writerConfig, byteArrayOutputStream);
85+
writer.writeStartTagStart(writer.constructName("testelement"));
86+
writer.writeComment(TEXT_WITH_SURROGATE);
87+
writer.writeStartTagEnd();
88+
writer.writeEndTag(writer.constructName("testelement"));
89+
writer.close(false);
90+
}
9591

92+
public void testSplitSurrogateWithPI() throws Exception
93+
{
94+
// Modification of "testSplitSurrogateWithAttributeValue()" but for Processing instructions
9695
WriterConfig writerConfig = new WriterConfig();
9796
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
9897
Utf8XmlWriter writer = new Utf8XmlWriter(writerConfig, byteArrayOutputStream);
9998
writer.writeStartTagStart(writer.constructName("testelement"));
100-
writer.writeComment(testText.toString());
99+
writer.writePI(writer.constructName("target"), TEXT_WITH_SURROGATE);
101100
writer.writeStartTagEnd();
102101
writer.writeEndTag(writer.constructName("testelement"));
103102
writer.close(false);

0 commit comments

Comments
 (0)