From 736eb467e123be5296a93e81cf208faa6543601d Mon Sep 17 00:00:00 2001 From: Aleksei Zotov Date: Sat, 31 Aug 2024 12:52:33 -0400 Subject: [PATCH] AMQ-8122 - Fix DataByteArrayInputStreamTest --- .../util/DataByteArrayOutputStream.java | 1 - .../util/DataByteArrayInputStreamTest.java | 143 ++++++++++++------ .../disk/util/DataByteArrayOutputStream.java | 1 - 3 files changed, 99 insertions(+), 46 deletions(-) diff --git a/activemq-client/src/main/java/org/apache/activemq/util/DataByteArrayOutputStream.java b/activemq-client/src/main/java/org/apache/activemq/util/DataByteArrayOutputStream.java index 692fb94a4a..f1aaf2983c 100644 --- a/activemq-client/src/main/java/org/apache/activemq/util/DataByteArrayOutputStream.java +++ b/activemq-client/src/main/java/org/apache/activemq/util/DataByteArrayOutputStream.java @@ -210,7 +210,6 @@ public final class DataByteArrayOutputStream extends OutputStream implements Dat ensureEnoughBuffer((int)(pos + encodedsize + 2)); writeShort((int)encodedsize); - byte[] buffer = new byte[(int)encodedsize]; MarshallingSupport.writeUTFBytesToBuffer(text, (int) encodedsize, buf, pos); pos += encodedsize; } diff --git a/activemq-client/src/test/java/org/apache/activemq/util/DataByteArrayInputStreamTest.java b/activemq-client/src/test/java/org/apache/activemq/util/DataByteArrayInputStreamTest.java index 632bfa30a2..f21260d9d8 100644 --- a/activemq-client/src/test/java/org/apache/activemq/util/DataByteArrayInputStreamTest.java +++ b/activemq-client/src/test/java/org/apache/activemq/util/DataByteArrayInputStreamTest.java @@ -17,66 +17,121 @@ package org.apache.activemq.util; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; +import java.io.UTFDataFormatException; import org.junit.Test; +/** + * https://issues.apache.org/jira/browse/AMQ-1911 + * https://issues.apache.org/jira/browse/AMQ-8122 + */ public class DataByteArrayInputStreamTest { - /** - * https://issues.apache.org/activemq/browse/AMQ-1911 - */ @Test - public void testNonAscii() throws Exception { - doMarshallUnMarshallValidation("mei\u00DFen"); - - String accumulator = new String(); - - int test = 0; // int to get Supplementary chars - while(Character.isDefined(test)) { - String toTest = String.valueOf((char)test); - accumulator += toTest; - doMarshallUnMarshallValidation(toTest); - test++; - } - - int massiveThreeByteCharValue = 0x0FFF; - String toTest = String.valueOf((char)massiveThreeByteCharValue); - accumulator += toTest; - doMarshallUnMarshallValidation(String.valueOf((char)massiveThreeByteCharValue)); - - // Altogether - doMarshallUnMarshallValidation(accumulator); - - // the three byte values - char t = '\u0800'; - final char max = '\uffff'; - accumulator = String.valueOf(t); - while (t < max) { - String val = String.valueOf(t); - accumulator += val; - doMarshallUnMarshallValidation(val); - t++; - } - - // Altogether so long as it is not too big - while (accumulator.length() > 20000) { - accumulator = accumulator.substring(20000); - } - doMarshallUnMarshallValidation(accumulator); + public void testOneByteCharacters() throws Exception { + testCodePointRange(0x000000, 0x00007F); } - void doMarshallUnMarshallValidation(String value) throws Exception { + @Test + public void testTwoBytesCharacters() throws Exception { + testCodePointRange(0x000080, 0x0007FF); + } + + @Test + public void testThreeBytesCharacters() throws Exception { + testCodePointRange(0x000800, 0x00FFFF); + } + + @Test + public void testFourBytesCharacters() throws Exception { + testCodePointRange(0x010000, 0X10FFFF); + } + + @Test + public void testFourBytesCharacterEncodedAsBytes() throws Exception { + // Currently ActiveMQ does not properly support 4-bytes UTF characters. + // Ideally, this test should be failing. The current logic was kept as is + // intentionally. See https://issues.apache.org/jira/browse/AMQ-8398. + + // 0xF0 0x80 0x80 0x80 (first valid 4-bytes character) + testInvalidCharacterBytes(new byte[]{-16, -128, -128, -128}, 4); + // 0xF7 0xBF 0xBF 0xBF (last valid 4-bytes character) + testInvalidCharacterBytes(new byte[]{-9, -65, -65, -65}, 4); + } + + + private void testCodePointRange(int from, int to) throws Exception { + StringBuilder accumulator = new StringBuilder(); + for (int codePoint = from; codePoint <= to; codePoint++) { + String val = String.valueOf(Character.toChars(codePoint)); + accumulator.append(val); + doMarshallUnMarshallValidation(val); + } + + // truncate string to last 20k characters + if (accumulator.length() > 20_000) { + doMarshallUnMarshallValidation(accumulator.substring( + accumulator.length() - 20_000)); + } else { + doMarshallUnMarshallValidation(accumulator.toString()); + } + } + + private void doMarshallUnMarshallValidation(String value) throws Exception { DataByteArrayOutputStream out = new DataByteArrayOutputStream(); - out.writeBoolean(true); out.writeUTF(value); out.close(); DataByteArrayInputStream in = new DataByteArrayInputStream(out.getData()); - in.readBoolean(); String readBack = in.readUTF(); + assertEquals(value, readBack); } + @Test + public void testTwoBytesOutOfRangeCharacter() throws Exception { + // 0xC0 0x7F + testInvalidCharacterBytes(new byte[]{-64, 127}, 2); + // 0xDF 0xC0 + testInvalidCharacterBytes(new byte[]{-33, -64}, 2); + } + + @Test + public void testThreeBytesOutOfRangeCharacter() throws Exception { + // 0xE0 0x80 0x7F + testInvalidCharacterBytes(new byte[]{-32, -128, 127}, 3); + // 0xEF 0xBF 0xC0 + testInvalidCharacterBytes(new byte[]{-17, -65, -64}, 3); + } + + @Test + public void testFourBytesOutOfRangeCharacter() throws Exception { + // 0xF0 0x80 0x80 0x7F + testInvalidCharacterBytes(new byte[]{-16, -128, -128, 127}, 4); + // 0xF7 0xBF 0xBF 0xC0 + testInvalidCharacterBytes(new byte[]{-9, -65, -65, -64}, 4); + } + + private void testInvalidCharacterBytes(byte[] bytes, int encodedSize) throws Exception { + // Java guarantees that strings are always UTF-8 compliant and valid, + // any invalid sequence of bytes is either replaced or removed. + // This test demonstrates that Java takes care about and does not allow + // anything to break. + String val = new String(bytes); + doMarshallUnMarshallValidation(val); + + // However, a non-java client can send an invalid sequence of bytes. + // Such data causes exceptions while unmarshalling. + DataByteArrayOutputStream out = new DataByteArrayOutputStream(); + out.writeShort(encodedSize); + out.write(bytes); + out.close(); + + DataByteArrayInputStream in = new DataByteArrayInputStream(out.getData()); + assertThrows(UTFDataFormatException.class, () -> in.readUTF()); + } + @Test public void testReadLong() throws Exception { DataByteArrayOutputStream out = new DataByteArrayOutputStream(8); @@ -87,4 +142,4 @@ public class DataByteArrayInputStreamTest { long readBack = in.readLong(); assertEquals(Long.MAX_VALUE, readBack); } -} +} \ No newline at end of file diff --git a/activemq-kahadb-store/src/main/java/org/apache/activemq/store/kahadb/disk/util/DataByteArrayOutputStream.java b/activemq-kahadb-store/src/main/java/org/apache/activemq/store/kahadb/disk/util/DataByteArrayOutputStream.java index 5a3fba4715..595726b8cd 100644 --- a/activemq-kahadb-store/src/main/java/org/apache/activemq/store/kahadb/disk/util/DataByteArrayOutputStream.java +++ b/activemq-kahadb-store/src/main/java/org/apache/activemq/store/kahadb/disk/util/DataByteArrayOutputStream.java @@ -237,7 +237,6 @@ public class DataByteArrayOutputStream extends OutputStream implements DataOutpu ensureEnoughBuffer((int)(pos + encodedsize + 2)); writeShort((int)encodedsize); - byte[] buffer = new byte[(int)encodedsize]; MarshallingSupport.writeUTFBytesToBuffer(text, (int) encodedsize, buf, pos); pos += encodedsize; onWrite();