ARTEMIS-1572 UTF-8 Encoding is not translating ASCII edge cases correctly

It fixed 0-127 ASCII chars translation using UTF encoding
This commit is contained in:
Francesco Nigro 2017-12-21 18:16:49 +01:00 committed by Justin Bertram
parent fee083a621
commit dab05adf6c
2 changed files with 28 additions and 3 deletions

View File

@ -103,7 +103,7 @@ public final class UTF8Util {
for (int i = 0; i < stringLength; i++) { for (int i = 0; i < stringLength; i++) {
char charAtPos = buffer.charBuffer[i]; char charAtPos = buffer.charBuffer[i];
if (charAtPos >= 1 && charAtPos < 0x7f) { if (charAtPos <= 0x7f) {
buffer.byteBuffer[charCount++] = (byte) charAtPos; buffer.byteBuffer[charCount++] = (byte) charAtPos;
} else if (charAtPos >= 0x800) { } else if (charAtPos >= 0x800) {
buffer.byteBuffer[charCount++] = (byte) (0xE0 | charAtPos >> 12 & 0x0F); buffer.byteBuffer[charCount++] = (byte) (0xE0 | charAtPos >> 12 & 0x0F);
@ -145,7 +145,7 @@ public final class UTF8Util {
while (count < size) { while (count < size) {
byte1 = buffer.byteBuffer[count++]; byte1 = buffer.byteBuffer[count++];
if (byte1 > 0 && byte1 <= 0x7F) { if (byte1 >= 0 && byte1 <= 0x7F) {
buffer.charBuffer[charCount++] = (char) byte1; buffer.charBuffer[charCount++] = (char) byte1;
} else { } else {
int c = byte1 & 0xff; int c = byte1 & 0xff;
@ -211,7 +211,7 @@ public final class UTF8Util {
for (int i = 0; i < stringLength; i++) { for (int i = 0; i < stringLength; i++) {
char c = stringBuffer.charBuffer[i]; char c = stringBuffer.charBuffer[i];
if (c >= 1 && c < 0x7f) { if (c <= 0x7f) {
calculatedLen++; calculatedLen++;
} else if (c >= 0x800) { } else if (c >= 0x800) {
calculatedLen += 3; calculatedLen += 3;

View File

@ -21,6 +21,8 @@ import java.io.ByteArrayOutputStream;
import java.io.DataInputStream; import java.io.DataInputStream;
import java.io.DataOutputStream; import java.io.DataOutputStream;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import org.apache.activemq.artemis.api.core.ActiveMQBuffer; import org.apache.activemq.artemis.api.core.ActiveMQBuffer;
import org.apache.activemq.artemis.api.core.ActiveMQBuffers; import org.apache.activemq.artemis.api.core.ActiveMQBuffers;
@ -34,6 +36,29 @@ import org.junit.Test;
public class UTF8Test extends ActiveMQTestBase { public class UTF8Test extends ActiveMQTestBase {
@Test
public void testValidateUTFWithENDChars() {
testValidateUTFWithChars(1024, (char) 0);
}
@Test
public void testValidateUTFWithLastAsciiChars() {
testValidateUTFWithChars(1024, (char) Byte.MAX_VALUE);
}
private void testValidateUTFWithChars(final int size, final char c) {
final char[] chars = new char[size];
Arrays.fill(chars, c);
final String expectedUtf8String = new String(chars);
final ActiveMQBuffer buffer = ActiveMQBuffers.fixedBuffer(4 * chars.length);
UTF8Util.saveUTF(buffer.byteBuf(), expectedUtf8String);
final byte[] expectedBytes = expectedUtf8String.getBytes(StandardCharsets.UTF_8);
final int encodedSize = buffer.readUnsignedShort();
final byte[] realEncodedBytes = new byte[encodedSize];
buffer.getBytes(buffer.readerIndex(), realEncodedBytes);
Assert.assertArrayEquals(expectedBytes, realEncodedBytes);
}
@Test @Test
public void testValidateUTF() throws Exception { public void testValidateUTF() throws Exception {
ActiveMQBuffer buffer = ActiveMQBuffers.fixedBuffer(60 * 1024); ActiveMQBuffer buffer = ActiveMQBuffers.fixedBuffer(60 * 1024);