AMQ-6651: Add new implementations of the writeUTF8 and readUTF8 methods that are based on Apache Harmony code. This also avoid some code duplication that was occurring.

This commit is contained in:
Hiram Chirino 2017-04-06 11:55:08 -04:00
parent 58046194d0
commit 172c29091e
5 changed files with 113 additions and 249 deletions

View File

@ -259,62 +259,12 @@ public final class DataByteArrayInputStream extends InputStream implements DataI
public String readUTF() throws IOException {
int length = readUnsignedShort();
char[] characters = new char[length];
int c;
int c2;
int c3;
int count = 0;
int total = pos + length;
while (pos < total) {
c = (int)buf[pos] & 0xff;
if (c > 127) {
break;
}
pos++;
characters[count++] = (char)c;
if (pos + length > buf.length) {
throw new UTFDataFormatException("bad string");
}
while (pos < total) {
c = (int)buf[pos] & 0xff;
switch (c >> 4) {
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
pos++;
characters[count++] = (char)c;
break;
case 12:
case 13:
pos += 2;
if (pos > total) {
throw new UTFDataFormatException("bad string");
}
c2 = (int)buf[pos - 1];
if ((c2 & 0xC0) != 0x80) {
throw new UTFDataFormatException("bad string");
}
characters[count++] = (char)(((c & 0x1F) << 6) | (c2 & 0x3F));
break;
case 14:
pos += 3;
if (pos > total) {
throw new UTFDataFormatException("bad string");
}
c2 = (int)buf[pos - 2];
c3 = (int)buf[pos - 1];
if (((c2 & 0xC0) != 0x80) || ((c3 & 0xC0) != 0x80)) {
throw new UTFDataFormatException("bad string");
}
characters[count++] = (char)(((c & 0x0F) << 12) | ((c2 & 0x3F) << 6) | ((c3 & 0x3F) << 0));
break;
default:
throw new UTFDataFormatException("bad string");
}
}
return new String(characters, 0, count);
char chararr[] = new char[length];
String result = MarshallingSupport.convertUTF8WithBuf(buf, chararr, pos, length);
pos += length;
return result;
}
}

View File

@ -202,46 +202,17 @@ public final class DataByteArrayOutputStream extends OutputStream implements Dat
}
}
public void writeUTF(String str) throws IOException {
int strlen = str.length();
int encodedsize = 0;
int c;
for (int i = 0; i < strlen; i++) {
c = str.charAt(i);
if ((c >= 0x0001) && (c <= 0x007F)) {
encodedsize++;
} else if (c > 0x07FF) {
encodedsize += 3;
} else {
encodedsize += 2;
}
}
public void writeUTF(String text) throws IOException {
long encodedsize = MarshallingSupport.countUTFBytes(text);
if (encodedsize > 65535) {
throw new UTFDataFormatException("encoded string too long: " + encodedsize + " bytes");
}
ensureEnoughBuffer(pos + encodedsize + 2);
writeShort(encodedsize);
int i = 0;
for (i = 0; i < strlen; i++) {
c = str.charAt(i);
if (!((c >= 0x0001) && (c <= 0x007F))) {
break;
}
buf[pos++] = (byte)c;
}
for (; i < strlen; i++) {
c = str.charAt(i);
if ((c >= 0x0001) && (c <= 0x007F)) {
buf[pos++] = (byte)c;
} else if (c > 0x07FF) {
buf[pos++] = (byte)(0xE0 | ((c >> 12) & 0x0F));
buf[pos++] = (byte)(0x80 | ((c >> 6) & 0x3F));
buf[pos++] = (byte)(0x80 | ((c >> 0) & 0x3F));
} else {
buf[pos++] = (byte)(0xC0 | ((c >> 6) & 0x1F));
buf[pos++] = (byte)(0x80 | ((c >> 0) & 0x3F));
}
}
ensureEnoughBuffer((int)(pos + encodedsize + 2));
writeShort((int)encodedsize);
byte[] buffer = new byte[(int)encodedsize];
MarshallingSupport.writeUTFBytesToBuffer(text, (int) encodedsize, buf, pos);
pos += encodedsize;
}
private void ensureEnoughBuffer(int newcount) {

View File

@ -296,117 +296,102 @@ public final class MarshallingSupport {
public static void writeUTF8(DataOutput dataOut, String text) throws IOException {
if (text != null) {
int strlen = text.length();
int utflen = 0;
char[] charr = new char[strlen];
int c = 0;
int count = 0;
long utfCount = countUTFBytes(text);
dataOut.writeInt((int)utfCount);
text.getChars(0, strlen, charr, 0);
for (int i = 0; i < strlen; i++) {
c = charr[i];
if ((c >= 0x0001) && (c <= 0x007F)) {
utflen++;
} else if (c > 0x07FF) {
utflen += 3;
} else {
utflen += 2;
}
}
// TODO diff: Sun code - removed
byte[] bytearr = new byte[utflen + 4]; // TODO diff: Sun code
bytearr[count++] = (byte)((utflen >>> 24) & 0xFF); // TODO diff:
// Sun code
bytearr[count++] = (byte)((utflen >>> 16) & 0xFF); // TODO diff:
// Sun code
bytearr[count++] = (byte)((utflen >>> 8) & 0xFF);
bytearr[count++] = (byte)((utflen >>> 0) & 0xFF);
for (int i = 0; i < strlen; i++) {
c = charr[i];
if ((c >= 0x0001) && (c <= 0x007F)) {
bytearr[count++] = (byte)c;
} else if (c > 0x07FF) {
bytearr[count++] = (byte)(0xE0 | ((c >> 12) & 0x0F));
bytearr[count++] = (byte)(0x80 | ((c >> 6) & 0x3F));
bytearr[count++] = (byte)(0x80 | ((c >> 0) & 0x3F));
} else {
bytearr[count++] = (byte)(0xC0 | ((c >> 6) & 0x1F));
bytearr[count++] = (byte)(0x80 | ((c >> 0) & 0x3F));
}
}
dataOut.write(bytearr);
byte[] buffer = new byte[(int)utfCount];
int len = writeUTFBytesToBuffer(text, (int) utfCount, buffer, 0);
dataOut.write(buffer, 0, len);
assert utfCount==len;
} else {
dataOut.writeInt(-1);
}
}
public static String readUTF8(DataInput dataIn) throws IOException {
int utflen = dataIn.readInt(); // TODO diff: Sun code
if (utflen > -1) {
StringBuffer str = new StringBuffer(utflen);
byte bytearr[] = new byte[utflen];
int c;
int char2;
int char3;
int count = 0;
dataIn.readFully(bytearr, 0, utflen);
while (count < utflen) {
c = bytearr[count] & 0xff;
switch (c >> 4) {
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
/* 0xxxxxxx */
count++;
str.append((char)c);
break;
case 12:
case 13:
/* 110x xxxx 10xx xxxx */
count += 2;
if (count > utflen) {
throw new UTFDataFormatException();
}
char2 = bytearr[count - 1];
if ((char2 & 0xC0) != 0x80) {
throw new UTFDataFormatException();
}
str.append((char)(((c & 0x1F) << 6) | (char2 & 0x3F)));
break;
case 14:
/* 1110 xxxx 10xx xxxx 10xx xxxx */
count += 3;
if (count > utflen) {
throw new UTFDataFormatException();
}
char2 = bytearr[count - 2]; // TODO diff: Sun code
char3 = bytearr[count - 1]; // TODO diff: Sun code
if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80)) {
throw new UTFDataFormatException();
}
str.append((char)(((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0)));
break;
default:
/* 10xx xxxx, 1111 xxxx */
throw new UTFDataFormatException();
}
/**
* From: http://svn.apache.org/repos/asf/harmony/enhanced/java/trunk/classlib/modules/luni/src/main/java/java/io/DataOutputStream.java
*/
public static long countUTFBytes(String str) {
int utfCount = 0, length = str.length();
for (int i = 0; i < length; i++) {
int charValue = str.charAt(i);
if (charValue > 0 && charValue <= 127) {
utfCount++;
} else if (charValue <= 2047) {
utfCount += 2;
} else {
utfCount += 3;
}
// The number of chars produced may be less than utflen
return new String(str);
}
return utfCount;
}
/**
* From: http://svn.apache.org/repos/asf/harmony/enhanced/java/trunk/classlib/modules/luni/src/main/java/java/io/DataOutputStream.java
*/
public static int writeUTFBytesToBuffer(String str, long count,
byte[] buffer, int offset) throws IOException {
int length = str.length();
for (int i = 0; i < length; i++) {
int charValue = str.charAt(i);
if (charValue > 0 && charValue <= 127) {
buffer[offset++] = (byte) charValue;
} else if (charValue <= 2047) {
buffer[offset++] = (byte) (0xc0 | (0x1f & (charValue >> 6)));
buffer[offset++] = (byte) (0x80 | (0x3f & charValue));
} else {
buffer[offset++] = (byte) (0xe0 | (0x0f & (charValue >> 12)));
buffer[offset++] = (byte) (0x80 | (0x3f & (charValue >> 6)));
buffer[offset++] = (byte) (0x80 | (0x3f & charValue));
}
}
return offset;
}
public static String readUTF8(DataInput dataIn) throws IOException {
int utflen = dataIn.readInt();
if (utflen > -1) {
byte bytearr[] = new byte[utflen];
char chararr[] = new char[utflen];
dataIn.readFully(bytearr, 0, utflen);
return convertUTF8WithBuf(bytearr, chararr, 0, utflen);
} else {
return null;
}
}
/**
* From: http://svn.apache.org/repos/asf/harmony/enhanced/java/trunk/classlib/modules/luni/src/main/java/org/apache/harmony/luni/util/Util.java
*/
public static String convertUTF8WithBuf(byte[] buf, char[] out, int offset,
int utfSize) throws UTFDataFormatException {
int count = 0, s = 0, a;
while (count < utfSize) {
if ((out[s] = (char) buf[offset + count++]) < '\u0080')
s++;
else if (((a = out[s]) & 0xe0) == 0xc0) {
if (count >= utfSize)
throw new UTFDataFormatException();
int b = buf[offset + count++];
if ((b & 0xC0) != 0x80)
throw new UTFDataFormatException();
out[s++] = (char) (((a & 0x1F) << 6) | (b & 0x3F));
} else if ((a & 0xf0) == 0xe0) {
if (count + 1 >= utfSize)
throw new UTFDataFormatException();
int b = buf[offset + count++];
int c = buf[offset + count++];
if (((b & 0xC0) != 0x80) || ((c & 0xC0) != 0x80))
throw new UTFDataFormatException();
out[s++] = (char) (((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F));
} else {
throw new UTFDataFormatException();
}
}
return new String(out, 0, s);
}
public static String propertiesToString(Properties props) throws IOException {
String result = "";
if (props != null) {

View File

@ -22,6 +22,7 @@ import java.io.InputStream;
import java.io.UTFDataFormatException;
import org.apache.activemq.util.ByteSequence;
import org.apache.activemq.util.MarshallingSupport;
/**
* Optimized ByteArrayInputStream that can be used more than once
@ -286,36 +287,13 @@ public final class DataByteArrayInputStream extends InputStream implements DataI
@Override
public String readUTF() throws IOException {
int length = readUnsignedShort();
int endPos = pos + length;
int count = 0, a;
char[] characters = new char[length];
while (pos < endPos) {
if ((characters[count] = (char) buf[pos++]) < '\u0080')
count++;
else if (((a = characters[count]) & 0xE0) == 0xC0) {
if (pos >= endPos) {
throw new UTFDataFormatException("bad string");
}
int b = buf[pos++];
if ((b & 0xC0) != 0x80) {
throw new UTFDataFormatException("bad string");
}
characters[count++] = (char) (((a & 0x1F) << 6) | (b & 0x3F));
} else if ((a & 0xf0) == 0xe0) {
if (pos + 1 >= endPos) {
throw new UTFDataFormatException("bad string");
}
int b = buf[pos++];
int c = buf[pos++];
if (((b & 0xC0) != 0x80) || ((c & 0xC0) != 0x80)) {
throw new UTFDataFormatException("bad string");
}
characters[count++] = (char) (((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F));
} else {
throw new UTFDataFormatException("bad string");
}
if (pos + length > buf.length) {
throw new UTFDataFormatException("bad string");
}
return new String(characters, 0, count);
char chararr[] = new char[length];
String result = MarshallingSupport.convertUTF8WithBuf(buf, chararr, pos, length);
pos += length;
return result;
}
public int getPos() {

View File

@ -23,6 +23,7 @@ import java.io.UTFDataFormatException;
import org.apache.activemq.store.kahadb.disk.page.PageFile;
import org.apache.activemq.util.ByteSequence;
import org.apache.activemq.util.MarshallingSupport;
/**
* Optimized ByteArrayOutputStream
@ -228,38 +229,17 @@ public class DataByteArrayOutputStream extends OutputStream implements DataOutpu
}
@Override
public void writeUTF(String str) throws IOException {
int strlen = str.length();
int encodedsize = 0;
int c;
for (int i = 0; i < strlen; i++) {
c = str.charAt(i);
if ((c >= 0x0001) && (c <= 0x007F)) {
encodedsize++;
} else if (c > 0x07FF) {
encodedsize += 3;
} else {
encodedsize += 2;
}
}
public void writeUTF(String text) throws IOException {
long encodedsize = MarshallingSupport.countUTFBytes(text);
if (encodedsize > 65535) {
throw new UTFDataFormatException("encoded string too long: " + encodedsize + " bytes");
}
ensureEnoughBuffer(pos + encodedsize + 2);
writeShort(encodedsize);
for (int i = 0; i < strlen; i++) {
int charValue = str.charAt(i);
if (charValue > 0 && charValue <= 127) {
buf[pos++] = (byte) charValue;
} else if (charValue <= 2047) {
buf[pos++] = (byte) (0xc0 | (0x1f & (charValue >> 6)));
buf[pos++] = (byte) (0x80 | (0x3f & charValue));
} else {
buf[pos++] = (byte) (0xe0 | (0x0f & (charValue >> 12)));
buf[pos++] = (byte) (0x80 | (0x3f & (charValue >> 6)));
buf[pos++] = (byte) (0x80 | (0x3f & charValue));
}
}
ensureEnoughBuffer((int)(pos + encodedsize + 2));
writeShort((int)encodedsize);
byte[] buffer = new byte[(int)encodedsize];
MarshallingSupport.writeUTFBytesToBuffer(text, (int) encodedsize, buf, pos);
pos += encodedsize;
onWrite();
}