Add new implementations of the writeUTF8 and readUTF8 methods that are based on Apache Harmony code. This also avoid some code duplication that was occurring.

(cherry picked from commit 45a1217228bc6fa9ccda49853156a5f71fe2c956)
This commit is contained in:
Hiram Chirino 2017-04-06 11:55:08 -04:00 committed by Dejan Bosanac
parent 6fe437a3d6
commit 32a4d74496
5 changed files with 113 additions and 249 deletions

View File

@ -259,62 +259,12 @@ public final class DataByteArrayInputStream extends InputStream implements DataI
public String readUTF() throws IOException {
int length = readUnsignedShort();
char[] characters = new char[length];
int c;
int c2;
int c3;
int count = 0;
int total = pos + length;
while (pos < total) {
c = (int)buf[pos] & 0xff;
if (c > 127) {
break;
}
pos++;
characters[count++] = (char)c;
if (pos + length > buf.length) {
throw new UTFDataFormatException("bad string");
}
while (pos < total) {
c = (int)buf[pos] & 0xff;
switch (c >> 4) {
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
pos++;
characters[count++] = (char)c;
break;
case 12:
case 13:
pos += 2;
if (pos > total) {
throw new UTFDataFormatException("bad string");
}
c2 = (int)buf[pos - 1];
if ((c2 & 0xC0) != 0x80) {
throw new UTFDataFormatException("bad string");
}
characters[count++] = (char)(((c & 0x1F) << 6) | (c2 & 0x3F));
break;
case 14:
pos += 3;
if (pos > total) {
throw new UTFDataFormatException("bad string");
}
c2 = (int)buf[pos - 2];
c3 = (int)buf[pos - 1];
if (((c2 & 0xC0) != 0x80) || ((c3 & 0xC0) != 0x80)) {
throw new UTFDataFormatException("bad string");
}
characters[count++] = (char)(((c & 0x0F) << 12) | ((c2 & 0x3F) << 6) | ((c3 & 0x3F) << 0));
break;
default:
throw new UTFDataFormatException("bad string");
}
}
return new String(characters, 0, count);
char chararr[] = new char[length];
String result = MarshallingSupport.convertUTF8WithBuf(buf, chararr, pos, length);
pos += length;
return result;
}
}

View File

@ -202,46 +202,17 @@ public final class DataByteArrayOutputStream extends OutputStream implements Dat
}
}
public void writeUTF(String str) throws IOException {
int strlen = str.length();
int encodedsize = 0;
int c;
for (int i = 0; i < strlen; i++) {
c = str.charAt(i);
if ((c >= 0x0001) && (c <= 0x007F)) {
encodedsize++;
} else if (c > 0x07FF) {
encodedsize += 3;
} else {
encodedsize += 2;
}
}
public void writeUTF(String text) throws IOException {
long encodedsize = MarshallingSupport.countUTFBytes(text);
if (encodedsize > 65535) {
throw new UTFDataFormatException("encoded string too long: " + encodedsize + " bytes");
}
ensureEnoughBuffer(pos + encodedsize + 2);
writeShort(encodedsize);
int i = 0;
for (i = 0; i < strlen; i++) {
c = str.charAt(i);
if (!((c >= 0x0001) && (c <= 0x007F))) {
break;
}
buf[pos++] = (byte)c;
}
for (; i < strlen; i++) {
c = str.charAt(i);
if ((c >= 0x0001) && (c <= 0x007F)) {
buf[pos++] = (byte)c;
} else if (c > 0x07FF) {
buf[pos++] = (byte)(0xE0 | ((c >> 12) & 0x0F));
buf[pos++] = (byte)(0x80 | ((c >> 6) & 0x3F));
buf[pos++] = (byte)(0x80 | ((c >> 0) & 0x3F));
} else {
buf[pos++] = (byte)(0xC0 | ((c >> 6) & 0x1F));
buf[pos++] = (byte)(0x80 | ((c >> 0) & 0x3F));
}
}
ensureEnoughBuffer((int)(pos + encodedsize + 2));
writeShort((int)encodedsize);
byte[] buffer = new byte[(int)encodedsize];
MarshallingSupport.writeUTFBytesToBuffer(text, (int) encodedsize, buf, pos);
pos += encodedsize;
}
private void ensureEnoughBuffer(int newcount) {

View File

@ -296,117 +296,102 @@ public final class MarshallingSupport {
public static void writeUTF8(DataOutput dataOut, String text) throws IOException {
if (text != null) {
int strlen = text.length();
int utflen = 0;
char[] charr = new char[strlen];
int c = 0;
int count = 0;
long utfCount = countUTFBytes(text);
dataOut.writeInt((int)utfCount);
text.getChars(0, strlen, charr, 0);
for (int i = 0; i < strlen; i++) {
c = charr[i];
if ((c >= 0x0001) && (c <= 0x007F)) {
utflen++;
} else if (c > 0x07FF) {
utflen += 3;
} else {
utflen += 2;
}
}
// TODO diff: Sun code - removed
byte[] bytearr = new byte[utflen + 4]; // TODO diff: Sun code
bytearr[count++] = (byte)((utflen >>> 24) & 0xFF); // TODO diff:
// Sun code
bytearr[count++] = (byte)((utflen >>> 16) & 0xFF); // TODO diff:
// Sun code
bytearr[count++] = (byte)((utflen >>> 8) & 0xFF);
bytearr[count++] = (byte)((utflen >>> 0) & 0xFF);
for (int i = 0; i < strlen; i++) {
c = charr[i];
if ((c >= 0x0001) && (c <= 0x007F)) {
bytearr[count++] = (byte)c;
} else if (c > 0x07FF) {
bytearr[count++] = (byte)(0xE0 | ((c >> 12) & 0x0F));
bytearr[count++] = (byte)(0x80 | ((c >> 6) & 0x3F));
bytearr[count++] = (byte)(0x80 | ((c >> 0) & 0x3F));
} else {
bytearr[count++] = (byte)(0xC0 | ((c >> 6) & 0x1F));
bytearr[count++] = (byte)(0x80 | ((c >> 0) & 0x3F));
}
}
dataOut.write(bytearr);
byte[] buffer = new byte[(int)utfCount];
int len = writeUTFBytesToBuffer(text, (int) utfCount, buffer, 0);
dataOut.write(buffer, 0, len);
assert utfCount==len;
} else {
dataOut.writeInt(-1);
}
}
public static String readUTF8(DataInput dataIn) throws IOException {
int utflen = dataIn.readInt(); // TODO diff: Sun code
if (utflen > -1) {
StringBuffer str = new StringBuffer(utflen);
byte bytearr[] = new byte[utflen];
int c;
int char2;
int char3;
int count = 0;
dataIn.readFully(bytearr, 0, utflen);
while (count < utflen) {
c = bytearr[count] & 0xff;
switch (c >> 4) {
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
/* 0xxxxxxx */
count++;
str.append((char)c);
break;
case 12:
case 13:
/* 110x xxxx 10xx xxxx */
count += 2;
if (count > utflen) {
throw new UTFDataFormatException();
}
char2 = bytearr[count - 1];
if ((char2 & 0xC0) != 0x80) {
throw new UTFDataFormatException();
}
str.append((char)(((c & 0x1F) << 6) | (char2 & 0x3F)));
break;
case 14:
/* 1110 xxxx 10xx xxxx 10xx xxxx */
count += 3;
if (count > utflen) {
throw new UTFDataFormatException();
}
char2 = bytearr[count - 2]; // TODO diff: Sun code
char3 = bytearr[count - 1]; // TODO diff: Sun code
if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80)) {
throw new UTFDataFormatException();
}
str.append((char)(((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0)));
break;
default:
/* 10xx xxxx, 1111 xxxx */
throw new UTFDataFormatException();
}
/**
* From: http://svn.apache.org/repos/asf/harmony/enhanced/java/trunk/classlib/modules/luni/src/main/java/java/io/DataOutputStream.java
*/
public static long countUTFBytes(String str) {
int utfCount = 0, length = str.length();
for (int i = 0; i < length; i++) {
int charValue = str.charAt(i);
if (charValue > 0 && charValue <= 127) {
utfCount++;
} else if (charValue <= 2047) {
utfCount += 2;
} else {
utfCount += 3;
}
// The number of chars produced may be less than utflen
return new String(str);
}
return utfCount;
}
/**
* From: http://svn.apache.org/repos/asf/harmony/enhanced/java/trunk/classlib/modules/luni/src/main/java/java/io/DataOutputStream.java
*/
public static int writeUTFBytesToBuffer(String str, long count,
byte[] buffer, int offset) throws IOException {
int length = str.length();
for (int i = 0; i < length; i++) {
int charValue = str.charAt(i);
if (charValue > 0 && charValue <= 127) {
buffer[offset++] = (byte) charValue;
} else if (charValue <= 2047) {
buffer[offset++] = (byte) (0xc0 | (0x1f & (charValue >> 6)));
buffer[offset++] = (byte) (0x80 | (0x3f & charValue));
} else {
buffer[offset++] = (byte) (0xe0 | (0x0f & (charValue >> 12)));
buffer[offset++] = (byte) (0x80 | (0x3f & (charValue >> 6)));
buffer[offset++] = (byte) (0x80 | (0x3f & charValue));
}
}
return offset;
}
public static String readUTF8(DataInput dataIn) throws IOException {
int utflen = dataIn.readInt();
if (utflen > -1) {
byte bytearr[] = new byte[utflen];
char chararr[] = new char[utflen];
dataIn.readFully(bytearr, 0, utflen);
return convertUTF8WithBuf(bytearr, chararr, 0, utflen);
} else {
return null;
}
}
/**
* From: http://svn.apache.org/repos/asf/harmony/enhanced/java/trunk/classlib/modules/luni/src/main/java/org/apache/harmony/luni/util/Util.java
*/
public static String convertUTF8WithBuf(byte[] buf, char[] out, int offset,
int utfSize) throws UTFDataFormatException {
int count = 0, s = 0, a;
while (count < utfSize) {
if ((out[s] = (char) buf[offset + count++]) < '\u0080')
s++;
else if (((a = out[s]) & 0xe0) == 0xc0) {
if (count >= utfSize)
throw new UTFDataFormatException();
int b = buf[offset + count++];
if ((b & 0xC0) != 0x80)
throw new UTFDataFormatException();
out[s++] = (char) (((a & 0x1F) << 6) | (b & 0x3F));
} else if ((a & 0xf0) == 0xe0) {
if (count + 1 >= utfSize)
throw new UTFDataFormatException();
int b = buf[offset + count++];
int c = buf[offset + count++];
if (((b & 0xC0) != 0x80) || ((c & 0xC0) != 0x80))
throw new UTFDataFormatException();
out[s++] = (char) (((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F));
} else {
throw new UTFDataFormatException();
}
}
return new String(out, 0, s);
}
public static String propertiesToString(Properties props) throws IOException {
String result = "";
if (props != null) {

View File

@ -22,6 +22,7 @@ import java.io.InputStream;
import java.io.UTFDataFormatException;
import org.apache.activemq.util.ByteSequence;
import org.apache.activemq.util.MarshallingSupport;
/**
* Optimized ByteArrayInputStream that can be used more than once
@ -286,36 +287,13 @@ public final class DataByteArrayInputStream extends InputStream implements DataI
@Override
public String readUTF() throws IOException {
int length = readUnsignedShort();
int endPos = pos + length;
int count = 0, a;
char[] characters = new char[length];
while (pos < endPos) {
if ((characters[count] = (char) buf[pos++]) < '\u0080')
count++;
else if (((a = characters[count]) & 0xE0) == 0xC0) {
if (pos >= endPos) {
throw new UTFDataFormatException("bad string");
}
int b = buf[pos++];
if ((b & 0xC0) != 0x80) {
throw new UTFDataFormatException("bad string");
}
characters[count++] = (char) (((a & 0x1F) << 6) | (b & 0x3F));
} else if ((a & 0xf0) == 0xe0) {
if (pos + 1 >= endPos) {
throw new UTFDataFormatException("bad string");
}
int b = buf[pos++];
int c = buf[pos++];
if (((b & 0xC0) != 0x80) || ((c & 0xC0) != 0x80)) {
throw new UTFDataFormatException("bad string");
}
characters[count++] = (char) (((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F));
} else {
throw new UTFDataFormatException("bad string");
}
if (pos + length > buf.length) {
throw new UTFDataFormatException("bad string");
}
return new String(characters, 0, count);
char chararr[] = new char[length];
String result = MarshallingSupport.convertUTF8WithBuf(buf, chararr, pos, length);
pos += length;
return result;
}
public int getPos() {

View File

@ -23,6 +23,7 @@ import java.io.UTFDataFormatException;
import org.apache.activemq.store.kahadb.disk.page.PageFile;
import org.apache.activemq.util.ByteSequence;
import org.apache.activemq.util.MarshallingSupport;
/**
* Optimized ByteArrayOutputStream
@ -228,38 +229,17 @@ public class DataByteArrayOutputStream extends OutputStream implements DataOutpu
}
@Override
public void writeUTF(String str) throws IOException {
int strlen = str.length();
int encodedsize = 0;
int c;
for (int i = 0; i < strlen; i++) {
c = str.charAt(i);
if ((c >= 0x0001) && (c <= 0x007F)) {
encodedsize++;
} else if (c > 0x07FF) {
encodedsize += 3;
} else {
encodedsize += 2;
}
}
public void writeUTF(String text) throws IOException {
long encodedsize = MarshallingSupport.countUTFBytes(text);
if (encodedsize > 65535) {
throw new UTFDataFormatException("encoded string too long: " + encodedsize + " bytes");
}
ensureEnoughBuffer(pos + encodedsize + 2);
writeShort(encodedsize);
for (int i = 0; i < strlen; i++) {
int charValue = str.charAt(i);
if (charValue > 0 && charValue <= 127) {
buf[pos++] = (byte) charValue;
} else if (charValue <= 2047) {
buf[pos++] = (byte) (0xc0 | (0x1f & (charValue >> 6)));
buf[pos++] = (byte) (0x80 | (0x3f & charValue));
} else {
buf[pos++] = (byte) (0xe0 | (0x0f & (charValue >> 12)));
buf[pos++] = (byte) (0x80 | (0x3f & (charValue >> 6)));
buf[pos++] = (byte) (0x80 | (0x3f & charValue));
}
}
ensureEnoughBuffer((int)(pos + encodedsize + 2));
writeShort((int)encodedsize);
byte[] buffer = new byte[(int)encodedsize];
MarshallingSupport.writeUTFBytesToBuffer(text, (int) encodedsize, buf, pos);
pos += encodedsize;
onWrite();
}