mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-09 14:34:43 +00:00
Improve serialization (stream) of UTF strings, note, requires flush when upgrading, closes #1595.
This commit is contained in:
parent
ef9c96faa6
commit
7966716673
@ -23,7 +23,6 @@ import org.elasticsearch.common.BytesHolder;
|
||||
|
||||
import java.io.EOFException;
|
||||
import java.io.IOException;
|
||||
import java.io.UTFDataFormatException;
|
||||
|
||||
/**
|
||||
*
|
||||
@ -135,82 +134,4 @@ public class BytesStreamInput extends StreamInput {
|
||||
public void close() throws IOException {
|
||||
// nothing to do here...
|
||||
}
|
||||
|
||||
public String readUTF() throws IOException {
|
||||
int utflen = readInt();
|
||||
if (utflen == 0) {
|
||||
return "";
|
||||
}
|
||||
if (chararr.length < utflen) {
|
||||
chararr = new char[utflen * 2];
|
||||
}
|
||||
char[] chararr = this.chararr;
|
||||
byte[] bytearr = buf;
|
||||
int endPos = pos + utflen;
|
||||
|
||||
int c, char2, char3;
|
||||
int count = pos;
|
||||
int chararr_count = 0;
|
||||
|
||||
while (count < endPos) {
|
||||
c = (int) bytearr[count] & 0xff;
|
||||
if (c > 127) break;
|
||||
count++;
|
||||
chararr[chararr_count++] = (char) c;
|
||||
}
|
||||
|
||||
while (count < endPos) {
|
||||
c = (int) bytearr[count] & 0xff;
|
||||
switch (c >> 4) {
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
case 5:
|
||||
case 6:
|
||||
case 7:
|
||||
/* 0xxxxxxx*/
|
||||
count++;
|
||||
chararr[chararr_count++] = (char) c;
|
||||
break;
|
||||
case 12:
|
||||
case 13:
|
||||
/* 110x xxxx 10xx xxxx*/
|
||||
count += 2;
|
||||
if (count > endPos)
|
||||
throw new UTFDataFormatException(
|
||||
"malformed input: partial character at end");
|
||||
char2 = (int) bytearr[count - 1];
|
||||
if ((char2 & 0xC0) != 0x80)
|
||||
throw new UTFDataFormatException(
|
||||
"malformed input around byte " + count);
|
||||
chararr[chararr_count++] = (char) (((c & 0x1F) << 6) |
|
||||
(char2 & 0x3F));
|
||||
break;
|
||||
case 14:
|
||||
/* 1110 xxxx 10xx xxxx 10xx xxxx */
|
||||
count += 3;
|
||||
if (count > endPos)
|
||||
throw new UTFDataFormatException(
|
||||
"malformed input: partial character at end");
|
||||
char2 = (int) bytearr[count - 2];
|
||||
char3 = (int) bytearr[count - 1];
|
||||
if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
|
||||
throw new UTFDataFormatException(
|
||||
"malformed input around byte " + (count - 1));
|
||||
chararr[chararr_count++] = (char) (((c & 0x0F) << 12) |
|
||||
((char2 & 0x3F) << 6) |
|
||||
((char3 & 0x3F) << 0));
|
||||
break;
|
||||
default:
|
||||
/* 10xx xxxx, 1111 xxxx */
|
||||
throw new UTFDataFormatException(
|
||||
"malformed input around byte " + count);
|
||||
}
|
||||
}
|
||||
pos += utflen;
|
||||
// The number of chars produced may be less than utflen
|
||||
return new String(chararr, 0, chararr_count);
|
||||
}
|
||||
}
|
||||
|
@ -118,62 +118,4 @@ public class BytesStreamOutput extends StreamOutput implements BytesStream {
|
||||
public int size() {
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Writes a string.
|
||||
*/
|
||||
// Override here since we can work on the byte array directly!
|
||||
public void writeUTF(String str) throws IOException {
|
||||
int strlen = str.length();
|
||||
int utflen = 0;
|
||||
int c = 0;
|
||||
|
||||
/* use charAt instead of copying String to char array */
|
||||
for (int i = 0; i < strlen; i++) {
|
||||
c = str.charAt(i);
|
||||
if ((c >= 0x0001) && (c <= 0x007F)) {
|
||||
utflen++;
|
||||
} else if (c > 0x07FF) {
|
||||
utflen += 3;
|
||||
} else {
|
||||
utflen += 2;
|
||||
}
|
||||
}
|
||||
|
||||
int newcount = count + utflen + 4;
|
||||
if (newcount > buf.length) {
|
||||
buf = Arrays.copyOf(buf, Math.max(buf.length << 1, newcount));
|
||||
}
|
||||
|
||||
byte[] bytearr = this.buf;
|
||||
|
||||
// same as writeInt
|
||||
bytearr[count++] = (byte) (utflen >> 24);
|
||||
bytearr[count++] = (byte) (utflen >> 16);
|
||||
bytearr[count++] = (byte) (utflen >> 8);
|
||||
bytearr[count++] = (byte) (utflen);
|
||||
|
||||
int i = 0;
|
||||
for (i = 0; i < strlen; i++) {
|
||||
c = str.charAt(i);
|
||||
if (!((c >= 0x0001) && (c <= 0x007F))) break;
|
||||
bytearr[count++] = (byte) c;
|
||||
}
|
||||
|
||||
for (; i < strlen; i++) {
|
||||
c = str.charAt(i);
|
||||
if ((c >= 0x0001) && (c <= 0x007F)) {
|
||||
bytearr[count++] = (byte) c;
|
||||
|
||||
} else if (c > 0x07FF) {
|
||||
bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
|
||||
bytearr[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
|
||||
bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
|
||||
} else {
|
||||
bytearr[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
|
||||
bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -28,6 +28,7 @@ import java.lang.ref.SoftReference;
|
||||
public class CachedStreamInput {
|
||||
|
||||
static class Entry {
|
||||
char[] chars = new char[80];
|
||||
final HandlesStreamInput handles;
|
||||
final LZFStreamInput lzf;
|
||||
|
||||
@ -73,4 +74,12 @@ public class CachedStreamInput {
|
||||
entry.handles.reset(entry.lzf);
|
||||
return entry.handles;
|
||||
}
|
||||
|
||||
public static char[] getCharArray(int size) {
|
||||
Entry entry = instance();
|
||||
if (entry.chars.length < size) {
|
||||
entry.chars = new char[size];
|
||||
}
|
||||
return entry.chars;
|
||||
}
|
||||
}
|
||||
|
@ -24,7 +24,6 @@ import org.elasticsearch.common.Nullable;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.UTFDataFormatException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
@ -32,12 +31,6 @@ import java.util.*;
|
||||
*/
|
||||
public abstract class StreamInput extends InputStream {
|
||||
|
||||
/**
|
||||
* working arrays initialized on demand by readUTF
|
||||
*/
|
||||
private byte bytearr[] = new byte[80];
|
||||
protected char chararr[] = new char[80];
|
||||
|
||||
/**
|
||||
* Reads and returns a single byte.
|
||||
*/
|
||||
@ -154,35 +147,12 @@ public abstract class StreamInput extends InputStream {
|
||||
return i | ((b & 0x7FL) << 56);
|
||||
}
|
||||
|
||||
// COPIED from DataInputStream
|
||||
|
||||
public String readUTF() throws IOException {
|
||||
int utflen = readInt();
|
||||
if (utflen == 0) {
|
||||
return "";
|
||||
}
|
||||
if (bytearr.length < utflen) {
|
||||
bytearr = new byte[utflen * 2];
|
||||
chararr = new char[utflen * 2];
|
||||
}
|
||||
char[] chararr = this.chararr;
|
||||
byte[] bytearr = this.bytearr;
|
||||
|
||||
int c, char2, char3;
|
||||
int count = 0;
|
||||
int chararr_count = 0;
|
||||
|
||||
readBytes(bytearr, 0, utflen);
|
||||
|
||||
while (count < utflen) {
|
||||
c = (int) bytearr[count] & 0xff;
|
||||
if (c > 127) break;
|
||||
count++;
|
||||
chararr[chararr_count++] = (char) c;
|
||||
}
|
||||
|
||||
while (count < utflen) {
|
||||
c = (int) bytearr[count] & 0xff;
|
||||
int charCount = readVInt();
|
||||
char[] chars = CachedStreamInput.getCharArray(charCount);
|
||||
int c, charIndex = 0;
|
||||
while (charIndex < charCount) {
|
||||
c = readByte() & 0xff;
|
||||
switch (c >> 4) {
|
||||
case 0:
|
||||
case 1:
|
||||
@ -192,47 +162,18 @@ public abstract class StreamInput extends InputStream {
|
||||
case 5:
|
||||
case 6:
|
||||
case 7:
|
||||
/* 0xxxxxxx*/
|
||||
count++;
|
||||
chararr[chararr_count++] = (char) c;
|
||||
chars[charIndex++] = (char) c;
|
||||
break;
|
||||
case 12:
|
||||
case 13:
|
||||
/* 110x xxxx 10xx xxxx*/
|
||||
count += 2;
|
||||
if (count > utflen)
|
||||
throw new UTFDataFormatException(
|
||||
"malformed input: partial character at end");
|
||||
char2 = (int) bytearr[count - 1];
|
||||
if ((char2 & 0xC0) != 0x80)
|
||||
throw new UTFDataFormatException(
|
||||
"malformed input around byte " + count);
|
||||
chararr[chararr_count++] = (char) (((c & 0x1F) << 6) |
|
||||
(char2 & 0x3F));
|
||||
chars[charIndex++] = (char) ((c & 0x1F) << 6 | readByte() & 0x3F);
|
||||
break;
|
||||
case 14:
|
||||
/* 1110 xxxx 10xx xxxx 10xx xxxx */
|
||||
count += 3;
|
||||
if (count > utflen)
|
||||
throw new UTFDataFormatException(
|
||||
"malformed input: partial character at end");
|
||||
char2 = (int) bytearr[count - 2];
|
||||
char3 = (int) bytearr[count - 1];
|
||||
if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
|
||||
throw new UTFDataFormatException(
|
||||
"malformed input around byte " + (count - 1));
|
||||
chararr[chararr_count++] = (char) (((c & 0x0F) << 12) |
|
||||
((char2 & 0x3F) << 6) |
|
||||
((char3 & 0x3F) << 0));
|
||||
chars[charIndex++] = (char) ((c & 0x0F) << 12 | (readByte() & 0x3F) << 6 | (readByte() & 0x3F) << 0);
|
||||
break;
|
||||
default:
|
||||
/* 10xx xxxx, 1111 xxxx */
|
||||
throw new UTFDataFormatException(
|
||||
"malformed input around byte " + count);
|
||||
}
|
||||
}
|
||||
// The number of chars produced may be less than utflen
|
||||
return new String(chararr, 0, chararr_count);
|
||||
return new String(chars, 0, charCount);
|
||||
}
|
||||
|
||||
|
||||
|
@ -33,11 +33,6 @@ import java.util.Map;
|
||||
*/
|
||||
public abstract class StreamOutput extends OutputStream {
|
||||
|
||||
/**
|
||||
* bytearr is initialized on demand by writeUTF
|
||||
*/
|
||||
private byte[] bytearr = null;
|
||||
|
||||
/**
|
||||
* Writes a single byte.
|
||||
*/
|
||||
@ -138,55 +133,22 @@ public abstract class StreamOutput extends OutputStream {
|
||||
* Writes a string.
|
||||
*/
|
||||
public void writeUTF(String str) throws IOException {
|
||||
int strlen = str.length();
|
||||
int utflen = 0;
|
||||
int c, count = 0;
|
||||
|
||||
/* use charAt instead of copying String to char array */
|
||||
for (int i = 0; i < strlen; i++) {
|
||||
int charCount = str.length();
|
||||
writeVInt(charCount);
|
||||
int c;
|
||||
for (int i = 0; i < charCount; i++) {
|
||||
c = str.charAt(i);
|
||||
if ((c >= 0x0001) && (c <= 0x007F)) {
|
||||
utflen++;
|
||||
if (c <= 0x007F) {
|
||||
writeByte((byte) c);
|
||||
} else if (c > 0x07FF) {
|
||||
utflen += 3;
|
||||
writeByte((byte) (0xE0 | c >> 12 & 0x0F));
|
||||
writeByte((byte) (0x80 | c >> 6 & 0x3F));
|
||||
writeByte((byte) (0x80 | c >> 0 & 0x3F));
|
||||
} else {
|
||||
utflen += 2;
|
||||
writeByte((byte) (0xC0 | c >> 6 & 0x1F));
|
||||
writeByte((byte) (0x80 | c >> 0 & 0x3F));
|
||||
}
|
||||
}
|
||||
|
||||
if (this.bytearr == null || (this.bytearr.length < (utflen + 4)))
|
||||
this.bytearr = new byte[(utflen * 2) + 4];
|
||||
byte[] bytearr = this.bytearr;
|
||||
|
||||
// same as writeInt
|
||||
bytearr[count++] = (byte) (utflen >> 24);
|
||||
bytearr[count++] = (byte) (utflen >> 16);
|
||||
bytearr[count++] = (byte) (utflen >> 8);
|
||||
bytearr[count++] = (byte) (utflen);
|
||||
|
||||
int i = 0;
|
||||
for (i = 0; i < strlen; i++) {
|
||||
c = str.charAt(i);
|
||||
if (!((c >= 0x0001) && (c <= 0x007F))) break;
|
||||
bytearr[count++] = (byte) c;
|
||||
}
|
||||
|
||||
for (; i < strlen; i++) {
|
||||
c = str.charAt(i);
|
||||
if ((c >= 0x0001) && (c <= 0x007F)) {
|
||||
bytearr[count++] = (byte) c;
|
||||
|
||||
} else if (c > 0x07FF) {
|
||||
bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
|
||||
bytearr[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
|
||||
bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
|
||||
} else {
|
||||
bytearr[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
|
||||
bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
|
||||
}
|
||||
}
|
||||
writeBytes(bytearr, 0, utflen + 4);
|
||||
// return utflen + 2;
|
||||
}
|
||||
|
||||
public void writeFloat(float v) throws IOException {
|
||||
|
Loading…
x
Reference in New Issue
Block a user