SQL: Support larger strings in binary protocol (elastic/x-pack-elasticsearch#3130)
While we're fairly sure we're going to remove the binary protocol in the long run, we're also fairly sure we're going to release the first version of SQL with the binary protocol. One big problem with it is that it blows up when it attempts to serialize fairly long strings. These long strings are actually quite common in the CLI. They are also possible in JDBC. I say "fairly long strings" because exactly how long the strings has to be is kind of funky. It is based on the number of bytes that it takes to encode the string, and the strings are encoded in a utf-8-like encoding of utf-16 encoded string documented here: https://docs.oracle.com/javase/7/docs/api/java/io/DataOutput.html#writeUTF(java.lang.String) Anyway, this fixes the protocol for these "fairly long strings" by chunking the strings and adding an extra 4 byte integer before each string to count the number of chunks. After that 4 byte integer the strings are serialized using the "normal" DataInput/DataOutput encoding, the funny utf-8-like encoding of the utf-16 encoded string. relates elastic/x-pack-elasticsearch#3018 Original commit: elastic/x-pack-elasticsearch@11f0d59f20
This commit is contained in:
parent
df802b40c8
commit
193f22b97f
|
@ -34,6 +34,23 @@ import java.io.IOException;
|
|||
return version;
|
||||
}
|
||||
|
||||
/**
|
||||
* Override the built-in {@link DataInput#readUTF()}
|
||||
* to support strings that need more than 65535 charcters.
|
||||
*/
|
||||
@Override
|
||||
public String readUTF() throws IOException {
|
||||
int splits = delegate.readInt();
|
||||
if (splits == 0) {
|
||||
return delegate.readUTF();
|
||||
}
|
||||
StringBuilder b = new StringBuilder(SqlDataOutput.WORST_CASE_SPLIT * splits);
|
||||
for (int i = 0; i < splits; i++) {
|
||||
b.append(delegate.readUTF());
|
||||
}
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFully(byte[] b) throws IOException {
|
||||
delegate.readFully(b);
|
||||
|
@ -103,9 +120,4 @@ import java.io.IOException;
|
|||
public String readLine() throws IOException {
|
||||
return delegate.readLine();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String readUTF() throws IOException {
|
||||
return delegate.readUTF();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,6 +35,34 @@ public final class SqlDataOutput implements DataOutput {
|
|||
return version;
|
||||
}
|
||||
|
||||
/**
|
||||
* The maximum size of a string to submit to {@link #delegate}'s
|
||||
* {@link DataOutput#writeUTF(String)}. The {@code 65535} is the
|
||||
* number of bytes that the string can be encoded to. The {@code 3}
|
||||
* is the "worst case" for the number of bytes used to encode each
|
||||
* char.
|
||||
*/
|
||||
static final int WORST_CASE_SPLIT = 65535 / 3;
|
||||
/**
|
||||
* Override the built-in {@link DataOutput#writeUTF(String)}
|
||||
* to support strings that need more than 65535 charcters.
|
||||
*/
|
||||
@Override
|
||||
public void writeUTF(String s) throws IOException {
|
||||
int splits = s.length() / WORST_CASE_SPLIT + 1;
|
||||
delegate.writeInt(splits);
|
||||
|
||||
int start = 0;
|
||||
while (true) {
|
||||
int end = Math.min(s.length(), start + WORST_CASE_SPLIT);
|
||||
delegate.writeUTF(s.substring(start, end));
|
||||
if (end == s.length()) {
|
||||
break;
|
||||
}
|
||||
start = end;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(int b) throws IOException {
|
||||
delegate.write(b);
|
||||
|
@ -99,9 +127,4 @@ public final class SqlDataOutput implements DataOutput {
|
|||
public void writeChars(String s) throws IOException {
|
||||
delegate.writeChars(s);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeUTF(String s) throws IOException {
|
||||
delegate.writeUTF(s);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.sql.protocol.shared;
|
||||
|
||||
import org.apache.http.client.entity.DeflateInputStream;
|
||||
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
public class SqlDataInputOutputTests extends ESTestCase {
|
||||
public void testSmallString() throws IOException {
|
||||
assertRoundTripString("t");
|
||||
assertRoundTripString("test");
|
||||
assertRoundTripString(randomAlphaOfLengthBetween(500, 1000));
|
||||
}
|
||||
|
||||
public void testLargeAscii() throws IOException {
|
||||
assertRoundTripString(randomAlphaOfLengthBetween(65535, 655350));
|
||||
}
|
||||
|
||||
public void testUnicode() throws IOException {
|
||||
assertRoundTripString(randomRealisticUnicodeOfLengthBetween(65535 / 3, 65535));
|
||||
assertRoundTripString(randomRealisticUnicodeOfLengthBetween(65535, 655350));
|
||||
}
|
||||
|
||||
/**
|
||||
* Round trip a string using {@link SqlDataOutput#writeUTF(String)}
|
||||
* and {@link SqlDataInput#readUTF()}.
|
||||
*/
|
||||
private void assertRoundTripString(String string) throws IOException {
|
||||
try (BytesStreamOutput out = new BytesStreamOutput()) {
|
||||
SqlDataOutput sout = new SqlDataOutput(new DataOutputStream(out), 0);
|
||||
sout.writeUTF(string);
|
||||
try (StreamInput in = out.bytes().streamInput()) {
|
||||
SqlDataInput sin = new SqlDataInput(new DataInputStream(in), 0);
|
||||
assertEquals(string, sin.readUTF());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue