Better detection of CBOR
CBOR has a special header that is optional, if exists, allows for exact detection. Also, since we know which formats we support in ES, we can support the object major type case. closes #7640
This commit is contained in:
parent
94f82368f0
commit
31b63b1a84
8
pom.xml
8
pom.xml
|
@ -234,28 +234,28 @@
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
<artifactId>jackson-core</artifactId>
|
<artifactId>jackson-core</artifactId>
|
||||||
<version>2.4.2</version>
|
<version>2.4.4</version>
|
||||||
<scope>compile</scope>
|
<scope>compile</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.dataformat</groupId>
|
<groupId>com.fasterxml.jackson.dataformat</groupId>
|
||||||
<artifactId>jackson-dataformat-smile</artifactId>
|
<artifactId>jackson-dataformat-smile</artifactId>
|
||||||
<version>2.4.2</version>
|
<version>2.4.4</version>
|
||||||
<scope>compile</scope>
|
<scope>compile</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.dataformat</groupId>
|
<groupId>com.fasterxml.jackson.dataformat</groupId>
|
||||||
<artifactId>jackson-dataformat-yaml</artifactId>
|
<artifactId>jackson-dataformat-yaml</artifactId>
|
||||||
<version>2.4.2</version>
|
<version>2.4.4</version>
|
||||||
<scope>compile</scope>
|
<scope>compile</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.dataformat</groupId>
|
<groupId>com.fasterxml.jackson.dataformat</groupId>
|
||||||
<artifactId>jackson-dataformat-cbor</artifactId>
|
<artifactId>jackson-dataformat-cbor</artifactId>
|
||||||
<version>2.4.2</version>
|
<version>2.4.4</version>
|
||||||
<scope>compile</scope>
|
<scope>compile</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
|
|
@ -208,11 +208,11 @@ public class XContentFactory {
|
||||||
* Guesses the content type based on the provided input stream.
|
* Guesses the content type based on the provided input stream.
|
||||||
*/
|
*/
|
||||||
public static XContentType xContentType(InputStream si) throws IOException {
|
public static XContentType xContentType(InputStream si) throws IOException {
|
||||||
int first = si.read();
|
byte first = (byte) si.read();
|
||||||
if (first == -1) {
|
if (first == -1) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
int second = si.read();
|
byte second = (byte) si.read();
|
||||||
if (second == -1) {
|
if (second == -1) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -231,9 +231,26 @@ public class XContentFactory {
|
||||||
return XContentType.YAML;
|
return XContentType.YAML;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (first == (CBORConstants.BYTE_OBJECT_INDEFINITE & 0xff)){
|
// CBOR logic similar to CBORFactory#hasCBORFormat
|
||||||
|
if (first == CBORConstants.BYTE_OBJECT_INDEFINITE){
|
||||||
return XContentType.CBOR;
|
return XContentType.CBOR;
|
||||||
}
|
}
|
||||||
|
if (CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_TAG, first)) {
|
||||||
|
// Actually, specific "self-describe tag" is a very good indicator
|
||||||
|
int third = si.read();
|
||||||
|
if (third == -1) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (first == (byte) 0xD9 && second == (byte) 0xD9 && third == (byte) 0xF7) {
|
||||||
|
return XContentType.CBOR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// for small objects, some encoders just encode as major type object, we can safely
|
||||||
|
// say its CBOR since it doesn't contradict SMILE or JSON, and its a last resort
|
||||||
|
if (CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, first)) {
|
||||||
|
return XContentType.CBOR;
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 2; i < GUESS_HEADER_LENGTH; i++) {
|
for (int i = 2; i < GUESS_HEADER_LENGTH; i++) {
|
||||||
int val = si.read();
|
int val = si.read();
|
||||||
if (val == -1) {
|
if (val == -1) {
|
||||||
|
@ -279,9 +296,23 @@ public class XContentFactory {
|
||||||
if (length > 2 && first == '-' && bytes.get(1) == '-' && bytes.get(2) == '-') {
|
if (length > 2 && first == '-' && bytes.get(1) == '-' && bytes.get(2) == '-') {
|
||||||
return XContentType.YAML;
|
return XContentType.YAML;
|
||||||
}
|
}
|
||||||
if (first == CBORConstants.BYTE_OBJECT_INDEFINITE){
|
// CBOR logic similar to CBORFactory#hasCBORFormat
|
||||||
|
if (first == CBORConstants.BYTE_OBJECT_INDEFINITE && length > 1){
|
||||||
return XContentType.CBOR;
|
return XContentType.CBOR;
|
||||||
}
|
}
|
||||||
|
if (CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_TAG, first) && length > 2) {
|
||||||
|
// Actually, specific "self-describe tag" is a very good indicator
|
||||||
|
if (first == (byte) 0xD9 && bytes.get(1) == (byte) 0xD9 && bytes.get(2) == (byte) 0xF7) {
|
||||||
|
return XContentType.CBOR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// for small objects, some encoders just encode as major type object, we can safely
|
||||||
|
// say its CBOR since it doesn't contradict SMILE or JSON, and its a last resort
|
||||||
|
if (CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, first)) {
|
||||||
|
return XContentType.CBOR;
|
||||||
|
}
|
||||||
|
|
||||||
|
// a last chance for JSON
|
||||||
for (int i = 0; i < length; i++) {
|
for (int i = 0; i < length; i++) {
|
||||||
if (bytes.get(i) == '{') {
|
if (bytes.get(i) == '{') {
|
||||||
return XContentType.JSON;
|
return XContentType.JSON;
|
||||||
|
|
|
@ -19,11 +19,14 @@
|
||||||
|
|
||||||
package org.elasticsearch.common.xcontent;
|
package org.elasticsearch.common.xcontent;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.dataformat.cbor.CBORConstants;
|
||||||
|
import com.fasterxml.jackson.dataformat.smile.SmileConstants;
|
||||||
import org.elasticsearch.common.bytes.BytesArray;
|
import org.elasticsearch.common.bytes.BytesArray;
|
||||||
import org.elasticsearch.common.io.stream.BytesStreamInput;
|
import org.elasticsearch.common.io.stream.BytesStreamInput;
|
||||||
import org.elasticsearch.test.ElasticsearchTestCase;
|
import org.elasticsearch.test.ElasticsearchTestCase;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
@ -69,4 +72,35 @@ public class XContentFactoryTests extends ElasticsearchTestCase {
|
||||||
assertThat(XContentFactory.xContentType(builder.string()), equalTo(type));
|
assertThat(XContentFactory.xContentType(builder.string()), equalTo(type));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testCBORBasedOnMajorObjectDetection() {
|
||||||
|
// for this {"f "=> 5} perl encoder for example generates:
|
||||||
|
byte[] bytes = new byte[] {(byte) 0xA1, (byte) 0x43, (byte) 0x66, (byte) 6f, (byte) 6f, (byte) 0x5};
|
||||||
|
assertThat(XContentFactory.xContentType(bytes), equalTo(XContentType.CBOR));
|
||||||
|
//assertThat(((Number) XContentHelper.convertToMap(bytes, true).v2().get("foo")).intValue(), equalTo(5));
|
||||||
|
|
||||||
|
// this if for {"foo" : 5} in python CBOR
|
||||||
|
bytes = new byte[] {(byte) 0xA1, (byte) 0x63, (byte) 0x66, (byte) 0x6f, (byte) 0x6f, (byte) 0x5};
|
||||||
|
assertThat(XContentFactory.xContentType(bytes), equalTo(XContentType.CBOR));
|
||||||
|
assertThat(((Number) XContentHelper.convertToMap(bytes, true).v2().get("foo")).intValue(), equalTo(5));
|
||||||
|
|
||||||
|
// also make sure major type check doesn't collide with SMILE and JSON, just in case
|
||||||
|
assertThat(CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, SmileConstants.HEADER_BYTE_1), equalTo(false));
|
||||||
|
assertThat(CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, (byte) '{'), equalTo(false));
|
||||||
|
assertThat(CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, (byte) ' '), equalTo(false));
|
||||||
|
assertThat(CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, (byte) '-'), equalTo(false));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCBORBasedOnMagicHeaderDetection() {
|
||||||
|
byte[] bytes = new byte[] {(byte) 0xd9, (byte) 0xd9, (byte) 0xf7};
|
||||||
|
assertThat(XContentFactory.xContentType(bytes), equalTo(XContentType.CBOR));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEmptyStream() throws Exception {
|
||||||
|
ByteArrayInputStream is = new ByteArrayInputStream(new byte[0]);
|
||||||
|
assertNull(XContentFactory.xContentType(is));
|
||||||
|
|
||||||
|
is = new ByteArrayInputStream(new byte[] {(byte) 1});
|
||||||
|
assertNull(XContentFactory.xContentType(is));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue