diff --git a/pom.xml b/pom.xml index 30fd9468946..9ed258b5ffb 100644 --- a/pom.xml +++ b/pom.xml @@ -234,28 +234,28 @@ com.fasterxml.jackson.core jackson-core - 2.4.2 + 2.4.4 compile com.fasterxml.jackson.dataformat jackson-dataformat-smile - 2.4.2 + 2.4.4 compile com.fasterxml.jackson.dataformat jackson-dataformat-yaml - 2.4.2 + 2.4.4 compile com.fasterxml.jackson.dataformat jackson-dataformat-cbor - 2.4.2 + 2.4.4 compile diff --git a/src/main/java/org/elasticsearch/common/xcontent/XContentFactory.java b/src/main/java/org/elasticsearch/common/xcontent/XContentFactory.java index ca3e2f7bcd2..a8fb28e6f6a 100644 --- a/src/main/java/org/elasticsearch/common/xcontent/XContentFactory.java +++ b/src/main/java/org/elasticsearch/common/xcontent/XContentFactory.java @@ -208,11 +208,11 @@ public class XContentFactory { * Guesses the content type based on the provided input stream. */ public static XContentType xContentType(InputStream si) throws IOException { - int first = si.read(); + byte first = (byte) si.read(); if (first == -1) { return null; } - int second = si.read(); + byte second = (byte) si.read(); if (second == -1) { return null; } @@ -231,9 +231,26 @@ public class XContentFactory { return XContentType.YAML; } } - if (first == (CBORConstants.BYTE_OBJECT_INDEFINITE & 0xff)){ + // CBOR logic similar to CBORFactory#hasCBORFormat + if (first == CBORConstants.BYTE_OBJECT_INDEFINITE){ return XContentType.CBOR; } + if (CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_TAG, first)) { + // Actually, specific "self-describe tag" is a very good indicator + int third = si.read(); + if (third == -1) { + return null; + } + if (first == (byte) 0xD9 && second == (byte) 0xD9 && third == (byte) 0xF7) { + return XContentType.CBOR; + } + } + // for small objects, some encoders just encode as major type object, we can safely + // say its CBOR since it doesn't contradict SMILE or JSON, and its a last resort + if (CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, first)) { + return XContentType.CBOR; + } + for (int i = 2; i < GUESS_HEADER_LENGTH; i++) { int val = si.read(); if (val == -1) { @@ -279,9 +296,23 @@ public class XContentFactory { if (length > 2 && first == '-' && bytes.get(1) == '-' && bytes.get(2) == '-') { return XContentType.YAML; } - if (first == CBORConstants.BYTE_OBJECT_INDEFINITE){ + // CBOR logic similar to CBORFactory#hasCBORFormat + if (first == CBORConstants.BYTE_OBJECT_INDEFINITE && length > 1){ return XContentType.CBOR; } + if (CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_TAG, first) && length > 2) { + // Actually, specific "self-describe tag" is a very good indicator + if (first == (byte) 0xD9 && bytes.get(1) == (byte) 0xD9 && bytes.get(2) == (byte) 0xF7) { + return XContentType.CBOR; + } + } + // for small objects, some encoders just encode as major type object, we can safely + // say its CBOR since it doesn't contradict SMILE or JSON, and its a last resort + if (CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, first)) { + return XContentType.CBOR; + } + + // a last chance for JSON for (int i = 0; i < length; i++) { if (bytes.get(i) == '{') { return XContentType.JSON; diff --git a/src/test/java/org/elasticsearch/common/xcontent/XContentFactoryTests.java b/src/test/java/org/elasticsearch/common/xcontent/XContentFactoryTests.java index 14710b26af4..1bd403ab82d 100644 --- a/src/test/java/org/elasticsearch/common/xcontent/XContentFactoryTests.java +++ b/src/test/java/org/elasticsearch/common/xcontent/XContentFactoryTests.java @@ -19,11 +19,14 @@ package org.elasticsearch.common.xcontent; +import com.fasterxml.jackson.dataformat.cbor.CBORConstants; +import com.fasterxml.jackson.dataformat.smile.SmileConstants; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.io.stream.BytesStreamInput; import org.elasticsearch.test.ElasticsearchTestCase; import org.junit.Test; +import java.io.ByteArrayInputStream; import java.io.IOException; import static org.hamcrest.Matchers.equalTo; @@ -69,4 +72,35 @@ public class XContentFactoryTests extends ElasticsearchTestCase { assertThat(XContentFactory.xContentType(builder.string()), equalTo(type)); } } + + public void testCBORBasedOnMajorObjectDetection() { + // for this {"f "=> 5} perl encoder for example generates: + byte[] bytes = new byte[] {(byte) 0xA1, (byte) 0x43, (byte) 0x66, (byte) 6f, (byte) 6f, (byte) 0x5}; + assertThat(XContentFactory.xContentType(bytes), equalTo(XContentType.CBOR)); + //assertThat(((Number) XContentHelper.convertToMap(bytes, true).v2().get("foo")).intValue(), equalTo(5)); + + // this if for {"foo" : 5} in python CBOR + bytes = new byte[] {(byte) 0xA1, (byte) 0x63, (byte) 0x66, (byte) 0x6f, (byte) 0x6f, (byte) 0x5}; + assertThat(XContentFactory.xContentType(bytes), equalTo(XContentType.CBOR)); + assertThat(((Number) XContentHelper.convertToMap(bytes, true).v2().get("foo")).intValue(), equalTo(5)); + + // also make sure major type check doesn't collide with SMILE and JSON, just in case + assertThat(CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, SmileConstants.HEADER_BYTE_1), equalTo(false)); + assertThat(CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, (byte) '{'), equalTo(false)); + assertThat(CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, (byte) ' '), equalTo(false)); + assertThat(CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, (byte) '-'), equalTo(false)); + } + + public void testCBORBasedOnMagicHeaderDetection() { + byte[] bytes = new byte[] {(byte) 0xd9, (byte) 0xd9, (byte) 0xf7}; + assertThat(XContentFactory.xContentType(bytes), equalTo(XContentType.CBOR)); + } + + public void testEmptyStream() throws Exception { + ByteArrayInputStream is = new ByteArrayInputStream(new byte[0]); + assertNull(XContentFactory.xContentType(is)); + + is = new ByteArrayInputStream(new byte[] {(byte) 1}); + assertNull(XContentFactory.xContentType(is)); + } }