From 7523d0b150ff7b05aa37b5e6edf8216341671b38 Mon Sep 17 00:00:00 2001 From: Chris Earle Date: Mon, 24 Nov 2014 12:19:20 -0500 Subject: [PATCH] Update to Jackson 2.4.3 - Update pom to 2.4.3 from 2.4.2 - Enable the CBOR data header (aka tag) from the CBOR Generator to provide binary identification like the Smile format - Check for the CBOR header and ensure that the data sent in represents a "major type" that is an object - Cleans up `JsonVsCborTests` unused imports --- pom.xml | 9 +- .../common/xcontent/XContentFactory.java | 120 ++++++++++++------ .../common/xcontent/cbor/CborXContent.java | 4 + .../common/xcontent/XContentFactoryTests.java | 54 ++++++-- .../common/xcontent/cbor/JsonVsCborTests.java | 5 - 5 files changed, 133 insertions(+), 59 deletions(-) diff --git a/pom.xml b/pom.xml index 9f8eaa99a44..d771d51e304 100644 --- a/pom.xml +++ b/pom.xml @@ -31,6 +31,7 @@ + 2.4.3 5.0.0 5.0.0-snapshot-1641343 auto @@ -226,28 +227,28 @@ com.fasterxml.jackson.core jackson-core - 2.4.2 + ${jackson.version} compile com.fasterxml.jackson.dataformat jackson-dataformat-smile - 2.4.2 + ${jackson.version} compile com.fasterxml.jackson.dataformat jackson-dataformat-yaml - 2.4.2 + ${jackson.version} compile com.fasterxml.jackson.dataformat jackson-dataformat-cbor - 2.4.2 + ${jackson.version} compile diff --git a/src/main/java/org/elasticsearch/common/xcontent/XContentFactory.java b/src/main/java/org/elasticsearch/common/xcontent/XContentFactory.java index ca3e2f7bcd2..2b2d3a3ab63 100644 --- a/src/main/java/org/elasticsearch/common/xcontent/XContentFactory.java +++ b/src/main/java/org/elasticsearch/common/xcontent/XContentFactory.java @@ -157,9 +157,9 @@ public class XContentFactory { return XContentType.YAML; } - // CBOR is not supported + // CBOR is not supported because it is a binary-only format - for (int i = 0; i < length; i++) { + for (int i = 1; i < length; i++) { char c = content.charAt(i); if (c == '{') { return XContentType.JSON; @@ -208,41 +208,48 @@ public class XContentFactory { * Guesses the content type based on the provided input stream. */ public static XContentType xContentType(InputStream si) throws IOException { + // Need minimum of 3 bytes for everything (except really JSON) int first = si.read(); - if (first == -1) { - return null; - } int second = si.read(); - if (second == -1) { + int third = si.read(); + + // Cannot short circuit based on third (or later fourth) because "{}" is valid JSON + if (first == -1 || second == -1) { return null; } - if (first == SmileConstants.HEADER_BYTE_1 && second == SmileConstants.HEADER_BYTE_2) { - int third = si.read(); - if (third == SmileConstants.HEADER_BYTE_3) { - return XContentType.SMILE; - } + + if (first == SmileConstants.HEADER_BYTE_1 && second == SmileConstants.HEADER_BYTE_2 && + third == SmileConstants.HEADER_BYTE_3) { + return XContentType.SMILE; } - if (first == '{' || second == '{') { + if (first == '-' && second == '-' && third == '-') { + return XContentType.YAML; + } + + // Need 4 bytes for CBOR + int fourth = si.read(); + + if (first == '{' || second == '{' || third == '{' || fourth == '{') { return XContentType.JSON; } - if (first == '-' && second == '-') { - int third = si.read(); - if (third == '-') { - return XContentType.YAML; - } - } - if (first == (CBORConstants.BYTE_OBJECT_INDEFINITE & 0xff)){ - return XContentType.CBOR; - } - for (int i = 2; i < GUESS_HEADER_LENGTH; i++) { - int val = si.read(); - if (val == -1) { - return null; - } - if (val == '{') { - return XContentType.JSON; + + // ensure that we don't only have two or three bytes (definitely not CBOR and everything else was checked) + if (third != -1 && fourth != -1) { + if (isCBORObjectHeader((byte)first, (byte)second, (byte)third, (byte)fourth)) { + return XContentType.CBOR; + } + + for (int i = 4; i < GUESS_HEADER_LENGTH; i++) { + int val = si.read(); + if (val == -1) { + return null; + } + if (val == '{') { + return XContentType.JSON; + } } } + return null; } @@ -273,20 +280,53 @@ public class XContentFactory { if (first == '{') { return XContentType.JSON; } - if (length > 2 && first == SmileConstants.HEADER_BYTE_1 && bytes.get(1) == SmileConstants.HEADER_BYTE_2 && bytes.get(2) == SmileConstants.HEADER_BYTE_3) { - return XContentType.SMILE; - } - if (length > 2 && first == '-' && bytes.get(1) == '-' && bytes.get(2) == '-') { - return XContentType.YAML; - } - if (first == CBORConstants.BYTE_OBJECT_INDEFINITE){ - return XContentType.CBOR; - } - for (int i = 0; i < length; i++) { - if (bytes.get(i) == '{') { - return XContentType.JSON; + if (length > 2) { + byte second = bytes.get(1); + byte third = bytes.get(2); + + if (first == SmileConstants.HEADER_BYTE_1 && second == SmileConstants.HEADER_BYTE_2 && third == SmileConstants.HEADER_BYTE_3) { + return XContentType.SMILE; + } + if (first == '-' && second == '-' && third == '-') { + return XContentType.YAML; + } + if (length > 3 && isCBORObjectHeader(first, second, third, bytes.get(3))) { + return XContentType.CBOR; + } + // note: technically this only needs length >= 2, but if the string is just " {", then it's not JSON, but + // " {}" is JSON (3 characters) + for (int i = 1; i < length; i++) { + if (bytes.get(i) == '{') { + return XContentType.JSON; + } } } return null; } + + /** + * Determine if the specified bytes represent a CBOR encoded stream. + *

+ * This performs two checks to verify that it is indeed valid/usable CBOR data: + *

    + *
  1. Checks the first three bytes for the + * {@link CBORConstants#TAG_ID_SELF_DESCRIBE self-identifying CBOR tag (header)}
  2. + *
  3. Checks that the fourth byte represents a major type that is an object, as opposed to some other type (e.g., + * text)
  4. + *
+ * + * @param first The first byte of the header + * @param second The second byte of the header + * @param third The third byte of the header + * @param fourth The fourth byte represents the first byte of the CBOR data, indicating the data's type + * + * @return {@code true} if a CBOR byte stream is detected. {@code false} otherwise. + */ + private static boolean isCBORObjectHeader(byte first, byte second, byte third, byte fourth) { + // Determine if it uses the ID TAG (0xd9f7), then see if it starts with an object (equivalent to + // checking in JSON if it starts with '{') + return CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_TAG, first) && + (((second << 8) & 0xff00) | (third & 0xff)) == CBORConstants.TAG_ID_SELF_DESCRIBE && + CBORConstants.hasMajorType(CBORConstants.MAJOR_TYPE_OBJECT, fourth); + } } diff --git a/src/main/java/org/elasticsearch/common/xcontent/cbor/CborXContent.java b/src/main/java/org/elasticsearch/common/xcontent/cbor/CborXContent.java index 06f8605ec24..5fa6c7b1da7 100644 --- a/src/main/java/org/elasticsearch/common/xcontent/cbor/CborXContent.java +++ b/src/main/java/org/elasticsearch/common/xcontent/cbor/CborXContent.java @@ -21,6 +21,8 @@ package org.elasticsearch.common.xcontent.cbor; import com.fasterxml.jackson.core.JsonEncoding; import com.fasterxml.jackson.dataformat.cbor.CBORFactory; +import com.fasterxml.jackson.dataformat.cbor.CBORGenerator; + import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.FastStringReader; @@ -43,6 +45,8 @@ public class CborXContent implements XContent { static { cborFactory = new CBORFactory(); cborFactory.configure(CBORFactory.Feature.FAIL_ON_SYMBOL_HASH_OVERFLOW, false); // this trips on many mappings now... + // Enable prefixing the entire byte stream with a CBOR header ("tag") + cborFactory.configure(CBORGenerator.Feature.WRITE_TYPE_HEADER, true); cborXContent = new CborXContent(); } diff --git a/src/test/java/org/elasticsearch/common/xcontent/XContentFactoryTests.java b/src/test/java/org/elasticsearch/common/xcontent/XContentFactoryTests.java index e7629332253..b790206cc68 100644 --- a/src/test/java/org/elasticsearch/common/xcontent/XContentFactoryTests.java +++ b/src/test/java/org/elasticsearch/common/xcontent/XContentFactoryTests.java @@ -29,37 +29,45 @@ import java.io.IOException; import static org.hamcrest.Matchers.equalTo; /** - * + * Tests {@link XContentFactory} type generation. */ public class XContentFactoryTests extends ElasticsearchTestCase { @Test public void testGuessJson() throws IOException { - testGuessType(XContentType.JSON); + assertType(XContentType.JSON); } @Test public void testGuessSmile() throws IOException { - testGuessType(XContentType.SMILE); + assertType(XContentType.SMILE); } @Test public void testGuessYaml() throws IOException { - testGuessType(XContentType.YAML); + assertType(XContentType.YAML); } @Test public void testGuessCbor() throws IOException { - testGuessType(XContentType.CBOR); + assertType(XContentType.CBOR); } - private void testGuessType(XContentType type) throws IOException { - XContentBuilder builder = XContentFactory.contentBuilder(type); - builder.startObject(); - builder.field("field1", "value1"); - builder.endObject(); + private void assertType(XContentType type) throws IOException { + for (XContentBuilder builder : generateBuilders(type)) { + assertBuilderType(builder, type); + } + } + /** + * Assert the {@code builder} maps to the appropriate {@code type}. + * + * @param builder Builder to check. + * @param type Type to match. + * @throws IOException if any error occurs while checking the builder + */ + private void assertBuilderType(XContentBuilder builder, XContentType type) throws IOException { assertThat(XContentFactory.xContentType(builder.bytes()), equalTo(type)); BytesArray bytesArray = builder.bytes().toBytesArray(); assertThat(XContentFactory.xContentType(new BytesStreamInput(bytesArray.array(), bytesArray.arrayOffset(), bytesArray.length(), false)), equalTo(type)); @@ -69,4 +77,30 @@ public class XContentFactoryTests extends ElasticsearchTestCase { assertThat(XContentFactory.xContentType(builder.string()), equalTo(type)); } } + + /** + * Generate builders to test various use cases to check. + * + * @param type The type to use. + * @return Never {@code null} array of unique {@link XContentBuilder}s testing different edge cases. + * @throws IOException if any error occurs while generating builders + */ + private XContentBuilder[] generateBuilders(XContentType type) throws IOException { + XContentBuilder[] builders = new XContentBuilder[] { + XContentFactory.contentBuilder(type), XContentFactory.contentBuilder(type) + }; + + // simple object + builders[0].startObject(); + builders[0].field("field1", "value1"); + builders[0].startObject("object1"); + builders[0].field("field2", "value2"); + builders[0].endObject(); + builders[0].endObject(); + + // empty object + builders[1].startObject().endObject(); + + return builders; + } } diff --git a/src/test/java/org/elasticsearch/common/xcontent/cbor/JsonVsCborTests.java b/src/test/java/org/elasticsearch/common/xcontent/cbor/JsonVsCborTests.java index bda1c31a3f3..f4982cd614b 100644 --- a/src/test/java/org/elasticsearch/common/xcontent/cbor/JsonVsCborTests.java +++ b/src/test/java/org/elasticsearch/common/xcontent/cbor/JsonVsCborTests.java @@ -19,10 +19,6 @@ package org.elasticsearch.common.xcontent.cbor; -import com.fasterxml.jackson.core.JsonFactory; -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.dataformat.cbor.CBORFactory; import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentGenerator; @@ -31,7 +27,6 @@ import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.test.ElasticsearchTestCase; import org.junit.Test; -import java.io.ByteArrayOutputStream; import java.io.IOException; import static org.hamcrest.Matchers.equalTo;