[PARSER] Clarify XContentParser/Builder interface for binary vs. utf8 values

Today we have very confusing naming since some methods names claim to read binary but in fact read utf-16 and convert to utf-8 under the hood. This commit clarifies the interfaces and adds additional documentation. Closes #7367
2025-02-17 10:25:15 +00:00 · 2014-08-21 11:24:32 +02:00 · 2014-08-21 11:24:32 +02:00 · c4bed91262
commit c4bed91262
parent b5b1960a2b
10 changed files with 114 additions and 21 deletions
--- a/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java
+++ b/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java
@ -512,6 +512,30 @@ public final class XContentBuilder implements BytesStream, Releasable {
        return this;
    }

+    /**
+     * Writes the binary content of the given BytesRef
+     * Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
+     */
+    public XContentBuilder field(String name, BytesRef value) throws IOException {
+        field(name);
+        generator.writeBinary(value.bytes, value.offset, value.length);
+        return this;
+    }
+
+    /**
+     * Writes the binary content of the given BytesRef
+     * Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
+     */
+    public XContentBuilder field(XContentBuilderString name, BytesRef value) throws IOException {
+        field(name);
+        generator.writeBinary(value.bytes, value.offset, value.length);
+        return this;
+    }
+
+    /**
+     * Writes the binary content of the given BytesReference
+     * Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
+     */
    public XContentBuilder field(String name, BytesReference value) throws IOException {
        field(name);
        if (!value.hasArray()) {
@ -521,6 +545,10 @@ public final class XContentBuilder implements BytesStream, Releasable {
        return this;
    }

+    /**
+     * Writes the binary content of the given BytesReference
+     * Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
+     */
    public XContentBuilder field(XContentBuilderString name, BytesReference value) throws IOException {
        field(name);
        if (!value.hasArray()) {
@ -530,12 +558,26 @@ public final class XContentBuilder implements BytesStream, Releasable {
        return this;
    }

+    /**
+     * Writes the binary content of the given BytesRef as UTF-8 bytes
+     * Use {@link XContentParser#utf8Bytes()} to read the value back
+     */
    public XContentBuilder utf8Field(XContentBuilderString name, BytesRef value) throws IOException {
        field(name);
        generator.writeUTF8String(value.bytes, value.offset, value.length);
        return this;
    }

+    /**
+     * Writes the binary content of the given BytesRef as UTF-8 bytes
+     * Use {@link XContentParser#utf8Bytes()} to read the value back
+     */
+    public XContentBuilder utf8Field(String name, BytesRef value) throws IOException {
+        field(name);
+        generator.writeUTF8String(value.bytes, value.offset, value.length);
+        return this;
+    }
+
    public XContentBuilder field(String name, Text value) throws IOException {
        field(name);
        if (value.hasBytes() && value.bytes().hasArray()) {
@ -989,6 +1031,22 @@ public final class XContentBuilder implements BytesStream, Releasable {
        return this;
    }

+    /**
+     * Writes the binary content of the given BytesRef
+     * Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
+     */
+    public XContentBuilder value(BytesRef value) throws IOException {
+        if (value == null) {
+            return nullValue();
+        }
+        generator.writeBinary(value.bytes, value.offset, value.length);
+        return this;
+    }
+
+    /**
+     * Writes the binary content of the given BytesReference
+     * Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
+     */
    public XContentBuilder value(BytesReference value) throws IOException {
        if (value == null) {
            return nullValue();
@ -1173,6 +1231,9 @@ public final class XContentBuilder implements BytesStream, Releasable {
                bytes = bytes.toBytesArray();
            }
            generator.writeBinary(bytes.array(), bytes.arrayOffset(), bytes.length());
+        } else if (value instanceof BytesRef) {
+            BytesRef bytes = (BytesRef) value;
+            generator.writeBinary(bytes.bytes, bytes.offset, bytes.length);
        } else if (value instanceof Text) {
            Text text = (Text) value;
            if (text.hasBytes() && text.bytes().hasArray()) {
--- a/src/main/java/org/elasticsearch/common/xcontent/XContentParser.java
+++ b/src/main/java/org/elasticsearch/common/xcontent/XContentParser.java
@ -22,7 +22,6 @@ package org.elasticsearch.common.xcontent;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.lease.Releasable;

-import java.io.Closeable;
 import java.io.IOException;
 import java.util.Map;

@ -139,9 +138,17 @@ public interface XContentParser extends Releasable {

    String textOrNull() throws IOException;

-    BytesRef bytesOrNull() throws IOException;
+    /**
+     * Returns a BytesRef holding UTF-8 bytes or null if a null value is {@link Token#VALUE_NULL}.
+     * This method should be used to read text only binary content should be read through {@link #binaryValue()}
+     */
+    BytesRef utf8BytesOrNull() throws IOException;

-    BytesRef bytes() throws IOException;
+    /**
+     * Returns a BytesRef holding UTF-8 bytes.
+     * This method should be used to read text only binary content should be read through {@link #binaryValue()}
+     */
+    BytesRef utf8Bytes() throws IOException;

    Object objectText() throws IOException;

@ -196,5 +203,32 @@ public interface XContentParser extends Releasable {

    boolean booleanValue() throws IOException;

+    /**
+     * Reads a plain binary value that was written via one of the following methods:
+     *
+     * <li>
+     *     <ul>{@link XContentBuilder#field(String, org.apache.lucene.util.BytesRef)}</ul>
+     *     <ul>{@link XContentBuilder#field(String, org.elasticsearch.common.bytes.BytesReference)}</ul>
+     *     <ul>{@link XContentBuilder#field(String, byte[], int, int)}}</ul>
+     *     <ul>{@link XContentBuilder#field(String, byte[])}}</ul>
+     * </li>
+     *
+     * as well as via their <code>XContentBuilderString</code> variants of the separated value methods.
+     * Note: Do not use this method to read values written with:
+     * <li>
+     *     <ul>{@link XContentBuilder#utf8Field(XContentBuilderString, org.apache.lucene.util.BytesRef)}</ul>
+     *     <ul>{@link XContentBuilder#utf8Field(String, org.apache.lucene.util.BytesRef)}</ul>
+     * </li>
+     *
+     * these methods write UTF-8 encoded strings and must be read through:
+     * <li>
+     *     <ul>{@link XContentParser#utf8Bytes()}</ul>
+     *     <ul>{@link XContentParser#utf8BytesOrNull()}}</ul>
+     *     <ul>{@link XContentParser#text()} ()}</ul>
+     *     <ul>{@link XContentParser#textOrNull()} ()}</ul>
+     *     <ul>{@link XContentParser#textCharacters()} ()}}</ul>
+     * </li>
+     *
+     */
    byte[] binaryValue() throws IOException;
 }
--- a/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContentParser.java
+++ b/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContentParser.java
@ -22,6 +22,7 @@ package org.elasticsearch.common.xcontent.json;
 import com.fasterxml.jackson.core.JsonParser;
 import com.fasterxml.jackson.core.JsonToken;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.UnicodeUtil;
 import org.elasticsearch.ElasticsearchIllegalStateException;
 import org.elasticsearch.common.xcontent.XContentType;
@ -86,7 +87,7 @@ public class JsonXContentParser extends AbstractXContentParser {
    }

    @Override
-    public BytesRef bytes() throws IOException {
+    public BytesRef utf8Bytes() throws IOException {
        BytesRef bytes = new BytesRef();
        UnicodeUtil.UTF16toUTF8(parser.getTextCharacters(), parser.getTextOffset(), parser.getTextLength(), bytes);
        return bytes;
@ -114,7 +115,7 @@ public class JsonXContentParser extends AbstractXContentParser {
    public Object objectBytes() throws IOException {
        JsonToken currentToken = parser.getCurrentToken();
        if (currentToken == JsonToken.VALUE_STRING) {
-            return bytes();
+            return utf8Bytes();
        } else if (currentToken == JsonToken.VALUE_NUMBER_INT || currentToken == JsonToken.VALUE_NUMBER_FLOAT) {
            return parser.getNumberValue();
        } else if (currentToken == JsonToken.VALUE_TRUE) {
@ -124,7 +125,8 @@ public class JsonXContentParser extends AbstractXContentParser {
        } else if (currentToken == JsonToken.VALUE_NULL) {
            return null;
        } else {
-            return bytes();
+            //TODO should this really do UTF-8 conversion?
+            return utf8Bytes();
        }
    }

@ -185,11 +187,7 @@ public class JsonXContentParser extends AbstractXContentParser {

    @Override
    public void close() {
-        try {
-            parser.close();
-        } catch (IOException e) {
-            // ignore
-        }
+        IOUtils.closeWhileHandlingException(parser);
    }

    private NumberType convertNumberType(JsonParser.NumberType numberType) {
--- a/src/main/java/org/elasticsearch/common/xcontent/support/AbstractXContentParser.java
+++ b/src/main/java/org/elasticsearch/common/xcontent/support/AbstractXContentParser.java
@ -196,11 +196,11 @@ public abstract class AbstractXContentParser implements XContentParser {


    @Override
-    public BytesRef bytesOrNull() throws IOException {
+    public BytesRef utf8BytesOrNull() throws IOException {
        if (currentToken() == Token.VALUE_NULL) {
            return null;
        }
-        return bytes();
+        return utf8Bytes();
    }

    @Override
--- a/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java
+++ b/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java
@ -284,7 +284,7 @@ public class CompletionFieldMapper extends AbstractFieldMapper<String> {
                        payload = payloadBuilder.bytes().toBytesRef();
                        payloadBuilder.close();
                    } else if (token.isValue()) {
-                        payload = parser.bytesOrNull();
+                        payload = parser.utf8BytesOrNull();
                    } else {
                        throw new MapperException("payload doesn't support type " + token);
                    }
--- a/src/main/java/org/elasticsearch/index/query/IdsFilterParser.java
+++ b/src/main/java/org/elasticsearch/index/query/IdsFilterParser.java
@ -65,7 +65,7 @@ public class IdsFilterParser implements FilterParser {
                if ("values".equals(currentFieldName)) {
                    idsProvided = true;
                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
-                        BytesRef value = parser.bytesOrNull();
+                        BytesRef value = parser.utf8BytesOrNull();
                        if (value == null) {
                            throw new QueryParsingException(parseContext.index(), "No value specified for term filter");
                        }
--- a/src/main/java/org/elasticsearch/index/query/IdsQueryParser.java
+++ b/src/main/java/org/elasticsearch/index/query/IdsQueryParser.java
@ -70,7 +70,7 @@ public class IdsQueryParser implements QueryParser {
                if ("values".equals(currentFieldName)) {
                    idsProvided = true;
                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
-                        BytesRef value = parser.bytesOrNull();
+                        BytesRef value = parser.utf8BytesOrNull();
                        if (value == null) {
                            throw new QueryParsingException(parseContext.index(), "No value specified for term filter");
                        }
--- a/src/main/java/org/elasticsearch/index/query/TypeFilterParser.java
+++ b/src/main/java/org/elasticsearch/index/query/TypeFilterParser.java
@ -59,7 +59,7 @@ public class TypeFilterParser implements FilterParser {
        if (token != XContentParser.Token.VALUE_STRING) {
            throw new QueryParsingException(parseContext.index(), "[type] filter should have a value field, and the type name");
        }
-        BytesRef type = parser.bytes();
+        BytesRef type = parser.utf8Bytes();
        // move to the next token
        parser.nextToken();

--- a/src/main/java/org/elasticsearch/search/suggest/SuggestParseElement.java
+++ b/src/main/java/org/elasticsearch/search/suggest/SuggestParseElement.java
@ -61,7 +61,7 @@ public final class SuggestParseElement implements SearchParseElement {
                fieldName = parser.currentName();
            } else if (token.isValue()) {
                if ("text".equals(fieldName)) {
-                    globalText = parser.bytes();
+                    globalText = parser.utf8Bytes();
                } else {
                    throw new ElasticsearchIllegalArgumentException("[suggest] does not support [" + fieldName + "]");
                }
@ -75,7 +75,7 @@ public final class SuggestParseElement implements SearchParseElement {
                        fieldName = parser.currentName();
                    } else if (token.isValue()) {
                        if ("text".equals(fieldName)) {
-                            suggestText = parser.bytes();
+                            suggestText = parser.utf8Bytes();
                        } else {
                            throw new ElasticsearchIllegalArgumentException("[suggest] does not support [" + fieldName + "]");
                        }
--- a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java
+++ b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java
@ -117,9 +117,9 @@ public final class PhraseSuggestParser implements SuggestContextParser {
                            fieldName = parser.currentName();
                        } else if (token.isValue()) {
                            if ("pre_tag".equals(fieldName) || "preTag".equals(fieldName)) {
-                                suggestion.setPreTag(parser.bytes());
+                                suggestion.setPreTag(parser.utf8Bytes());
                            } else if ("post_tag".equals(fieldName) || "postTag".equals(fieldName)) {
-                                suggestion.setPostTag(parser.bytes());
+                                suggestion.setPostTag(parser.utf8Bytes());
                            } else {
                                throw new ElasticsearchIllegalArgumentException(
                                    "suggester[phrase][highlight] doesn't support field [" + fieldName + "]");