[PARSER] Clarify XContentParser/Builder interface for binary vs. utf8 values

Today we have very confusing naming since some methods names claim to
read binary but in fact read utf-16 and convert to utf-8 under the hood.
This commit clarifies the interfaces and adds additional documentation.

Closes #7367
This commit is contained in:
Simon Willnauer 2014-08-21 11:24:32 +02:00
parent b5b1960a2b
commit c4bed91262
10 changed files with 114 additions and 21 deletions

View File

@ -512,6 +512,30 @@ public final class XContentBuilder implements BytesStream, Releasable {
return this;
}
/**
* Writes the binary content of the given BytesRef
* Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
*/
public XContentBuilder field(String name, BytesRef value) throws IOException {
field(name);
generator.writeBinary(value.bytes, value.offset, value.length);
return this;
}
/**
* Writes the binary content of the given BytesRef
* Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
*/
public XContentBuilder field(XContentBuilderString name, BytesRef value) throws IOException {
field(name);
generator.writeBinary(value.bytes, value.offset, value.length);
return this;
}
/**
* Writes the binary content of the given BytesReference
* Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
*/
public XContentBuilder field(String name, BytesReference value) throws IOException {
field(name);
if (!value.hasArray()) {
@ -521,6 +545,10 @@ public final class XContentBuilder implements BytesStream, Releasable {
return this;
}
/**
* Writes the binary content of the given BytesReference
* Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
*/
public XContentBuilder field(XContentBuilderString name, BytesReference value) throws IOException {
field(name);
if (!value.hasArray()) {
@ -530,12 +558,26 @@ public final class XContentBuilder implements BytesStream, Releasable {
return this;
}
/**
* Writes the binary content of the given BytesRef as UTF-8 bytes
* Use {@link XContentParser#utf8Bytes()} to read the value back
*/
public XContentBuilder utf8Field(XContentBuilderString name, BytesRef value) throws IOException {
field(name);
generator.writeUTF8String(value.bytes, value.offset, value.length);
return this;
}
/**
* Writes the binary content of the given BytesRef as UTF-8 bytes
* Use {@link XContentParser#utf8Bytes()} to read the value back
*/
public XContentBuilder utf8Field(String name, BytesRef value) throws IOException {
field(name);
generator.writeUTF8String(value.bytes, value.offset, value.length);
return this;
}
public XContentBuilder field(String name, Text value) throws IOException {
field(name);
if (value.hasBytes() && value.bytes().hasArray()) {
@ -989,6 +1031,22 @@ public final class XContentBuilder implements BytesStream, Releasable {
return this;
}
/**
* Writes the binary content of the given BytesRef
* Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
*/
public XContentBuilder value(BytesRef value) throws IOException {
if (value == null) {
return nullValue();
}
generator.writeBinary(value.bytes, value.offset, value.length);
return this;
}
/**
* Writes the binary content of the given BytesReference
* Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
*/
public XContentBuilder value(BytesReference value) throws IOException {
if (value == null) {
return nullValue();
@ -1173,6 +1231,9 @@ public final class XContentBuilder implements BytesStream, Releasable {
bytes = bytes.toBytesArray();
}
generator.writeBinary(bytes.array(), bytes.arrayOffset(), bytes.length());
} else if (value instanceof BytesRef) {
BytesRef bytes = (BytesRef) value;
generator.writeBinary(bytes.bytes, bytes.offset, bytes.length);
} else if (value instanceof Text) {
Text text = (Text) value;
if (text.hasBytes() && text.bytes().hasArray()) {

View File

@ -22,7 +22,6 @@ package org.elasticsearch.common.xcontent;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lease.Releasable;
import java.io.Closeable;
import java.io.IOException;
import java.util.Map;
@ -139,9 +138,17 @@ public interface XContentParser extends Releasable {
String textOrNull() throws IOException;
BytesRef bytesOrNull() throws IOException;
/**
* Returns a BytesRef holding UTF-8 bytes or null if a null value is {@link Token#VALUE_NULL}.
* This method should be used to read text only binary content should be read through {@link #binaryValue()}
*/
BytesRef utf8BytesOrNull() throws IOException;
BytesRef bytes() throws IOException;
/**
* Returns a BytesRef holding UTF-8 bytes.
* This method should be used to read text only binary content should be read through {@link #binaryValue()}
*/
BytesRef utf8Bytes() throws IOException;
Object objectText() throws IOException;
@ -196,5 +203,32 @@ public interface XContentParser extends Releasable {
boolean booleanValue() throws IOException;
/**
* Reads a plain binary value that was written via one of the following methods:
*
* <li>
* <ul>{@link XContentBuilder#field(String, org.apache.lucene.util.BytesRef)}</ul>
* <ul>{@link XContentBuilder#field(String, org.elasticsearch.common.bytes.BytesReference)}</ul>
* <ul>{@link XContentBuilder#field(String, byte[], int, int)}}</ul>
* <ul>{@link XContentBuilder#field(String, byte[])}}</ul>
* </li>
*
* as well as via their <code>XContentBuilderString</code> variants of the separated value methods.
* Note: Do not use this method to read values written with:
* <li>
* <ul>{@link XContentBuilder#utf8Field(XContentBuilderString, org.apache.lucene.util.BytesRef)}</ul>
* <ul>{@link XContentBuilder#utf8Field(String, org.apache.lucene.util.BytesRef)}</ul>
* </li>
*
* these methods write UTF-8 encoded strings and must be read through:
* <li>
* <ul>{@link XContentParser#utf8Bytes()}</ul>
* <ul>{@link XContentParser#utf8BytesOrNull()}}</ul>
* <ul>{@link XContentParser#text()} ()}</ul>
* <ul>{@link XContentParser#textOrNull()} ()}</ul>
* <ul>{@link XContentParser#textCharacters()} ()}}</ul>
* </li>
*
*/
byte[] binaryValue() throws IOException;
}

View File

@ -22,6 +22,7 @@ package org.elasticsearch.common.xcontent.json;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.ElasticsearchIllegalStateException;
import org.elasticsearch.common.xcontent.XContentType;
@ -86,7 +87,7 @@ public class JsonXContentParser extends AbstractXContentParser {
}
@Override
public BytesRef bytes() throws IOException {
public BytesRef utf8Bytes() throws IOException {
BytesRef bytes = new BytesRef();
UnicodeUtil.UTF16toUTF8(parser.getTextCharacters(), parser.getTextOffset(), parser.getTextLength(), bytes);
return bytes;
@ -114,7 +115,7 @@ public class JsonXContentParser extends AbstractXContentParser {
public Object objectBytes() throws IOException {
JsonToken currentToken = parser.getCurrentToken();
if (currentToken == JsonToken.VALUE_STRING) {
return bytes();
return utf8Bytes();
} else if (currentToken == JsonToken.VALUE_NUMBER_INT || currentToken == JsonToken.VALUE_NUMBER_FLOAT) {
return parser.getNumberValue();
} else if (currentToken == JsonToken.VALUE_TRUE) {
@ -124,7 +125,8 @@ public class JsonXContentParser extends AbstractXContentParser {
} else if (currentToken == JsonToken.VALUE_NULL) {
return null;
} else {
return bytes();
//TODO should this really do UTF-8 conversion?
return utf8Bytes();
}
}
@ -185,11 +187,7 @@ public class JsonXContentParser extends AbstractXContentParser {
@Override
public void close() {
try {
parser.close();
} catch (IOException e) {
// ignore
}
IOUtils.closeWhileHandlingException(parser);
}
private NumberType convertNumberType(JsonParser.NumberType numberType) {

View File

@ -196,11 +196,11 @@ public abstract class AbstractXContentParser implements XContentParser {
@Override
public BytesRef bytesOrNull() throws IOException {
public BytesRef utf8BytesOrNull() throws IOException {
if (currentToken() == Token.VALUE_NULL) {
return null;
}
return bytes();
return utf8Bytes();
}
@Override

View File

@ -284,7 +284,7 @@ public class CompletionFieldMapper extends AbstractFieldMapper<String> {
payload = payloadBuilder.bytes().toBytesRef();
payloadBuilder.close();
} else if (token.isValue()) {
payload = parser.bytesOrNull();
payload = parser.utf8BytesOrNull();
} else {
throw new MapperException("payload doesn't support type " + token);
}

View File

@ -65,7 +65,7 @@ public class IdsFilterParser implements FilterParser {
if ("values".equals(currentFieldName)) {
idsProvided = true;
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
BytesRef value = parser.bytesOrNull();
BytesRef value = parser.utf8BytesOrNull();
if (value == null) {
throw new QueryParsingException(parseContext.index(), "No value specified for term filter");
}

View File

@ -70,7 +70,7 @@ public class IdsQueryParser implements QueryParser {
if ("values".equals(currentFieldName)) {
idsProvided = true;
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
BytesRef value = parser.bytesOrNull();
BytesRef value = parser.utf8BytesOrNull();
if (value == null) {
throw new QueryParsingException(parseContext.index(), "No value specified for term filter");
}

View File

@ -59,7 +59,7 @@ public class TypeFilterParser implements FilterParser {
if (token != XContentParser.Token.VALUE_STRING) {
throw new QueryParsingException(parseContext.index(), "[type] filter should have a value field, and the type name");
}
BytesRef type = parser.bytes();
BytesRef type = parser.utf8Bytes();
// move to the next token
parser.nextToken();

View File

@ -61,7 +61,7 @@ public final class SuggestParseElement implements SearchParseElement {
fieldName = parser.currentName();
} else if (token.isValue()) {
if ("text".equals(fieldName)) {
globalText = parser.bytes();
globalText = parser.utf8Bytes();
} else {
throw new ElasticsearchIllegalArgumentException("[suggest] does not support [" + fieldName + "]");
}
@ -75,7 +75,7 @@ public final class SuggestParseElement implements SearchParseElement {
fieldName = parser.currentName();
} else if (token.isValue()) {
if ("text".equals(fieldName)) {
suggestText = parser.bytes();
suggestText = parser.utf8Bytes();
} else {
throw new ElasticsearchIllegalArgumentException("[suggest] does not support [" + fieldName + "]");
}

View File

@ -117,9 +117,9 @@ public final class PhraseSuggestParser implements SuggestContextParser {
fieldName = parser.currentName();
} else if (token.isValue()) {
if ("pre_tag".equals(fieldName) || "preTag".equals(fieldName)) {
suggestion.setPreTag(parser.bytes());
suggestion.setPreTag(parser.utf8Bytes());
} else if ("post_tag".equals(fieldName) || "postTag".equals(fieldName)) {
suggestion.setPostTag(parser.bytes());
suggestion.setPostTag(parser.utf8Bytes());
} else {
throw new ElasticsearchIllegalArgumentException(
"suggester[phrase][highlight] doesn't support field [" + fieldName + "]");