mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-07 13:38:49 +00:00
[PARSER] Clarify XContentParser/Builder interface for binary vs. utf8 values
Today we have very confusing naming since some methods names claim to read binary but in fact read utf-16 and convert to utf-8 under the hood. This commit clarifies the interfaces and adds additional documentation. Closes #7367
This commit is contained in:
parent
b5b1960a2b
commit
c4bed91262
@ -512,6 +512,30 @@ public final class XContentBuilder implements BytesStream, Releasable {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the binary content of the given BytesRef
|
||||
* Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
|
||||
*/
|
||||
public XContentBuilder field(String name, BytesRef value) throws IOException {
|
||||
field(name);
|
||||
generator.writeBinary(value.bytes, value.offset, value.length);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the binary content of the given BytesRef
|
||||
* Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
|
||||
*/
|
||||
public XContentBuilder field(XContentBuilderString name, BytesRef value) throws IOException {
|
||||
field(name);
|
||||
generator.writeBinary(value.bytes, value.offset, value.length);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the binary content of the given BytesReference
|
||||
* Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
|
||||
*/
|
||||
public XContentBuilder field(String name, BytesReference value) throws IOException {
|
||||
field(name);
|
||||
if (!value.hasArray()) {
|
||||
@ -521,6 +545,10 @@ public final class XContentBuilder implements BytesStream, Releasable {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the binary content of the given BytesReference
|
||||
* Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
|
||||
*/
|
||||
public XContentBuilder field(XContentBuilderString name, BytesReference value) throws IOException {
|
||||
field(name);
|
||||
if (!value.hasArray()) {
|
||||
@ -530,12 +558,26 @@ public final class XContentBuilder implements BytesStream, Releasable {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the binary content of the given BytesRef as UTF-8 bytes
|
||||
* Use {@link XContentParser#utf8Bytes()} to read the value back
|
||||
*/
|
||||
public XContentBuilder utf8Field(XContentBuilderString name, BytesRef value) throws IOException {
|
||||
field(name);
|
||||
generator.writeUTF8String(value.bytes, value.offset, value.length);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the binary content of the given BytesRef as UTF-8 bytes
|
||||
* Use {@link XContentParser#utf8Bytes()} to read the value back
|
||||
*/
|
||||
public XContentBuilder utf8Field(String name, BytesRef value) throws IOException {
|
||||
field(name);
|
||||
generator.writeUTF8String(value.bytes, value.offset, value.length);
|
||||
return this;
|
||||
}
|
||||
|
||||
public XContentBuilder field(String name, Text value) throws IOException {
|
||||
field(name);
|
||||
if (value.hasBytes() && value.bytes().hasArray()) {
|
||||
@ -989,6 +1031,22 @@ public final class XContentBuilder implements BytesStream, Releasable {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the binary content of the given BytesRef
|
||||
* Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
|
||||
*/
|
||||
public XContentBuilder value(BytesRef value) throws IOException {
|
||||
if (value == null) {
|
||||
return nullValue();
|
||||
}
|
||||
generator.writeBinary(value.bytes, value.offset, value.length);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the binary content of the given BytesReference
|
||||
* Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
|
||||
*/
|
||||
public XContentBuilder value(BytesReference value) throws IOException {
|
||||
if (value == null) {
|
||||
return nullValue();
|
||||
@ -1173,6 +1231,9 @@ public final class XContentBuilder implements BytesStream, Releasable {
|
||||
bytes = bytes.toBytesArray();
|
||||
}
|
||||
generator.writeBinary(bytes.array(), bytes.arrayOffset(), bytes.length());
|
||||
} else if (value instanceof BytesRef) {
|
||||
BytesRef bytes = (BytesRef) value;
|
||||
generator.writeBinary(bytes.bytes, bytes.offset, bytes.length);
|
||||
} else if (value instanceof Text) {
|
||||
Text text = (Text) value;
|
||||
if (text.hasBytes() && text.bytes().hasArray()) {
|
||||
|
@ -22,7 +22,6 @@ package org.elasticsearch.common.xcontent;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.lease.Releasable;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
@ -139,9 +138,17 @@ public interface XContentParser extends Releasable {
|
||||
|
||||
String textOrNull() throws IOException;
|
||||
|
||||
BytesRef bytesOrNull() throws IOException;
|
||||
/**
|
||||
* Returns a BytesRef holding UTF-8 bytes or null if a null value is {@link Token#VALUE_NULL}.
|
||||
* This method should be used to read text only binary content should be read through {@link #binaryValue()}
|
||||
*/
|
||||
BytesRef utf8BytesOrNull() throws IOException;
|
||||
|
||||
BytesRef bytes() throws IOException;
|
||||
/**
|
||||
* Returns a BytesRef holding UTF-8 bytes.
|
||||
* This method should be used to read text only binary content should be read through {@link #binaryValue()}
|
||||
*/
|
||||
BytesRef utf8Bytes() throws IOException;
|
||||
|
||||
Object objectText() throws IOException;
|
||||
|
||||
@ -196,5 +203,32 @@ public interface XContentParser extends Releasable {
|
||||
|
||||
boolean booleanValue() throws IOException;
|
||||
|
||||
/**
|
||||
* Reads a plain binary value that was written via one of the following methods:
|
||||
*
|
||||
* <li>
|
||||
* <ul>{@link XContentBuilder#field(String, org.apache.lucene.util.BytesRef)}</ul>
|
||||
* <ul>{@link XContentBuilder#field(String, org.elasticsearch.common.bytes.BytesReference)}</ul>
|
||||
* <ul>{@link XContentBuilder#field(String, byte[], int, int)}}</ul>
|
||||
* <ul>{@link XContentBuilder#field(String, byte[])}}</ul>
|
||||
* </li>
|
||||
*
|
||||
* as well as via their <code>XContentBuilderString</code> variants of the separated value methods.
|
||||
* Note: Do not use this method to read values written with:
|
||||
* <li>
|
||||
* <ul>{@link XContentBuilder#utf8Field(XContentBuilderString, org.apache.lucene.util.BytesRef)}</ul>
|
||||
* <ul>{@link XContentBuilder#utf8Field(String, org.apache.lucene.util.BytesRef)}</ul>
|
||||
* </li>
|
||||
*
|
||||
* these methods write UTF-8 encoded strings and must be read through:
|
||||
* <li>
|
||||
* <ul>{@link XContentParser#utf8Bytes()}</ul>
|
||||
* <ul>{@link XContentParser#utf8BytesOrNull()}}</ul>
|
||||
* <ul>{@link XContentParser#text()} ()}</ul>
|
||||
* <ul>{@link XContentParser#textOrNull()} ()}</ul>
|
||||
* <ul>{@link XContentParser#textCharacters()} ()}}</ul>
|
||||
* </li>
|
||||
*
|
||||
*/
|
||||
byte[] binaryValue() throws IOException;
|
||||
}
|
||||
|
@ -22,6 +22,7 @@ package org.elasticsearch.common.xcontent.json;
|
||||
import com.fasterxml.jackson.core.JsonParser;
|
||||
import com.fasterxml.jackson.core.JsonToken;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||
import org.elasticsearch.common.xcontent.XContentType;
|
||||
@ -86,7 +87,7 @@ public class JsonXContentParser extends AbstractXContentParser {
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef bytes() throws IOException {
|
||||
public BytesRef utf8Bytes() throws IOException {
|
||||
BytesRef bytes = new BytesRef();
|
||||
UnicodeUtil.UTF16toUTF8(parser.getTextCharacters(), parser.getTextOffset(), parser.getTextLength(), bytes);
|
||||
return bytes;
|
||||
@ -114,7 +115,7 @@ public class JsonXContentParser extends AbstractXContentParser {
|
||||
public Object objectBytes() throws IOException {
|
||||
JsonToken currentToken = parser.getCurrentToken();
|
||||
if (currentToken == JsonToken.VALUE_STRING) {
|
||||
return bytes();
|
||||
return utf8Bytes();
|
||||
} else if (currentToken == JsonToken.VALUE_NUMBER_INT || currentToken == JsonToken.VALUE_NUMBER_FLOAT) {
|
||||
return parser.getNumberValue();
|
||||
} else if (currentToken == JsonToken.VALUE_TRUE) {
|
||||
@ -124,7 +125,8 @@ public class JsonXContentParser extends AbstractXContentParser {
|
||||
} else if (currentToken == JsonToken.VALUE_NULL) {
|
||||
return null;
|
||||
} else {
|
||||
return bytes();
|
||||
//TODO should this really do UTF-8 conversion?
|
||||
return utf8Bytes();
|
||||
}
|
||||
}
|
||||
|
||||
@ -185,11 +187,7 @@ public class JsonXContentParser extends AbstractXContentParser {
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
try {
|
||||
parser.close();
|
||||
} catch (IOException e) {
|
||||
// ignore
|
||||
}
|
||||
IOUtils.closeWhileHandlingException(parser);
|
||||
}
|
||||
|
||||
private NumberType convertNumberType(JsonParser.NumberType numberType) {
|
||||
|
@ -196,11 +196,11 @@ public abstract class AbstractXContentParser implements XContentParser {
|
||||
|
||||
|
||||
@Override
|
||||
public BytesRef bytesOrNull() throws IOException {
|
||||
public BytesRef utf8BytesOrNull() throws IOException {
|
||||
if (currentToken() == Token.VALUE_NULL) {
|
||||
return null;
|
||||
}
|
||||
return bytes();
|
||||
return utf8Bytes();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -284,7 +284,7 @@ public class CompletionFieldMapper extends AbstractFieldMapper<String> {
|
||||
payload = payloadBuilder.bytes().toBytesRef();
|
||||
payloadBuilder.close();
|
||||
} else if (token.isValue()) {
|
||||
payload = parser.bytesOrNull();
|
||||
payload = parser.utf8BytesOrNull();
|
||||
} else {
|
||||
throw new MapperException("payload doesn't support type " + token);
|
||||
}
|
||||
|
@ -65,7 +65,7 @@ public class IdsFilterParser implements FilterParser {
|
||||
if ("values".equals(currentFieldName)) {
|
||||
idsProvided = true;
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
BytesRef value = parser.bytesOrNull();
|
||||
BytesRef value = parser.utf8BytesOrNull();
|
||||
if (value == null) {
|
||||
throw new QueryParsingException(parseContext.index(), "No value specified for term filter");
|
||||
}
|
||||
|
@ -70,7 +70,7 @@ public class IdsQueryParser implements QueryParser {
|
||||
if ("values".equals(currentFieldName)) {
|
||||
idsProvided = true;
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
BytesRef value = parser.bytesOrNull();
|
||||
BytesRef value = parser.utf8BytesOrNull();
|
||||
if (value == null) {
|
||||
throw new QueryParsingException(parseContext.index(), "No value specified for term filter");
|
||||
}
|
||||
|
@ -59,7 +59,7 @@ public class TypeFilterParser implements FilterParser {
|
||||
if (token != XContentParser.Token.VALUE_STRING) {
|
||||
throw new QueryParsingException(parseContext.index(), "[type] filter should have a value field, and the type name");
|
||||
}
|
||||
BytesRef type = parser.bytes();
|
||||
BytesRef type = parser.utf8Bytes();
|
||||
// move to the next token
|
||||
parser.nextToken();
|
||||
|
||||
|
@ -61,7 +61,7 @@ public final class SuggestParseElement implements SearchParseElement {
|
||||
fieldName = parser.currentName();
|
||||
} else if (token.isValue()) {
|
||||
if ("text".equals(fieldName)) {
|
||||
globalText = parser.bytes();
|
||||
globalText = parser.utf8Bytes();
|
||||
} else {
|
||||
throw new ElasticsearchIllegalArgumentException("[suggest] does not support [" + fieldName + "]");
|
||||
}
|
||||
@ -75,7 +75,7 @@ public final class SuggestParseElement implements SearchParseElement {
|
||||
fieldName = parser.currentName();
|
||||
} else if (token.isValue()) {
|
||||
if ("text".equals(fieldName)) {
|
||||
suggestText = parser.bytes();
|
||||
suggestText = parser.utf8Bytes();
|
||||
} else {
|
||||
throw new ElasticsearchIllegalArgumentException("[suggest] does not support [" + fieldName + "]");
|
||||
}
|
||||
|
@ -117,9 +117,9 @@ public final class PhraseSuggestParser implements SuggestContextParser {
|
||||
fieldName = parser.currentName();
|
||||
} else if (token.isValue()) {
|
||||
if ("pre_tag".equals(fieldName) || "preTag".equals(fieldName)) {
|
||||
suggestion.setPreTag(parser.bytes());
|
||||
suggestion.setPreTag(parser.utf8Bytes());
|
||||
} else if ("post_tag".equals(fieldName) || "postTag".equals(fieldName)) {
|
||||
suggestion.setPostTag(parser.bytes());
|
||||
suggestion.setPostTag(parser.utf8Bytes());
|
||||
} else {
|
||||
throw new ElasticsearchIllegalArgumentException(
|
||||
"suggester[phrase][highlight] doesn't support field [" + fieldName + "]");
|
||||
|
Loading…
x
Reference in New Issue
Block a user