Use non analyzed token stream optimization everywhere
In the string type, we have an optimization to reuse the StringTokenStream on a thread local when a non analyzed field is used (instead of creating it each time). We should use this across the board on all places where we create a field with a String. Also, move to a specific XStringField, that we can reuse StringTokenStream instead of copying it. closes #6001
This commit is contained in:
parent
12f43fbbc0
commit
23f200bc0e
|
@ -51,3 +51,6 @@ java.lang.Math#abs(long)
|
|||
|
||||
@defaultMessage Use Long.compare instead we are on Java7
|
||||
com.google.common.primitives.Longs#compare(long,long)
|
||||
|
||||
@defaultMessage we have an optimized XStringField to reduce analysis creation overhead
|
||||
org.apache.lucene.document.Field#<init>(java.lang.String,java.lang.String,org.apache.lucene.document.FieldType)
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.apache.lucene.document;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.util.CloseableThreadLocal;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A string/text field that optimizes the case for non analyzed fields to reuse a thread local token
|
||||
* stream (instead of creating it each time). This reduces analysis chain overhead and object creation
|
||||
* (which is significant, yay Attributes).
|
||||
* <p/>
|
||||
* Not to be confused with Lucene StringField, this handles analyzed text as well, and relies on providing
|
||||
* the FieldType. Couldn't come up with a good name for this that is different from Text/String...
|
||||
*/
|
||||
public class XStringField extends Field {
|
||||
|
||||
private static final CloseableThreadLocal<StringTokenStream> NOT_ANALYZED_TOKENSTREAM = new CloseableThreadLocal<StringTokenStream>() {
|
||||
@Override
|
||||
protected StringTokenStream initialValue() {
|
||||
return new StringTokenStream();
|
||||
}
|
||||
};
|
||||
|
||||
public XStringField(String name, String value, FieldType fieldType) {
|
||||
super(name, fieldType);
|
||||
fieldsData = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
|
||||
if (!fieldType().indexed()) {
|
||||
return null;
|
||||
}
|
||||
// Only use the cached TokenStream if the value is indexed and not-tokenized
|
||||
if (fieldType().tokenized()) {
|
||||
return super.tokenStream(analyzer);
|
||||
}
|
||||
StringTokenStream nonAnalyzedTokenStream = NOT_ANALYZED_TOKENSTREAM.get();
|
||||
nonAnalyzedTokenStream.setValue((String) fieldsData);
|
||||
return nonAnalyzedTokenStream;
|
||||
}
|
||||
}
|
|
@ -21,6 +21,7 @@ package org.elasticsearch.index.mapper.core;
|
|||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.queries.TermFilter;
|
||||
import org.apache.lucene.search.Filter;
|
||||
|
@ -221,7 +222,7 @@ public class BooleanFieldMapper extends AbstractFieldMapper<Boolean> {
|
|||
if (value == null) {
|
||||
return;
|
||||
}
|
||||
fields.add(new Field(names.indexName(), value ? "T" : "F", fieldType));
|
||||
fields.add(new XStringField(names.indexName(), value ? "T" : "F", fieldType));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
|
@ -387,18 +388,11 @@ public class CompletionFieldMapper extends AbstractFieldMapper<String> {
|
|||
surfaceForm, weight, payload);
|
||||
}
|
||||
|
||||
private static final class SuggestField extends Field {
|
||||
private static final class SuggestField extends XStringField {
|
||||
private final BytesRef payload;
|
||||
private final CompletionTokenStream.ToFiniteStrings toFiniteStrings;
|
||||
private final ContextMapping.Context ctx;
|
||||
|
||||
public SuggestField(String name, ContextMapping.Context ctx, Reader value, FieldType type, BytesRef payload, CompletionTokenStream.ToFiniteStrings toFiniteStrings) {
|
||||
super(name, value, type);
|
||||
this.payload = payload;
|
||||
this.toFiniteStrings = toFiniteStrings;
|
||||
this.ctx = ctx;
|
||||
}
|
||||
|
||||
public SuggestField(String name, ContextMapping.Context ctx, String value, FieldType type, BytesRef payload, CompletionTokenStream.ToFiniteStrings toFiniteStrings) {
|
||||
super(name, value, type);
|
||||
this.payload = payload;
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -286,7 +287,7 @@ public class StringFieldMapper extends AbstractFieldMapper<String> implements Al
|
|||
}
|
||||
|
||||
if (fieldType.indexed() || fieldType.stored()) {
|
||||
Field field = new StringField(names.indexName(), valueAndBoost.value(), fieldType);
|
||||
Field field = new XStringField(names.indexName(), valueAndBoost.value(), fieldType);
|
||||
field.setBoost(valueAndBoost.boost());
|
||||
fields.add(field);
|
||||
}
|
||||
|
@ -385,86 +386,6 @@ public class StringFieldMapper extends AbstractFieldMapper<String> implements Al
|
|||
}
|
||||
}
|
||||
|
||||
/** Extension of {@link Field} supporting reuse of a cached TokenStream for not-tokenized values. */
|
||||
static class StringField extends Field {
|
||||
|
||||
public StringField(String name, String value, FieldType fieldType) {
|
||||
super(name, fieldType);
|
||||
fieldsData = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
|
||||
if (!fieldType().indexed()) {
|
||||
return null;
|
||||
}
|
||||
// Only use the cached TokenStream if the value is indexed and not-tokenized
|
||||
if (fieldType().tokenized()) {
|
||||
return super.tokenStream(analyzer);
|
||||
}
|
||||
return NOT_ANALYZED_TOKENSTREAM.get().setValue((String) fieldsData);
|
||||
}
|
||||
}
|
||||
|
||||
private static final ThreadLocal<StringTokenStream> NOT_ANALYZED_TOKENSTREAM = new ThreadLocal<StringTokenStream>() {
|
||||
@Override
|
||||
protected StringTokenStream initialValue() {
|
||||
return new StringTokenStream();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// Copied from Field.java
|
||||
static final class StringTokenStream extends TokenStream {
|
||||
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
|
||||
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
|
||||
private boolean used = false;
|
||||
private String value = null;
|
||||
|
||||
/**
|
||||
* Creates a new TokenStream that returns a String as single token.
|
||||
* <p>Warning: Does not initialize the value, you must call
|
||||
* {@link #setValue(String)} afterwards!
|
||||
*/
|
||||
StringTokenStream() {
|
||||
}
|
||||
|
||||
/** Sets the string value. */
|
||||
StringTokenStream setValue(String value) {
|
||||
this.value = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() {
|
||||
if (used) {
|
||||
return false;
|
||||
}
|
||||
clearAttributes();
|
||||
termAttribute.append(value);
|
||||
offsetAttribute.setOffset(0, value.length());
|
||||
used = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void end() {
|
||||
final int finalOffset = value.length();
|
||||
offsetAttribute.setOffset(finalOffset, finalOffset);
|
||||
value = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
used = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
value = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parsed value and boost to be returned from {@link #parseCreateFieldForString}.
|
||||
*/
|
||||
|
|
|
@ -24,6 +24,7 @@ import com.carrotsearch.hppc.cursors.ObjectCursor;
|
|||
import com.google.common.base.Objects;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -570,7 +571,7 @@ public class GeoPointFieldMapper extends AbstractFieldMapper<GeoPoint> implement
|
|||
}
|
||||
|
||||
if (fieldType.indexed() || fieldType.stored()) {
|
||||
Field field = new Field(names.indexName(), Double.toString(point.lat()) + ',' + Double.toString(point.lon()), fieldType);
|
||||
Field field = new XStringField(names.indexName(), Double.toString(point.lat()) + ',' + Double.toString(point.lon()), fieldType);
|
||||
context.doc().add(field);
|
||||
}
|
||||
if (enableGeoHash) {
|
||||
|
|
|
@ -23,6 +23,7 @@ import com.google.common.collect.Iterables;
|
|||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.TermsFilter;
|
||||
|
@ -313,7 +314,7 @@ public class IdFieldMapper extends AbstractFieldMapper<String> implements Intern
|
|||
} // else we are in the pre/post parse phase
|
||||
|
||||
if (fieldType.indexed() || fieldType.stored()) {
|
||||
fields.add(new Field(names.indexName(), context.id(), fieldType));
|
||||
fields.add(new XStringField(names.indexName(), context.id(), fieldType));
|
||||
}
|
||||
if (hasDocValues()) {
|
||||
fields.add(new BinaryDocValuesField(names.indexName(), new BytesRef(context.id())));
|
||||
|
|
|
@ -22,6 +22,7 @@ package org.elasticsearch.index.mapper.internal;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.Strings;
|
||||
|
@ -185,7 +186,7 @@ public class IndexFieldMapper extends AbstractFieldMapper<String> implements Int
|
|||
if (!enabledState.enabled) {
|
||||
return;
|
||||
}
|
||||
fields.add(new Field(names.indexName(), context.index(), fieldType));
|
||||
fields.add(new XStringField(names.indexName(), context.index(), fieldType));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.elasticsearch.index.mapper.internal;
|
|||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.TermFilter;
|
||||
|
@ -188,7 +189,7 @@ public class ParentFieldMapper extends AbstractFieldMapper<Uid> implements Inter
|
|||
// we are in the parsing of _parent phase
|
||||
String parentId = context.parser().text();
|
||||
context.sourceToParse().parent(parentId);
|
||||
fields.add(new Field(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType));
|
||||
fields.add(new XStringField(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType));
|
||||
} else {
|
||||
// otherwise, we are running it post processing of the xcontent
|
||||
String parsedParentId = context.doc().get(Defaults.NAME);
|
||||
|
@ -199,7 +200,7 @@ public class ParentFieldMapper extends AbstractFieldMapper<Uid> implements Inter
|
|||
throw new MapperParsingException("No parent id provided, not within the document, and not externally");
|
||||
}
|
||||
// we did not add it in the parsing phase, add it now
|
||||
fields.add(new Field(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType));
|
||||
fields.add(new XStringField(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType));
|
||||
} else if (parentId != null && !parsedParentId.equals(Uid.createUid(context.stringBuilder(), type, parentId))) {
|
||||
throw new MapperParsingException("Parent id mismatch, document value is [" + Uid.createUid(parsedParentId).id() + "], while external value is [" + parentId + "]");
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ package org.elasticsearch.index.mapper.internal;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.Strings;
|
||||
|
@ -226,7 +227,7 @@ public class RoutingFieldMapper extends AbstractFieldMapper<String> implements I
|
|||
context.ignoredValue(names.indexName(), routing);
|
||||
return;
|
||||
}
|
||||
fields.add(new Field(names.indexName(), routing, fieldType));
|
||||
fields.add(new XStringField(names.indexName(), routing, fieldType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ package org.elasticsearch.index.mapper.internal;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.TermFilter;
|
||||
|
@ -181,7 +182,7 @@ public class TypeFieldMapper extends AbstractFieldMapper<String> implements Inte
|
|||
if (!fieldType.indexed() && !fieldType.stored()) {
|
||||
return;
|
||||
}
|
||||
fields.add(new Field(names.indexName(), context.type(), fieldType));
|
||||
fields.add(new XStringField(names.indexName(), context.type(), fieldType));
|
||||
if (hasDocValues()) {
|
||||
fields.add(new SortedSetDocValuesField(names.indexName(), new BytesRef(context.type())));
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ package org.elasticsearch.index.mapper.internal;
|
|||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -153,7 +154,7 @@ public class UidFieldMapper extends AbstractFieldMapper<Uid> implements Internal
|
|||
// we need to go over the docs and add it...
|
||||
for (int i = 1; i < context.docs().size(); i++) {
|
||||
final Document doc = context.docs().get(i);
|
||||
doc.add(new Field(UidFieldMapper.NAME, uidField.stringValue(), Defaults.NESTED_FIELD_TYPE));
|
||||
doc.add(new XStringField(UidFieldMapper.NAME, uidField.stringValue(), Defaults.NESTED_FIELD_TYPE));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -175,7 +176,7 @@ public class UidFieldMapper extends AbstractFieldMapper<Uid> implements Internal
|
|||
|
||||
@Override
|
||||
protected void parseCreateField(ParseContext context, List<Field> fields) throws IOException {
|
||||
Field uid = new Field(NAME, Uid.createUid(context.stringBuilder(), context.type(), context.id()), Defaults.FIELD_TYPE);
|
||||
Field uid = new XStringField(NAME, Uid.createUid(context.stringBuilder(), context.type(), context.id()), Defaults.FIELD_TYPE);
|
||||
context.uid(uid);
|
||||
fields.add(uid);
|
||||
if (hasDocValues()) {
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.elasticsearch.index.mapper.object;
|
|||
|
||||
import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.TermFilter;
|
||||
|
@ -432,12 +433,12 @@ public class ObjectMapper implements Mapper, AllFieldMapper.IncludeInAll {
|
|||
// we also rely on this for UidField#loadVersion
|
||||
|
||||
// this is a deeply nested field
|
||||
nestedDoc.add(new Field(UidFieldMapper.NAME, uidField.stringValue(), UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
|
||||
nestedDoc.add(new XStringField(UidFieldMapper.NAME, uidField.stringValue(), UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
|
||||
}
|
||||
// the type of the nested doc starts with __, so we can identify that its a nested one in filters
|
||||
// note, we don't prefix it with the type of the doc since it allows us to execute a nested query
|
||||
// across types (for example, with similar nested objects)
|
||||
nestedDoc.add(new Field(TypeFieldMapper.NAME, nestedTypePathAsString, TypeFieldMapper.Defaults.FIELD_TYPE));
|
||||
nestedDoc.add(new XStringField(TypeFieldMapper.NAME, nestedTypePathAsString, TypeFieldMapper.Defaults.FIELD_TYPE));
|
||||
restoreDoc = context.switchDoc(nestedDoc);
|
||||
context.addDoc(nestedDoc);
|
||||
}
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.ngram.NGramTokenizerFactory;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.highlight.Encoder;
|
||||
|
@ -66,7 +67,7 @@ public class SourceScoreOrderFragmentsBuilder extends ScoreOrderFragmentsBuilder
|
|||
List<Object> values = lookup.source().extractRawValues(mapper.names().sourcePath());
|
||||
Field[] fields = new Field[values.size()];
|
||||
for (int i = 0; i < values.size(); i++) {
|
||||
fields[i] = new Field(mapper.names().indexName(), values.get(i).toString(), TextField.TYPE_NOT_STORED);
|
||||
fields[i] = new XStringField(mapper.names().indexName(), values.get(i).toString(), TextField.TYPE_NOT_STORED);
|
||||
}
|
||||
return fields;
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.elasticsearch.search.highlight.vectorhighlight;
|
|||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.document.XStringField;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
|
||||
|
@ -58,7 +59,7 @@ public class SourceSimpleFragmentsBuilder extends SimpleFragmentsBuilder {
|
|||
}
|
||||
Field[] fields = new Field[values.size()];
|
||||
for (int i = 0; i < values.size(); i++) {
|
||||
fields[i] = new Field(mapper.names().indexName(), values.get(i).toString(), TextField.TYPE_NOT_STORED);
|
||||
fields[i] = new XStringField(mapper.names().indexName(), values.get(i).toString(), TextField.TYPE_NOT_STORED);
|
||||
}
|
||||
return fields;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue