Use non analyzed token stream optimization everywhere

In the string type, we have an optimization to reuse the StringTokenStream on a thread local when a non analyzed field is used (instead of creating it each time). We should use this across the board on all places where we create a field with a String.
Also, move to a specific XStringField, that we can reuse StringTokenStream instead of copying it.
closes #6001
This commit is contained in:
Shay Banon 2014-04-30 14:52:35 -04:00
parent 12f43fbbc0
commit 23f200bc0e
15 changed files with 94 additions and 103 deletions

View File

@ -51,3 +51,6 @@ java.lang.Math#abs(long)
@defaultMessage Use Long.compare instead we are on Java7
com.google.common.primitives.Longs#compare(long,long)
@defaultMessage we have an optimized XStringField to reduce analysis creation overhead
org.apache.lucene.document.Field#<init>(java.lang.String,java.lang.String,org.apache.lucene.document.FieldType)

View File

@ -0,0 +1,62 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.document;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.CloseableThreadLocal;
import java.io.IOException;
/**
* A string/text field that optimizes the case for non analyzed fields to reuse a thread local token
* stream (instead of creating it each time). This reduces analysis chain overhead and object creation
* (which is significant, yay Attributes).
* <p/>
* Not to be confused with Lucene StringField, this handles analyzed text as well, and relies on providing
* the FieldType. Couldn't come up with a good name for this that is different from Text/String...
*/
public class XStringField extends Field {
private static final CloseableThreadLocal<StringTokenStream> NOT_ANALYZED_TOKENSTREAM = new CloseableThreadLocal<StringTokenStream>() {
@Override
protected StringTokenStream initialValue() {
return new StringTokenStream();
}
};
public XStringField(String name, String value, FieldType fieldType) {
super(name, fieldType);
fieldsData = value;
}
@Override
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
if (!fieldType().indexed()) {
return null;
}
// Only use the cached TokenStream if the value is indexed and not-tokenized
if (fieldType().tokenized()) {
return super.tokenStream(analyzer);
}
StringTokenStream nonAnalyzedTokenStream = NOT_ANALYZED_TOKENSTREAM.get();
nonAnalyzedTokenStream.setValue((String) fieldsData);
return nonAnalyzedTokenStream;
}
}

View File

@ -21,6 +21,7 @@ package org.elasticsearch.index.mapper.core;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.queries.TermFilter;
import org.apache.lucene.search.Filter;
@ -221,7 +222,7 @@ public class BooleanFieldMapper extends AbstractFieldMapper<Boolean> {
if (value == null) {
return;
}
fields.add(new Field(names.indexName(), value ? "T" : "F", fieldType));
fields.add(new XStringField(names.indexName(), value ? "T" : "F", fieldType));
}
@Override

View File

@ -25,6 +25,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
@ -387,18 +388,11 @@ public class CompletionFieldMapper extends AbstractFieldMapper<String> {
surfaceForm, weight, payload);
}
private static final class SuggestField extends Field {
private static final class SuggestField extends XStringField {
private final BytesRef payload;
private final CompletionTokenStream.ToFiniteStrings toFiniteStrings;
private final ContextMapping.Context ctx;
public SuggestField(String name, ContextMapping.Context ctx, Reader value, FieldType type, BytesRef payload, CompletionTokenStream.ToFiniteStrings toFiniteStrings) {
super(name, value, type);
this.payload = payload;
this.toFiniteStrings = toFiniteStrings;
this.ctx = ctx;
}
public SuggestField(String name, ContextMapping.Context ctx, String value, FieldType type, BytesRef payload, CompletionTokenStream.ToFiniteStrings toFiniteStrings) {
super(name, value, type);
this.payload = payload;

View File

@ -26,6 +26,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.BytesRef;
@ -286,7 +287,7 @@ public class StringFieldMapper extends AbstractFieldMapper<String> implements Al
}
if (fieldType.indexed() || fieldType.stored()) {
Field field = new StringField(names.indexName(), valueAndBoost.value(), fieldType);
Field field = new XStringField(names.indexName(), valueAndBoost.value(), fieldType);
field.setBoost(valueAndBoost.boost());
fields.add(field);
}
@ -385,86 +386,6 @@ public class StringFieldMapper extends AbstractFieldMapper<String> implements Al
}
}
/** Extension of {@link Field} supporting reuse of a cached TokenStream for not-tokenized values. */
static class StringField extends Field {
public StringField(String name, String value, FieldType fieldType) {
super(name, fieldType);
fieldsData = value;
}
@Override
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
if (!fieldType().indexed()) {
return null;
}
// Only use the cached TokenStream if the value is indexed and not-tokenized
if (fieldType().tokenized()) {
return super.tokenStream(analyzer);
}
return NOT_ANALYZED_TOKENSTREAM.get().setValue((String) fieldsData);
}
}
private static final ThreadLocal<StringTokenStream> NOT_ANALYZED_TOKENSTREAM = new ThreadLocal<StringTokenStream>() {
@Override
protected StringTokenStream initialValue() {
return new StringTokenStream();
}
};
// Copied from Field.java
static final class StringTokenStream extends TokenStream {
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
private boolean used = false;
private String value = null;
/**
* Creates a new TokenStream that returns a String as single token.
* <p>Warning: Does not initialize the value, you must call
* {@link #setValue(String)} afterwards!
*/
StringTokenStream() {
}
/** Sets the string value. */
StringTokenStream setValue(String value) {
this.value = value;
return this;
}
@Override
public boolean incrementToken() {
if (used) {
return false;
}
clearAttributes();
termAttribute.append(value);
offsetAttribute.setOffset(0, value.length());
used = true;
return true;
}
@Override
public void end() {
final int finalOffset = value.length();
offsetAttribute.setOffset(finalOffset, finalOffset);
value = null;
}
@Override
public void reset() {
used = false;
}
@Override
public void close() {
value = null;
}
}
/**
* Parsed value and boost to be returned from {@link #parseCreateFieldForString}.
*/

View File

@ -24,6 +24,7 @@ import com.carrotsearch.hppc.cursors.ObjectCursor;
import com.google.common.base.Objects;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.util.BytesRef;
@ -570,7 +571,7 @@ public class GeoPointFieldMapper extends AbstractFieldMapper<GeoPoint> implement
}
if (fieldType.indexed() || fieldType.stored()) {
Field field = new Field(names.indexName(), Double.toString(point.lat()) + ',' + Double.toString(point.lon()), fieldType);
Field field = new XStringField(names.indexName(), Double.toString(point.lat()) + ',' + Double.toString(point.lon()), fieldType);
context.doc().add(field);
}
if (enableGeoHash) {

View File

@ -23,6 +23,7 @@ import com.google.common.collect.Iterables;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermsFilter;
@ -313,7 +314,7 @@ public class IdFieldMapper extends AbstractFieldMapper<String> implements Intern
} // else we are in the pre/post parse phase
if (fieldType.indexed() || fieldType.stored()) {
fields.add(new Field(names.indexName(), context.id(), fieldType));
fields.add(new XStringField(names.indexName(), context.id(), fieldType));
}
if (hasDocValues()) {
fields.add(new BinaryDocValuesField(names.indexName(), new BytesRef(context.id())));

View File

@ -22,6 +22,7 @@ package org.elasticsearch.index.mapper.internal;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Strings;
@ -185,7 +186,7 @@ public class IndexFieldMapper extends AbstractFieldMapper<String> implements Int
if (!enabledState.enabled) {
return;
}
fields.add(new Field(names.indexName(), context.index(), fieldType));
fields.add(new XStringField(names.indexName(), context.index(), fieldType));
}
@Override

View File

@ -20,6 +20,7 @@ package org.elasticsearch.index.mapper.internal;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermFilter;
@ -188,7 +189,7 @@ public class ParentFieldMapper extends AbstractFieldMapper<Uid> implements Inter
// we are in the parsing of _parent phase
String parentId = context.parser().text();
context.sourceToParse().parent(parentId);
fields.add(new Field(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType));
fields.add(new XStringField(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType));
} else {
// otherwise, we are running it post processing of the xcontent
String parsedParentId = context.doc().get(Defaults.NAME);
@ -199,7 +200,7 @@ public class ParentFieldMapper extends AbstractFieldMapper<Uid> implements Inter
throw new MapperParsingException("No parent id provided, not within the document, and not externally");
}
// we did not add it in the parsing phase, add it now
fields.add(new Field(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType));
fields.add(new XStringField(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType));
} else if (parentId != null && !parsedParentId.equals(Uid.createUid(context.stringBuilder(), type, parentId))) {
throw new MapperParsingException("Parent id mismatch, document value is [" + Uid.createUid(parsedParentId).id() + "], while external value is [" + parentId + "]");
}

View File

@ -22,6 +22,7 @@ package org.elasticsearch.index.mapper.internal;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Strings;
@ -226,7 +227,7 @@ public class RoutingFieldMapper extends AbstractFieldMapper<String> implements I
context.ignoredValue(names.indexName(), routing);
return;
}
fields.add(new Field(names.indexName(), routing, fieldType));
fields.add(new XStringField(names.indexName(), routing, fieldType));
}
}
}

View File

@ -22,6 +22,7 @@ package org.elasticsearch.index.mapper.internal;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermFilter;
@ -181,7 +182,7 @@ public class TypeFieldMapper extends AbstractFieldMapper<String> implements Inte
if (!fieldType.indexed() && !fieldType.stored()) {
return;
}
fields.add(new Field(names.indexName(), context.type(), fieldType));
fields.add(new XStringField(names.indexName(), context.type(), fieldType));
if (hasDocValues()) {
fields.add(new SortedSetDocValuesField(names.indexName(), new BytesRef(context.type())));
}

View File

@ -22,6 +22,7 @@ package org.elasticsearch.index.mapper.internal;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
@ -153,7 +154,7 @@ public class UidFieldMapper extends AbstractFieldMapper<Uid> implements Internal
// we need to go over the docs and add it...
for (int i = 1; i < context.docs().size(); i++) {
final Document doc = context.docs().get(i);
doc.add(new Field(UidFieldMapper.NAME, uidField.stringValue(), Defaults.NESTED_FIELD_TYPE));
doc.add(new XStringField(UidFieldMapper.NAME, uidField.stringValue(), Defaults.NESTED_FIELD_TYPE));
}
}
}
@ -175,7 +176,7 @@ public class UidFieldMapper extends AbstractFieldMapper<Uid> implements Internal
@Override
protected void parseCreateField(ParseContext context, List<Field> fields) throws IOException {
Field uid = new Field(NAME, Uid.createUid(context.stringBuilder(), context.type(), context.id()), Defaults.FIELD_TYPE);
Field uid = new XStringField(NAME, Uid.createUid(context.stringBuilder(), context.type(), context.id()), Defaults.FIELD_TYPE);
context.uid(uid);
fields.add(uid);
if (hasDocValues()) {

View File

@ -21,6 +21,7 @@ package org.elasticsearch.index.mapper.object;
import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermFilter;
@ -432,12 +433,12 @@ public class ObjectMapper implements Mapper, AllFieldMapper.IncludeInAll {
// we also rely on this for UidField#loadVersion
// this is a deeply nested field
nestedDoc.add(new Field(UidFieldMapper.NAME, uidField.stringValue(), UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
nestedDoc.add(new XStringField(UidFieldMapper.NAME, uidField.stringValue(), UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
}
// the type of the nested doc starts with __, so we can identify that its a nested one in filters
// note, we don't prefix it with the type of the doc since it allows us to execute a nested query
// across types (for example, with similar nested objects)
nestedDoc.add(new Field(TypeFieldMapper.NAME, nestedTypePathAsString, TypeFieldMapper.Defaults.FIELD_TYPE));
nestedDoc.add(new XStringField(TypeFieldMapper.NAME, nestedTypePathAsString, TypeFieldMapper.Defaults.FIELD_TYPE));
restoreDoc = context.switchDoc(nestedDoc);
context.addDoc(nestedDoc);
}

View File

@ -27,6 +27,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.ngram.NGramTokenizerFactory;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.highlight.Encoder;
@ -66,7 +67,7 @@ public class SourceScoreOrderFragmentsBuilder extends ScoreOrderFragmentsBuilder
List<Object> values = lookup.source().extractRawValues(mapper.names().sourcePath());
Field[] fields = new Field[values.size()];
for (int i = 0; i < values.size(); i++) {
fields[i] = new Field(mapper.names().indexName(), values.get(i).toString(), TextField.TYPE_NOT_STORED);
fields[i] = new XStringField(mapper.names().indexName(), values.get(i).toString(), TextField.TYPE_NOT_STORED);
}
return fields;
}

View File

@ -20,6 +20,7 @@ package org.elasticsearch.search.highlight.vectorhighlight;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
@ -58,7 +59,7 @@ public class SourceSimpleFragmentsBuilder extends SimpleFragmentsBuilder {
}
Field[] fields = new Field[values.size()];
for (int i = 0; i < values.size(); i++) {
fields[i] = new Field(mapper.names().indexName(), values.get(i).toString(), TextField.TYPE_NOT_STORED);
fields[i] = new XStringField(mapper.names().indexName(), values.get(i).toString(), TextField.TYPE_NOT_STORED);
}
return fields;
}