diff --git a/core-signatures.txt b/core-signatures.txt index 9be24648b6d..8cc2aad3daa 100644 --- a/core-signatures.txt +++ b/core-signatures.txt @@ -51,3 +51,6 @@ java.lang.Math#abs(long) @defaultMessage Use Long.compare instead we are on Java7 com.google.common.primitives.Longs#compare(long,long) + +@defaultMessage we have an optimized XStringField to reduce analysis creation overhead +org.apache.lucene.document.Field#(java.lang.String,java.lang.String,org.apache.lucene.document.FieldType) diff --git a/src/main/java/org/apache/lucene/document/XStringField.java b/src/main/java/org/apache/lucene/document/XStringField.java new file mode 100644 index 00000000000..7a562c7dfbe --- /dev/null +++ b/src/main/java/org/apache/lucene/document/XStringField.java @@ -0,0 +1,62 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.lucene.document; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.util.CloseableThreadLocal; + +import java.io.IOException; + +/** + * A string/text field that optimizes the case for non analyzed fields to reuse a thread local token + * stream (instead of creating it each time). This reduces analysis chain overhead and object creation + * (which is significant, yay Attributes). + *

+ * Not to be confused with Lucene StringField, this handles analyzed text as well, and relies on providing + * the FieldType. Couldn't come up with a good name for this that is different from Text/String... + */ +public class XStringField extends Field { + + private static final CloseableThreadLocal NOT_ANALYZED_TOKENSTREAM = new CloseableThreadLocal() { + @Override + protected StringTokenStream initialValue() { + return new StringTokenStream(); + } + }; + + public XStringField(String name, String value, FieldType fieldType) { + super(name, fieldType); + fieldsData = value; + } + + @Override + public TokenStream tokenStream(Analyzer analyzer) throws IOException { + if (!fieldType().indexed()) { + return null; + } + // Only use the cached TokenStream if the value is indexed and not-tokenized + if (fieldType().tokenized()) { + return super.tokenStream(analyzer); + } + StringTokenStream nonAnalyzedTokenStream = NOT_ANALYZED_TOKENSTREAM.get(); + nonAnalyzedTokenStream.setValue((String) fieldsData); + return nonAnalyzedTokenStream; + } +} diff --git a/src/main/java/org/elasticsearch/index/mapper/core/BooleanFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/BooleanFieldMapper.java index fa7a28a4858..ff5afc05244 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/BooleanFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/BooleanFieldMapper.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.mapper.core; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.XStringField; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.queries.TermFilter; import org.apache.lucene.search.Filter; @@ -221,7 +222,7 @@ public class BooleanFieldMapper extends AbstractFieldMapper { if (value == null) { return; } - fields.add(new Field(names.indexName(), value ? "T" : "F", fieldType)); + fields.add(new XStringField(names.indexName(), value ? "T" : "F", fieldType)); } @Override diff --git a/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java index 32959213eb4..d003cce4c49 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java @@ -25,6 +25,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.XStringField; import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchIllegalArgumentException; @@ -387,18 +388,11 @@ public class CompletionFieldMapper extends AbstractFieldMapper { surfaceForm, weight, payload); } - private static final class SuggestField extends Field { + private static final class SuggestField extends XStringField { private final BytesRef payload; private final CompletionTokenStream.ToFiniteStrings toFiniteStrings; private final ContextMapping.Context ctx; - public SuggestField(String name, ContextMapping.Context ctx, Reader value, FieldType type, BytesRef payload, CompletionTokenStream.ToFiniteStrings toFiniteStrings) { - super(name, value, type); - this.payload = payload; - this.toFiniteStrings = toFiniteStrings; - this.ctx = ctx; - } - public SuggestField(String name, ContextMapping.Context ctx, String value, FieldType type, BytesRef payload, CompletionTokenStream.ToFiniteStrings toFiniteStrings) { super(name, value, type); this.payload = payload; diff --git a/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java index a108fcb4139..cc5f323c660 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java @@ -26,6 +26,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.document.XStringField; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.search.Filter; import org.apache.lucene.util.BytesRef; @@ -286,7 +287,7 @@ public class StringFieldMapper extends AbstractFieldMapper implements Al } if (fieldType.indexed() || fieldType.stored()) { - Field field = new StringField(names.indexName(), valueAndBoost.value(), fieldType); + Field field = new XStringField(names.indexName(), valueAndBoost.value(), fieldType); field.setBoost(valueAndBoost.boost()); fields.add(field); } @@ -385,86 +386,6 @@ public class StringFieldMapper extends AbstractFieldMapper implements Al } } - /** Extension of {@link Field} supporting reuse of a cached TokenStream for not-tokenized values. */ - static class StringField extends Field { - - public StringField(String name, String value, FieldType fieldType) { - super(name, fieldType); - fieldsData = value; - } - - @Override - public TokenStream tokenStream(Analyzer analyzer) throws IOException { - if (!fieldType().indexed()) { - return null; - } - // Only use the cached TokenStream if the value is indexed and not-tokenized - if (fieldType().tokenized()) { - return super.tokenStream(analyzer); - } - return NOT_ANALYZED_TOKENSTREAM.get().setValue((String) fieldsData); - } - } - - private static final ThreadLocal NOT_ANALYZED_TOKENSTREAM = new ThreadLocal() { - @Override - protected StringTokenStream initialValue() { - return new StringTokenStream(); - } - }; - - - // Copied from Field.java - static final class StringTokenStream extends TokenStream { - private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); - private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class); - private boolean used = false; - private String value = null; - - /** - * Creates a new TokenStream that returns a String as single token. - *

Warning: Does not initialize the value, you must call - * {@link #setValue(String)} afterwards! - */ - StringTokenStream() { - } - - /** Sets the string value. */ - StringTokenStream setValue(String value) { - this.value = value; - return this; - } - - @Override - public boolean incrementToken() { - if (used) { - return false; - } - clearAttributes(); - termAttribute.append(value); - offsetAttribute.setOffset(0, value.length()); - used = true; - return true; - } - - @Override - public void end() { - final int finalOffset = value.length(); - offsetAttribute.setOffset(finalOffset, finalOffset); - value = null; - } - - @Override - public void reset() { - used = false; - } - - @Override - public void close() { - value = null; - } - } - /** * Parsed value and boost to be returned from {@link #parseCreateFieldForString}. */ diff --git a/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldMapper.java index a0c4f0c0b22..87356dc2c3f 100644 --- a/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldMapper.java @@ -24,6 +24,7 @@ import com.carrotsearch.hppc.cursors.ObjectCursor; import com.google.common.base.Objects; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.XStringField; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.util.BytesRef; @@ -570,7 +571,7 @@ public class GeoPointFieldMapper extends AbstractFieldMapper implement } if (fieldType.indexed() || fieldType.stored()) { - Field field = new Field(names.indexName(), Double.toString(point.lat()) + ',' + Double.toString(point.lon()), fieldType); + Field field = new XStringField(names.indexName(), Double.toString(point.lat()) + ',' + Double.toString(point.lon()), fieldType); context.doc().add(field); } if (enableGeoHash) { diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/IdFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/IdFieldMapper.java index ae9cb5211b1..63e10bcf983 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/IdFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/IdFieldMapper.java @@ -23,6 +23,7 @@ import com.google.common.collect.Iterables; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.XStringField; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.Term; import org.apache.lucene.queries.TermsFilter; @@ -313,7 +314,7 @@ public class IdFieldMapper extends AbstractFieldMapper implements Intern } // else we are in the pre/post parse phase if (fieldType.indexed() || fieldType.stored()) { - fields.add(new Field(names.indexName(), context.id(), fieldType)); + fields.add(new XStringField(names.indexName(), context.id(), fieldType)); } if (hasDocValues()) { fields.add(new BinaryDocValuesField(names.indexName(), new BytesRef(context.id()))); diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/IndexFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/IndexFieldMapper.java index b59934d9548..b2d9d6f4499 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/IndexFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/IndexFieldMapper.java @@ -22,6 +22,7 @@ package org.elasticsearch.index.mapper.internal; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.XStringField; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Strings; @@ -185,7 +186,7 @@ public class IndexFieldMapper extends AbstractFieldMapper implements Int if (!enabledState.enabled) { return; } - fields.add(new Field(names.indexName(), context.index(), fieldType)); + fields.add(new XStringField(names.indexName(), context.index(), fieldType)); } @Override diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/ParentFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/ParentFieldMapper.java index 590b105b0e0..886e4e0afe0 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/ParentFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/ParentFieldMapper.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.mapper.internal; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.XStringField; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.Term; import org.apache.lucene.queries.TermFilter; @@ -188,7 +189,7 @@ public class ParentFieldMapper extends AbstractFieldMapper implements Inter // we are in the parsing of _parent phase String parentId = context.parser().text(); context.sourceToParse().parent(parentId); - fields.add(new Field(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType)); + fields.add(new XStringField(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType)); } else { // otherwise, we are running it post processing of the xcontent String parsedParentId = context.doc().get(Defaults.NAME); @@ -199,7 +200,7 @@ public class ParentFieldMapper extends AbstractFieldMapper implements Inter throw new MapperParsingException("No parent id provided, not within the document, and not externally"); } // we did not add it in the parsing phase, add it now - fields.add(new Field(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType)); + fields.add(new XStringField(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType)); } else if (parentId != null && !parsedParentId.equals(Uid.createUid(context.stringBuilder(), type, parentId))) { throw new MapperParsingException("Parent id mismatch, document value is [" + Uid.createUid(parsedParentId).id() + "], while external value is [" + parentId + "]"); } diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/RoutingFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/RoutingFieldMapper.java index c94e58c8631..ce8bda06145 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/RoutingFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/RoutingFieldMapper.java @@ -22,6 +22,7 @@ package org.elasticsearch.index.mapper.internal; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.XStringField; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Strings; @@ -226,7 +227,7 @@ public class RoutingFieldMapper extends AbstractFieldMapper implements I context.ignoredValue(names.indexName(), routing); return; } - fields.add(new Field(names.indexName(), routing, fieldType)); + fields.add(new XStringField(names.indexName(), routing, fieldType)); } } } diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/TypeFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/TypeFieldMapper.java index 8fc64a9b682..eb168a47c01 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/TypeFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/TypeFieldMapper.java @@ -22,6 +22,7 @@ package org.elasticsearch.index.mapper.internal; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.document.XStringField; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.Term; import org.apache.lucene.queries.TermFilter; @@ -181,7 +182,7 @@ public class TypeFieldMapper extends AbstractFieldMapper implements Inte if (!fieldType.indexed() && !fieldType.stored()) { return; } - fields.add(new Field(names.indexName(), context.type(), fieldType)); + fields.add(new XStringField(names.indexName(), context.type(), fieldType)); if (hasDocValues()) { fields.add(new SortedSetDocValuesField(names.indexName(), new BytesRef(context.type()))); } diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/UidFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/UidFieldMapper.java index 29e5b1de1f8..23449ff1b5e 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/UidFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/UidFieldMapper.java @@ -22,6 +22,7 @@ package org.elasticsearch.index.mapper.internal; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.XStringField; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; @@ -153,7 +154,7 @@ public class UidFieldMapper extends AbstractFieldMapper implements Internal // we need to go over the docs and add it... for (int i = 1; i < context.docs().size(); i++) { final Document doc = context.docs().get(i); - doc.add(new Field(UidFieldMapper.NAME, uidField.stringValue(), Defaults.NESTED_FIELD_TYPE)); + doc.add(new XStringField(UidFieldMapper.NAME, uidField.stringValue(), Defaults.NESTED_FIELD_TYPE)); } } } @@ -175,7 +176,7 @@ public class UidFieldMapper extends AbstractFieldMapper implements Internal @Override protected void parseCreateField(ParseContext context, List fields) throws IOException { - Field uid = new Field(NAME, Uid.createUid(context.stringBuilder(), context.type(), context.id()), Defaults.FIELD_TYPE); + Field uid = new XStringField(NAME, Uid.createUid(context.stringBuilder(), context.type(), context.id()), Defaults.FIELD_TYPE); context.uid(uid); fields.add(uid); if (hasDocValues()) { diff --git a/src/main/java/org/elasticsearch/index/mapper/object/ObjectMapper.java b/src/main/java/org/elasticsearch/index/mapper/object/ObjectMapper.java index 4c1ae32f68f..ddeb282647e 100644 --- a/src/main/java/org/elasticsearch/index/mapper/object/ObjectMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/object/ObjectMapper.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.mapper.object; import com.carrotsearch.hppc.cursors.ObjectObjectCursor; import org.apache.lucene.document.Field; +import org.apache.lucene.document.XStringField; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; import org.apache.lucene.queries.TermFilter; @@ -432,12 +433,12 @@ public class ObjectMapper implements Mapper, AllFieldMapper.IncludeInAll { // we also rely on this for UidField#loadVersion // this is a deeply nested field - nestedDoc.add(new Field(UidFieldMapper.NAME, uidField.stringValue(), UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); + nestedDoc.add(new XStringField(UidFieldMapper.NAME, uidField.stringValue(), UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); } // the type of the nested doc starts with __, so we can identify that its a nested one in filters // note, we don't prefix it with the type of the doc since it allows us to execute a nested query // across types (for example, with similar nested objects) - nestedDoc.add(new Field(TypeFieldMapper.NAME, nestedTypePathAsString, TypeFieldMapper.Defaults.FIELD_TYPE)); + nestedDoc.add(new XStringField(TypeFieldMapper.NAME, nestedTypePathAsString, TypeFieldMapper.Defaults.FIELD_TYPE)); restoreDoc = context.switchDoc(nestedDoc); context.addDoc(nestedDoc); } diff --git a/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceScoreOrderFragmentsBuilder.java b/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceScoreOrderFragmentsBuilder.java index 7404a72af95..efb75008c1f 100644 --- a/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceScoreOrderFragmentsBuilder.java +++ b/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceScoreOrderFragmentsBuilder.java @@ -27,6 +27,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.ngram.NGramTokenizerFactory; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; +import org.apache.lucene.document.XStringField; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.highlight.Encoder; @@ -66,7 +67,7 @@ public class SourceScoreOrderFragmentsBuilder extends ScoreOrderFragmentsBuilder List values = lookup.source().extractRawValues(mapper.names().sourcePath()); Field[] fields = new Field[values.size()]; for (int i = 0; i < values.size(); i++) { - fields[i] = new Field(mapper.names().indexName(), values.get(i).toString(), TextField.TYPE_NOT_STORED); + fields[i] = new XStringField(mapper.names().indexName(), values.get(i).toString(), TextField.TYPE_NOT_STORED); } return fields; } diff --git a/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceSimpleFragmentsBuilder.java b/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceSimpleFragmentsBuilder.java index abfd907af29..d1b46391ca0 100644 --- a/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceSimpleFragmentsBuilder.java +++ b/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceSimpleFragmentsBuilder.java @@ -20,6 +20,7 @@ package org.elasticsearch.search.highlight.vectorhighlight; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; +import org.apache.lucene.document.XStringField; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.vectorhighlight.BoundaryScanner; @@ -58,7 +59,7 @@ public class SourceSimpleFragmentsBuilder extends SimpleFragmentsBuilder { } Field[] fields = new Field[values.size()]; for (int i = 0; i < values.size(); i++) { - fields[i] = new Field(mapper.names().indexName(), values.get(i).toString(), TextField.TYPE_NOT_STORED); + fields[i] = new XStringField(mapper.names().indexName(), values.get(i).toString(), TextField.TYPE_NOT_STORED); } return fields; }