properly reuse indices analyzers

don't wrap in AnalysisService the indices analyzers we have with a NamedAnalyzer, since its effectively creates a new instance of an analyzer (with per field reuse strategy) and we don't benefit as much from reusing analyzers on the indices / node level

Now, the indices level analyzers return a NamedAnalyzer, also NamedAnalyzer will use the non per field reuse strategy since thats really the common case for it (no need for per field reuse there).

Also, try and reuse numeric analyzers globally instead of creating them per numeric mapper. Although those analyzers are not used during indexing (we have a custom numeric field for it), they can be used sometimes when searching in a query string for example without specific query implemenation in the mappers
This commit is contained in:
Shay Banon 2013-07-05 19:07:08 -07:00
parent 8d9c84f84e
commit b9a2fbd874
16 changed files with 142 additions and 96 deletions

View File

@ -22,15 +22,8 @@ package org.apache.lucene.analysis;
import java.io.Reader;
/**
* Extension to {@link Analyzer} suitable for Analyzers which wrap
* other Analyzers.
* <p/>
* {@link #getWrappedAnalyzer(String)} allows the Analyzer
* to wrap multiple Analyzers which are selected on a per field basis.
* <p/>
* {@link #wrapComponents(String, Analyzer.TokenStreamComponents)} allows the
* TokenStreamComponents of the wrapped Analyzer to then be wrapped
* (such as adding a new {@link TokenFilter} to form new TokenStreamComponents.
* Similar to Lucene {@link AnalyzerWrapper} but actually allows to set the reuse strategy....
* //TODO add to lucene the ability to set it...
*/
public abstract class CustomAnalyzerWrapper extends Analyzer {
@ -38,8 +31,8 @@ public abstract class CustomAnalyzerWrapper extends Analyzer {
* Creates a new CustomAnalyzerWrapper. Since the {@link Analyzer.ReuseStrategy} of
* the wrapped Analyzers are unknown, {@link Analyzer.PerFieldReuseStrategy} is assumed
*/
protected CustomAnalyzerWrapper() {
super(new PerFieldReuseStrategy());
protected CustomAnalyzerWrapper(ReuseStrategy reuseStrategy) {
super(reuseStrategy);
}
/**
@ -56,8 +49,7 @@ public abstract class CustomAnalyzerWrapper extends Analyzer {
* Analyzer, to form new components. It is through this method that new
* TokenFilters can be added by AnalyzerWrappers.
*
*
* @param fieldName Name of the field which is to be analyzed
* @param fieldName Name of the field which is to be analyzed
* @param components TokenStreamComponents taken from the wrapped Analyzer
* @return Wrapped / altered TokenStreamComponents.
*/

View File

@ -43,11 +43,8 @@ import static com.google.common.collect.Maps.newHashMap;
public class AnalysisService extends AbstractIndexComponent implements CloseableComponent {
private final ImmutableMap<String, NamedAnalyzer> analyzers;
private final ImmutableMap<String, TokenizerFactory> tokenizers;
private final ImmutableMap<String, CharFilterFactory> charFilters;
private final ImmutableMap<String, TokenFilterFactory> tokenFilters;
private final NamedAnalyzer defaultAnalyzer;
@ -225,7 +222,13 @@ public class AnalysisService extends AbstractIndexComponent implements Closeable
if (analyzerF == null) {
throw new ElasticSearchIllegalArgumentException("analyzer [" + analyzerFactory.name() + "] created null analyzer");
}
NamedAnalyzer analyzer = new NamedAnalyzer(analyzerFactory.name(), analyzerFactory.scope(), analyzerF);
NamedAnalyzer analyzer;
// if we got a named analyzer back, use it...
if (analyzerF instanceof NamedAnalyzer) {
analyzer = (NamedAnalyzer) analyzerF;
} else {
analyzer = new NamedAnalyzer(analyzerFactory.name(), analyzerFactory.scope(), analyzerF);
}
analyzers.put(analyzerFactory.name(), analyzer);
analyzers.put(Strings.toCamelCase(analyzerFactory.name()), analyzer);
String strAliases = indexSettings.get("index.analysis.analyzer." + analyzerFactory.name() + ".alias");

View File

@ -20,6 +20,7 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.AnalyzerWrapper;
import org.apache.lucene.analysis.CustomAnalyzerWrapper;
/**
@ -29,19 +30,31 @@ import org.apache.lucene.analysis.CustomAnalyzerWrapper;
public class NamedAnalyzer extends CustomAnalyzerWrapper {
private final String name;
private final AnalyzerScope scope;
private final Analyzer analyzer;
private final int positionOffsetGap;
public NamedAnalyzer(NamedAnalyzer analyzer, int positionOffsetGap) {
this(analyzer.name(), analyzer.scope(), analyzer.analyzer(), positionOffsetGap);
}
public NamedAnalyzer(String name, Analyzer analyzer) {
this(name, AnalyzerScope.INDEX, analyzer);
}
public NamedAnalyzer(String name, AnalyzerScope scope, Analyzer analyzer) {
this(name, scope, analyzer, Integer.MIN_VALUE);
}
public NamedAnalyzer(String name, AnalyzerScope scope, Analyzer analyzer, int positionOffsetGap) {
// our named analyzer always wrap a non per field analyzer, so no need to have per field analyzer
super(new GlobalReuseStrategy());
// TODO would be nice to pick the reuse start based on the analyzer...
assert !(analyzer instanceof AnalyzerWrapper); // this is the only one in Lucene currently that uses PerFieldStrategy, make sure we don't wrap it
this.name = name;
this.scope = scope;
this.analyzer = analyzer;
this.positionOffsetGap = positionOffsetGap;
}
/**
@ -75,6 +88,14 @@ public class NamedAnalyzer extends CustomAnalyzerWrapper {
return components;
}
@Override
public int getPositionIncrementGap(String fieldName) {
if (positionOffsetGap != Integer.MIN_VALUE) {
return positionOffsetGap;
}
return super.getPositionIncrementGap(fieldName);
}
@Override
public String toString() {
return "analyzer name[" + name + "], analyzer [" + analyzer + "]";

View File

@ -1,37 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
/**
*/
public class NamedCustomAnalyzer extends NamedAnalyzer {
private final int positionOffsetGap;
public NamedCustomAnalyzer(NamedAnalyzer analyzer, int positionOffsetGap) {
super(analyzer.name(), analyzer.scope(), analyzer.analyzer());
this.positionOffsetGap = positionOffsetGap;
}
@Override
public int getPositionIncrementGap(String fieldName) {
return positionOffsetGap;
}
}

View File

@ -19,6 +19,7 @@
package org.elasticsearch.index.analysis;
import gnu.trove.map.hash.TIntObjectHashMap;
import org.apache.lucene.util.NumericUtils;
import java.io.IOException;
@ -29,6 +30,24 @@ import java.io.Reader;
*/
public class NumericDoubleAnalyzer extends NumericAnalyzer<NumericDoubleTokenizer> {
private final static TIntObjectHashMap<NamedAnalyzer> builtIn;
static {
builtIn = new TIntObjectHashMap<NamedAnalyzer>();
builtIn.put(Integer.MAX_VALUE, new NamedAnalyzer("_double/max", AnalyzerScope.GLOBAL, new NumericDoubleAnalyzer(Integer.MAX_VALUE)));
for (int i = 0; i <= 64; i += 4) {
builtIn.put(i, new NamedAnalyzer("_double/" + i, AnalyzerScope.GLOBAL, new NumericDoubleAnalyzer(i)));
}
}
public static NamedAnalyzer buildNamedAnalyzer(int precisionStep) {
NamedAnalyzer namedAnalyzer = builtIn.get(precisionStep);
if (namedAnalyzer == null) {
namedAnalyzer = new NamedAnalyzer("_double/" + precisionStep, AnalyzerScope.INDEX, new NumericDoubleAnalyzer(precisionStep));
}
return namedAnalyzer;
}
private final int precisionStep;
public NumericDoubleAnalyzer() {

View File

@ -19,6 +19,7 @@
package org.elasticsearch.index.analysis;
import gnu.trove.map.hash.TIntObjectHashMap;
import org.apache.lucene.util.NumericUtils;
import java.io.IOException;
@ -29,6 +30,24 @@ import java.io.Reader;
*/
public class NumericFloatAnalyzer extends NumericAnalyzer<NumericFloatTokenizer> {
private final static TIntObjectHashMap<NamedAnalyzer> builtIn;
static {
builtIn = new TIntObjectHashMap<NamedAnalyzer>();
builtIn.put(Integer.MAX_VALUE, new NamedAnalyzer("_float/max", AnalyzerScope.GLOBAL, new NumericFloatAnalyzer(Integer.MAX_VALUE)));
for (int i = 0; i <= 64; i += 4) {
builtIn.put(i, new NamedAnalyzer("_float/" + i, AnalyzerScope.GLOBAL, new NumericFloatAnalyzer(i)));
}
}
public static NamedAnalyzer buildNamedAnalyzer(int precisionStep) {
NamedAnalyzer namedAnalyzer = builtIn.get(precisionStep);
if (namedAnalyzer == null) {
namedAnalyzer = new NamedAnalyzer("_float/" + precisionStep, AnalyzerScope.INDEX, new NumericFloatAnalyzer(precisionStep));
}
return namedAnalyzer;
}
private final int precisionStep;
public NumericFloatAnalyzer() {

View File

@ -19,6 +19,7 @@
package org.elasticsearch.index.analysis;
import gnu.trove.map.hash.TIntObjectHashMap;
import org.apache.lucene.util.NumericUtils;
import java.io.IOException;
@ -29,6 +30,24 @@ import java.io.Reader;
*/
public class NumericIntegerAnalyzer extends NumericAnalyzer<NumericIntegerTokenizer> {
private final static TIntObjectHashMap<NamedAnalyzer> builtIn;
static {
builtIn = new TIntObjectHashMap<NamedAnalyzer>();
builtIn.put(Integer.MAX_VALUE, new NamedAnalyzer("_int/max", AnalyzerScope.GLOBAL, new NumericIntegerAnalyzer(Integer.MAX_VALUE)));
for (int i = 0; i <= 64; i += 4) {
builtIn.put(i, new NamedAnalyzer("_int/" + i, AnalyzerScope.GLOBAL, new NumericIntegerAnalyzer(i)));
}
}
public static NamedAnalyzer buildNamedAnalyzer(int precisionStep) {
NamedAnalyzer namedAnalyzer = builtIn.get(precisionStep);
if (namedAnalyzer == null) {
namedAnalyzer = new NamedAnalyzer("_int/" + precisionStep, AnalyzerScope.INDEX, new NumericIntegerAnalyzer(precisionStep));
}
return namedAnalyzer;
}
private final int precisionStep;
public NumericIntegerAnalyzer() {

View File

@ -19,6 +19,7 @@
package org.elasticsearch.index.analysis;
import gnu.trove.map.hash.TIntObjectHashMap;
import org.apache.lucene.util.NumericUtils;
import java.io.IOException;
@ -29,6 +30,24 @@ import java.io.Reader;
*/
public class NumericLongAnalyzer extends NumericAnalyzer<NumericLongTokenizer> {
private final static TIntObjectHashMap<NamedAnalyzer> builtIn;
static {
builtIn = new TIntObjectHashMap<NamedAnalyzer>();
builtIn.put(Integer.MAX_VALUE, new NamedAnalyzer("_long/max", AnalyzerScope.GLOBAL, new NumericLongAnalyzer(Integer.MAX_VALUE)));
for (int i = 0; i <= 64; i += 4) {
builtIn.put(i, new NamedAnalyzer("_long/" + i, AnalyzerScope.GLOBAL, new NumericLongAnalyzer(i)));
}
}
public static NamedAnalyzer buildNamedAnalyzer(int precisionStep) {
NamedAnalyzer namedAnalyzer = builtIn.get(precisionStep);
if (namedAnalyzer == null) {
namedAnalyzer = new NamedAnalyzer("_long/" + precisionStep, AnalyzerScope.INDEX, new NumericLongAnalyzer(precisionStep));
}
return namedAnalyzer;
}
private final int precisionStep;
public NumericLongAnalyzer() {

View File

@ -24,32 +24,29 @@ import org.apache.lucene.analysis.Analyzer;
/**
*
*/
public class PreBuiltAnalyzerProvider<T extends Analyzer> implements AnalyzerProvider<T> {
public class PreBuiltAnalyzerProvider implements AnalyzerProvider<NamedAnalyzer> {
private final String name;
private final NamedAnalyzer analyzer;
private final AnalyzerScope scope;
private final T analyzer;
public PreBuiltAnalyzerProvider(String name, AnalyzerScope scope, T analyzer) {
this.name = name;
this.scope = scope;
this.analyzer = analyzer;
public PreBuiltAnalyzerProvider(String name, AnalyzerScope scope, Analyzer analyzer) {
// we create the named analyzer here so the resources associated with it will be shared
// and we won't wrap a shared analyzer with named analyzer each time causing the resources
// to not be shared...
this.analyzer = new NamedAnalyzer(name, scope, analyzer);
}
@Override
public String name() {
return name;
return analyzer.name();
}
@Override
public AnalyzerScope scope() {
return scope;
return analyzer.scope();
}
@Override
public T get() {
public NamedAnalyzer get() {
return analyzer;
}
}

View File

@ -30,15 +30,15 @@ public class PreBuiltAnalyzerProviderFactory implements AnalyzerProviderFactory
private final PreBuiltAnalyzerProvider analyzerProvider;
public PreBuiltAnalyzerProviderFactory(String name, AnalyzerScope scope, Analyzer analyzer) {
this(new PreBuiltAnalyzerProvider<Analyzer>(name, scope, analyzer));
this(new PreBuiltAnalyzerProvider(name, scope, analyzer));
}
public PreBuiltAnalyzerProviderFactory(PreBuiltAnalyzerProvider<Analyzer> analyzerProvider) {
public PreBuiltAnalyzerProviderFactory(PreBuiltAnalyzerProvider analyzerProvider) {
this.analyzerProvider = analyzerProvider;
}
@Override
public AnalyzerProvider<Analyzer> create(String name, Settings settings) {
public AnalyzerProvider create(String name, Settings settings) {
return analyzerProvider;
}

View File

@ -36,7 +36,6 @@ import org.elasticsearch.common.Numbers;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.NumericDoubleAnalyzer;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
import org.elasticsearch.index.fielddata.FieldDataType;
@ -120,9 +119,9 @@ public class DoubleFieldMapper extends NumberFieldMapper<Double> {
protected DoubleFieldMapper(Names names, int precisionStep, float boost, FieldType fieldType,
Double nullValue, Explicit<Boolean> ignoreMalformed,
PostingsFormatProvider provider, SimilarityProvider similarity, @Nullable Settings fieldDataSettings) {
super(names, precisionStep, boost, fieldType,
ignoreMalformed, new NamedAnalyzer("_double/" + precisionStep, new NumericDoubleAnalyzer(precisionStep)),
new NamedAnalyzer("_double/max", new NumericDoubleAnalyzer(Integer.MAX_VALUE)), provider, similarity, fieldDataSettings);
super(names, precisionStep, boost, fieldType, ignoreMalformed,
NumericDoubleAnalyzer.buildNamedAnalyzer(precisionStep), NumericDoubleAnalyzer.buildNamedAnalyzer(Integer.MAX_VALUE),
provider, similarity, fieldDataSettings);
this.nullValue = nullValue;
this.nullValueAsString = nullValue == null ? null : nullValue.toString();
}

View File

@ -37,7 +37,6 @@ import org.elasticsearch.common.Strings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.NumericFloatAnalyzer;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
import org.elasticsearch.index.fielddata.FieldDataType;
@ -119,9 +118,9 @@ public class FloatFieldMapper extends NumberFieldMapper<Float> {
protected FloatFieldMapper(Names names, int precisionStep, float boost, FieldType fieldType,
Float nullValue, Explicit<Boolean> ignoreMalformed, PostingsFormatProvider provider, SimilarityProvider similarity, @Nullable Settings fieldDataSettings) {
super(names, precisionStep, boost, fieldType,
ignoreMalformed, new NamedAnalyzer("_float/" + precisionStep, new NumericFloatAnalyzer(precisionStep)),
new NamedAnalyzer("_float/max", new NumericFloatAnalyzer(Integer.MAX_VALUE)), provider, similarity, fieldDataSettings);
super(names, precisionStep, boost, fieldType, ignoreMalformed,
NumericFloatAnalyzer.buildNamedAnalyzer(precisionStep), NumericFloatAnalyzer.buildNamedAnalyzer(Integer.MAX_VALUE),
provider, similarity, fieldDataSettings);
this.nullValue = nullValue;
this.nullValueAsString = nullValue == null ? null : nullValue.toString();
}

View File

@ -37,7 +37,6 @@ import org.elasticsearch.common.Strings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.NumericIntegerAnalyzer;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
import org.elasticsearch.index.fielddata.FieldDataType;
@ -119,9 +118,9 @@ public class IntegerFieldMapper extends NumberFieldMapper<Integer> {
protected IntegerFieldMapper(Names names, int precisionStep, float boost, FieldType fieldType,
Integer nullValue, Explicit<Boolean> ignoreMalformed,
PostingsFormatProvider provider, SimilarityProvider similarity, @Nullable Settings fieldDataSettings) {
super(names, precisionStep, boost, fieldType,
ignoreMalformed, new NamedAnalyzer("_int/" + precisionStep, new NumericIntegerAnalyzer(precisionStep)),
new NamedAnalyzer("_int/max", new NumericIntegerAnalyzer(Integer.MAX_VALUE)), provider, similarity, fieldDataSettings);
super(names, precisionStep, boost, fieldType, ignoreMalformed,
NumericIntegerAnalyzer.buildNamedAnalyzer(precisionStep), NumericIntegerAnalyzer.buildNamedAnalyzer(Integer.MAX_VALUE),
provider, similarity, fieldDataSettings);
this.nullValue = nullValue;
this.nullValueAsString = nullValue == null ? null : nullValue.toString();
}

View File

@ -37,7 +37,6 @@ import org.elasticsearch.common.Strings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.NumericLongAnalyzer;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
import org.elasticsearch.index.fielddata.FieldDataType;
@ -120,9 +119,9 @@ public class LongFieldMapper extends NumberFieldMapper<Long> {
protected LongFieldMapper(Names names, int precisionStep, float boost, FieldType fieldType,
Long nullValue, Explicit<Boolean> ignoreMalformed,
PostingsFormatProvider provider, SimilarityProvider similarity, @Nullable Settings fieldDataSettings) {
super(names, precisionStep, boost, fieldType,
ignoreMalformed, new NamedAnalyzer("_long/" + precisionStep, new NumericLongAnalyzer(precisionStep)),
new NamedAnalyzer("_long/max", new NumericLongAnalyzer(Integer.MAX_VALUE)), provider, similarity, fieldDataSettings);
super(names, precisionStep, boost, fieldType, ignoreMalformed,
NumericLongAnalyzer.buildNamedAnalyzer(precisionStep), NumericLongAnalyzer.buildNamedAnalyzer(Integer.MAX_VALUE),
provider, similarity, fieldDataSettings);
this.nullValue = nullValue;
this.nullValueAsString = nullValue == null ? null : nullValue.toString();
}

View File

@ -35,7 +35,6 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.NamedCustomAnalyzer;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.mapper.*;
@ -121,9 +120,9 @@ public class StringFieldMapper extends AbstractFieldMapper<String> implements Al
@Override
public StringFieldMapper build(BuilderContext context) {
if (positionOffsetGap > 0) {
indexAnalyzer = new NamedCustomAnalyzer(indexAnalyzer, positionOffsetGap);
searchAnalyzer = new NamedCustomAnalyzer(searchAnalyzer, positionOffsetGap);
searchQuotedAnalyzer = new NamedCustomAnalyzer(searchQuotedAnalyzer, positionOffsetGap);
indexAnalyzer = new NamedAnalyzer(indexAnalyzer, positionOffsetGap);
searchAnalyzer = new NamedAnalyzer(searchAnalyzer, positionOffsetGap);
searchQuotedAnalyzer = new NamedAnalyzer(searchQuotedAnalyzer, positionOffsetGap);
}
// if the field is not analyzed, then by default, we should omit norms and have docs only
// index options, as probably what the user really wants

View File

@ -33,7 +33,6 @@ import org.elasticsearch.common.Strings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.NumericFloatAnalyzer;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
import org.elasticsearch.index.fielddata.FieldDataType;
@ -122,9 +121,9 @@ public class BoostFieldMapper extends NumberFieldMapper<Float> implements Intern
protected BoostFieldMapper(String name, String indexName, int precisionStep, float boost, FieldType fieldType,
Float nullValue, PostingsFormatProvider provider, @Nullable Settings fieldDataSettings) {
super(new Names(name, indexName, indexName, name), precisionStep, boost, fieldType,
Defaults.IGNORE_MALFORMED, new NamedAnalyzer("_float/" + precisionStep, new NumericFloatAnalyzer(precisionStep)),
new NamedAnalyzer("_float/max", new NumericFloatAnalyzer(Integer.MAX_VALUE)), provider, null, fieldDataSettings);
super(new Names(name, indexName, indexName, name), precisionStep, boost, fieldType, Defaults.IGNORE_MALFORMED,
NumericFloatAnalyzer.buildNamedAnalyzer(precisionStep), NumericFloatAnalyzer.buildNamedAnalyzer(Integer.MAX_VALUE),
provider, null, fieldDataSettings);
this.nullValue = nullValue;
}