Improve similarity integration. (#29187)

This improves the way similarities are plugged in in order to:
 - reject the classic similarity on 7.x indices and emit a deprecation
   warning otherwise
 - reject unkwown parameters on 7.x indices and emit a deprecation
   warning otherwise

Even though this breaks the plugin API, I'd like to backport to 7.x so
that users can get deprecation warnings when they are doing something
that will become unsupported in the future.

Closes #23208
Closes #29035
This commit is contained in:
Adrien Grand 2018-04-03 16:45:25 +02:00 committed by GitHub
parent 8cdd950056
commit 569d0c0e89
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 516 additions and 816 deletions

View File

@ -82,20 +82,6 @@ This similarity has the following options:
Type name: `BM25`
[float]
[[classic-similarity]]
==== Classic similarity
The classic similarity that is based on the TF/IDF model. This
similarity has the following option:
`discount_overlaps`::
Determines whether overlap tokens (Tokens with
0 position increment) are ignored when computing norm. By default this
is true, meaning overlap tokens do not count when computing norms.
Type name: `classic`
[float]
[[dfr]]
==== DFR similarity
@ -541,7 +527,7 @@ PUT /index
"index": {
"similarity": {
"default": {
"type": "classic"
"type": "boolean"
}
}
}
@ -563,7 +549,7 @@ PUT /index/_settings
"index": {
"similarity": {
"default": {
"type": "classic"
"type": "boolean"
}
}
}

View File

@ -44,13 +44,9 @@ PUT my_index
"default_field": { <1>
"type": "text"
},
"classic_field": {
"type": "text",
"similarity": "classic" <2>
},
"boolean_sim_field": {
"type": "text",
"similarity": "boolean" <3>
"similarity": "boolean" <2>
}
}
}
@ -59,5 +55,4 @@ PUT my_index
--------------------------------------------------
// CONSOLE
<1> The `default_field` uses the `BM25` similarity.
<2> The `classic_field` uses the `classic` similarity (ie TF/IDF).
<3> The `boolean_sim_field` uses the `boolean` similarity.
<2> The `boolean_sim_field` uses the `boolean` similarity.

View File

@ -24,3 +24,16 @@ the index setting `index.mapping.nested_objects.limit`.
==== The `update_all_types` option has been removed
This option is useless now that all indices have at most one type.
=== The `classic` similarity has been removed
The `classic` similarity relied on coordination factors for scoring to be good
in presence of stopwords in the query. This feature has been removed from
Lucene, which means that the `classic` similarity now produces scores of lower
quality. It is advised to switch to `BM25` instead, which is widely accepted
as a better alternative.
=== Similarities fail when unsupported options are provided
An error will now be thrown when unknown configuration options are provided
to similarities. Such unknown parameters were ignored before.

View File

@ -336,9 +336,7 @@ public class HasChildQueryBuilderTests extends AbstractQueryTestCase<HasChildQue
hasChildQuery(CHILD_DOC, new TermQueryBuilder("custom_string", "value"), ScoreMode.None);
HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext);
Similarity expected = SimilarityService.BUILT_IN.get(similarity)
.create(similarity, Settings.EMPTY,
Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(), null)
.get();
.apply(Settings.EMPTY, Version.CURRENT, null);
assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass()));
}

View File

@ -87,7 +87,7 @@ public class LegacyHasChildQueryBuilderTests extends AbstractQueryTestCase<HasCh
@Override
protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
similarity = randomFrom("classic", "BM25");
similarity = randomFrom("boolean", "BM25");
// TODO: use a single type when inner hits have been changed to work with join field,
// this test randomly generates queries with inner hits
mapperService.merge(PARENT_TYPE, new CompressedXContent(Strings.toString(PutMappingRequest.buildFromSimplifiedDef(PARENT_TYPE,
@ -323,9 +323,7 @@ public class LegacyHasChildQueryBuilderTests extends AbstractQueryTestCase<HasCh
hasChildQuery(CHILD_TYPE, new TermQueryBuilder("custom_string", "value"), ScoreMode.None);
HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext);
Similarity expected = SimilarityService.BUILT_IN.get(similarity)
.create(similarity, Settings.EMPTY,
Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(), null)
.get();
.apply(Settings.EMPTY, Version.CURRENT, null);
assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass()));
}

View File

@ -20,7 +20,9 @@ package org.elasticsearch.cluster.metadata;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.similarities.Similarity;
import org.elasticsearch.Version;
import org.elasticsearch.common.TriFunction;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.settings.IndexScopedSettings;
import org.elasticsearch.common.settings.Settings;
@ -31,8 +33,8 @@ import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.similarity.SimilarityService;
import org.elasticsearch.index.similarity.SimilarityProvider;
import org.elasticsearch.indices.mapper.MapperRegistry;
import org.elasticsearch.script.ScriptService;
import java.util.AbstractMap;
import java.util.Collection;
@ -142,14 +144,15 @@ public class MetaDataIndexUpgradeService extends AbstractComponent {
IndexSettings indexSettings = new IndexSettings(indexMetaData, this.settings);
final Map<String, SimilarityProvider.Factory> similarityMap = new AbstractMap<String, SimilarityProvider.Factory>() {
final Map<String, TriFunction<Settings, Version, ScriptService, Similarity>> similarityMap
= new AbstractMap<String, TriFunction<Settings, Version, ScriptService, Similarity>>() {
@Override
public boolean containsKey(Object key) {
return true;
}
@Override
public SimilarityProvider.Factory get(Object key) {
public TriFunction<Settings, Version, ScriptService, Similarity> get(Object key) {
assert key instanceof String : "key must be a string but was: " + key.getClass();
return SimilarityService.BUILT_IN.get(SimilarityService.DEFAULT_SIMILARITY);
}
@ -157,7 +160,7 @@ public class MetaDataIndexUpgradeService extends AbstractComponent {
// this entrySet impl isn't fully correct but necessary as SimilarityService will iterate
// over all similarities
@Override
public Set<Entry<String, SimilarityProvider.Factory>> entrySet() {
public Set<Entry<String, TriFunction<Settings, Version, ScriptService, Similarity>>> entrySet() {
return Collections.emptySet();
}
};

View File

@ -19,9 +19,13 @@
package org.elasticsearch.index;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.util.SetOnce;
import org.elasticsearch.Version;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.TriFunction;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Setting.Property;
@ -39,9 +43,6 @@ import org.elasticsearch.index.shard.IndexEventListener;
import org.elasticsearch.index.shard.IndexSearcherWrapper;
import org.elasticsearch.index.shard.IndexingOperationListener;
import org.elasticsearch.index.shard.SearchOperationListener;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.similarity.BM25SimilarityProvider;
import org.elasticsearch.index.similarity.SimilarityProvider;
import org.elasticsearch.index.similarity.SimilarityService;
import org.elasticsearch.index.store.IndexStore;
import org.elasticsearch.indices.IndicesQueryCache;
@ -68,10 +69,10 @@ import java.util.function.Function;
/**
* IndexModule represents the central extension point for index level custom implementations like:
* <ul>
* <li>{@link SimilarityProvider} - New {@link SimilarityProvider} implementations can be registered through
* {@link #addSimilarity(String, SimilarityProvider.Factory)} while existing Providers can be referenced through Settings under the
* <li>{@link Similarity} - New {@link Similarity} implementations can be registered through
* {@link #addSimilarity(String, TriFunction)} while existing Providers can be referenced through Settings under the
* {@link IndexModule#SIMILARITY_SETTINGS_PREFIX} prefix along with the "type" value. For example, to reference the
* {@link BM25SimilarityProvider}, the configuration <tt>"index.similarity.my_similarity.type : "BM25"</tt> can be used.</li>
* {@link BM25Similarity}, the configuration <tt>"index.similarity.my_similarity.type : "BM25"</tt> can be used.</li>
* <li>{@link IndexStore} - Custom {@link IndexStore} instances can be registered via {@link #addIndexStore(String, Function)}</li>
* <li>{@link IndexEventListener} - Custom {@link IndexEventListener} instances can be registered via
* {@link #addIndexEventListener(IndexEventListener)}</li>
@ -107,7 +108,7 @@ public final class IndexModule {
final SetOnce<EngineFactory> engineFactory = new SetOnce<>();
private SetOnce<IndexSearcherWrapperFactory> indexSearcherWrapper = new SetOnce<>();
private final Set<IndexEventListener> indexEventListeners = new HashSet<>();
private final Map<String, SimilarityProvider.Factory> similarities = new HashMap<>();
private final Map<String, TriFunction<Settings, Version, ScriptService, Similarity>> similarities = new HashMap<>();
private final Map<String, Function<IndexSettings, IndexStore>> storeTypes = new HashMap<>();
private final SetOnce<BiFunction<IndexSettings, IndicesQueryCache, QueryCache>> forceQueryCacheProvider = new SetOnce<>();
private final List<SearchOperationListener> searchOperationListeners = new ArrayList<>();
@ -246,12 +247,17 @@ public final class IndexModule {
/**
* Registers the given {@link SimilarityProvider} with the given name
* Registers the given {@link Similarity} with the given name.
* The function takes as parameters:<ul>
* <li>settings for this similarity
* <li>version of Elasticsearch when the index was created
* <li>ScriptService, for script-based similarities
* </ul>
*
* @param name Name of the SimilarityProvider
* @param similarity SimilarityProvider to register
*/
public void addSimilarity(String name, SimilarityProvider.Factory similarity) {
public void addSimilarity(String name, TriFunction<Settings, Version, ScriptService, Similarity> similarity) {
ensureNotFrozen();
if (similarities.containsKey(name) || SimilarityService.BUILT_IN.containsKey(name)) {
throw new IllegalArgumentException("similarity for name: [" + name + " is already registered");

View File

@ -1,82 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.similarity;
import org.apache.lucene.search.similarities.Normalization;
import org.apache.lucene.search.similarities.NormalizationH1;
import org.apache.lucene.search.similarities.NormalizationH2;
import org.apache.lucene.search.similarities.NormalizationH3;
import org.apache.lucene.search.similarities.NormalizationZ;
import org.elasticsearch.common.settings.Settings;
/**
* Abstract implementation of {@link SimilarityProvider} providing common behaviour
*/
public abstract class AbstractSimilarityProvider implements SimilarityProvider {
protected static final Normalization NO_NORMALIZATION = new Normalization.NoNormalization();
private final String name;
/**
* Creates a new AbstractSimilarityProvider with the given name
*
* @param name Name of the Provider
*/
protected AbstractSimilarityProvider(String name) {
this.name = name;
}
/**
* {@inheritDoc}
*/
@Override
public String name() {
return this.name;
}
/**
* Parses the given Settings and creates the appropriate {@link Normalization}
*
* @param settings Settings to parse
* @return {@link Normalization} referred to in the Settings
*/
protected Normalization parseNormalization(Settings settings) {
String normalization = settings.get("normalization");
if ("no".equals(normalization)) {
return NO_NORMALIZATION;
} else if ("h1".equals(normalization)) {
float c = settings.getAsFloat("normalization.h1.c", 1f);
return new NormalizationH1(c);
} else if ("h2".equals(normalization)) {
float c = settings.getAsFloat("normalization.h2.c", 1f);
return new NormalizationH2(c);
} else if ("h3".equals(normalization)) {
float c = settings.getAsFloat("normalization.h3.c", 800f);
return new NormalizationH3(c);
} else if ("z".equals(normalization)) {
float z = settings.getAsFloat("normalization.z.z", 0.30f);
return new NormalizationZ(z);
} else {
throw new IllegalArgumentException("Unsupported Normalization [" + normalization + "]");
}
}
}

View File

@ -1,59 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.similarity;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.Similarity;
import org.elasticsearch.common.settings.Settings;
/**
* {@link SimilarityProvider} for the {@link BM25Similarity}.
* <p>
* Configuration options available:
* <ul>
* <li>k1</li>
* <li>b</li>
* <li>discount_overlaps</li>
* </ul>
* @see BM25Similarity For more information about configuration
*/
public class BM25SimilarityProvider extends AbstractSimilarityProvider {
private final BM25Similarity similarity;
public BM25SimilarityProvider(String name, Settings settings, Settings indexSettings) {
super(name);
float k1 = settings.getAsFloat("k1", 1.2f);
float b = settings.getAsFloat("b", 0.75f);
boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true);
this.similarity = new BM25Similarity(k1, b);
this.similarity.setDiscountOverlaps(discountOverlaps);
}
/**
* {@inheritDoc}
*/
@Override
public Similarity get() {
return similarity;
}
}

View File

@ -1,48 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.similarity;
import org.apache.lucene.search.similarities.BooleanSimilarity;
import org.elasticsearch.common.settings.Settings;
/**
* {@link SimilarityProvider} for the {@link BooleanSimilarity},
* which is a simple similarity that gives terms a score equal
* to their query boost only. This is useful in situations where
* a field does not need to be scored by a full-text ranking
* algorithm, but rather all that matters is whether the query
* terms matched or not.
*/
public class BooleanSimilarityProvider extends AbstractSimilarityProvider {
private final BooleanSimilarity similarity = new BooleanSimilarity();
public BooleanSimilarityProvider(String name, Settings settings, Settings indexSettings) {
super(name);
}
/**
* {@inheritDoc}
*/
@Override
public BooleanSimilarity get() {
return similarity;
}
}

View File

@ -1,52 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.similarity;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.elasticsearch.common.settings.Settings;
/**
* {@link SimilarityProvider} for {@link ClassicSimilarity}.
* <p>
* Configuration options available:
* <ul>
* <li>discount_overlaps</li>
* </ul>
* @see ClassicSimilarity For more information about configuration
*/
public class ClassicSimilarityProvider extends AbstractSimilarityProvider {
private final ClassicSimilarity similarity = new ClassicSimilarity();
public ClassicSimilarityProvider(String name, Settings settings, Settings indexSettings) {
super(name);
boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true);
this.similarity.setDiscountOverlaps(discountOverlaps);
}
/**
* {@inheritDoc}
*/
@Override
public ClassicSimilarity get() {
return similarity;
}
}

View File

@ -1,79 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.similarity;
import org.apache.lucene.search.similarities.DFISimilarity;
import org.apache.lucene.search.similarities.Independence;
import org.apache.lucene.search.similarities.IndependenceChiSquared;
import org.apache.lucene.search.similarities.IndependenceSaturated;
import org.apache.lucene.search.similarities.IndependenceStandardized;
import org.apache.lucene.search.similarities.Similarity;
import org.elasticsearch.common.settings.Settings;
import java.util.HashMap;
import java.util.Map;
import static java.util.Collections.unmodifiableMap;
/**
* {@link SimilarityProvider} for the {@link DFISimilarity}.
* <p>
* Configuration options available:
* <ul>
* <li>independence_measure</li>
* <li>discount_overlaps</li>
* </ul>
* @see DFISimilarity For more information about configuration
*/
public class DFISimilarityProvider extends AbstractSimilarityProvider {
// the "basic models" of divergence from independence
private static final Map<String, Independence> INDEPENDENCE_MEASURES;
static {
Map<String, Independence> measures = new HashMap<>();
measures.put("standardized", new IndependenceStandardized());
measures.put("saturated", new IndependenceSaturated());
measures.put("chisquared", new IndependenceChiSquared());
INDEPENDENCE_MEASURES = unmodifiableMap(measures);
}
private final DFISimilarity similarity;
public DFISimilarityProvider(String name, Settings settings, Settings indexSettings) {
super(name);
boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true);
Independence measure = parseIndependence(settings);
this.similarity = new DFISimilarity(measure);
this.similarity.setDiscountOverlaps(discountOverlaps);
}
private Independence parseIndependence(Settings settings) {
String name = settings.get("independence_measure");
Independence measure = INDEPENDENCE_MEASURES.get(name);
if (measure == null) {
throw new IllegalArgumentException("Unsupported IndependenceMeasure [" + name + "]");
}
return measure;
}
@Override
public Similarity get() {
return similarity;
}
}

View File

@ -1,123 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.similarity;
import org.apache.lucene.search.similarities.AfterEffect;
import org.apache.lucene.search.similarities.AfterEffectB;
import org.apache.lucene.search.similarities.AfterEffectL;
import org.apache.lucene.search.similarities.BasicModel;
import org.apache.lucene.search.similarities.BasicModelBE;
import org.apache.lucene.search.similarities.BasicModelD;
import org.apache.lucene.search.similarities.BasicModelG;
import org.apache.lucene.search.similarities.BasicModelIF;
import org.apache.lucene.search.similarities.BasicModelIn;
import org.apache.lucene.search.similarities.BasicModelIne;
import org.apache.lucene.search.similarities.BasicModelP;
import org.apache.lucene.search.similarities.DFRSimilarity;
import org.apache.lucene.search.similarities.Normalization;
import org.apache.lucene.search.similarities.Similarity;
import org.elasticsearch.common.settings.Settings;
import java.util.HashMap;
import java.util.Map;
import static java.util.Collections.unmodifiableMap;
/**
* {@link SimilarityProvider} for {@link DFRSimilarity}.
* <p>
* Configuration options available:
* <ul>
* <li>basic_model</li>
* <li>after_effect</li>
* <li>normalization</li>
* </ul>
* @see DFRSimilarity For more information about configuration
*/
public class DFRSimilarityProvider extends AbstractSimilarityProvider {
private static final Map<String, BasicModel> BASIC_MODELS;
private static final Map<String, AfterEffect> AFTER_EFFECTS;
static {
Map<String, BasicModel> models = new HashMap<>();
models.put("be", new BasicModelBE());
models.put("d", new BasicModelD());
models.put("g", new BasicModelG());
models.put("if", new BasicModelIF());
models.put("in", new BasicModelIn());
models.put("ine", new BasicModelIne());
models.put("p", new BasicModelP());
BASIC_MODELS = unmodifiableMap(models);
Map<String, AfterEffect> effects = new HashMap<>();
effects.put("no", new AfterEffect.NoAfterEffect());
effects.put("b", new AfterEffectB());
effects.put("l", new AfterEffectL());
AFTER_EFFECTS = unmodifiableMap(effects);
}
private final DFRSimilarity similarity;
public DFRSimilarityProvider(String name, Settings settings, Settings indexSettings) {
super(name);
BasicModel basicModel = parseBasicModel(settings);
AfterEffect afterEffect = parseAfterEffect(settings);
Normalization normalization = parseNormalization(settings);
this.similarity = new DFRSimilarity(basicModel, afterEffect, normalization);
}
/**
* Parses the given Settings and creates the appropriate {@link BasicModel}
*
* @param settings Settings to parse
* @return {@link BasicModel} referred to in the Settings
*/
protected BasicModel parseBasicModel(Settings settings) {
String basicModel = settings.get("basic_model");
BasicModel model = BASIC_MODELS.get(basicModel);
if (model == null) {
throw new IllegalArgumentException("Unsupported BasicModel [" + basicModel + "]");
}
return model;
}
/**
* Parses the given Settings and creates the appropriate {@link AfterEffect}
*
* @param settings Settings to parse
* @return {@link AfterEffect} referred to in the Settings
*/
protected AfterEffect parseAfterEffect(Settings settings) {
String afterEffect = settings.get("after_effect");
AfterEffect effect = AFTER_EFFECTS.get(afterEffect);
if (effect == null) {
throw new IllegalArgumentException("Unsupported AfterEffect [" + afterEffect + "]");
}
return effect;
}
/**
* {@inheritDoc}
*/
@Override
public Similarity get() {
return similarity;
}
}

View File

@ -1,113 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.similarity;
import org.apache.lucene.search.similarities.Distribution;
import org.apache.lucene.search.similarities.DistributionLL;
import org.apache.lucene.search.similarities.DistributionSPL;
import org.apache.lucene.search.similarities.IBSimilarity;
import org.apache.lucene.search.similarities.Lambda;
import org.apache.lucene.search.similarities.LambdaDF;
import org.apache.lucene.search.similarities.LambdaTTF;
import org.apache.lucene.search.similarities.Normalization;
import org.apache.lucene.search.similarities.Similarity;
import org.elasticsearch.common.settings.Settings;
import java.util.HashMap;
import java.util.Map;
import static java.util.Collections.unmodifiableMap;
/**
* {@link SimilarityProvider} for {@link IBSimilarity}.
* <p>
* Configuration options available:
* <ul>
* <li>distribution</li>
* <li>lambda</li>
* <li>normalization</li>
* </ul>
* @see IBSimilarity For more information about configuration
*/
public class IBSimilarityProvider extends AbstractSimilarityProvider {
private static final Map<String, Distribution> DISTRIBUTIONS;
private static final Map<String, Lambda> LAMBDAS;
static {
Map<String, Distribution> distributions = new HashMap<>();
distributions.put("ll", new DistributionLL());
distributions.put("spl", new DistributionSPL());
DISTRIBUTIONS = unmodifiableMap(distributions);
Map<String, Lambda> lamdas = new HashMap<>();
lamdas.put("df", new LambdaDF());
lamdas.put("ttf", new LambdaTTF());
LAMBDAS = unmodifiableMap(lamdas);
}
private final IBSimilarity similarity;
public IBSimilarityProvider(String name, Settings settings, Settings indexSettings) {
super(name);
Distribution distribution = parseDistribution(settings);
Lambda lambda = parseLambda(settings);
Normalization normalization = parseNormalization(settings);
this.similarity = new IBSimilarity(distribution, lambda, normalization);
}
/**
* Parses the given Settings and creates the appropriate {@link Distribution}
*
* @param settings Settings to parse
* @return {@link Normalization} referred to in the Settings
*/
protected Distribution parseDistribution(Settings settings) {
String rawDistribution = settings.get("distribution");
Distribution distribution = DISTRIBUTIONS.get(rawDistribution);
if (distribution == null) {
throw new IllegalArgumentException("Unsupported Distribution [" + rawDistribution + "]");
}
return distribution;
}
/**
* Parses the given Settings and creates the appropriate {@link Lambda}
*
* @param settings Settings to parse
* @return {@link Normalization} referred to in the Settings
*/
protected Lambda parseLambda(Settings settings) {
String rawLambda = settings.get("lambda");
Lambda lambda = LAMBDAS.get(rawLambda);
if (lambda == null) {
throw new IllegalArgumentException("Unsupported Lambda [" + rawLambda + "]");
}
return lambda;
}
/**
* {@inheritDoc}
*/
@Override
public Similarity get() {
return similarity;
}
}

View File

@ -1,52 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.similarity;
import org.apache.lucene.search.similarities.LMDirichletSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.elasticsearch.common.settings.Settings;
/**
* {@link SimilarityProvider} for {@link LMDirichletSimilarity}.
* <p>
* Configuration options available:
* <ul>
* <li>mu</li>
* </ul>
* @see LMDirichletSimilarity For more information about configuration
*/
public class LMDirichletSimilarityProvider extends AbstractSimilarityProvider {
private final LMDirichletSimilarity similarity;
public LMDirichletSimilarityProvider(String name, Settings settings, Settings indexSettings) {
super(name);
float mu = settings.getAsFloat("mu", 2000f);
this.similarity = new LMDirichletSimilarity(mu);
}
/**
* {@inheritDoc}
*/
@Override
public Similarity get() {
return similarity;
}
}

View File

@ -1,52 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.similarity;
import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.elasticsearch.common.settings.Settings;
/**
* {@link SimilarityProvider} for {@link LMJelinekMercerSimilarity}.
* <p>
* Configuration options available:
* <ul>
* <li>lambda</li>
* </ul>
* @see LMJelinekMercerSimilarity For more information about configuration
*/
public class LMJelinekMercerSimilarityProvider extends AbstractSimilarityProvider {
private final LMJelinekMercerSimilarity similarity;
public LMJelinekMercerSimilarityProvider(String name, Settings settings, Settings indexSettings) {
super(name);
float lambda = settings.getAsFloat("lambda", 0.1f);
this.similarity = new LMJelinekMercerSimilarity(lambda);
}
/**
* {@inheritDoc}
*/
@Override
public Similarity get() {
return similarity;
}
}

View File

@ -20,6 +20,8 @@
package org.elasticsearch.index.similarity;
import org.apache.lucene.search.similarities.Similarity;
import org.elasticsearch.Version;
import org.elasticsearch.common.TriFunction;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptService;
@ -27,13 +29,11 @@ import org.elasticsearch.script.SimilarityScript;
import org.elasticsearch.script.SimilarityWeightScript;
/** Provider of scripted similarities. */
public class ScriptedSimilarityProvider extends AbstractSimilarityProvider {
final class ScriptedSimilarityProvider implements TriFunction<Settings, Version, ScriptService, Similarity> {
private final ScriptedSimilarity scriptedSimilarity;
public ScriptedSimilarityProvider(String name, Settings settings, Settings indexSettings, ScriptService scriptService) {
super(name);
boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true);
@Override
public Similarity apply(Settings settings, Version indexCreatedVersion, ScriptService scriptService) {
boolean discountOverlaps = settings.getAsBoolean(SimilarityProviders.DISCOUNT_OVERLAPS, true);
Settings scriptSettings = settings.getAsSettings("script");
Script script = Script.parse(scriptSettings);
SimilarityScript.Factory scriptFactory = scriptService.compile(script, SimilarityScript.CONTEXT);
@ -44,15 +44,10 @@ public class ScriptedSimilarityProvider extends AbstractSimilarityProvider {
weightScript = Script.parse(weightScriptSettings);
weightScriptFactory = scriptService.compile(weightScript, SimilarityWeightScript.CONTEXT);
}
scriptedSimilarity = new ScriptedSimilarity(
return new ScriptedSimilarity(
weightScript == null ? null : weightScript.toString(),
weightScriptFactory == null ? null : weightScriptFactory::newInstance,
script.toString(), scriptFactory::newInstance, discountOverlaps);
}
@Override
public Similarity get() {
return scriptedSimilarity;
}
}

View File

@ -20,32 +20,32 @@
package org.elasticsearch.index.similarity;
import org.apache.lucene.search.similarities.Similarity;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.script.ScriptService;
/**
* Provider for {@link Similarity} instances
* Wrapper around a {@link Similarity} and its name.
*/
public interface SimilarityProvider {
public final class SimilarityProvider {
/**
* Returns the name associated with the Provider
*
* @return Name of the Provider
*/
String name();
private final String name;
private final Similarity similarity;
/**
* Returns the {@link Similarity} the Provider is for
*
* @return Provided {@link Similarity}
*/
Similarity get();
/** Factory of {@link SimilarityProvider} */
@FunctionalInterface
interface Factory {
/** Create a new {@link SimilarityProvider}. */
SimilarityProvider create(String name, Settings settings, Settings indexSettings, ScriptService scriptService);
public SimilarityProvider(String name, Similarity similarity) {
this.name = name;
this.similarity = similarity;
}
/**
* Return the name of this {@link Similarity}.
*/
public String name() {
return name;
}
/**
* Return the wrapped {@link Similarity}.
*/
public Similarity get() {
return similarity;
}
}

View File

@ -0,0 +1,300 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.similarity;
import org.apache.lucene.search.similarities.AfterEffect;
import org.apache.lucene.search.similarities.AfterEffectB;
import org.apache.lucene.search.similarities.AfterEffectL;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.BasicModel;
import org.apache.lucene.search.similarities.BasicModelBE;
import org.apache.lucene.search.similarities.BasicModelD;
import org.apache.lucene.search.similarities.BasicModelG;
import org.apache.lucene.search.similarities.BasicModelIF;
import org.apache.lucene.search.similarities.BasicModelIn;
import org.apache.lucene.search.similarities.BasicModelIne;
import org.apache.lucene.search.similarities.BasicModelP;
import org.apache.lucene.search.similarities.BooleanSimilarity;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.DFISimilarity;
import org.apache.lucene.search.similarities.DFRSimilarity;
import org.apache.lucene.search.similarities.Distribution;
import org.apache.lucene.search.similarities.DistributionLL;
import org.apache.lucene.search.similarities.DistributionSPL;
import org.apache.lucene.search.similarities.IBSimilarity;
import org.apache.lucene.search.similarities.Independence;
import org.apache.lucene.search.similarities.IndependenceChiSquared;
import org.apache.lucene.search.similarities.IndependenceSaturated;
import org.apache.lucene.search.similarities.IndependenceStandardized;
import org.apache.lucene.search.similarities.LMDirichletSimilarity;
import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity;
import org.apache.lucene.search.similarities.Lambda;
import org.apache.lucene.search.similarities.LambdaDF;
import org.apache.lucene.search.similarities.LambdaTTF;
import org.apache.lucene.search.similarities.Normalization;
import org.apache.lucene.search.similarities.NormalizationH1;
import org.apache.lucene.search.similarities.NormalizationH2;
import org.apache.lucene.search.similarities.NormalizationH3;
import org.apache.lucene.search.similarities.NormalizationZ;
import org.elasticsearch.Version;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.settings.Settings;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import static java.util.Collections.unmodifiableMap;
final class SimilarityProviders {
private SimilarityProviders() {} // no instantiation
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(SimilarityProviders.class));
static final String DISCOUNT_OVERLAPS = "discount_overlaps";
private static final Map<String, BasicModel> BASIC_MODELS;
private static final Map<String, AfterEffect> AFTER_EFFECTS;
static {
Map<String, BasicModel> models = new HashMap<>();
models.put("be", new BasicModelBE());
models.put("d", new BasicModelD());
models.put("g", new BasicModelG());
models.put("if", new BasicModelIF());
models.put("in", new BasicModelIn());
models.put("ine", new BasicModelIne());
models.put("p", new BasicModelP());
BASIC_MODELS = unmodifiableMap(models);
Map<String, AfterEffect> effects = new HashMap<>();
effects.put("no", new AfterEffect.NoAfterEffect());
effects.put("b", new AfterEffectB());
effects.put("l", new AfterEffectL());
AFTER_EFFECTS = unmodifiableMap(effects);
}
private static final Map<String, Independence> INDEPENDENCE_MEASURES;
static {
Map<String, Independence> measures = new HashMap<>();
measures.put("standardized", new IndependenceStandardized());
measures.put("saturated", new IndependenceSaturated());
measures.put("chisquared", new IndependenceChiSquared());
INDEPENDENCE_MEASURES = unmodifiableMap(measures);
}
private static final Map<String, Distribution> DISTRIBUTIONS;
private static final Map<String, Lambda> LAMBDAS;
static {
Map<String, Distribution> distributions = new HashMap<>();
distributions.put("ll", new DistributionLL());
distributions.put("spl", new DistributionSPL());
DISTRIBUTIONS = unmodifiableMap(distributions);
Map<String, Lambda> lamdas = new HashMap<>();
lamdas.put("df", new LambdaDF());
lamdas.put("ttf", new LambdaTTF());
LAMBDAS = unmodifiableMap(lamdas);
}
/**
* Parses the given Settings and creates the appropriate {@link BasicModel}
*
* @param settings Settings to parse
* @return {@link BasicModel} referred to in the Settings
*/
private static BasicModel parseBasicModel(Settings settings) {
String basicModel = settings.get("basic_model");
BasicModel model = BASIC_MODELS.get(basicModel);
if (model == null) {
throw new IllegalArgumentException("Unsupported BasicModel [" + basicModel + "], expected one of " + BASIC_MODELS.keySet());
}
return model;
}
/**
* Parses the given Settings and creates the appropriate {@link AfterEffect}
*
* @param settings Settings to parse
* @return {@link AfterEffect} referred to in the Settings
*/
private static AfterEffect parseAfterEffect(Settings settings) {
String afterEffect = settings.get("after_effect");
AfterEffect effect = AFTER_EFFECTS.get(afterEffect);
if (effect == null) {
throw new IllegalArgumentException("Unsupported AfterEffect [" + afterEffect + "], expected one of " + AFTER_EFFECTS.keySet());
}
return effect;
}
/**
* Parses the given Settings and creates the appropriate {@link Normalization}
*
* @param settings Settings to parse
* @return {@link Normalization} referred to in the Settings
*/
private static Normalization parseNormalization(Settings settings) {
String normalization = settings.get("normalization");
if ("no".equals(normalization)) {
return new Normalization.NoNormalization();
} else if ("h1".equals(normalization)) {
float c = settings.getAsFloat("normalization.h1.c", 1f);
return new NormalizationH1(c);
} else if ("h2".equals(normalization)) {
float c = settings.getAsFloat("normalization.h2.c", 1f);
return new NormalizationH2(c);
} else if ("h3".equals(normalization)) {
float c = settings.getAsFloat("normalization.h3.c", 800f);
return new NormalizationH3(c);
} else if ("z".equals(normalization)) {
float z = settings.getAsFloat("normalization.z.z", 0.30f);
return new NormalizationZ(z);
} else {
throw new IllegalArgumentException("Unsupported Normalization [" + normalization + "]");
}
}
private static Independence parseIndependence(Settings settings) {
String name = settings.get("independence_measure");
Independence measure = INDEPENDENCE_MEASURES.get(name);
if (measure == null) {
throw new IllegalArgumentException("Unsupported IndependenceMeasure [" + name + "], expected one of "
+ INDEPENDENCE_MEASURES.keySet());
}
return measure;
}
/**
* Parses the given Settings and creates the appropriate {@link Distribution}
*
* @param settings Settings to parse
* @return {@link Normalization} referred to in the Settings
*/
private static Distribution parseDistribution(Settings settings) {
String rawDistribution = settings.get("distribution");
Distribution distribution = DISTRIBUTIONS.get(rawDistribution);
if (distribution == null) {
throw new IllegalArgumentException("Unsupported Distribution [" + rawDistribution + "]");
}
return distribution;
}
/**
* Parses the given Settings and creates the appropriate {@link Lambda}
*
* @param settings Settings to parse
* @return {@link Normalization} referred to in the Settings
*/
private static Lambda parseLambda(Settings settings) {
String rawLambda = settings.get("lambda");
Lambda lambda = LAMBDAS.get(rawLambda);
if (lambda == null) {
throw new IllegalArgumentException("Unsupported Lambda [" + rawLambda + "]");
}
return lambda;
}
static void assertSettingsIsSubsetOf(String type, Version version, Settings settings, String... supportedSettings) {
Set<String> unknownSettings = new HashSet<>(settings.keySet());
unknownSettings.removeAll(Arrays.asList(supportedSettings));
unknownSettings.remove("type"); // used to figure out which sim this is
if (unknownSettings.isEmpty() == false) {
if (version.onOrAfter(Version.V_7_0_0_alpha1)) {
throw new IllegalArgumentException("Unknown settings for similarity of type [" + type + "]: " + unknownSettings);
} else {
DEPRECATION_LOGGER.deprecated("Unknown settings for similarity of type [" + type + "]: " + unknownSettings);
}
}
}
public static BM25Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) {
assertSettingsIsSubsetOf("BM25", indexCreatedVersion, settings, "k1", "b", DISCOUNT_OVERLAPS);
float k1 = settings.getAsFloat("k1", 1.2f);
float b = settings.getAsFloat("b", 0.75f);
boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true);
BM25Similarity similarity = new BM25Similarity(k1, b);
similarity.setDiscountOverlaps(discountOverlaps);
return similarity;
}
public static BooleanSimilarity createBooleanSimilarity(Settings settings, Version indexCreatedVersion) {
assertSettingsIsSubsetOf("boolean", indexCreatedVersion, settings);
return new BooleanSimilarity();
}
public static ClassicSimilarity createClassicSimilarity(Settings settings, Version indexCreatedVersion) {
assertSettingsIsSubsetOf("classic", indexCreatedVersion, settings, DISCOUNT_OVERLAPS);
boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true);
ClassicSimilarity similarity = new ClassicSimilarity();
similarity.setDiscountOverlaps(discountOverlaps);
return similarity;
}
public static DFRSimilarity createDfrSimilarity(Settings settings, Version indexCreatedVersion) {
assertSettingsIsSubsetOf("DFR", indexCreatedVersion, settings,
"basic_model", "after_effect", "normalization",
"normalization.h1.c", "normalization.h2.c", "normalization.h3.c", "normalization.z.z");
return new DFRSimilarity(
parseBasicModel(settings),
parseAfterEffect(settings),
parseNormalization(settings));
}
public static DFISimilarity createDfiSimilarity(Settings settings, Version indexCreatedVersion) {
assertSettingsIsSubsetOf("DFI", indexCreatedVersion, settings, "independence_measure");
return new DFISimilarity(parseIndependence(settings));
}
public static IBSimilarity createIBSimilarity(Settings settings, Version indexCreatedVersion) {
assertSettingsIsSubsetOf("IB", indexCreatedVersion, settings, "distribution", "lambda", "normalization",
"normalization.h1.c", "normalization.h2.c", "normalization.h3.c", "normalization.z.z");
return new IBSimilarity(
parseDistribution(settings),
parseLambda(settings),
parseNormalization(settings));
}
public static LMDirichletSimilarity createLMDirichletSimilarity(Settings settings, Version indexCreatedVersion) {
assertSettingsIsSubsetOf("LMDirichlet", indexCreatedVersion, settings, "mu");
float mu = settings.getAsFloat("mu", 2000f);
return new LMDirichletSimilarity(mu);
}
public static LMJelinekMercerSimilarity createLMJelinekMercerSimilarity(Settings settings, Version indexCreatedVersion) {
assertSettingsIsSubsetOf("LMJelinekMercer", indexCreatedVersion, settings, "lambda");
float lambda = settings.getAsFloat("lambda", 0.1f);
return new LMJelinekMercerSimilarity(lambda);
}
}

View File

@ -19,8 +19,13 @@
package org.elasticsearch.index.similarity;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.BooleanSimilarity;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
import org.apache.lucene.search.similarities.Similarity;
import org.elasticsearch.Version;
import org.elasticsearch.common.TriFunction;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.settings.Settings;
@ -34,45 +39,84 @@ import org.elasticsearch.script.ScriptService;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.function.Function;
import java.util.function.Supplier;
public final class SimilarityService extends AbstractIndexComponent {
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(SimilarityService.class));
public static final String DEFAULT_SIMILARITY = "BM25";
private final Similarity defaultSimilarity;
private final Map<String, SimilarityProvider> similarities;
private static final Map<String, SimilarityProvider.Factory> DEFAULTS;
public static final Map<String, SimilarityProvider.Factory> BUILT_IN;
private static final String CLASSIC_SIMILARITY = "classic";
private static final Map<String, Function<Version, Supplier<Similarity>>> DEFAULTS;
public static final Map<String, TriFunction<Settings, Version, ScriptService, Similarity>> BUILT_IN;
static {
Map<String, SimilarityProvider.Factory> defaults = new HashMap<>();
defaults.put("classic",
(name, settings, indexSettings, scriptService) -> new ClassicSimilarityProvider(name, settings, indexSettings));
defaults.put("BM25",
(name, settings, indexSettings, scriptService) -> new BM25SimilarityProvider(name, settings, indexSettings));
defaults.put("boolean",
(name, settings, indexSettings, scriptService) -> new BooleanSimilarityProvider(name, settings, indexSettings));
Map<String, Function<Version, Supplier<Similarity>>> defaults = new HashMap<>();
defaults.put(CLASSIC_SIMILARITY, version -> {
if (version.onOrAfter(Version.V_7_0_0_alpha1)) {
return () -> {
throw new IllegalArgumentException("The [classic] similarity may not be used anymore. Please use the [BM25] "
+ "similarity or build a custom [scripted] similarity instead.");
};
} else {
final ClassicSimilarity similarity = SimilarityProviders.createClassicSimilarity(Settings.EMPTY, version);
return () -> {
DEPRECATION_LOGGER.deprecated("The [classic] similarity is now deprecated in favour of BM25, which is generally "
+ "accepted as a better alternative. Use the [BM25] similarity or build a custom [scripted] similarity "
+ "instead.");
return similarity;
};
}
});
defaults.put("BM25", version -> {
final BM25Similarity similarity = SimilarityProviders.createBM25Similarity(Settings.EMPTY, version);
return () -> similarity;
});
defaults.put("boolean", version -> {
final Similarity similarity = new BooleanSimilarity();
return () -> similarity;
});
Map<String, SimilarityProvider.Factory> builtIn = new HashMap<>(defaults);
Map<String, TriFunction<Settings, Version, ScriptService, Similarity>> builtIn = new HashMap<>();
builtIn.put(CLASSIC_SIMILARITY,
(settings, version, script) -> {
if (version.onOrAfter(Version.V_7_0_0_alpha1)) {
throw new IllegalArgumentException("The [classic] similarity may not be used anymore. Please use the [BM25] "
+ "similarity or build a custom [scripted] similarity instead.");
} else {
DEPRECATION_LOGGER.deprecated("The [classic] similarity is now deprecated in favour of BM25, which is generally "
+ "accepted as a better alternative. Use the [BM25] similarity or build a custom [scripted] similarity "
+ "instead.");
return SimilarityProviders.createClassicSimilarity(settings, version);
}
});
builtIn.put("BM25",
(settings, version, scriptService) -> SimilarityProviders.createBM25Similarity(settings, version));
builtIn.put("boolean",
(settings, version, scriptService) -> SimilarityProviders.createBooleanSimilarity(settings, version));
builtIn.put("DFR",
(name, settings, indexSettings, scriptService) -> new DFRSimilarityProvider(name, settings, indexSettings));
(settings, version, scriptService) -> SimilarityProviders.createDfrSimilarity(settings, version));
builtIn.put("IB",
(name, settings, indexSettings, scriptService) -> new IBSimilarityProvider(name, settings, indexSettings));
(settings, version, scriptService) -> SimilarityProviders.createIBSimilarity(settings, version));
builtIn.put("LMDirichlet",
(name, settings, indexSettings, scriptService) -> new LMDirichletSimilarityProvider(name, settings, indexSettings));
(settings, version, scriptService) -> SimilarityProviders.createLMDirichletSimilarity(settings, version));
builtIn.put("LMJelinekMercer",
(name, settings, indexSettings, scriptService) -> new LMJelinekMercerSimilarityProvider(name, settings, indexSettings));
(settings, version, scriptService) -> SimilarityProviders.createLMJelinekMercerSimilarity(settings, version));
builtIn.put("DFI",
(name, settings, indexSettings, scriptService) -> new DFISimilarityProvider(name, settings, indexSettings));
builtIn.put("scripted", ScriptedSimilarityProvider::new);
(settings, version, scriptService) -> SimilarityProviders.createDfiSimilarity(settings, version));
builtIn.put("scripted", new ScriptedSimilarityProvider());
DEFAULTS = Collections.unmodifiableMap(defaults);
BUILT_IN = Collections.unmodifiableMap(builtIn);
}
private final Similarity defaultSimilarity;
private final Map<String, Supplier<Similarity>> similarities;
public SimilarityService(IndexSettings indexSettings, ScriptService scriptService,
Map<String, SimilarityProvider.Factory> similarities) {
Map<String, TriFunction<Settings, Version, ScriptService, Similarity>> similarities) {
super(indexSettings);
Map<String, SimilarityProvider> providers = new HashMap<>(similarities.size());
Map<String, Supplier<Similarity>> providers = new HashMap<>(similarities.size());
Map<String, Settings> similaritySettings = this.indexSettings.getSettings().getGroups(IndexModule.SIMILARITY_SETTINGS_PREFIX);
for (Map.Entry<String, Settings> entry : similaritySettings.entrySet()) {
String name = entry.getKey();
if (BUILT_IN.containsKey(name)) {
@ -85,14 +129,13 @@ public final class SimilarityService extends AbstractIndexComponent {
} else if ((similarities.containsKey(typeName) || BUILT_IN.containsKey(typeName)) == false) {
throw new IllegalArgumentException("Unknown Similarity type [" + typeName + "] for [" + name + "]");
}
SimilarityProvider.Factory defaultFactory = BUILT_IN.get(typeName);
SimilarityProvider.Factory factory = similarities.getOrDefault(typeName, defaultFactory);
providers.put(name, factory.create(name, providerSettings, indexSettings.getSettings(), scriptService));
TriFunction<Settings, Version, ScriptService, Similarity> defaultFactory = BUILT_IN.get(typeName);
TriFunction<Settings, Version, ScriptService, Similarity> factory = similarities.getOrDefault(typeName, defaultFactory);
final Similarity similarity = factory.apply(providerSettings, indexSettings.getIndexVersionCreated(), scriptService);
providers.put(name, () -> similarity);
}
Map<String, SimilarityProvider> providerMapping = addSimilarities(similaritySettings, indexSettings.getSettings(), scriptService,
DEFAULTS);
for (Map.Entry<String, SimilarityProvider> entry : providerMapping.entrySet()) {
providers.put(entry.getKey(), entry.getValue());
for (Map.Entry<String, Function<Version, Supplier<Similarity>>> entry : DEFAULTS.entrySet()) {
providers.put(entry.getKey(), entry.getValue().apply(indexSettings.getIndexVersionCreated()));
}
this.similarities = providers;
defaultSimilarity = (providers.get("default") != null) ? providers.get("default").get()
@ -108,25 +151,16 @@ public final class SimilarityService extends AbstractIndexComponent {
defaultSimilarity;
}
private Map<String, SimilarityProvider> addSimilarities(Map<String, Settings> similaritySettings, Settings indexSettings,
ScriptService scriptService, Map<String, SimilarityProvider.Factory> similarities) {
Map<String, SimilarityProvider> providers = new HashMap<>(similarities.size());
for (Map.Entry<String, SimilarityProvider.Factory> entry : similarities.entrySet()) {
String name = entry.getKey();
SimilarityProvider.Factory factory = entry.getValue();
Settings providerSettings = similaritySettings.get(name);
if (providerSettings == null) {
providerSettings = Settings.Builder.EMPTY_SETTINGS;
}
providers.put(name, factory.create(name, providerSettings, indexSettings, scriptService));
}
return providers;
}
public SimilarityProvider getSimilarity(String name) {
return similarities.get(name);
Supplier<Similarity> sim = similarities.get(name);
if (sim == null) {
return null;
}
return new SimilarityProvider(name, sim.get());
}
// for testing
Similarity getDefaultSimilarity() {
return defaultSimilarity;
}

View File

@ -59,7 +59,6 @@ import org.elasticsearch.index.shard.IndexSearcherWrapper;
import org.elasticsearch.index.shard.IndexingOperationListener;
import org.elasticsearch.index.shard.SearchOperationListener;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.similarity.SimilarityProvider;
import org.elasticsearch.index.similarity.SimilarityService;
import org.elasticsearch.index.store.IndexStore;
import org.elasticsearch.indices.IndicesModule;
@ -287,17 +286,8 @@ public class IndexModuleTests extends ESTestCase {
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings), emptyAnalysisRegistry);
module.addSimilarity("test_similarity", (string, providerSettings, indexLevelSettings, scriptService) -> new SimilarityProvider() {
@Override
public String name() {
return string;
}
@Override
public Similarity get() {
return new TestSimilarity(providerSettings.get("key"));
}
});
module.addSimilarity("test_similarity",
(providerSettings, indexCreatedVersion, scriptService) -> new TestSimilarity(providerSettings.get("key")));
IndexService indexService = newIndexService(module);
SimilarityService similarityService = indexService.similarityService();

View File

@ -19,7 +19,7 @@
package org.elasticsearch.index.similarity;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.BooleanSimilarity;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.test.ESTestCase;
@ -50,10 +50,10 @@ public class SimilarityServiceTests extends ESTestCase {
}
public void testOverrideDefaultSimilarity() {
Settings settings = Settings.builder().put("index.similarity.default.type", "classic")
Settings settings = Settings.builder().put("index.similarity.default.type", "boolean")
.build();
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap());
assertTrue(service.getDefaultSimilarity() instanceof ClassicSimilarity);
assertTrue(service.getDefaultSimilarity() instanceof BooleanSimilarity);
}
}

View File

@ -33,6 +33,8 @@ import org.apache.lucene.search.similarities.LMDirichletSimilarity;
import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity;
import org.apache.lucene.search.similarities.LambdaTTF;
import org.apache.lucene.search.similarities.NormalizationH2;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.settings.Settings;
@ -60,7 +62,24 @@ public class SimilarityTests extends ESSingleNodeTestCase {
public void testResolveDefaultSimilarities() {
SimilarityService similarityService = createIndex("foo").similarityService();
assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class));
assertThat(similarityService.getSimilarity("boolean").get(), instanceOf(BooleanSimilarity.class));
assertThat(similarityService.getSimilarity("default"), equalTo(null));
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> similarityService.getSimilarity("classic"));
assertEquals("The [classic] similarity may not be used anymore. Please use the [BM25] similarity or build a custom [scripted] "
+ "similarity instead.", e.getMessage());
}
public void testResolveDefaultSimilaritiesOn6xIndex() {
Settings indexSettings = Settings.builder()
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_6_3_0) // otherwise classic is forbidden
.build();
SimilarityService similarityService = createIndex("foo", indexSettings).similarityService();
assertThat(similarityService.getSimilarity("classic").get(), instanceOf(ClassicSimilarity.class));
assertWarnings("The [classic] similarity is now deprecated in favour of BM25, which is generally "
+ "accepted as a better alternative. Use the [BM25] similarity or build a custom [scripted] similarity "
+ "instead.");
assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class));
assertThat(similarityService.getSimilarity("boolean").get(), instanceOf(BooleanSimilarity.class));
assertThat(similarityService.getSimilarity("default"), equalTo(null));
@ -76,15 +95,27 @@ public class SimilarityTests extends ESSingleNodeTestCase {
Settings indexSettings = Settings.builder()
.put("index.similarity.my_similarity.type", "classic")
.put("index.similarity.my_similarity.discount_overlaps", false)
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_6_3_0) // otherwise classic is forbidden
.build();
IndexService indexService = createIndex("foo", indexSettings);
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(ClassicSimilarityProvider.class));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(ClassicSimilarity.class));
ClassicSimilarity similarity = (ClassicSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get();
assertThat(similarity.getDiscountOverlaps(), equalTo(false));
}
public void testResolveSimilaritiesFromMapping_classicIsForbidden() throws IOException {
Settings indexSettings = Settings.builder()
.put("index.similarity.my_similarity.type", "classic")
.put("index.similarity.my_similarity.discount_overlaps", false)
.build();
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> createIndex("foo", indexSettings));
assertEquals("The [classic] similarity may not be used anymore. Please use the [BM25] similarity or build a custom [scripted] "
+ "similarity instead.", e.getMessage());
}
public void testResolveSimilaritiesFromMapping_bm25() throws IOException {
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
@ -100,7 +131,7 @@ public class SimilarityTests extends ESSingleNodeTestCase {
.build();
IndexService indexService = createIndex("foo", indexSettings);
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(BM25SimilarityProvider.class));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(BM25Similarity.class));
BM25Similarity similarity = (BM25Similarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get();
assertThat(similarity.getK1(), equalTo(2.0f));
@ -119,8 +150,8 @@ public class SimilarityTests extends ESSingleNodeTestCase {
DocumentMapper documentMapper = indexService.mapperService()
.documentMapperParser()
.parse("type", new CompressedXContent(mapping));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(),
instanceOf(BooleanSimilarityProvider.class));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(),
instanceOf(BooleanSimilarity.class));
}
public void testResolveSimilaritiesFromMapping_DFR() throws IOException {
@ -139,7 +170,7 @@ public class SimilarityTests extends ESSingleNodeTestCase {
.build();
IndexService indexService = createIndex("foo", indexSettings);
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(DFRSimilarityProvider.class));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(DFRSimilarity.class));
DFRSimilarity similarity = (DFRSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get();
assertThat(similarity.getBasicModel(), instanceOf(BasicModelG.class));
@ -164,7 +195,7 @@ public class SimilarityTests extends ESSingleNodeTestCase {
.build();
IndexService indexService = createIndex("foo", indexSettings);
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(IBSimilarityProvider.class));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(IBSimilarity.class));
IBSimilarity similarity = (IBSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get();
assertThat(similarity.getDistribution(), instanceOf(DistributionSPL.class));
@ -187,7 +218,7 @@ public class SimilarityTests extends ESSingleNodeTestCase {
IndexService indexService = createIndex("foo", indexSettings);
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
MappedFieldType fieldType = documentMapper.mappers().getMapper("field1").fieldType();
assertThat(fieldType.similarity(), instanceOf(DFISimilarityProvider.class));
assertThat(fieldType.similarity().get(), instanceOf(DFISimilarity.class));
DFISimilarity similarity = (DFISimilarity) fieldType.similarity().get();
assertThat(similarity.getIndependence(), instanceOf(IndependenceChiSquared.class));
}
@ -205,7 +236,7 @@ public class SimilarityTests extends ESSingleNodeTestCase {
.build();
IndexService indexService = createIndex("foo", indexSettings);
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(LMDirichletSimilarityProvider.class));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(LMDirichletSimilarity.class));
LMDirichletSimilarity similarity = (LMDirichletSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get();
assertThat(similarity.getMu(), equalTo(3000f));
@ -224,7 +255,7 @@ public class SimilarityTests extends ESSingleNodeTestCase {
.build();
IndexService indexService = createIndex("foo", indexSettings);
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(LMJelinekMercerSimilarityProvider.class));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(LMJelinekMercerSimilarity.class));
LMJelinekMercerSimilarity similarity = (LMJelinekMercerSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get();
assertThat(similarity.getLambda(), equalTo(0.7f));
@ -245,4 +276,14 @@ public class SimilarityTests extends ESSingleNodeTestCase {
assertThat(e.getMessage(), equalTo("Unknown Similarity type [unknown_similarity] for field [field1]"));
}
}
public void testUnknownParameters() throws IOException {
Settings indexSettings = Settings.builder()
.put("index.similarity.my_similarity.type", "BM25")
.put("index.similarity.my_similarity.z", 2.0f)
.build();
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> createIndex("foo", indexSettings));
assertEquals("Unknown settings for similarity of type [BM25]: [z]", e.getMessage());
}
}

View File

@ -18,6 +18,7 @@
*/
package org.elasticsearch.indices;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.store.AlreadyClosedException;
import org.elasticsearch.Version;
import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags;
@ -49,7 +50,6 @@ import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.index.shard.IndexShardState;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.shard.ShardPath;
import org.elasticsearch.index.similarity.BM25SimilarityProvider;
import org.elasticsearch.indices.IndicesService.ShardDeletionCheckResult;
import org.elasticsearch.plugins.MapperPlugin;
import org.elasticsearch.plugins.Plugin;
@ -106,7 +106,7 @@ public class IndicesServiceTests extends ESSingleNodeTestCase {
public void onIndexModule(IndexModule indexModule) {
super.onIndexModule(indexModule);
indexModule.addSimilarity("fake-similarity",
(name, settings, indexSettings, scriptService) -> new BM25SimilarityProvider(name, settings, indexSettings));
(settings, indexCreatedVersion, scriptService) -> new BM25Similarity());
}
}
@ -375,8 +375,8 @@ public class IndicesServiceTests extends ESSingleNodeTestCase {
.build();
MapperService mapperService = indicesService.createIndexMapperService(indexMetaData);
assertNotNull(mapperService.documentMapperParser().parserContext("type").typeParser("fake-mapper"));
assertThat(mapperService.documentMapperParser().parserContext("type").getSimilarity("test"),
instanceOf(BM25SimilarityProvider.class));
assertThat(mapperService.documentMapperParser().parserContext("type").getSimilarity("test").get(),
instanceOf(BM25Similarity.class));
}
public void testStatsByShardDoesNotDieFromExpectedExceptions() {

View File

@ -46,7 +46,7 @@ public class SimilarityIT extends ESIntegTestCase {
.field("type", "text")
.endObject()
.startObject("field2")
.field("similarity", "classic")
.field("similarity", "boolean")
.field("type", "text")
.endObject()
.endObject()
@ -68,9 +68,9 @@ public class SimilarityIT extends ESIntegTestCase {
assertThat(bm25SearchResponse.getHits().getTotalHits(), equalTo(1L));
float bm25Score = bm25SearchResponse.getHits().getHits()[0].getScore();
SearchResponse defaultSearchResponse = client().prepareSearch().setQuery(matchQuery("field2", "quick brown fox")).execute().actionGet();
assertThat(defaultSearchResponse.getHits().getTotalHits(), equalTo(1L));
float defaultScore = defaultSearchResponse.getHits().getHits()[0].getScore();
SearchResponse booleanSearchResponse = client().prepareSearch().setQuery(matchQuery("field2", "quick brown fox")).execute().actionGet();
assertThat(booleanSearchResponse.getHits().getTotalHits(), equalTo(1L));
float defaultScore = booleanSearchResponse.getHits().getHits()[0].getScore();
assertThat(bm25Score, not(equalTo(defaultScore)));
}

View File

@ -20,13 +20,14 @@ package org.elasticsearch.index.mapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.similarity.BM25SimilarityProvider;
import org.elasticsearch.index.similarity.SimilarityProvider;
import org.elasticsearch.test.ESTestCase;
import java.util.ArrayList;
@ -123,17 +124,17 @@ public abstract class FieldTypeTestCase extends ESTestCase {
new Modifier("similarity", false) {
@Override
public void modify(MappedFieldType ft) {
ft.setSimilarity(new BM25SimilarityProvider("foo", Settings.EMPTY, INDEX_SETTINGS));
ft.setSimilarity(new SimilarityProvider("foo", new BM25Similarity()));
}
},
new Modifier("similarity", false) {
@Override
public void modify(MappedFieldType ft) {
ft.setSimilarity(new BM25SimilarityProvider("foo", Settings.EMPTY, INDEX_SETTINGS));
ft.setSimilarity(new SimilarityProvider("foo", new BM25Similarity()));
}
@Override
public void normalizeOther(MappedFieldType other) {
other.setSimilarity(new BM25SimilarityProvider("bar", Settings.EMPTY, INDEX_SETTINGS));
other.setSimilarity(new SimilarityProvider("bar", new BM25Similarity()));
}
},
new Modifier("eager_global_ordinals", true) {