Improve similarity integration. (#29187)
This improves the way similarities are plugged in in order to: - reject the classic similarity on 7.x indices and emit a deprecation warning otherwise - reject unkwown parameters on 7.x indices and emit a deprecation warning otherwise Even though this breaks the plugin API, I'd like to backport to 7.x so that users can get deprecation warnings when they are doing something that will become unsupported in the future. Closes #23208 Closes #29035
This commit is contained in:
parent
8cdd950056
commit
569d0c0e89
|
@ -82,20 +82,6 @@ This similarity has the following options:
|
|||
|
||||
Type name: `BM25`
|
||||
|
||||
[float]
|
||||
[[classic-similarity]]
|
||||
==== Classic similarity
|
||||
|
||||
The classic similarity that is based on the TF/IDF model. This
|
||||
similarity has the following option:
|
||||
|
||||
`discount_overlaps`::
|
||||
Determines whether overlap tokens (Tokens with
|
||||
0 position increment) are ignored when computing norm. By default this
|
||||
is true, meaning overlap tokens do not count when computing norms.
|
||||
|
||||
Type name: `classic`
|
||||
|
||||
[float]
|
||||
[[dfr]]
|
||||
==== DFR similarity
|
||||
|
@ -541,7 +527,7 @@ PUT /index
|
|||
"index": {
|
||||
"similarity": {
|
||||
"default": {
|
||||
"type": "classic"
|
||||
"type": "boolean"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -563,7 +549,7 @@ PUT /index/_settings
|
|||
"index": {
|
||||
"similarity": {
|
||||
"default": {
|
||||
"type": "classic"
|
||||
"type": "boolean"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -44,13 +44,9 @@ PUT my_index
|
|||
"default_field": { <1>
|
||||
"type": "text"
|
||||
},
|
||||
"classic_field": {
|
||||
"type": "text",
|
||||
"similarity": "classic" <2>
|
||||
},
|
||||
"boolean_sim_field": {
|
||||
"type": "text",
|
||||
"similarity": "boolean" <3>
|
||||
"similarity": "boolean" <2>
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -59,5 +55,4 @@ PUT my_index
|
|||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
<1> The `default_field` uses the `BM25` similarity.
|
||||
<2> The `classic_field` uses the `classic` similarity (ie TF/IDF).
|
||||
<3> The `boolean_sim_field` uses the `boolean` similarity.
|
||||
<2> The `boolean_sim_field` uses the `boolean` similarity.
|
||||
|
|
|
@ -24,3 +24,16 @@ the index setting `index.mapping.nested_objects.limit`.
|
|||
==== The `update_all_types` option has been removed
|
||||
|
||||
This option is useless now that all indices have at most one type.
|
||||
|
||||
=== The `classic` similarity has been removed
|
||||
|
||||
The `classic` similarity relied on coordination factors for scoring to be good
|
||||
in presence of stopwords in the query. This feature has been removed from
|
||||
Lucene, which means that the `classic` similarity now produces scores of lower
|
||||
quality. It is advised to switch to `BM25` instead, which is widely accepted
|
||||
as a better alternative.
|
||||
|
||||
=== Similarities fail when unsupported options are provided
|
||||
|
||||
An error will now be thrown when unknown configuration options are provided
|
||||
to similarities. Such unknown parameters were ignored before.
|
||||
|
|
|
@ -336,9 +336,7 @@ public class HasChildQueryBuilderTests extends AbstractQueryTestCase<HasChildQue
|
|||
hasChildQuery(CHILD_DOC, new TermQueryBuilder("custom_string", "value"), ScoreMode.None);
|
||||
HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext);
|
||||
Similarity expected = SimilarityService.BUILT_IN.get(similarity)
|
||||
.create(similarity, Settings.EMPTY,
|
||||
Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(), null)
|
||||
.get();
|
||||
.apply(Settings.EMPTY, Version.CURRENT, null);
|
||||
assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass()));
|
||||
}
|
||||
|
||||
|
|
|
@ -87,7 +87,7 @@ public class LegacyHasChildQueryBuilderTests extends AbstractQueryTestCase<HasCh
|
|||
|
||||
@Override
|
||||
protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
|
||||
similarity = randomFrom("classic", "BM25");
|
||||
similarity = randomFrom("boolean", "BM25");
|
||||
// TODO: use a single type when inner hits have been changed to work with join field,
|
||||
// this test randomly generates queries with inner hits
|
||||
mapperService.merge(PARENT_TYPE, new CompressedXContent(Strings.toString(PutMappingRequest.buildFromSimplifiedDef(PARENT_TYPE,
|
||||
|
@ -323,9 +323,7 @@ public class LegacyHasChildQueryBuilderTests extends AbstractQueryTestCase<HasCh
|
|||
hasChildQuery(CHILD_TYPE, new TermQueryBuilder("custom_string", "value"), ScoreMode.None);
|
||||
HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext);
|
||||
Similarity expected = SimilarityService.BUILT_IN.get(similarity)
|
||||
.create(similarity, Settings.EMPTY,
|
||||
Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(), null)
|
||||
.get();
|
||||
.apply(Settings.EMPTY, Version.CURRENT, null);
|
||||
assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass()));
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,9 @@ package org.elasticsearch.cluster.metadata;
|
|||
|
||||
import org.apache.logging.log4j.message.ParameterizedMessage;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.TriFunction;
|
||||
import org.elasticsearch.common.component.AbstractComponent;
|
||||
import org.elasticsearch.common.settings.IndexScopedSettings;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -31,8 +33,8 @@ import org.elasticsearch.index.analysis.IndexAnalyzers;
|
|||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.index.similarity.SimilarityService;
|
||||
import org.elasticsearch.index.similarity.SimilarityProvider;
|
||||
import org.elasticsearch.indices.mapper.MapperRegistry;
|
||||
import org.elasticsearch.script.ScriptService;
|
||||
|
||||
import java.util.AbstractMap;
|
||||
import java.util.Collection;
|
||||
|
@ -142,14 +144,15 @@ public class MetaDataIndexUpgradeService extends AbstractComponent {
|
|||
|
||||
IndexSettings indexSettings = new IndexSettings(indexMetaData, this.settings);
|
||||
|
||||
final Map<String, SimilarityProvider.Factory> similarityMap = new AbstractMap<String, SimilarityProvider.Factory>() {
|
||||
final Map<String, TriFunction<Settings, Version, ScriptService, Similarity>> similarityMap
|
||||
= new AbstractMap<String, TriFunction<Settings, Version, ScriptService, Similarity>>() {
|
||||
@Override
|
||||
public boolean containsKey(Object key) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SimilarityProvider.Factory get(Object key) {
|
||||
public TriFunction<Settings, Version, ScriptService, Similarity> get(Object key) {
|
||||
assert key instanceof String : "key must be a string but was: " + key.getClass();
|
||||
return SimilarityService.BUILT_IN.get(SimilarityService.DEFAULT_SIMILARITY);
|
||||
}
|
||||
|
@ -157,7 +160,7 @@ public class MetaDataIndexUpgradeService extends AbstractComponent {
|
|||
// this entrySet impl isn't fully correct but necessary as SimilarityService will iterate
|
||||
// over all similarities
|
||||
@Override
|
||||
public Set<Entry<String, SimilarityProvider.Factory>> entrySet() {
|
||||
public Set<Entry<String, TriFunction<Settings, Version, ScriptService, Similarity>>> entrySet() {
|
||||
return Collections.emptySet();
|
||||
}
|
||||
};
|
||||
|
|
|
@ -19,9 +19,13 @@
|
|||
|
||||
package org.elasticsearch.index;
|
||||
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.util.SetOnce;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.client.Client;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.TriFunction;
|
||||
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
|
||||
import org.elasticsearch.common.settings.Setting;
|
||||
import org.elasticsearch.common.settings.Setting.Property;
|
||||
|
@ -39,9 +43,6 @@ import org.elasticsearch.index.shard.IndexEventListener;
|
|||
import org.elasticsearch.index.shard.IndexSearcherWrapper;
|
||||
import org.elasticsearch.index.shard.IndexingOperationListener;
|
||||
import org.elasticsearch.index.shard.SearchOperationListener;
|
||||
import org.elasticsearch.index.shard.ShardId;
|
||||
import org.elasticsearch.index.similarity.BM25SimilarityProvider;
|
||||
import org.elasticsearch.index.similarity.SimilarityProvider;
|
||||
import org.elasticsearch.index.similarity.SimilarityService;
|
||||
import org.elasticsearch.index.store.IndexStore;
|
||||
import org.elasticsearch.indices.IndicesQueryCache;
|
||||
|
@ -68,10 +69,10 @@ import java.util.function.Function;
|
|||
/**
|
||||
* IndexModule represents the central extension point for index level custom implementations like:
|
||||
* <ul>
|
||||
* <li>{@link SimilarityProvider} - New {@link SimilarityProvider} implementations can be registered through
|
||||
* {@link #addSimilarity(String, SimilarityProvider.Factory)} while existing Providers can be referenced through Settings under the
|
||||
* <li>{@link Similarity} - New {@link Similarity} implementations can be registered through
|
||||
* {@link #addSimilarity(String, TriFunction)} while existing Providers can be referenced through Settings under the
|
||||
* {@link IndexModule#SIMILARITY_SETTINGS_PREFIX} prefix along with the "type" value. For example, to reference the
|
||||
* {@link BM25SimilarityProvider}, the configuration <tt>"index.similarity.my_similarity.type : "BM25"</tt> can be used.</li>
|
||||
* {@link BM25Similarity}, the configuration <tt>"index.similarity.my_similarity.type : "BM25"</tt> can be used.</li>
|
||||
* <li>{@link IndexStore} - Custom {@link IndexStore} instances can be registered via {@link #addIndexStore(String, Function)}</li>
|
||||
* <li>{@link IndexEventListener} - Custom {@link IndexEventListener} instances can be registered via
|
||||
* {@link #addIndexEventListener(IndexEventListener)}</li>
|
||||
|
@ -107,7 +108,7 @@ public final class IndexModule {
|
|||
final SetOnce<EngineFactory> engineFactory = new SetOnce<>();
|
||||
private SetOnce<IndexSearcherWrapperFactory> indexSearcherWrapper = new SetOnce<>();
|
||||
private final Set<IndexEventListener> indexEventListeners = new HashSet<>();
|
||||
private final Map<String, SimilarityProvider.Factory> similarities = new HashMap<>();
|
||||
private final Map<String, TriFunction<Settings, Version, ScriptService, Similarity>> similarities = new HashMap<>();
|
||||
private final Map<String, Function<IndexSettings, IndexStore>> storeTypes = new HashMap<>();
|
||||
private final SetOnce<BiFunction<IndexSettings, IndicesQueryCache, QueryCache>> forceQueryCacheProvider = new SetOnce<>();
|
||||
private final List<SearchOperationListener> searchOperationListeners = new ArrayList<>();
|
||||
|
@ -246,12 +247,17 @@ public final class IndexModule {
|
|||
|
||||
|
||||
/**
|
||||
* Registers the given {@link SimilarityProvider} with the given name
|
||||
* Registers the given {@link Similarity} with the given name.
|
||||
* The function takes as parameters:<ul>
|
||||
* <li>settings for this similarity
|
||||
* <li>version of Elasticsearch when the index was created
|
||||
* <li>ScriptService, for script-based similarities
|
||||
* </ul>
|
||||
*
|
||||
* @param name Name of the SimilarityProvider
|
||||
* @param similarity SimilarityProvider to register
|
||||
*/
|
||||
public void addSimilarity(String name, SimilarityProvider.Factory similarity) {
|
||||
public void addSimilarity(String name, TriFunction<Settings, Version, ScriptService, Similarity> similarity) {
|
||||
ensureNotFrozen();
|
||||
if (similarities.containsKey(name) || SimilarityService.BUILT_IN.containsKey(name)) {
|
||||
throw new IllegalArgumentException("similarity for name: [" + name + " is already registered");
|
||||
|
|
|
@ -1,82 +0,0 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.search.similarities.Normalization;
|
||||
import org.apache.lucene.search.similarities.NormalizationH1;
|
||||
import org.apache.lucene.search.similarities.NormalizationH2;
|
||||
import org.apache.lucene.search.similarities.NormalizationH3;
|
||||
import org.apache.lucene.search.similarities.NormalizationZ;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
||||
/**
|
||||
* Abstract implementation of {@link SimilarityProvider} providing common behaviour
|
||||
*/
|
||||
public abstract class AbstractSimilarityProvider implements SimilarityProvider {
|
||||
|
||||
protected static final Normalization NO_NORMALIZATION = new Normalization.NoNormalization();
|
||||
|
||||
private final String name;
|
||||
|
||||
/**
|
||||
* Creates a new AbstractSimilarityProvider with the given name
|
||||
*
|
||||
* @param name Name of the Provider
|
||||
*/
|
||||
protected AbstractSimilarityProvider(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public String name() {
|
||||
return this.name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the given Settings and creates the appropriate {@link Normalization}
|
||||
*
|
||||
* @param settings Settings to parse
|
||||
* @return {@link Normalization} referred to in the Settings
|
||||
*/
|
||||
protected Normalization parseNormalization(Settings settings) {
|
||||
String normalization = settings.get("normalization");
|
||||
|
||||
if ("no".equals(normalization)) {
|
||||
return NO_NORMALIZATION;
|
||||
} else if ("h1".equals(normalization)) {
|
||||
float c = settings.getAsFloat("normalization.h1.c", 1f);
|
||||
return new NormalizationH1(c);
|
||||
} else if ("h2".equals(normalization)) {
|
||||
float c = settings.getAsFloat("normalization.h2.c", 1f);
|
||||
return new NormalizationH2(c);
|
||||
} else if ("h3".equals(normalization)) {
|
||||
float c = settings.getAsFloat("normalization.h3.c", 800f);
|
||||
return new NormalizationH3(c);
|
||||
} else if ("z".equals(normalization)) {
|
||||
float z = settings.getAsFloat("normalization.z.z", 0.30f);
|
||||
return new NormalizationZ(z);
|
||||
} else {
|
||||
throw new IllegalArgumentException("Unsupported Normalization [" + normalization + "]");
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,59 +0,0 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
||||
/**
|
||||
* {@link SimilarityProvider} for the {@link BM25Similarity}.
|
||||
* <p>
|
||||
* Configuration options available:
|
||||
* <ul>
|
||||
* <li>k1</li>
|
||||
* <li>b</li>
|
||||
* <li>discount_overlaps</li>
|
||||
* </ul>
|
||||
* @see BM25Similarity For more information about configuration
|
||||
*/
|
||||
public class BM25SimilarityProvider extends AbstractSimilarityProvider {
|
||||
|
||||
private final BM25Similarity similarity;
|
||||
|
||||
public BM25SimilarityProvider(String name, Settings settings, Settings indexSettings) {
|
||||
super(name);
|
||||
float k1 = settings.getAsFloat("k1", 1.2f);
|
||||
float b = settings.getAsFloat("b", 0.75f);
|
||||
boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true);
|
||||
|
||||
this.similarity = new BM25Similarity(k1, b);
|
||||
this.similarity.setDiscountOverlaps(discountOverlaps);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public Similarity get() {
|
||||
return similarity;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,48 +0,0 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.search.similarities.BooleanSimilarity;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
||||
/**
|
||||
* {@link SimilarityProvider} for the {@link BooleanSimilarity},
|
||||
* which is a simple similarity that gives terms a score equal
|
||||
* to their query boost only. This is useful in situations where
|
||||
* a field does not need to be scored by a full-text ranking
|
||||
* algorithm, but rather all that matters is whether the query
|
||||
* terms matched or not.
|
||||
*/
|
||||
public class BooleanSimilarityProvider extends AbstractSimilarityProvider {
|
||||
|
||||
private final BooleanSimilarity similarity = new BooleanSimilarity();
|
||||
|
||||
public BooleanSimilarityProvider(String name, Settings settings, Settings indexSettings) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public BooleanSimilarity get() {
|
||||
return similarity;
|
||||
}
|
||||
}
|
|
@ -1,52 +0,0 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
||||
/**
|
||||
* {@link SimilarityProvider} for {@link ClassicSimilarity}.
|
||||
* <p>
|
||||
* Configuration options available:
|
||||
* <ul>
|
||||
* <li>discount_overlaps</li>
|
||||
* </ul>
|
||||
* @see ClassicSimilarity For more information about configuration
|
||||
*/
|
||||
public class ClassicSimilarityProvider extends AbstractSimilarityProvider {
|
||||
|
||||
private final ClassicSimilarity similarity = new ClassicSimilarity();
|
||||
|
||||
public ClassicSimilarityProvider(String name, Settings settings, Settings indexSettings) {
|
||||
super(name);
|
||||
boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true);
|
||||
this.similarity.setDiscountOverlaps(discountOverlaps);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public ClassicSimilarity get() {
|
||||
return similarity;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,79 +0,0 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.search.similarities.DFISimilarity;
|
||||
import org.apache.lucene.search.similarities.Independence;
|
||||
import org.apache.lucene.search.similarities.IndependenceChiSquared;
|
||||
import org.apache.lucene.search.similarities.IndependenceSaturated;
|
||||
import org.apache.lucene.search.similarities.IndependenceStandardized;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static java.util.Collections.unmodifiableMap;
|
||||
|
||||
/**
|
||||
* {@link SimilarityProvider} for the {@link DFISimilarity}.
|
||||
* <p>
|
||||
* Configuration options available:
|
||||
* <ul>
|
||||
* <li>independence_measure</li>
|
||||
* <li>discount_overlaps</li>
|
||||
* </ul>
|
||||
* @see DFISimilarity For more information about configuration
|
||||
*/
|
||||
public class DFISimilarityProvider extends AbstractSimilarityProvider {
|
||||
// the "basic models" of divergence from independence
|
||||
private static final Map<String, Independence> INDEPENDENCE_MEASURES;
|
||||
static {
|
||||
Map<String, Independence> measures = new HashMap<>();
|
||||
measures.put("standardized", new IndependenceStandardized());
|
||||
measures.put("saturated", new IndependenceSaturated());
|
||||
measures.put("chisquared", new IndependenceChiSquared());
|
||||
INDEPENDENCE_MEASURES = unmodifiableMap(measures);
|
||||
}
|
||||
|
||||
private final DFISimilarity similarity;
|
||||
|
||||
public DFISimilarityProvider(String name, Settings settings, Settings indexSettings) {
|
||||
super(name);
|
||||
boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true);
|
||||
Independence measure = parseIndependence(settings);
|
||||
this.similarity = new DFISimilarity(measure);
|
||||
this.similarity.setDiscountOverlaps(discountOverlaps);
|
||||
}
|
||||
|
||||
private Independence parseIndependence(Settings settings) {
|
||||
String name = settings.get("independence_measure");
|
||||
Independence measure = INDEPENDENCE_MEASURES.get(name);
|
||||
if (measure == null) {
|
||||
throw new IllegalArgumentException("Unsupported IndependenceMeasure [" + name + "]");
|
||||
}
|
||||
return measure;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Similarity get() {
|
||||
return similarity;
|
||||
}
|
||||
}
|
|
@ -1,123 +0,0 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.search.similarities.AfterEffect;
|
||||
import org.apache.lucene.search.similarities.AfterEffectB;
|
||||
import org.apache.lucene.search.similarities.AfterEffectL;
|
||||
import org.apache.lucene.search.similarities.BasicModel;
|
||||
import org.apache.lucene.search.similarities.BasicModelBE;
|
||||
import org.apache.lucene.search.similarities.BasicModelD;
|
||||
import org.apache.lucene.search.similarities.BasicModelG;
|
||||
import org.apache.lucene.search.similarities.BasicModelIF;
|
||||
import org.apache.lucene.search.similarities.BasicModelIn;
|
||||
import org.apache.lucene.search.similarities.BasicModelIne;
|
||||
import org.apache.lucene.search.similarities.BasicModelP;
|
||||
import org.apache.lucene.search.similarities.DFRSimilarity;
|
||||
import org.apache.lucene.search.similarities.Normalization;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static java.util.Collections.unmodifiableMap;
|
||||
|
||||
/**
|
||||
* {@link SimilarityProvider} for {@link DFRSimilarity}.
|
||||
* <p>
|
||||
* Configuration options available:
|
||||
* <ul>
|
||||
* <li>basic_model</li>
|
||||
* <li>after_effect</li>
|
||||
* <li>normalization</li>
|
||||
* </ul>
|
||||
* @see DFRSimilarity For more information about configuration
|
||||
*/
|
||||
public class DFRSimilarityProvider extends AbstractSimilarityProvider {
|
||||
private static final Map<String, BasicModel> BASIC_MODELS;
|
||||
private static final Map<String, AfterEffect> AFTER_EFFECTS;
|
||||
|
||||
static {
|
||||
Map<String, BasicModel> models = new HashMap<>();
|
||||
models.put("be", new BasicModelBE());
|
||||
models.put("d", new BasicModelD());
|
||||
models.put("g", new BasicModelG());
|
||||
models.put("if", new BasicModelIF());
|
||||
models.put("in", new BasicModelIn());
|
||||
models.put("ine", new BasicModelIne());
|
||||
models.put("p", new BasicModelP());
|
||||
BASIC_MODELS = unmodifiableMap(models);
|
||||
|
||||
Map<String, AfterEffect> effects = new HashMap<>();
|
||||
effects.put("no", new AfterEffect.NoAfterEffect());
|
||||
effects.put("b", new AfterEffectB());
|
||||
effects.put("l", new AfterEffectL());
|
||||
AFTER_EFFECTS = unmodifiableMap(effects);
|
||||
}
|
||||
|
||||
private final DFRSimilarity similarity;
|
||||
|
||||
public DFRSimilarityProvider(String name, Settings settings, Settings indexSettings) {
|
||||
super(name);
|
||||
BasicModel basicModel = parseBasicModel(settings);
|
||||
AfterEffect afterEffect = parseAfterEffect(settings);
|
||||
Normalization normalization = parseNormalization(settings);
|
||||
this.similarity = new DFRSimilarity(basicModel, afterEffect, normalization);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the given Settings and creates the appropriate {@link BasicModel}
|
||||
*
|
||||
* @param settings Settings to parse
|
||||
* @return {@link BasicModel} referred to in the Settings
|
||||
*/
|
||||
protected BasicModel parseBasicModel(Settings settings) {
|
||||
String basicModel = settings.get("basic_model");
|
||||
BasicModel model = BASIC_MODELS.get(basicModel);
|
||||
if (model == null) {
|
||||
throw new IllegalArgumentException("Unsupported BasicModel [" + basicModel + "]");
|
||||
}
|
||||
return model;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the given Settings and creates the appropriate {@link AfterEffect}
|
||||
*
|
||||
* @param settings Settings to parse
|
||||
* @return {@link AfterEffect} referred to in the Settings
|
||||
*/
|
||||
protected AfterEffect parseAfterEffect(Settings settings) {
|
||||
String afterEffect = settings.get("after_effect");
|
||||
AfterEffect effect = AFTER_EFFECTS.get(afterEffect);
|
||||
if (effect == null) {
|
||||
throw new IllegalArgumentException("Unsupported AfterEffect [" + afterEffect + "]");
|
||||
}
|
||||
return effect;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public Similarity get() {
|
||||
return similarity;
|
||||
}
|
||||
}
|
|
@ -1,113 +0,0 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.search.similarities.Distribution;
|
||||
import org.apache.lucene.search.similarities.DistributionLL;
|
||||
import org.apache.lucene.search.similarities.DistributionSPL;
|
||||
import org.apache.lucene.search.similarities.IBSimilarity;
|
||||
import org.apache.lucene.search.similarities.Lambda;
|
||||
import org.apache.lucene.search.similarities.LambdaDF;
|
||||
import org.apache.lucene.search.similarities.LambdaTTF;
|
||||
import org.apache.lucene.search.similarities.Normalization;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static java.util.Collections.unmodifiableMap;
|
||||
|
||||
/**
|
||||
* {@link SimilarityProvider} for {@link IBSimilarity}.
|
||||
* <p>
|
||||
* Configuration options available:
|
||||
* <ul>
|
||||
* <li>distribution</li>
|
||||
* <li>lambda</li>
|
||||
* <li>normalization</li>
|
||||
* </ul>
|
||||
* @see IBSimilarity For more information about configuration
|
||||
*/
|
||||
public class IBSimilarityProvider extends AbstractSimilarityProvider {
|
||||
|
||||
private static final Map<String, Distribution> DISTRIBUTIONS;
|
||||
private static final Map<String, Lambda> LAMBDAS;
|
||||
|
||||
static {
|
||||
Map<String, Distribution> distributions = new HashMap<>();
|
||||
distributions.put("ll", new DistributionLL());
|
||||
distributions.put("spl", new DistributionSPL());
|
||||
DISTRIBUTIONS = unmodifiableMap(distributions);
|
||||
|
||||
Map<String, Lambda> lamdas = new HashMap<>();
|
||||
lamdas.put("df", new LambdaDF());
|
||||
lamdas.put("ttf", new LambdaTTF());
|
||||
LAMBDAS = unmodifiableMap(lamdas);
|
||||
}
|
||||
|
||||
private final IBSimilarity similarity;
|
||||
|
||||
public IBSimilarityProvider(String name, Settings settings, Settings indexSettings) {
|
||||
super(name);
|
||||
Distribution distribution = parseDistribution(settings);
|
||||
Lambda lambda = parseLambda(settings);
|
||||
Normalization normalization = parseNormalization(settings);
|
||||
this.similarity = new IBSimilarity(distribution, lambda, normalization);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the given Settings and creates the appropriate {@link Distribution}
|
||||
*
|
||||
* @param settings Settings to parse
|
||||
* @return {@link Normalization} referred to in the Settings
|
||||
*/
|
||||
protected Distribution parseDistribution(Settings settings) {
|
||||
String rawDistribution = settings.get("distribution");
|
||||
Distribution distribution = DISTRIBUTIONS.get(rawDistribution);
|
||||
if (distribution == null) {
|
||||
throw new IllegalArgumentException("Unsupported Distribution [" + rawDistribution + "]");
|
||||
}
|
||||
return distribution;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the given Settings and creates the appropriate {@link Lambda}
|
||||
*
|
||||
* @param settings Settings to parse
|
||||
* @return {@link Normalization} referred to in the Settings
|
||||
*/
|
||||
protected Lambda parseLambda(Settings settings) {
|
||||
String rawLambda = settings.get("lambda");
|
||||
Lambda lambda = LAMBDAS.get(rawLambda);
|
||||
if (lambda == null) {
|
||||
throw new IllegalArgumentException("Unsupported Lambda [" + rawLambda + "]");
|
||||
}
|
||||
return lambda;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public Similarity get() {
|
||||
return similarity;
|
||||
}
|
||||
}
|
|
@ -1,52 +0,0 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.search.similarities.LMDirichletSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
||||
/**
|
||||
* {@link SimilarityProvider} for {@link LMDirichletSimilarity}.
|
||||
* <p>
|
||||
* Configuration options available:
|
||||
* <ul>
|
||||
* <li>mu</li>
|
||||
* </ul>
|
||||
* @see LMDirichletSimilarity For more information about configuration
|
||||
*/
|
||||
public class LMDirichletSimilarityProvider extends AbstractSimilarityProvider {
|
||||
|
||||
private final LMDirichletSimilarity similarity;
|
||||
|
||||
public LMDirichletSimilarityProvider(String name, Settings settings, Settings indexSettings) {
|
||||
super(name);
|
||||
float mu = settings.getAsFloat("mu", 2000f);
|
||||
this.similarity = new LMDirichletSimilarity(mu);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public Similarity get() {
|
||||
return similarity;
|
||||
}
|
||||
}
|
|
@ -1,52 +0,0 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
||||
/**
|
||||
* {@link SimilarityProvider} for {@link LMJelinekMercerSimilarity}.
|
||||
* <p>
|
||||
* Configuration options available:
|
||||
* <ul>
|
||||
* <li>lambda</li>
|
||||
* </ul>
|
||||
* @see LMJelinekMercerSimilarity For more information about configuration
|
||||
*/
|
||||
public class LMJelinekMercerSimilarityProvider extends AbstractSimilarityProvider {
|
||||
|
||||
private final LMJelinekMercerSimilarity similarity;
|
||||
|
||||
public LMJelinekMercerSimilarityProvider(String name, Settings settings, Settings indexSettings) {
|
||||
super(name);
|
||||
float lambda = settings.getAsFloat("lambda", 0.1f);
|
||||
this.similarity = new LMJelinekMercerSimilarity(lambda);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public Similarity get() {
|
||||
return similarity;
|
||||
}
|
||||
}
|
|
@ -20,6 +20,8 @@
|
|||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.TriFunction;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.script.Script;
|
||||
import org.elasticsearch.script.ScriptService;
|
||||
|
@ -27,13 +29,11 @@ import org.elasticsearch.script.SimilarityScript;
|
|||
import org.elasticsearch.script.SimilarityWeightScript;
|
||||
|
||||
/** Provider of scripted similarities. */
|
||||
public class ScriptedSimilarityProvider extends AbstractSimilarityProvider {
|
||||
final class ScriptedSimilarityProvider implements TriFunction<Settings, Version, ScriptService, Similarity> {
|
||||
|
||||
private final ScriptedSimilarity scriptedSimilarity;
|
||||
|
||||
public ScriptedSimilarityProvider(String name, Settings settings, Settings indexSettings, ScriptService scriptService) {
|
||||
super(name);
|
||||
boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true);
|
||||
@Override
|
||||
public Similarity apply(Settings settings, Version indexCreatedVersion, ScriptService scriptService) {
|
||||
boolean discountOverlaps = settings.getAsBoolean(SimilarityProviders.DISCOUNT_OVERLAPS, true);
|
||||
Settings scriptSettings = settings.getAsSettings("script");
|
||||
Script script = Script.parse(scriptSettings);
|
||||
SimilarityScript.Factory scriptFactory = scriptService.compile(script, SimilarityScript.CONTEXT);
|
||||
|
@ -44,15 +44,10 @@ public class ScriptedSimilarityProvider extends AbstractSimilarityProvider {
|
|||
weightScript = Script.parse(weightScriptSettings);
|
||||
weightScriptFactory = scriptService.compile(weightScript, SimilarityWeightScript.CONTEXT);
|
||||
}
|
||||
scriptedSimilarity = new ScriptedSimilarity(
|
||||
return new ScriptedSimilarity(
|
||||
weightScript == null ? null : weightScript.toString(),
|
||||
weightScriptFactory == null ? null : weightScriptFactory::newInstance,
|
||||
script.toString(), scriptFactory::newInstance, discountOverlaps);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Similarity get() {
|
||||
return scriptedSimilarity;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -20,32 +20,32 @@
|
|||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.script.ScriptService;
|
||||
|
||||
/**
|
||||
* Provider for {@link Similarity} instances
|
||||
* Wrapper around a {@link Similarity} and its name.
|
||||
*/
|
||||
public interface SimilarityProvider {
|
||||
public final class SimilarityProvider {
|
||||
|
||||
/**
|
||||
* Returns the name associated with the Provider
|
||||
*
|
||||
* @return Name of the Provider
|
||||
*/
|
||||
String name();
|
||||
private final String name;
|
||||
private final Similarity similarity;
|
||||
|
||||
/**
|
||||
* Returns the {@link Similarity} the Provider is for
|
||||
*
|
||||
* @return Provided {@link Similarity}
|
||||
*/
|
||||
Similarity get();
|
||||
|
||||
/** Factory of {@link SimilarityProvider} */
|
||||
@FunctionalInterface
|
||||
interface Factory {
|
||||
/** Create a new {@link SimilarityProvider}. */
|
||||
SimilarityProvider create(String name, Settings settings, Settings indexSettings, ScriptService scriptService);
|
||||
public SimilarityProvider(String name, Similarity similarity) {
|
||||
this.name = name;
|
||||
this.similarity = similarity;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the name of this {@link Similarity}.
|
||||
*/
|
||||
public String name() {
|
||||
return name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the wrapped {@link Similarity}.
|
||||
*/
|
||||
public Similarity get() {
|
||||
return similarity;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,300 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.search.similarities.AfterEffect;
|
||||
import org.apache.lucene.search.similarities.AfterEffectB;
|
||||
import org.apache.lucene.search.similarities.AfterEffectL;
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.search.similarities.BasicModel;
|
||||
import org.apache.lucene.search.similarities.BasicModelBE;
|
||||
import org.apache.lucene.search.similarities.BasicModelD;
|
||||
import org.apache.lucene.search.similarities.BasicModelG;
|
||||
import org.apache.lucene.search.similarities.BasicModelIF;
|
||||
import org.apache.lucene.search.similarities.BasicModelIn;
|
||||
import org.apache.lucene.search.similarities.BasicModelIne;
|
||||
import org.apache.lucene.search.similarities.BasicModelP;
|
||||
import org.apache.lucene.search.similarities.BooleanSimilarity;
|
||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||
import org.apache.lucene.search.similarities.DFISimilarity;
|
||||
import org.apache.lucene.search.similarities.DFRSimilarity;
|
||||
import org.apache.lucene.search.similarities.Distribution;
|
||||
import org.apache.lucene.search.similarities.DistributionLL;
|
||||
import org.apache.lucene.search.similarities.DistributionSPL;
|
||||
import org.apache.lucene.search.similarities.IBSimilarity;
|
||||
import org.apache.lucene.search.similarities.Independence;
|
||||
import org.apache.lucene.search.similarities.IndependenceChiSquared;
|
||||
import org.apache.lucene.search.similarities.IndependenceSaturated;
|
||||
import org.apache.lucene.search.similarities.IndependenceStandardized;
|
||||
import org.apache.lucene.search.similarities.LMDirichletSimilarity;
|
||||
import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity;
|
||||
import org.apache.lucene.search.similarities.Lambda;
|
||||
import org.apache.lucene.search.similarities.LambdaDF;
|
||||
import org.apache.lucene.search.similarities.LambdaTTF;
|
||||
import org.apache.lucene.search.similarities.Normalization;
|
||||
import org.apache.lucene.search.similarities.NormalizationH1;
|
||||
import org.apache.lucene.search.similarities.NormalizationH2;
|
||||
import org.apache.lucene.search.similarities.NormalizationH3;
|
||||
import org.apache.lucene.search.similarities.NormalizationZ;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.logging.DeprecationLogger;
|
||||
import org.elasticsearch.common.logging.Loggers;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import static java.util.Collections.unmodifiableMap;
|
||||
|
||||
final class SimilarityProviders {
|
||||
|
||||
private SimilarityProviders() {} // no instantiation
|
||||
|
||||
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(SimilarityProviders.class));
|
||||
static final String DISCOUNT_OVERLAPS = "discount_overlaps";
|
||||
|
||||
private static final Map<String, BasicModel> BASIC_MODELS;
|
||||
private static final Map<String, AfterEffect> AFTER_EFFECTS;
|
||||
|
||||
static {
|
||||
Map<String, BasicModel> models = new HashMap<>();
|
||||
models.put("be", new BasicModelBE());
|
||||
models.put("d", new BasicModelD());
|
||||
models.put("g", new BasicModelG());
|
||||
models.put("if", new BasicModelIF());
|
||||
models.put("in", new BasicModelIn());
|
||||
models.put("ine", new BasicModelIne());
|
||||
models.put("p", new BasicModelP());
|
||||
BASIC_MODELS = unmodifiableMap(models);
|
||||
|
||||
Map<String, AfterEffect> effects = new HashMap<>();
|
||||
effects.put("no", new AfterEffect.NoAfterEffect());
|
||||
effects.put("b", new AfterEffectB());
|
||||
effects.put("l", new AfterEffectL());
|
||||
AFTER_EFFECTS = unmodifiableMap(effects);
|
||||
}
|
||||
|
||||
private static final Map<String, Independence> INDEPENDENCE_MEASURES;
|
||||
static {
|
||||
Map<String, Independence> measures = new HashMap<>();
|
||||
measures.put("standardized", new IndependenceStandardized());
|
||||
measures.put("saturated", new IndependenceSaturated());
|
||||
measures.put("chisquared", new IndependenceChiSquared());
|
||||
INDEPENDENCE_MEASURES = unmodifiableMap(measures);
|
||||
}
|
||||
|
||||
private static final Map<String, Distribution> DISTRIBUTIONS;
|
||||
private static final Map<String, Lambda> LAMBDAS;
|
||||
|
||||
static {
|
||||
Map<String, Distribution> distributions = new HashMap<>();
|
||||
distributions.put("ll", new DistributionLL());
|
||||
distributions.put("spl", new DistributionSPL());
|
||||
DISTRIBUTIONS = unmodifiableMap(distributions);
|
||||
|
||||
Map<String, Lambda> lamdas = new HashMap<>();
|
||||
lamdas.put("df", new LambdaDF());
|
||||
lamdas.put("ttf", new LambdaTTF());
|
||||
LAMBDAS = unmodifiableMap(lamdas);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the given Settings and creates the appropriate {@link BasicModel}
|
||||
*
|
||||
* @param settings Settings to parse
|
||||
* @return {@link BasicModel} referred to in the Settings
|
||||
*/
|
||||
private static BasicModel parseBasicModel(Settings settings) {
|
||||
String basicModel = settings.get("basic_model");
|
||||
BasicModel model = BASIC_MODELS.get(basicModel);
|
||||
if (model == null) {
|
||||
throw new IllegalArgumentException("Unsupported BasicModel [" + basicModel + "], expected one of " + BASIC_MODELS.keySet());
|
||||
}
|
||||
return model;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the given Settings and creates the appropriate {@link AfterEffect}
|
||||
*
|
||||
* @param settings Settings to parse
|
||||
* @return {@link AfterEffect} referred to in the Settings
|
||||
*/
|
||||
private static AfterEffect parseAfterEffect(Settings settings) {
|
||||
String afterEffect = settings.get("after_effect");
|
||||
AfterEffect effect = AFTER_EFFECTS.get(afterEffect);
|
||||
if (effect == null) {
|
||||
throw new IllegalArgumentException("Unsupported AfterEffect [" + afterEffect + "], expected one of " + AFTER_EFFECTS.keySet());
|
||||
}
|
||||
return effect;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the given Settings and creates the appropriate {@link Normalization}
|
||||
*
|
||||
* @param settings Settings to parse
|
||||
* @return {@link Normalization} referred to in the Settings
|
||||
*/
|
||||
private static Normalization parseNormalization(Settings settings) {
|
||||
String normalization = settings.get("normalization");
|
||||
|
||||
if ("no".equals(normalization)) {
|
||||
return new Normalization.NoNormalization();
|
||||
} else if ("h1".equals(normalization)) {
|
||||
float c = settings.getAsFloat("normalization.h1.c", 1f);
|
||||
return new NormalizationH1(c);
|
||||
} else if ("h2".equals(normalization)) {
|
||||
float c = settings.getAsFloat("normalization.h2.c", 1f);
|
||||
return new NormalizationH2(c);
|
||||
} else if ("h3".equals(normalization)) {
|
||||
float c = settings.getAsFloat("normalization.h3.c", 800f);
|
||||
return new NormalizationH3(c);
|
||||
} else if ("z".equals(normalization)) {
|
||||
float z = settings.getAsFloat("normalization.z.z", 0.30f);
|
||||
return new NormalizationZ(z);
|
||||
} else {
|
||||
throw new IllegalArgumentException("Unsupported Normalization [" + normalization + "]");
|
||||
}
|
||||
}
|
||||
|
||||
private static Independence parseIndependence(Settings settings) {
|
||||
String name = settings.get("independence_measure");
|
||||
Independence measure = INDEPENDENCE_MEASURES.get(name);
|
||||
if (measure == null) {
|
||||
throw new IllegalArgumentException("Unsupported IndependenceMeasure [" + name + "], expected one of "
|
||||
+ INDEPENDENCE_MEASURES.keySet());
|
||||
}
|
||||
return measure;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the given Settings and creates the appropriate {@link Distribution}
|
||||
*
|
||||
* @param settings Settings to parse
|
||||
* @return {@link Normalization} referred to in the Settings
|
||||
*/
|
||||
private static Distribution parseDistribution(Settings settings) {
|
||||
String rawDistribution = settings.get("distribution");
|
||||
Distribution distribution = DISTRIBUTIONS.get(rawDistribution);
|
||||
if (distribution == null) {
|
||||
throw new IllegalArgumentException("Unsupported Distribution [" + rawDistribution + "]");
|
||||
}
|
||||
return distribution;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the given Settings and creates the appropriate {@link Lambda}
|
||||
*
|
||||
* @param settings Settings to parse
|
||||
* @return {@link Normalization} referred to in the Settings
|
||||
*/
|
||||
private static Lambda parseLambda(Settings settings) {
|
||||
String rawLambda = settings.get("lambda");
|
||||
Lambda lambda = LAMBDAS.get(rawLambda);
|
||||
if (lambda == null) {
|
||||
throw new IllegalArgumentException("Unsupported Lambda [" + rawLambda + "]");
|
||||
}
|
||||
return lambda;
|
||||
}
|
||||
|
||||
static void assertSettingsIsSubsetOf(String type, Version version, Settings settings, String... supportedSettings) {
|
||||
Set<String> unknownSettings = new HashSet<>(settings.keySet());
|
||||
unknownSettings.removeAll(Arrays.asList(supportedSettings));
|
||||
unknownSettings.remove("type"); // used to figure out which sim this is
|
||||
if (unknownSettings.isEmpty() == false) {
|
||||
if (version.onOrAfter(Version.V_7_0_0_alpha1)) {
|
||||
throw new IllegalArgumentException("Unknown settings for similarity of type [" + type + "]: " + unknownSettings);
|
||||
} else {
|
||||
DEPRECATION_LOGGER.deprecated("Unknown settings for similarity of type [" + type + "]: " + unknownSettings);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static BM25Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) {
|
||||
assertSettingsIsSubsetOf("BM25", indexCreatedVersion, settings, "k1", "b", DISCOUNT_OVERLAPS);
|
||||
|
||||
float k1 = settings.getAsFloat("k1", 1.2f);
|
||||
float b = settings.getAsFloat("b", 0.75f);
|
||||
boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true);
|
||||
|
||||
BM25Similarity similarity = new BM25Similarity(k1, b);
|
||||
similarity.setDiscountOverlaps(discountOverlaps);
|
||||
return similarity;
|
||||
}
|
||||
|
||||
public static BooleanSimilarity createBooleanSimilarity(Settings settings, Version indexCreatedVersion) {
|
||||
assertSettingsIsSubsetOf("boolean", indexCreatedVersion, settings);
|
||||
return new BooleanSimilarity();
|
||||
}
|
||||
|
||||
public static ClassicSimilarity createClassicSimilarity(Settings settings, Version indexCreatedVersion) {
|
||||
assertSettingsIsSubsetOf("classic", indexCreatedVersion, settings, DISCOUNT_OVERLAPS);
|
||||
|
||||
boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true);
|
||||
|
||||
ClassicSimilarity similarity = new ClassicSimilarity();
|
||||
similarity.setDiscountOverlaps(discountOverlaps);
|
||||
return similarity;
|
||||
}
|
||||
|
||||
public static DFRSimilarity createDfrSimilarity(Settings settings, Version indexCreatedVersion) {
|
||||
assertSettingsIsSubsetOf("DFR", indexCreatedVersion, settings,
|
||||
"basic_model", "after_effect", "normalization",
|
||||
"normalization.h1.c", "normalization.h2.c", "normalization.h3.c", "normalization.z.z");
|
||||
|
||||
|
||||
return new DFRSimilarity(
|
||||
parseBasicModel(settings),
|
||||
parseAfterEffect(settings),
|
||||
parseNormalization(settings));
|
||||
}
|
||||
|
||||
public static DFISimilarity createDfiSimilarity(Settings settings, Version indexCreatedVersion) {
|
||||
assertSettingsIsSubsetOf("DFI", indexCreatedVersion, settings, "independence_measure");
|
||||
|
||||
return new DFISimilarity(parseIndependence(settings));
|
||||
}
|
||||
|
||||
public static IBSimilarity createIBSimilarity(Settings settings, Version indexCreatedVersion) {
|
||||
assertSettingsIsSubsetOf("IB", indexCreatedVersion, settings, "distribution", "lambda", "normalization",
|
||||
"normalization.h1.c", "normalization.h2.c", "normalization.h3.c", "normalization.z.z");
|
||||
|
||||
return new IBSimilarity(
|
||||
parseDistribution(settings),
|
||||
parseLambda(settings),
|
||||
parseNormalization(settings));
|
||||
}
|
||||
|
||||
public static LMDirichletSimilarity createLMDirichletSimilarity(Settings settings, Version indexCreatedVersion) {
|
||||
assertSettingsIsSubsetOf("LMDirichlet", indexCreatedVersion, settings, "mu");
|
||||
|
||||
float mu = settings.getAsFloat("mu", 2000f);
|
||||
return new LMDirichletSimilarity(mu);
|
||||
}
|
||||
|
||||
public static LMJelinekMercerSimilarity createLMJelinekMercerSimilarity(Settings settings, Version indexCreatedVersion) {
|
||||
assertSettingsIsSubsetOf("LMJelinekMercer", indexCreatedVersion, settings, "lambda");
|
||||
|
||||
float lambda = settings.getAsFloat("lambda", 0.1f);
|
||||
return new LMJelinekMercerSimilarity(lambda);
|
||||
}
|
||||
}
|
|
@ -19,8 +19,13 @@
|
|||
|
||||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.search.similarities.BooleanSimilarity;
|
||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.TriFunction;
|
||||
import org.elasticsearch.common.logging.DeprecationLogger;
|
||||
import org.elasticsearch.common.logging.Loggers;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -34,45 +39,84 @@ import org.elasticsearch.script.ScriptService;
|
|||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
public final class SimilarityService extends AbstractIndexComponent {
|
||||
|
||||
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(SimilarityService.class));
|
||||
public static final String DEFAULT_SIMILARITY = "BM25";
|
||||
private final Similarity defaultSimilarity;
|
||||
private final Map<String, SimilarityProvider> similarities;
|
||||
private static final Map<String, SimilarityProvider.Factory> DEFAULTS;
|
||||
public static final Map<String, SimilarityProvider.Factory> BUILT_IN;
|
||||
private static final String CLASSIC_SIMILARITY = "classic";
|
||||
private static final Map<String, Function<Version, Supplier<Similarity>>> DEFAULTS;
|
||||
public static final Map<String, TriFunction<Settings, Version, ScriptService, Similarity>> BUILT_IN;
|
||||
static {
|
||||
Map<String, SimilarityProvider.Factory> defaults = new HashMap<>();
|
||||
defaults.put("classic",
|
||||
(name, settings, indexSettings, scriptService) -> new ClassicSimilarityProvider(name, settings, indexSettings));
|
||||
defaults.put("BM25",
|
||||
(name, settings, indexSettings, scriptService) -> new BM25SimilarityProvider(name, settings, indexSettings));
|
||||
defaults.put("boolean",
|
||||
(name, settings, indexSettings, scriptService) -> new BooleanSimilarityProvider(name, settings, indexSettings));
|
||||
Map<String, Function<Version, Supplier<Similarity>>> defaults = new HashMap<>();
|
||||
defaults.put(CLASSIC_SIMILARITY, version -> {
|
||||
if (version.onOrAfter(Version.V_7_0_0_alpha1)) {
|
||||
return () -> {
|
||||
throw new IllegalArgumentException("The [classic] similarity may not be used anymore. Please use the [BM25] "
|
||||
+ "similarity or build a custom [scripted] similarity instead.");
|
||||
};
|
||||
} else {
|
||||
final ClassicSimilarity similarity = SimilarityProviders.createClassicSimilarity(Settings.EMPTY, version);
|
||||
return () -> {
|
||||
DEPRECATION_LOGGER.deprecated("The [classic] similarity is now deprecated in favour of BM25, which is generally "
|
||||
+ "accepted as a better alternative. Use the [BM25] similarity or build a custom [scripted] similarity "
|
||||
+ "instead.");
|
||||
return similarity;
|
||||
};
|
||||
}
|
||||
});
|
||||
defaults.put("BM25", version -> {
|
||||
final BM25Similarity similarity = SimilarityProviders.createBM25Similarity(Settings.EMPTY, version);
|
||||
return () -> similarity;
|
||||
});
|
||||
defaults.put("boolean", version -> {
|
||||
final Similarity similarity = new BooleanSimilarity();
|
||||
return () -> similarity;
|
||||
});
|
||||
|
||||
Map<String, SimilarityProvider.Factory> builtIn = new HashMap<>(defaults);
|
||||
Map<String, TriFunction<Settings, Version, ScriptService, Similarity>> builtIn = new HashMap<>();
|
||||
builtIn.put(CLASSIC_SIMILARITY,
|
||||
(settings, version, script) -> {
|
||||
if (version.onOrAfter(Version.V_7_0_0_alpha1)) {
|
||||
throw new IllegalArgumentException("The [classic] similarity may not be used anymore. Please use the [BM25] "
|
||||
+ "similarity or build a custom [scripted] similarity instead.");
|
||||
} else {
|
||||
DEPRECATION_LOGGER.deprecated("The [classic] similarity is now deprecated in favour of BM25, which is generally "
|
||||
+ "accepted as a better alternative. Use the [BM25] similarity or build a custom [scripted] similarity "
|
||||
+ "instead.");
|
||||
return SimilarityProviders.createClassicSimilarity(settings, version);
|
||||
}
|
||||
});
|
||||
builtIn.put("BM25",
|
||||
(settings, version, scriptService) -> SimilarityProviders.createBM25Similarity(settings, version));
|
||||
builtIn.put("boolean",
|
||||
(settings, version, scriptService) -> SimilarityProviders.createBooleanSimilarity(settings, version));
|
||||
builtIn.put("DFR",
|
||||
(name, settings, indexSettings, scriptService) -> new DFRSimilarityProvider(name, settings, indexSettings));
|
||||
(settings, version, scriptService) -> SimilarityProviders.createDfrSimilarity(settings, version));
|
||||
builtIn.put("IB",
|
||||
(name, settings, indexSettings, scriptService) -> new IBSimilarityProvider(name, settings, indexSettings));
|
||||
(settings, version, scriptService) -> SimilarityProviders.createIBSimilarity(settings, version));
|
||||
builtIn.put("LMDirichlet",
|
||||
(name, settings, indexSettings, scriptService) -> new LMDirichletSimilarityProvider(name, settings, indexSettings));
|
||||
(settings, version, scriptService) -> SimilarityProviders.createLMDirichletSimilarity(settings, version));
|
||||
builtIn.put("LMJelinekMercer",
|
||||
(name, settings, indexSettings, scriptService) -> new LMJelinekMercerSimilarityProvider(name, settings, indexSettings));
|
||||
(settings, version, scriptService) -> SimilarityProviders.createLMJelinekMercerSimilarity(settings, version));
|
||||
builtIn.put("DFI",
|
||||
(name, settings, indexSettings, scriptService) -> new DFISimilarityProvider(name, settings, indexSettings));
|
||||
builtIn.put("scripted", ScriptedSimilarityProvider::new);
|
||||
(settings, version, scriptService) -> SimilarityProviders.createDfiSimilarity(settings, version));
|
||||
builtIn.put("scripted", new ScriptedSimilarityProvider());
|
||||
DEFAULTS = Collections.unmodifiableMap(defaults);
|
||||
BUILT_IN = Collections.unmodifiableMap(builtIn);
|
||||
}
|
||||
|
||||
private final Similarity defaultSimilarity;
|
||||
private final Map<String, Supplier<Similarity>> similarities;
|
||||
|
||||
public SimilarityService(IndexSettings indexSettings, ScriptService scriptService,
|
||||
Map<String, SimilarityProvider.Factory> similarities) {
|
||||
Map<String, TriFunction<Settings, Version, ScriptService, Similarity>> similarities) {
|
||||
super(indexSettings);
|
||||
Map<String, SimilarityProvider> providers = new HashMap<>(similarities.size());
|
||||
Map<String, Supplier<Similarity>> providers = new HashMap<>(similarities.size());
|
||||
Map<String, Settings> similaritySettings = this.indexSettings.getSettings().getGroups(IndexModule.SIMILARITY_SETTINGS_PREFIX);
|
||||
|
||||
for (Map.Entry<String, Settings> entry : similaritySettings.entrySet()) {
|
||||
String name = entry.getKey();
|
||||
if (BUILT_IN.containsKey(name)) {
|
||||
|
@ -85,14 +129,13 @@ public final class SimilarityService extends AbstractIndexComponent {
|
|||
} else if ((similarities.containsKey(typeName) || BUILT_IN.containsKey(typeName)) == false) {
|
||||
throw new IllegalArgumentException("Unknown Similarity type [" + typeName + "] for [" + name + "]");
|
||||
}
|
||||
SimilarityProvider.Factory defaultFactory = BUILT_IN.get(typeName);
|
||||
SimilarityProvider.Factory factory = similarities.getOrDefault(typeName, defaultFactory);
|
||||
providers.put(name, factory.create(name, providerSettings, indexSettings.getSettings(), scriptService));
|
||||
TriFunction<Settings, Version, ScriptService, Similarity> defaultFactory = BUILT_IN.get(typeName);
|
||||
TriFunction<Settings, Version, ScriptService, Similarity> factory = similarities.getOrDefault(typeName, defaultFactory);
|
||||
final Similarity similarity = factory.apply(providerSettings, indexSettings.getIndexVersionCreated(), scriptService);
|
||||
providers.put(name, () -> similarity);
|
||||
}
|
||||
Map<String, SimilarityProvider> providerMapping = addSimilarities(similaritySettings, indexSettings.getSettings(), scriptService,
|
||||
DEFAULTS);
|
||||
for (Map.Entry<String, SimilarityProvider> entry : providerMapping.entrySet()) {
|
||||
providers.put(entry.getKey(), entry.getValue());
|
||||
for (Map.Entry<String, Function<Version, Supplier<Similarity>>> entry : DEFAULTS.entrySet()) {
|
||||
providers.put(entry.getKey(), entry.getValue().apply(indexSettings.getIndexVersionCreated()));
|
||||
}
|
||||
this.similarities = providers;
|
||||
defaultSimilarity = (providers.get("default") != null) ? providers.get("default").get()
|
||||
|
@ -108,25 +151,16 @@ public final class SimilarityService extends AbstractIndexComponent {
|
|||
defaultSimilarity;
|
||||
}
|
||||
|
||||
private Map<String, SimilarityProvider> addSimilarities(Map<String, Settings> similaritySettings, Settings indexSettings,
|
||||
ScriptService scriptService, Map<String, SimilarityProvider.Factory> similarities) {
|
||||
Map<String, SimilarityProvider> providers = new HashMap<>(similarities.size());
|
||||
for (Map.Entry<String, SimilarityProvider.Factory> entry : similarities.entrySet()) {
|
||||
String name = entry.getKey();
|
||||
SimilarityProvider.Factory factory = entry.getValue();
|
||||
Settings providerSettings = similaritySettings.get(name);
|
||||
if (providerSettings == null) {
|
||||
providerSettings = Settings.Builder.EMPTY_SETTINGS;
|
||||
}
|
||||
providers.put(name, factory.create(name, providerSettings, indexSettings, scriptService));
|
||||
}
|
||||
return providers;
|
||||
}
|
||||
|
||||
|
||||
public SimilarityProvider getSimilarity(String name) {
|
||||
return similarities.get(name);
|
||||
Supplier<Similarity> sim = similarities.get(name);
|
||||
if (sim == null) {
|
||||
return null;
|
||||
}
|
||||
return new SimilarityProvider(name, sim.get());
|
||||
}
|
||||
|
||||
// for testing
|
||||
Similarity getDefaultSimilarity() {
|
||||
return defaultSimilarity;
|
||||
}
|
||||
|
|
|
@ -59,7 +59,6 @@ import org.elasticsearch.index.shard.IndexSearcherWrapper;
|
|||
import org.elasticsearch.index.shard.IndexingOperationListener;
|
||||
import org.elasticsearch.index.shard.SearchOperationListener;
|
||||
import org.elasticsearch.index.shard.ShardId;
|
||||
import org.elasticsearch.index.similarity.SimilarityProvider;
|
||||
import org.elasticsearch.index.similarity.SimilarityService;
|
||||
import org.elasticsearch.index.store.IndexStore;
|
||||
import org.elasticsearch.indices.IndicesModule;
|
||||
|
@ -287,17 +286,8 @@ public class IndexModuleTests extends ESTestCase {
|
|||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings), emptyAnalysisRegistry);
|
||||
module.addSimilarity("test_similarity", (string, providerSettings, indexLevelSettings, scriptService) -> new SimilarityProvider() {
|
||||
@Override
|
||||
public String name() {
|
||||
return string;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Similarity get() {
|
||||
return new TestSimilarity(providerSettings.get("key"));
|
||||
}
|
||||
});
|
||||
module.addSimilarity("test_similarity",
|
||||
(providerSettings, indexCreatedVersion, scriptService) -> new TestSimilarity(providerSettings.get("key")));
|
||||
|
||||
IndexService indexService = newIndexService(module);
|
||||
SimilarityService similarityService = indexService.similarityService();
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||
import org.apache.lucene.search.similarities.BooleanSimilarity;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
|
@ -50,10 +50,10 @@ public class SimilarityServiceTests extends ESTestCase {
|
|||
}
|
||||
|
||||
public void testOverrideDefaultSimilarity() {
|
||||
Settings settings = Settings.builder().put("index.similarity.default.type", "classic")
|
||||
Settings settings = Settings.builder().put("index.similarity.default.type", "boolean")
|
||||
.build();
|
||||
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
|
||||
SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap());
|
||||
assertTrue(service.getDefaultSimilarity() instanceof ClassicSimilarity);
|
||||
assertTrue(service.getDefaultSimilarity() instanceof BooleanSimilarity);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,6 +33,8 @@ import org.apache.lucene.search.similarities.LMDirichletSimilarity;
|
|||
import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity;
|
||||
import org.apache.lucene.search.similarities.LambdaTTF;
|
||||
import org.apache.lucene.search.similarities.NormalizationH2;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.compress.CompressedXContent;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -60,7 +62,24 @@ public class SimilarityTests extends ESSingleNodeTestCase {
|
|||
|
||||
public void testResolveDefaultSimilarities() {
|
||||
SimilarityService similarityService = createIndex("foo").similarityService();
|
||||
assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class));
|
||||
assertThat(similarityService.getSimilarity("boolean").get(), instanceOf(BooleanSimilarity.class));
|
||||
assertThat(similarityService.getSimilarity("default"), equalTo(null));
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> similarityService.getSimilarity("classic"));
|
||||
assertEquals("The [classic] similarity may not be used anymore. Please use the [BM25] similarity or build a custom [scripted] "
|
||||
+ "similarity instead.", e.getMessage());
|
||||
}
|
||||
|
||||
public void testResolveDefaultSimilaritiesOn6xIndex() {
|
||||
Settings indexSettings = Settings.builder()
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_6_3_0) // otherwise classic is forbidden
|
||||
.build();
|
||||
SimilarityService similarityService = createIndex("foo", indexSettings).similarityService();
|
||||
assertThat(similarityService.getSimilarity("classic").get(), instanceOf(ClassicSimilarity.class));
|
||||
assertWarnings("The [classic] similarity is now deprecated in favour of BM25, which is generally "
|
||||
+ "accepted as a better alternative. Use the [BM25] similarity or build a custom [scripted] similarity "
|
||||
+ "instead.");
|
||||
assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class));
|
||||
assertThat(similarityService.getSimilarity("boolean").get(), instanceOf(BooleanSimilarity.class));
|
||||
assertThat(similarityService.getSimilarity("default"), equalTo(null));
|
||||
|
@ -76,15 +95,27 @@ public class SimilarityTests extends ESSingleNodeTestCase {
|
|||
Settings indexSettings = Settings.builder()
|
||||
.put("index.similarity.my_similarity.type", "classic")
|
||||
.put("index.similarity.my_similarity.discount_overlaps", false)
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_6_3_0) // otherwise classic is forbidden
|
||||
.build();
|
||||
IndexService indexService = createIndex("foo", indexSettings);
|
||||
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
|
||||
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(ClassicSimilarityProvider.class));
|
||||
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(ClassicSimilarity.class));
|
||||
|
||||
ClassicSimilarity similarity = (ClassicSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get();
|
||||
assertThat(similarity.getDiscountOverlaps(), equalTo(false));
|
||||
}
|
||||
|
||||
public void testResolveSimilaritiesFromMapping_classicIsForbidden() throws IOException {
|
||||
Settings indexSettings = Settings.builder()
|
||||
.put("index.similarity.my_similarity.type", "classic")
|
||||
.put("index.similarity.my_similarity.discount_overlaps", false)
|
||||
.build();
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> createIndex("foo", indexSettings));
|
||||
assertEquals("The [classic] similarity may not be used anymore. Please use the [BM25] similarity or build a custom [scripted] "
|
||||
+ "similarity instead.", e.getMessage());
|
||||
}
|
||||
|
||||
public void testResolveSimilaritiesFromMapping_bm25() throws IOException {
|
||||
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties")
|
||||
|
@ -100,7 +131,7 @@ public class SimilarityTests extends ESSingleNodeTestCase {
|
|||
.build();
|
||||
IndexService indexService = createIndex("foo", indexSettings);
|
||||
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
|
||||
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(BM25SimilarityProvider.class));
|
||||
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(BM25Similarity.class));
|
||||
|
||||
BM25Similarity similarity = (BM25Similarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get();
|
||||
assertThat(similarity.getK1(), equalTo(2.0f));
|
||||
|
@ -119,8 +150,8 @@ public class SimilarityTests extends ESSingleNodeTestCase {
|
|||
DocumentMapper documentMapper = indexService.mapperService()
|
||||
.documentMapperParser()
|
||||
.parse("type", new CompressedXContent(mapping));
|
||||
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(),
|
||||
instanceOf(BooleanSimilarityProvider.class));
|
||||
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(),
|
||||
instanceOf(BooleanSimilarity.class));
|
||||
}
|
||||
|
||||
public void testResolveSimilaritiesFromMapping_DFR() throws IOException {
|
||||
|
@ -139,7 +170,7 @@ public class SimilarityTests extends ESSingleNodeTestCase {
|
|||
.build();
|
||||
IndexService indexService = createIndex("foo", indexSettings);
|
||||
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
|
||||
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(DFRSimilarityProvider.class));
|
||||
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(DFRSimilarity.class));
|
||||
|
||||
DFRSimilarity similarity = (DFRSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get();
|
||||
assertThat(similarity.getBasicModel(), instanceOf(BasicModelG.class));
|
||||
|
@ -164,7 +195,7 @@ public class SimilarityTests extends ESSingleNodeTestCase {
|
|||
.build();
|
||||
IndexService indexService = createIndex("foo", indexSettings);
|
||||
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
|
||||
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(IBSimilarityProvider.class));
|
||||
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(IBSimilarity.class));
|
||||
|
||||
IBSimilarity similarity = (IBSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get();
|
||||
assertThat(similarity.getDistribution(), instanceOf(DistributionSPL.class));
|
||||
|
@ -187,7 +218,7 @@ public class SimilarityTests extends ESSingleNodeTestCase {
|
|||
IndexService indexService = createIndex("foo", indexSettings);
|
||||
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
|
||||
MappedFieldType fieldType = documentMapper.mappers().getMapper("field1").fieldType();
|
||||
assertThat(fieldType.similarity(), instanceOf(DFISimilarityProvider.class));
|
||||
assertThat(fieldType.similarity().get(), instanceOf(DFISimilarity.class));
|
||||
DFISimilarity similarity = (DFISimilarity) fieldType.similarity().get();
|
||||
assertThat(similarity.getIndependence(), instanceOf(IndependenceChiSquared.class));
|
||||
}
|
||||
|
@ -205,7 +236,7 @@ public class SimilarityTests extends ESSingleNodeTestCase {
|
|||
.build();
|
||||
IndexService indexService = createIndex("foo", indexSettings);
|
||||
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
|
||||
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(LMDirichletSimilarityProvider.class));
|
||||
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(LMDirichletSimilarity.class));
|
||||
|
||||
LMDirichletSimilarity similarity = (LMDirichletSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get();
|
||||
assertThat(similarity.getMu(), equalTo(3000f));
|
||||
|
@ -224,7 +255,7 @@ public class SimilarityTests extends ESSingleNodeTestCase {
|
|||
.build();
|
||||
IndexService indexService = createIndex("foo", indexSettings);
|
||||
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
|
||||
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(LMJelinekMercerSimilarityProvider.class));
|
||||
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(LMJelinekMercerSimilarity.class));
|
||||
|
||||
LMJelinekMercerSimilarity similarity = (LMJelinekMercerSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get();
|
||||
assertThat(similarity.getLambda(), equalTo(0.7f));
|
||||
|
@ -245,4 +276,14 @@ public class SimilarityTests extends ESSingleNodeTestCase {
|
|||
assertThat(e.getMessage(), equalTo("Unknown Similarity type [unknown_similarity] for field [field1]"));
|
||||
}
|
||||
}
|
||||
|
||||
public void testUnknownParameters() throws IOException {
|
||||
Settings indexSettings = Settings.builder()
|
||||
.put("index.similarity.my_similarity.type", "BM25")
|
||||
.put("index.similarity.my_similarity.z", 2.0f)
|
||||
.build();
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> createIndex("foo", indexSettings));
|
||||
assertEquals("Unknown settings for similarity of type [BM25]: [z]", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
*/
|
||||
package org.elasticsearch.indices;
|
||||
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags;
|
||||
|
@ -49,7 +50,6 @@ import org.elasticsearch.index.shard.IndexShard;
|
|||
import org.elasticsearch.index.shard.IndexShardState;
|
||||
import org.elasticsearch.index.shard.ShardId;
|
||||
import org.elasticsearch.index.shard.ShardPath;
|
||||
import org.elasticsearch.index.similarity.BM25SimilarityProvider;
|
||||
import org.elasticsearch.indices.IndicesService.ShardDeletionCheckResult;
|
||||
import org.elasticsearch.plugins.MapperPlugin;
|
||||
import org.elasticsearch.plugins.Plugin;
|
||||
|
@ -106,7 +106,7 @@ public class IndicesServiceTests extends ESSingleNodeTestCase {
|
|||
public void onIndexModule(IndexModule indexModule) {
|
||||
super.onIndexModule(indexModule);
|
||||
indexModule.addSimilarity("fake-similarity",
|
||||
(name, settings, indexSettings, scriptService) -> new BM25SimilarityProvider(name, settings, indexSettings));
|
||||
(settings, indexCreatedVersion, scriptService) -> new BM25Similarity());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -375,8 +375,8 @@ public class IndicesServiceTests extends ESSingleNodeTestCase {
|
|||
.build();
|
||||
MapperService mapperService = indicesService.createIndexMapperService(indexMetaData);
|
||||
assertNotNull(mapperService.documentMapperParser().parserContext("type").typeParser("fake-mapper"));
|
||||
assertThat(mapperService.documentMapperParser().parserContext("type").getSimilarity("test"),
|
||||
instanceOf(BM25SimilarityProvider.class));
|
||||
assertThat(mapperService.documentMapperParser().parserContext("type").getSimilarity("test").get(),
|
||||
instanceOf(BM25Similarity.class));
|
||||
}
|
||||
|
||||
public void testStatsByShardDoesNotDieFromExpectedExceptions() {
|
||||
|
|
|
@ -46,7 +46,7 @@ public class SimilarityIT extends ESIntegTestCase {
|
|||
.field("type", "text")
|
||||
.endObject()
|
||||
.startObject("field2")
|
||||
.field("similarity", "classic")
|
||||
.field("similarity", "boolean")
|
||||
.field("type", "text")
|
||||
.endObject()
|
||||
.endObject()
|
||||
|
@ -68,9 +68,9 @@ public class SimilarityIT extends ESIntegTestCase {
|
|||
assertThat(bm25SearchResponse.getHits().getTotalHits(), equalTo(1L));
|
||||
float bm25Score = bm25SearchResponse.getHits().getHits()[0].getScore();
|
||||
|
||||
SearchResponse defaultSearchResponse = client().prepareSearch().setQuery(matchQuery("field2", "quick brown fox")).execute().actionGet();
|
||||
assertThat(defaultSearchResponse.getHits().getTotalHits(), equalTo(1L));
|
||||
float defaultScore = defaultSearchResponse.getHits().getHits()[0].getScore();
|
||||
SearchResponse booleanSearchResponse = client().prepareSearch().setQuery(matchQuery("field2", "quick brown fox")).execute().actionGet();
|
||||
assertThat(booleanSearchResponse.getHits().getTotalHits(), equalTo(1L));
|
||||
float defaultScore = booleanSearchResponse.getHits().getHits()[0].getScore();
|
||||
|
||||
assertThat(bm25Score, not(equalTo(defaultScore)));
|
||||
}
|
||||
|
|
|
@ -20,13 +20,14 @@ package org.elasticsearch.index.mapper;
|
|||
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.analysis.AnalyzerScope;
|
||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||
import org.elasticsearch.index.query.QueryShardContext;
|
||||
import org.elasticsearch.index.similarity.BM25SimilarityProvider;
|
||||
import org.elasticsearch.index.similarity.SimilarityProvider;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
@ -123,17 +124,17 @@ public abstract class FieldTypeTestCase extends ESTestCase {
|
|||
new Modifier("similarity", false) {
|
||||
@Override
|
||||
public void modify(MappedFieldType ft) {
|
||||
ft.setSimilarity(new BM25SimilarityProvider("foo", Settings.EMPTY, INDEX_SETTINGS));
|
||||
ft.setSimilarity(new SimilarityProvider("foo", new BM25Similarity()));
|
||||
}
|
||||
},
|
||||
new Modifier("similarity", false) {
|
||||
@Override
|
||||
public void modify(MappedFieldType ft) {
|
||||
ft.setSimilarity(new BM25SimilarityProvider("foo", Settings.EMPTY, INDEX_SETTINGS));
|
||||
ft.setSimilarity(new SimilarityProvider("foo", new BM25Similarity()));
|
||||
}
|
||||
@Override
|
||||
public void normalizeOther(MappedFieldType other) {
|
||||
other.setSimilarity(new BM25SimilarityProvider("bar", Settings.EMPTY, INDEX_SETTINGS));
|
||||
other.setSimilarity(new SimilarityProvider("bar", new BM25Similarity()));
|
||||
}
|
||||
},
|
||||
new Modifier("eager_global_ordinals", true) {
|
||||
|
|
Loading…
Reference in New Issue