Change default similarity to BM25
The default similarity was set to `classic` which refers to TFIDF and has not been moved after the upgrade to Lucene 6. Though moving to BM25 could have some downside for queries that relies on coordination factor (match_query, multi_match_query) ? relates #18944
This commit is contained in:
parent
82f7bfad98
commit
423291b6bc
|
@ -36,7 +36,7 @@ import java.util.function.BiFunction;
|
|||
|
||||
public final class SimilarityService extends AbstractIndexComponent {
|
||||
|
||||
public final static String DEFAULT_SIMILARITY = "classic";
|
||||
public final static String DEFAULT_SIMILARITY = "BM25";
|
||||
private final Similarity defaultSimilarity;
|
||||
private final Similarity baseSimilarity;
|
||||
private final Map<String, SimilarityProvider> similarities;
|
||||
|
@ -121,8 +121,8 @@ public final class SimilarityService extends AbstractIndexComponent {
|
|||
return similarities.get(name);
|
||||
}
|
||||
|
||||
public SimilarityProvider getDefaultSimilarity() {
|
||||
return similarities.get("default");
|
||||
Similarity getDefaultSimilarity() {
|
||||
return defaultSimilarity;
|
||||
}
|
||||
|
||||
static class PerFieldSimilarity extends PerFieldSimilarityWrapper {
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
*/
|
||||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -27,7 +29,15 @@ import org.elasticsearch.test.IndexSettingsModule;
|
|||
|
||||
import java.util.Collections;
|
||||
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
|
||||
public class SimilarityServiceTests extends ESTestCase {
|
||||
public void testDefaultSimilarity() {
|
||||
Settings settings = Settings.builder().build();
|
||||
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
|
||||
SimilarityService service = new SimilarityService(indexSettings, Collections.emptyMap());
|
||||
assertThat(service.getDefaultSimilarity(), instanceOf(BM25Similarity.class));
|
||||
}
|
||||
|
||||
// Tests #16594
|
||||
public void testOverrideBuiltInSimilarity() {
|
||||
|
@ -53,10 +63,10 @@ public class SimilarityServiceTests extends ESTestCase {
|
|||
}
|
||||
|
||||
// Tests #16594
|
||||
public void testDefaultSimilarity() {
|
||||
Settings settings = Settings.builder().put("index.similarity.default.type", "BM25").build();
|
||||
public void testOverrideDefaultSimilarity() {
|
||||
Settings settings = Settings.builder().put("index.similarity.default.type", "classic").build();
|
||||
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
|
||||
SimilarityService service = new SimilarityService(indexSettings, Collections.emptyMap());
|
||||
assertTrue(service.getDefaultSimilarity() instanceof BM25SimilarityProvider);
|
||||
assertTrue(service.getDefaultSimilarity() instanceof ClassicSimilarity);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,25 +47,11 @@ Here we configure the DFRSimilarity so it can be referenced as
|
|||
[float]
|
||||
=== Available similarities
|
||||
|
||||
[float]
|
||||
[[classic-similarity]]
|
||||
==== Classic similarity
|
||||
|
||||
The classic similarity that is based on the TF/IDF model. This
|
||||
similarity has the following option:
|
||||
|
||||
`discount_overlaps`::
|
||||
Determines whether overlap tokens (Tokens with
|
||||
0 position increment) are ignored when computing norm. By default this
|
||||
is true, meaning overlap tokens do not count when computing norms.
|
||||
|
||||
Type name: `classic`
|
||||
|
||||
[float]
|
||||
[[bm25]]
|
||||
==== BM25 similarity
|
||||
==== BM25 similarity (*default*)
|
||||
|
||||
Another TF/IDF based similarity that has built-in tf normalization and
|
||||
TF/IDF based similarity that has built-in tf normalization and
|
||||
is supposed to work better for short fields (like names). See
|
||||
http://en.wikipedia.org/wiki/Okapi_BM25[Okapi_BM25] for more details.
|
||||
This similarity has the following options:
|
||||
|
@ -86,6 +72,20 @@ This similarity has the following options:
|
|||
|
||||
Type name: `BM25`
|
||||
|
||||
[float]
|
||||
[[classic-similarity]]
|
||||
==== Classic similarity
|
||||
|
||||
The classic similarity that is based on the TF/IDF model. This
|
||||
similarity has the following option:
|
||||
|
||||
`discount_overlaps`::
|
||||
Determines whether overlap tokens (Tokens with
|
||||
0 position increment) are ignored when computing norm. By default this
|
||||
is true, meaning overlap tokens do not count when computing norms.
|
||||
|
||||
Type name: `classic`
|
||||
|
||||
[float]
|
||||
[[drf]]
|
||||
==== DFR similarity
|
||||
|
@ -178,5 +178,5 @@ You can change the default similarity for all fields by putting the following se
|
|||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
index.similarity.default.type: BM25
|
||||
index.similarity.default.type: classic
|
||||
--------------------------------------------------
|
||||
|
|
|
@ -196,3 +196,7 @@ The <<search-request-preference,search preference>> `_prefer_node` has
|
|||
been superseded by `_prefer_nodes`. By specifying a single node,
|
||||
`_prefer_nodes` provides the same functionality as `_prefer_node` but
|
||||
also supports specifying multiple nodes.
|
||||
|
||||
==== Default similarity
|
||||
|
||||
The default similarity has been changed to `BM25`.
|
||||
|
|
Loading…
Reference in New Issue