diff --git a/core/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java b/core/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java index 865c0eb685b..4ba2e0d0a38 100644 --- a/core/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java +++ b/core/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java @@ -36,7 +36,7 @@ import java.util.function.BiFunction; public final class SimilarityService extends AbstractIndexComponent { - public final static String DEFAULT_SIMILARITY = "classic"; + public final static String DEFAULT_SIMILARITY = "BM25"; private final Similarity defaultSimilarity; private final Similarity baseSimilarity; private final Map similarities; @@ -121,8 +121,8 @@ public final class SimilarityService extends AbstractIndexComponent { return similarities.get(name); } - public SimilarityProvider getDefaultSimilarity() { - return similarities.get("default"); + Similarity getDefaultSimilarity() { + return defaultSimilarity; } static class PerFieldSimilarity extends PerFieldSimilarityWrapper { diff --git a/core/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java b/core/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java index edb337fd4e6..57d025128d8 100644 --- a/core/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java +++ b/core/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java @@ -18,6 +18,8 @@ */ package org.elasticsearch.index.similarity; +import org.apache.lucene.search.similarities.BM25Similarity; +import org.apache.lucene.search.similarities.ClassicSimilarity; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.settings.Settings; @@ -27,7 +29,15 @@ import org.elasticsearch.test.IndexSettingsModule; import java.util.Collections; +import static org.hamcrest.Matchers.instanceOf; + public class SimilarityServiceTests extends ESTestCase { + public void testDefaultSimilarity() { + Settings settings = Settings.builder().build(); + IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings); + SimilarityService service = new SimilarityService(indexSettings, Collections.emptyMap()); + assertThat(service.getDefaultSimilarity(), instanceOf(BM25Similarity.class)); + } // Tests #16594 public void testOverrideBuiltInSimilarity() { @@ -53,10 +63,10 @@ public class SimilarityServiceTests extends ESTestCase { } // Tests #16594 - public void testDefaultSimilarity() { - Settings settings = Settings.builder().put("index.similarity.default.type", "BM25").build(); + public void testOverrideDefaultSimilarity() { + Settings settings = Settings.builder().put("index.similarity.default.type", "classic").build(); IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings); SimilarityService service = new SimilarityService(indexSettings, Collections.emptyMap()); - assertTrue(service.getDefaultSimilarity() instanceof BM25SimilarityProvider); + assertTrue(service.getDefaultSimilarity() instanceof ClassicSimilarity); } } diff --git a/docs/reference/index-modules/similarity.asciidoc b/docs/reference/index-modules/similarity.asciidoc index f1250ddf6c1..1833b45a9ba 100644 --- a/docs/reference/index-modules/similarity.asciidoc +++ b/docs/reference/index-modules/similarity.asciidoc @@ -47,25 +47,11 @@ Here we configure the DFRSimilarity so it can be referenced as [float] === Available similarities -[float] -[[classic-similarity]] -==== Classic similarity - -The classic similarity that is based on the TF/IDF model. This -similarity has the following option: - -`discount_overlaps`:: - Determines whether overlap tokens (Tokens with - 0 position increment) are ignored when computing norm. By default this - is true, meaning overlap tokens do not count when computing norms. - -Type name: `classic` - [float] [[bm25]] -==== BM25 similarity +==== BM25 similarity (*default*) -Another TF/IDF based similarity that has built-in tf normalization and +TF/IDF based similarity that has built-in tf normalization and is supposed to work better for short fields (like names). See http://en.wikipedia.org/wiki/Okapi_BM25[Okapi_BM25] for more details. This similarity has the following options: @@ -86,6 +72,20 @@ This similarity has the following options: Type name: `BM25` +[float] +[[classic-similarity]] +==== Classic similarity + +The classic similarity that is based on the TF/IDF model. This +similarity has the following option: + +`discount_overlaps`:: + Determines whether overlap tokens (Tokens with + 0 position increment) are ignored when computing norm. By default this + is true, meaning overlap tokens do not count when computing norms. + +Type name: `classic` + [float] [[drf]] ==== DFR similarity @@ -178,5 +178,5 @@ You can change the default similarity for all fields by putting the following se [source,js] -------------------------------------------------- -index.similarity.default.type: BM25 +index.similarity.default.type: classic -------------------------------------------------- diff --git a/docs/reference/migration/migrate_5_0/search.asciidoc b/docs/reference/migration/migrate_5_0/search.asciidoc index 7ebd79e66ca..bb42bc20de5 100644 --- a/docs/reference/migration/migrate_5_0/search.asciidoc +++ b/docs/reference/migration/migrate_5_0/search.asciidoc @@ -196,3 +196,7 @@ The <> `_prefer_node` has been superseded by `_prefer_nodes`. By specifying a single node, `_prefer_nodes` provides the same functionality as `_prefer_node` but also supports specifying multiple nodes. + +==== Default similarity + +The default similarity has been changed to `BM25`.