Merge pull request #18948 from jimferenczi/bm25
Change default similarity to BM25
This commit is contained in:
commit
cc91014dee
|
@ -36,7 +36,7 @@ import java.util.function.BiFunction;
|
|||
|
||||
public final class SimilarityService extends AbstractIndexComponent {
|
||||
|
||||
public final static String DEFAULT_SIMILARITY = "classic";
|
||||
public final static String DEFAULT_SIMILARITY = "BM25";
|
||||
private final Similarity defaultSimilarity;
|
||||
private final Similarity baseSimilarity;
|
||||
private final Map<String, SimilarityProvider> similarities;
|
||||
|
@ -121,8 +121,8 @@ public final class SimilarityService extends AbstractIndexComponent {
|
|||
return similarities.get(name);
|
||||
}
|
||||
|
||||
public SimilarityProvider getDefaultSimilarity() {
|
||||
return similarities.get("default");
|
||||
Similarity getDefaultSimilarity() {
|
||||
return defaultSimilarity;
|
||||
}
|
||||
|
||||
static class PerFieldSimilarity extends PerFieldSimilarityWrapper {
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
*/
|
||||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -27,7 +29,15 @@ import org.elasticsearch.test.IndexSettingsModule;
|
|||
|
||||
import java.util.Collections;
|
||||
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
|
||||
public class SimilarityServiceTests extends ESTestCase {
|
||||
public void testDefaultSimilarity() {
|
||||
Settings settings = Settings.builder().build();
|
||||
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
|
||||
SimilarityService service = new SimilarityService(indexSettings, Collections.emptyMap());
|
||||
assertThat(service.getDefaultSimilarity(), instanceOf(BM25Similarity.class));
|
||||
}
|
||||
|
||||
// Tests #16594
|
||||
public void testOverrideBuiltInSimilarity() {
|
||||
|
@ -53,10 +63,10 @@ public class SimilarityServiceTests extends ESTestCase {
|
|||
}
|
||||
|
||||
// Tests #16594
|
||||
public void testDefaultSimilarity() {
|
||||
Settings settings = Settings.builder().put("index.similarity.default.type", "BM25").build();
|
||||
public void testOverrideDefaultSimilarity() {
|
||||
Settings settings = Settings.builder().put("index.similarity.default.type", "classic").build();
|
||||
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
|
||||
SimilarityService service = new SimilarityService(indexSettings, Collections.emptyMap());
|
||||
assertTrue(service.getDefaultSimilarity() instanceof BM25SimilarityProvider);
|
||||
assertTrue(service.getDefaultSimilarity() instanceof ClassicSimilarity);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,25 +47,11 @@ Here we configure the DFRSimilarity so it can be referenced as
|
|||
[float]
|
||||
=== Available similarities
|
||||
|
||||
[float]
|
||||
[[classic-similarity]]
|
||||
==== Classic similarity
|
||||
|
||||
The classic similarity that is based on the TF/IDF model. This
|
||||
similarity has the following option:
|
||||
|
||||
`discount_overlaps`::
|
||||
Determines whether overlap tokens (Tokens with
|
||||
0 position increment) are ignored when computing norm. By default this
|
||||
is true, meaning overlap tokens do not count when computing norms.
|
||||
|
||||
Type name: `classic`
|
||||
|
||||
[float]
|
||||
[[bm25]]
|
||||
==== BM25 similarity
|
||||
==== BM25 similarity (*default*)
|
||||
|
||||
Another TF/IDF based similarity that has built-in tf normalization and
|
||||
TF/IDF based similarity that has built-in tf normalization and
|
||||
is supposed to work better for short fields (like names). See
|
||||
http://en.wikipedia.org/wiki/Okapi_BM25[Okapi_BM25] for more details.
|
||||
This similarity has the following options:
|
||||
|
@ -86,6 +72,20 @@ This similarity has the following options:
|
|||
|
||||
Type name: `BM25`
|
||||
|
||||
[float]
|
||||
[[classic-similarity]]
|
||||
==== Classic similarity
|
||||
|
||||
The classic similarity that is based on the TF/IDF model. This
|
||||
similarity has the following option:
|
||||
|
||||
`discount_overlaps`::
|
||||
Determines whether overlap tokens (Tokens with
|
||||
0 position increment) are ignored when computing norm. By default this
|
||||
is true, meaning overlap tokens do not count when computing norms.
|
||||
|
||||
Type name: `classic`
|
||||
|
||||
[float]
|
||||
[[drf]]
|
||||
==== DFR similarity
|
||||
|
@ -178,5 +178,5 @@ You can change the default similarity for all fields by putting the following se
|
|||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
index.similarity.default.type: BM25
|
||||
index.similarity.default.type: classic
|
||||
--------------------------------------------------
|
||||
|
|
|
@ -196,3 +196,7 @@ The <<search-request-preference,search preference>> `_prefer_node` has
|
|||
been superseded by `_prefer_nodes`. By specifying a single node,
|
||||
`_prefer_nodes` provides the same functionality as `_prefer_node` but
|
||||
also supports specifying multiple nodes.
|
||||
|
||||
==== Default similarity
|
||||
|
||||
The default similarity has been changed to `BM25`.
|
||||
|
|
Loading…
Reference in New Issue