Merge pull request #18948 from jimferenczi/bm25
Change default similarity to BM25
This commit is contained in:
commit
cc91014dee
|
@ -36,7 +36,7 @@ import java.util.function.BiFunction;
|
||||||
|
|
||||||
public final class SimilarityService extends AbstractIndexComponent {
|
public final class SimilarityService extends AbstractIndexComponent {
|
||||||
|
|
||||||
public final static String DEFAULT_SIMILARITY = "classic";
|
public final static String DEFAULT_SIMILARITY = "BM25";
|
||||||
private final Similarity defaultSimilarity;
|
private final Similarity defaultSimilarity;
|
||||||
private final Similarity baseSimilarity;
|
private final Similarity baseSimilarity;
|
||||||
private final Map<String, SimilarityProvider> similarities;
|
private final Map<String, SimilarityProvider> similarities;
|
||||||
|
@ -121,8 +121,8 @@ public final class SimilarityService extends AbstractIndexComponent {
|
||||||
return similarities.get(name);
|
return similarities.get(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
public SimilarityProvider getDefaultSimilarity() {
|
Similarity getDefaultSimilarity() {
|
||||||
return similarities.get("default");
|
return defaultSimilarity;
|
||||||
}
|
}
|
||||||
|
|
||||||
static class PerFieldSimilarity extends PerFieldSimilarityWrapper {
|
static class PerFieldSimilarity extends PerFieldSimilarityWrapper {
|
||||||
|
|
|
@ -18,6 +18,8 @@
|
||||||
*/
|
*/
|
||||||
package org.elasticsearch.index.similarity;
|
package org.elasticsearch.index.similarity;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||||
|
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||||
import org.elasticsearch.Version;
|
import org.elasticsearch.Version;
|
||||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -27,7 +29,15 @@ import org.elasticsearch.test.IndexSettingsModule;
|
||||||
|
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
|
||||||
|
import static org.hamcrest.Matchers.instanceOf;
|
||||||
|
|
||||||
public class SimilarityServiceTests extends ESTestCase {
|
public class SimilarityServiceTests extends ESTestCase {
|
||||||
|
public void testDefaultSimilarity() {
|
||||||
|
Settings settings = Settings.builder().build();
|
||||||
|
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
|
||||||
|
SimilarityService service = new SimilarityService(indexSettings, Collections.emptyMap());
|
||||||
|
assertThat(service.getDefaultSimilarity(), instanceOf(BM25Similarity.class));
|
||||||
|
}
|
||||||
|
|
||||||
// Tests #16594
|
// Tests #16594
|
||||||
public void testOverrideBuiltInSimilarity() {
|
public void testOverrideBuiltInSimilarity() {
|
||||||
|
@ -53,10 +63,10 @@ public class SimilarityServiceTests extends ESTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tests #16594
|
// Tests #16594
|
||||||
public void testDefaultSimilarity() {
|
public void testOverrideDefaultSimilarity() {
|
||||||
Settings settings = Settings.builder().put("index.similarity.default.type", "BM25").build();
|
Settings settings = Settings.builder().put("index.similarity.default.type", "classic").build();
|
||||||
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
|
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
|
||||||
SimilarityService service = new SimilarityService(indexSettings, Collections.emptyMap());
|
SimilarityService service = new SimilarityService(indexSettings, Collections.emptyMap());
|
||||||
assertTrue(service.getDefaultSimilarity() instanceof BM25SimilarityProvider);
|
assertTrue(service.getDefaultSimilarity() instanceof ClassicSimilarity);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,25 +47,11 @@ Here we configure the DFRSimilarity so it can be referenced as
|
||||||
[float]
|
[float]
|
||||||
=== Available similarities
|
=== Available similarities
|
||||||
|
|
||||||
[float]
|
|
||||||
[[classic-similarity]]
|
|
||||||
==== Classic similarity
|
|
||||||
|
|
||||||
The classic similarity that is based on the TF/IDF model. This
|
|
||||||
similarity has the following option:
|
|
||||||
|
|
||||||
`discount_overlaps`::
|
|
||||||
Determines whether overlap tokens (Tokens with
|
|
||||||
0 position increment) are ignored when computing norm. By default this
|
|
||||||
is true, meaning overlap tokens do not count when computing norms.
|
|
||||||
|
|
||||||
Type name: `classic`
|
|
||||||
|
|
||||||
[float]
|
[float]
|
||||||
[[bm25]]
|
[[bm25]]
|
||||||
==== BM25 similarity
|
==== BM25 similarity (*default*)
|
||||||
|
|
||||||
Another TF/IDF based similarity that has built-in tf normalization and
|
TF/IDF based similarity that has built-in tf normalization and
|
||||||
is supposed to work better for short fields (like names). See
|
is supposed to work better for short fields (like names). See
|
||||||
http://en.wikipedia.org/wiki/Okapi_BM25[Okapi_BM25] for more details.
|
http://en.wikipedia.org/wiki/Okapi_BM25[Okapi_BM25] for more details.
|
||||||
This similarity has the following options:
|
This similarity has the following options:
|
||||||
|
@ -86,6 +72,20 @@ This similarity has the following options:
|
||||||
|
|
||||||
Type name: `BM25`
|
Type name: `BM25`
|
||||||
|
|
||||||
|
[float]
|
||||||
|
[[classic-similarity]]
|
||||||
|
==== Classic similarity
|
||||||
|
|
||||||
|
The classic similarity that is based on the TF/IDF model. This
|
||||||
|
similarity has the following option:
|
||||||
|
|
||||||
|
`discount_overlaps`::
|
||||||
|
Determines whether overlap tokens (Tokens with
|
||||||
|
0 position increment) are ignored when computing norm. By default this
|
||||||
|
is true, meaning overlap tokens do not count when computing norms.
|
||||||
|
|
||||||
|
Type name: `classic`
|
||||||
|
|
||||||
[float]
|
[float]
|
||||||
[[drf]]
|
[[drf]]
|
||||||
==== DFR similarity
|
==== DFR similarity
|
||||||
|
@ -178,5 +178,5 @@ You can change the default similarity for all fields by putting the following se
|
||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
index.similarity.default.type: BM25
|
index.similarity.default.type: classic
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
|
@ -196,3 +196,7 @@ The <<search-request-preference,search preference>> `_prefer_node` has
|
||||||
been superseded by `_prefer_nodes`. By specifying a single node,
|
been superseded by `_prefer_nodes`. By specifying a single node,
|
||||||
`_prefer_nodes` provides the same functionality as `_prefer_node` but
|
`_prefer_nodes` provides the same functionality as `_prefer_node` but
|
||||||
also supports specifying multiple nodes.
|
also supports specifying multiple nodes.
|
||||||
|
|
||||||
|
==== Default similarity
|
||||||
|
|
||||||
|
The default similarity has been changed to `BM25`.
|
||||||
|
|
Loading…
Reference in New Issue