From f0cba4fce5cc41a68e6c2b8fb686ee4e6121ed02 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 8 Aug 2017 08:55:12 +0200 Subject: [PATCH] Add a scripted similarity. (#25831) The goal of this similarity is to help users who would like to keep the functionality of the `tf-idf` similarity that we want to remove, or to allow for specific usec-cases (disabling idf, disabling tf, disabling length norm, etc.) to not have to build a custom plugin and familiarize with the low-level Lucene API. --- .../metadata/MetaDataIndexUpgradeService.java | 2 +- .../org/elasticsearch/index/IndexModule.java | 14 +- .../index/similarity/ScriptedSimilarity.java | 284 ++++++++++++++++ .../ScriptedSimilarityProvider.java | 58 ++++ .../index/similarity/SimilarityProvider.java | 9 + .../index/similarity/SimilarityService.java | 65 ++-- .../elasticsearch/indices/IndicesService.java | 2 +- .../java/org/elasticsearch/script/Script.java | 17 + .../elasticsearch/script/ScriptModule.java | 2 + .../script/SimilarityScript.java | 45 +++ .../script/SimilarityWeightScript.java | 43 +++ .../elasticsearch/index/IndexModuleTests.java | 2 +- .../elasticsearch/index/codec/CodecTests.java | 2 +- .../index/engine/InternalEngineTests.java | 2 +- .../index/mapper/ParentFieldMapperTests.java | 2 +- .../similarity/ScriptedSimilarityTests.java | 224 ++++++++++++ .../similarity/SimilarityServiceTests.java | 6 +- .../indices/IndicesServiceTests.java | 3 +- .../index-modules/similarity.asciidoc | 319 ++++++++++++++++++ .../painless/org.elasticsearch.txt | 20 ++ .../painless/SimilarityScriptTests.java | 131 +++++++ .../join/query/HasChildQueryBuilderTests.java | 3 +- .../LegacyHasChildQueryBuilderTests.java | 3 +- .../elasticsearch/index/MapperTestUtils.java | 2 +- .../index/shard/IndexShardTestCase.java | 2 +- .../script/MockScriptEngine.java | 57 ++++ .../test/AbstractQueryTestCase.java | 2 +- 27 files changed, 1272 insertions(+), 49 deletions(-) create mode 100644 core/src/main/java/org/elasticsearch/index/similarity/ScriptedSimilarity.java create mode 100644 core/src/main/java/org/elasticsearch/index/similarity/ScriptedSimilarityProvider.java create mode 100644 core/src/main/java/org/elasticsearch/script/SimilarityScript.java create mode 100644 core/src/main/java/org/elasticsearch/script/SimilarityWeightScript.java create mode 100644 core/src/test/java/org/elasticsearch/index/similarity/ScriptedSimilarityTests.java create mode 100644 modules/lang-painless/src/test/java/org/elasticsearch/painless/SimilarityScriptTests.java diff --git a/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java b/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java index e1383d7de1c..269657367dc 100644 --- a/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java +++ b/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java @@ -140,7 +140,7 @@ public class MetaDataIndexUpgradeService extends AbstractComponent { // We cannot instantiate real analysis server at this point because the node might not have // been started yet. However, we don't really need real analyzers at this stage - so we can fake it IndexSettings indexSettings = new IndexSettings(indexMetaData, this.settings); - SimilarityService similarityService = new SimilarityService(indexSettings, Collections.emptyMap()); + SimilarityService similarityService = new SimilarityService(indexSettings, null, Collections.emptyMap()); final NamedAnalyzer fakeDefault = new NamedAnalyzer("default", AnalyzerScope.INDEX, new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/core/src/main/java/org/elasticsearch/index/IndexModule.java b/core/src/main/java/org/elasticsearch/index/IndexModule.java index 02b1f847768..630fe11e0a8 100644 --- a/core/src/main/java/org/elasticsearch/index/IndexModule.java +++ b/core/src/main/java/org/elasticsearch/index/IndexModule.java @@ -69,7 +69,7 @@ import java.util.function.Function; * IndexModule represents the central extension point for index level custom implementations like: *