Add a scripted similarity. (#25831)
The goal of this similarity is to help users who would like to keep the functionality of the `tf-idf` similarity that we want to remove, or to allow for specific usec-cases (disabling idf, disabling tf, disabling length norm, etc.) to not have to build a custom plugin and familiarize with the low-level Lucene API.
This commit is contained in:
parent
872526cad3
commit
f0cba4fce5
|
@ -140,7 +140,7 @@ public class MetaDataIndexUpgradeService extends AbstractComponent {
|
|||
// We cannot instantiate real analysis server at this point because the node might not have
|
||||
// been started yet. However, we don't really need real analyzers at this stage - so we can fake it
|
||||
IndexSettings indexSettings = new IndexSettings(indexMetaData, this.settings);
|
||||
SimilarityService similarityService = new SimilarityService(indexSettings, Collections.emptyMap());
|
||||
SimilarityService similarityService = new SimilarityService(indexSettings, null, Collections.emptyMap());
|
||||
final NamedAnalyzer fakeDefault = new NamedAnalyzer("default", AnalyzerScope.INDEX, new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
|
|
|
@ -69,7 +69,7 @@ import java.util.function.Function;
|
|||
* IndexModule represents the central extension point for index level custom implementations like:
|
||||
* <ul>
|
||||
* <li>{@link SimilarityProvider} - New {@link SimilarityProvider} implementations can be registered through
|
||||
* {@link #addSimilarity(String, TriFunction)}while existing Providers can be referenced through Settings under the
|
||||
* {@link #addSimilarity(String, SimilarityProvider.Factory)} while existing Providers can be referenced through Settings under the
|
||||
* {@link IndexModule#SIMILARITY_SETTINGS_PREFIX} prefix along with the "type" value. For example, to reference the
|
||||
* {@link BM25SimilarityProvider}, the configuration <tt>"index.similarity.my_similarity.type : "BM25"</tt> can be used.</li>
|
||||
* <li>{@link IndexStore} - Custom {@link IndexStore} instances can be registered via {@link #addIndexStore(String, Function)}</li>
|
||||
|
@ -112,7 +112,7 @@ public final class IndexModule {
|
|||
final SetOnce<EngineFactory> engineFactory = new SetOnce<>();
|
||||
private SetOnce<IndexSearcherWrapperFactory> indexSearcherWrapper = new SetOnce<>();
|
||||
private final Set<IndexEventListener> indexEventListeners = new HashSet<>();
|
||||
private final Map<String, TriFunction<String, Settings, Settings, SimilarityProvider>> similarities = new HashMap<>();
|
||||
private final Map<String, SimilarityProvider.Factory> similarities = new HashMap<>();
|
||||
private final Map<String, Function<IndexSettings, IndexStore>> storeTypes = new HashMap<>();
|
||||
private final SetOnce<BiFunction<IndexSettings, IndicesQueryCache, QueryCache>> forceQueryCacheProvider = new SetOnce<>();
|
||||
private final List<SearchOperationListener> searchOperationListeners = new ArrayList<>();
|
||||
|
@ -256,7 +256,7 @@ public final class IndexModule {
|
|||
* @param name Name of the SimilarityProvider
|
||||
* @param similarity SimilarityProvider to register
|
||||
*/
|
||||
public void addSimilarity(String name, TriFunction<String, Settings, Settings, SimilarityProvider> similarity) {
|
||||
public void addSimilarity(String name, SimilarityProvider.Factory similarity) {
|
||||
ensureNotFrozen();
|
||||
if (similarities.containsKey(name) || SimilarityService.BUILT_IN.containsKey(name)) {
|
||||
throw new IllegalArgumentException("similarity for name: [" + name + " is already registered");
|
||||
|
@ -361,7 +361,8 @@ public final class IndexModule {
|
|||
} else {
|
||||
queryCache = new DisabledQueryCache(indexSettings);
|
||||
}
|
||||
return new IndexService(indexSettings, environment, xContentRegistry, new SimilarityService(indexSettings, similarities),
|
||||
return new IndexService(indexSettings, environment, xContentRegistry,
|
||||
new SimilarityService(indexSettings, scriptService, similarities),
|
||||
shardStoreDeleter, analysisRegistry, engineFactory.get(), circuitBreakerService, bigArrays, threadPool, scriptService,
|
||||
client, queryCache, store, eventListener, searcherWrapperFactory, mapperRegistry,
|
||||
indicesFieldDataCache, searchOperationListeners, indexOperationListeners, namedWriteableRegistry);
|
||||
|
@ -371,9 +372,10 @@ public final class IndexModule {
|
|||
* creates a new mapper service to do administrative work like mapping updates. This *should not* be used for document parsing.
|
||||
* doing so will result in an exception.
|
||||
*/
|
||||
public MapperService newIndexMapperService(NamedXContentRegistry xContentRegistry, MapperRegistry mapperRegistry) throws IOException {
|
||||
public MapperService newIndexMapperService(NamedXContentRegistry xContentRegistry, MapperRegistry mapperRegistry,
|
||||
ScriptService scriptService) throws IOException {
|
||||
return new MapperService(indexSettings, analysisRegistry.build(indexSettings), xContentRegistry,
|
||||
new SimilarityService(indexSettings, similarities), mapperRegistry,
|
||||
new SimilarityService(indexSettings, scriptService, similarities), mapperRegistry,
|
||||
() -> { throw new UnsupportedOperationException("no index query shard context available"); });
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,284 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.SmallFloat;
|
||||
import org.elasticsearch.script.SimilarityScript;
|
||||
import org.elasticsearch.script.SimilarityWeightScript;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A {@link Similarity} implementation that allows scores to be scripted.
|
||||
*/
|
||||
public final class ScriptedSimilarity extends Similarity {
|
||||
|
||||
final String weightScriptSource;
|
||||
final String scriptSource;
|
||||
final SimilarityWeightScript.Factory weightScriptFactory;
|
||||
final SimilarityScript.Factory scriptFactory;
|
||||
final boolean discountOverlaps;
|
||||
|
||||
/** Sole constructor. */
|
||||
public ScriptedSimilarity(String weightScriptString, SimilarityWeightScript.Factory weightScriptFactory,
|
||||
String scriptString, SimilarityScript.Factory scriptFactory, boolean discountOverlaps) {
|
||||
this.weightScriptSource = weightScriptString;
|
||||
this.weightScriptFactory = weightScriptFactory;
|
||||
this.scriptSource = scriptString;
|
||||
this.scriptFactory = scriptFactory;
|
||||
this.discountOverlaps = discountOverlaps;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName() + "(weightScript=[" + weightScriptSource + "], script=[" + scriptSource + "])";
|
||||
}
|
||||
|
||||
@Override
|
||||
public long computeNorm(FieldInvertState state) {
|
||||
final int numTerms = discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength();
|
||||
return SmallFloat.intToByte4(numTerms);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||
Query query = new Query(boost);
|
||||
long docCount = collectionStats.docCount();
|
||||
if (docCount == -1) {
|
||||
docCount = collectionStats.maxDoc();
|
||||
}
|
||||
Field field = new Field(docCount, collectionStats.sumDocFreq(), collectionStats.sumTotalTermFreq());
|
||||
Term[] terms = new Term[termStats.length];
|
||||
for (int i = 0; i < termStats.length; ++i) {
|
||||
terms[i] = new Term(termStats[i].docFreq(), termStats[i].totalTermFreq());
|
||||
}
|
||||
return new Weight(collectionStats.field(), query, field, terms);
|
||||
}
|
||||
|
||||
/** Compute the part of the score that does not depend on the current document using the init_script. */
|
||||
private double computeWeight(Query query, Field field, Term term) throws IOException {
|
||||
if (weightScriptFactory == null) {
|
||||
return 1d;
|
||||
}
|
||||
SimilarityWeightScript weightScript = weightScriptFactory.newInstance();
|
||||
return weightScript.execute(query, field, term);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SimScorer simScorer(SimWeight w, LeafReaderContext context) throws IOException {
|
||||
Weight weight = (Weight) w;
|
||||
SimScorer[] scorers = new SimScorer[weight.terms.length];
|
||||
for (int i = 0; i < weight.terms.length; ++i) {
|
||||
final Term term = weight.terms[i];
|
||||
final SimilarityScript script = scriptFactory.newInstance();
|
||||
final NumericDocValues norms = context.reader().getNormValues(weight.fieldName);
|
||||
final Doc doc = new Doc(norms);
|
||||
final double scoreWeight = computeWeight(weight.query, weight.field, term);
|
||||
scorers[i] = new SimScorer() {
|
||||
|
||||
@Override
|
||||
public float score(int docID, float freq) throws IOException {
|
||||
doc.docID = docID;
|
||||
doc.freq = freq;
|
||||
return (float) script.execute(scoreWeight, weight.query, weight.field, term, doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float computeSlopFactor(int distance) {
|
||||
return 1.0f / (distance + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
|
||||
return 1f;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(int docID, Explanation freq) throws IOException {
|
||||
doc.docID = docID;
|
||||
float score = score(docID, freq.getValue());
|
||||
return Explanation.match(score, "score from " + ScriptedSimilarity.this.toString() +
|
||||
" computed from:",
|
||||
Explanation.match((float) scoreWeight, "weight"),
|
||||
Explanation.match(weight.query.boost, "query.boost"),
|
||||
Explanation.match(weight.field.docCount, "field.docCount"),
|
||||
Explanation.match(weight.field.sumDocFreq, "field.sumDocFreq"),
|
||||
Explanation.match(weight.field.sumTotalTermFreq, "field.sumTotalTermFreq"),
|
||||
Explanation.match(term.docFreq, "term.docFreq"),
|
||||
Explanation.match(term.totalTermFreq, "term.totalTermFreq"),
|
||||
Explanation.match(freq.getValue(), "doc.freq", freq.getDetails()),
|
||||
Explanation.match(doc.getLength(), "doc.length"));
|
||||
}
|
||||
};
|
||||
}
|
||||
if (scorers.length == 1) {
|
||||
return scorers[0];
|
||||
} else {
|
||||
// Sum scores across terms like a BooleanQuery would do
|
||||
return new SimScorer() {
|
||||
|
||||
@Override
|
||||
public float score(int doc, float freq) throws IOException {
|
||||
double sum = 0;
|
||||
for (SimScorer scorer : scorers) {
|
||||
sum += scorer.score(doc, freq);
|
||||
}
|
||||
return (float) sum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float computeSlopFactor(int distance) {
|
||||
return 1.0f / (distance + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
|
||||
return 1f;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(int doc, Explanation freq) throws IOException {
|
||||
Explanation[] subs = new Explanation[scorers.length];
|
||||
for (int i = 0; i < subs.length; ++i) {
|
||||
subs[i] = scorers[i].explain(doc, freq);
|
||||
}
|
||||
return Explanation.match(score(doc, freq.getValue()), "Sum of:", subs);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private static class Weight extends SimWeight {
|
||||
private final String fieldName;
|
||||
private final Query query;
|
||||
private final Field field;
|
||||
private final Term[] terms;
|
||||
|
||||
Weight(String fieldName, Query query, Field field, Term[] terms) {
|
||||
this.fieldName = fieldName;
|
||||
this.query = query;
|
||||
this.field = field;
|
||||
this.terms = terms;
|
||||
}
|
||||
}
|
||||
|
||||
/** Scoring factors that come from the query. */
|
||||
public static class Query {
|
||||
private final float boost;
|
||||
|
||||
private Query(float boost) {
|
||||
this.boost = boost;
|
||||
}
|
||||
|
||||
/** The boost of the query. It should typically be incorporated into the score as a multiplicative factor. */
|
||||
public float getBoost() {
|
||||
return boost;
|
||||
}
|
||||
}
|
||||
|
||||
/** Statistics that are specific to a given field. */
|
||||
public static class Field {
|
||||
private final long docCount;
|
||||
private final long sumDocFreq;
|
||||
private final long sumTotalTermFreq;
|
||||
|
||||
private Field(long docCount, long sumDocFreq, long sumTotalTermFreq) {
|
||||
this.docCount = docCount;
|
||||
this.sumDocFreq = sumDocFreq;
|
||||
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||
}
|
||||
|
||||
/** Return the number of documents that have a value for this field. */
|
||||
public long getDocCount() {
|
||||
return docCount;
|
||||
}
|
||||
|
||||
/** Return the sum of {@link Term#getDocFreq()} for all terms that exist in this field,
|
||||
* or {@code -1} if this statistic is not available. */
|
||||
public long getSumDocFreq() {
|
||||
return sumDocFreq;
|
||||
}
|
||||
|
||||
/** Return the sum of {@link Term#getTotalTermFreq()} for all terms that exist in this field,
|
||||
* or {@code -1} if this statistic is not available. */
|
||||
public long getSumTotalTermFreq() {
|
||||
return sumTotalTermFreq;
|
||||
}
|
||||
}
|
||||
|
||||
/** Statistics that are specific to a given term. */
|
||||
public static class Term {
|
||||
private final long docFreq;
|
||||
private final long totalTermFreq;
|
||||
|
||||
private Term(long docFreq, long totalTermFreq) {
|
||||
this.docFreq = docFreq;
|
||||
this.totalTermFreq = totalTermFreq;
|
||||
}
|
||||
|
||||
/** Return the number of documents that contain this term in the index. */
|
||||
public long getDocFreq() {
|
||||
return docFreq;
|
||||
}
|
||||
|
||||
/** Return the total number of occurrences of this term in the index, or {@code -1} if this statistic is not available. */
|
||||
public long getTotalTermFreq() {
|
||||
return totalTermFreq;
|
||||
}
|
||||
}
|
||||
|
||||
/** Statistics that are specific to a document. */
|
||||
public static class Doc {
|
||||
private final NumericDocValues norms;
|
||||
private int docID;
|
||||
private float freq;
|
||||
|
||||
private Doc(NumericDocValues norms) {
|
||||
this.norms = norms;
|
||||
}
|
||||
|
||||
/** Return the number of tokens that the current document has in the considered field. */
|
||||
public int getLength() throws IOException {
|
||||
// the length is computed lazily so that similarities that do not use the length are
|
||||
// not penalized
|
||||
if (norms == null) {
|
||||
return 1;
|
||||
} else if (norms.advanceExact(docID)) {
|
||||
return SmallFloat.byte4ToInt((byte) norms.longValue());
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/** Return the number of occurrences of the term in the current document for the considered field. */
|
||||
public float getFreq() {
|
||||
return freq;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.script.Script;
|
||||
import org.elasticsearch.script.ScriptService;
|
||||
import org.elasticsearch.script.SimilarityScript;
|
||||
import org.elasticsearch.script.SimilarityWeightScript;
|
||||
|
||||
/** Provider of scripted similarities. */
|
||||
public class ScriptedSimilarityProvider extends AbstractSimilarityProvider {
|
||||
|
||||
private final ScriptedSimilarity scriptedSimilarity;
|
||||
|
||||
public ScriptedSimilarityProvider(String name, Settings settings, Settings indexSettings, ScriptService scriptService) {
|
||||
super(name);
|
||||
boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true);
|
||||
Settings scriptSettings = settings.getAsSettings("script");
|
||||
Script script = Script.parse(scriptSettings);
|
||||
SimilarityScript.Factory scriptFactory = scriptService.compile(script, SimilarityScript.CONTEXT);
|
||||
Settings weightScriptSettings = settings.getAsSettings("weight_script");
|
||||
Script weightScript = null;
|
||||
SimilarityWeightScript.Factory weightScriptFactory = null;
|
||||
if (weightScriptSettings.isEmpty() == false) {
|
||||
weightScript = Script.parse(weightScriptSettings);
|
||||
weightScriptFactory = scriptService.compile(weightScript, SimilarityWeightScript.CONTEXT);
|
||||
}
|
||||
scriptedSimilarity = new ScriptedSimilarity(
|
||||
weightScript == null ? null : weightScript.toString(),
|
||||
weightScriptFactory == null ? null : weightScriptFactory::newInstance,
|
||||
script.toString(), scriptFactory::newInstance, discountOverlaps);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Similarity get() {
|
||||
return scriptedSimilarity;
|
||||
}
|
||||
|
||||
}
|
|
@ -20,6 +20,8 @@
|
|||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.script.ScriptService;
|
||||
|
||||
/**
|
||||
* Provider for {@link Similarity} instances
|
||||
|
@ -39,4 +41,11 @@ public interface SimilarityProvider {
|
|||
* @return Provided {@link Similarity}
|
||||
*/
|
||||
Similarity get();
|
||||
|
||||
/** Factory of {@link SimilarityProvider} */
|
||||
@FunctionalInterface
|
||||
interface Factory {
|
||||
/** Create a new {@link SimilarityProvider}. */
|
||||
SimilarityProvider create(String name, Settings settings, Settings indexSettings, ScriptService scriptService);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,7 +22,6 @@ package org.elasticsearch.index.similarity;
|
|||
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.TriFunction;
|
||||
import org.elasticsearch.common.logging.DeprecationLogger;
|
||||
import org.elasticsearch.common.logging.Loggers;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -31,6 +30,7 @@ import org.elasticsearch.index.IndexModule;
|
|||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.script.ScriptService;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
|
@ -42,27 +42,38 @@ public final class SimilarityService extends AbstractIndexComponent {
|
|||
public static final String DEFAULT_SIMILARITY = "BM25";
|
||||
private final Similarity defaultSimilarity;
|
||||
private final Map<String, SimilarityProvider> similarities;
|
||||
private static final Map<String, TriFunction<String, Settings, Settings, SimilarityProvider>> DEFAULTS;
|
||||
public static final Map<String, TriFunction<String, Settings, Settings, SimilarityProvider>> BUILT_IN;
|
||||
private static final Map<String, SimilarityProvider.Factory> DEFAULTS;
|
||||
public static final Map<String, SimilarityProvider.Factory> BUILT_IN;
|
||||
static {
|
||||
Map<String, TriFunction<String, Settings, Settings, SimilarityProvider>> defaults = new HashMap<>();
|
||||
Map<String, TriFunction<String, Settings, Settings, SimilarityProvider>> buildIn = new HashMap<>();
|
||||
defaults.put("classic", ClassicSimilarityProvider::new);
|
||||
defaults.put("BM25", BM25SimilarityProvider::new);
|
||||
defaults.put("boolean", BooleanSimilarityProvider::new);
|
||||
buildIn.put("classic", ClassicSimilarityProvider::new);
|
||||
buildIn.put("BM25", BM25SimilarityProvider::new);
|
||||
buildIn.put("DFR", DFRSimilarityProvider::new);
|
||||
buildIn.put("IB", IBSimilarityProvider::new);
|
||||
buildIn.put("LMDirichlet", LMDirichletSimilarityProvider::new);
|
||||
buildIn.put("LMJelinekMercer", LMJelinekMercerSimilarityProvider::new);
|
||||
buildIn.put("DFI", DFISimilarityProvider::new);
|
||||
Map<String, SimilarityProvider.Factory> defaults = new HashMap<>();
|
||||
Map<String, SimilarityProvider.Factory> buildIn = new HashMap<>();
|
||||
defaults.put("classic",
|
||||
(name, settings, indexSettings, scriptService) -> new ClassicSimilarityProvider(name, settings, indexSettings));
|
||||
defaults.put("BM25",
|
||||
(name, settings, indexSettings, scriptService) -> new BM25SimilarityProvider(name, settings, indexSettings));
|
||||
defaults.put("boolean",
|
||||
(name, settings, indexSettings, scriptService) -> new BooleanSimilarityProvider(name, settings, indexSettings));
|
||||
buildIn.put("classic",
|
||||
(name, settings, indexSettings, scriptService) -> new ClassicSimilarityProvider(name, settings, indexSettings));
|
||||
buildIn.put("BM25",
|
||||
(name, settings, indexSettings, scriptService) -> new BM25SimilarityProvider(name, settings, indexSettings));
|
||||
buildIn.put("DFR",
|
||||
(name, settings, indexSettings, scriptService) -> new DFRSimilarityProvider(name, settings, indexSettings));
|
||||
buildIn.put("IB",
|
||||
(name, settings, indexSettings, scriptService) -> new IBSimilarityProvider(name, settings, indexSettings));
|
||||
buildIn.put("LMDirichlet",
|
||||
(name, settings, indexSettings, scriptService) -> new LMDirichletSimilarityProvider(name, settings, indexSettings));
|
||||
buildIn.put("LMJelinekMercer",
|
||||
(name, settings, indexSettings, scriptService) -> new LMJelinekMercerSimilarityProvider(name, settings, indexSettings));
|
||||
buildIn.put("DFI",
|
||||
(name, settings, indexSettings, scriptService) -> new DFISimilarityProvider(name, settings, indexSettings));
|
||||
buildIn.put("scripted", ScriptedSimilarityProvider::new);
|
||||
DEFAULTS = Collections.unmodifiableMap(defaults);
|
||||
BUILT_IN = Collections.unmodifiableMap(buildIn);
|
||||
}
|
||||
|
||||
public SimilarityService(IndexSettings indexSettings,
|
||||
Map<String, TriFunction<String, Settings, Settings, SimilarityProvider>> similarities) {
|
||||
public SimilarityService(IndexSettings indexSettings, ScriptService scriptService,
|
||||
Map<String, SimilarityProvider.Factory> similarities) {
|
||||
super(indexSettings);
|
||||
Map<String, SimilarityProvider> providers = new HashMap<>(similarities.size());
|
||||
Map<String, Settings> similaritySettings = this.indexSettings.getSettings().getGroups(IndexModule.SIMILARITY_SETTINGS_PREFIX);
|
||||
|
@ -79,14 +90,12 @@ public final class SimilarityService extends AbstractIndexComponent {
|
|||
} else if ((similarities.containsKey(typeName) || BUILT_IN.containsKey(typeName)) == false) {
|
||||
throw new IllegalArgumentException("Unknown Similarity type [" + typeName + "] for [" + name + "]");
|
||||
}
|
||||
TriFunction<String, Settings, Settings, SimilarityProvider> defaultFactory = BUILT_IN.get(typeName);
|
||||
TriFunction<String, Settings, Settings, SimilarityProvider> factory = similarities.getOrDefault(typeName, defaultFactory);
|
||||
if (providerSettings == null) {
|
||||
providerSettings = Settings.Builder.EMPTY_SETTINGS;
|
||||
}
|
||||
providers.put(name, factory.apply(name, providerSettings, indexSettings.getSettings()));
|
||||
SimilarityProvider.Factory defaultFactory = BUILT_IN.get(typeName);
|
||||
SimilarityProvider.Factory factory = similarities.getOrDefault(typeName, defaultFactory);
|
||||
providers.put(name, factory.create(name, providerSettings, indexSettings.getSettings(), scriptService));
|
||||
}
|
||||
Map<String, SimilarityProvider> providerMapping = addSimilarities(similaritySettings, indexSettings.getSettings(), DEFAULTS);
|
||||
Map<String, SimilarityProvider> providerMapping = addSimilarities(similaritySettings, indexSettings.getSettings(), scriptService,
|
||||
DEFAULTS);
|
||||
for (Map.Entry<String, SimilarityProvider> entry : providerMapping.entrySet()) {
|
||||
// Avoid overwriting custom providers for indices older that v5.0
|
||||
if (providers.containsKey(entry.getKey()) && indexSettings.getIndexVersionCreated().before(Version.V_5_0_0_alpha1)) {
|
||||
|
@ -109,16 +118,16 @@ public final class SimilarityService extends AbstractIndexComponent {
|
|||
}
|
||||
|
||||
private Map<String, SimilarityProvider> addSimilarities(Map<String, Settings> similaritySettings, Settings indexSettings,
|
||||
Map<String, TriFunction<String, Settings, Settings, SimilarityProvider>> similarities) {
|
||||
ScriptService scriptService, Map<String, SimilarityProvider.Factory> similarities) {
|
||||
Map<String, SimilarityProvider> providers = new HashMap<>(similarities.size());
|
||||
for (Map.Entry<String, TriFunction<String, Settings, Settings, SimilarityProvider>> entry : similarities.entrySet()) {
|
||||
for (Map.Entry<String, SimilarityProvider.Factory> entry : similarities.entrySet()) {
|
||||
String name = entry.getKey();
|
||||
TriFunction<String, Settings, Settings, SimilarityProvider> factory = entry.getValue();
|
||||
SimilarityProvider.Factory factory = entry.getValue();
|
||||
Settings providerSettings = similaritySettings.get(name);
|
||||
if (providerSettings == null) {
|
||||
providerSettings = Settings.Builder.EMPTY_SETTINGS;
|
||||
}
|
||||
providers.put(name, factory.apply(name, providerSettings, indexSettings));
|
||||
providers.put(name, factory.create(name, providerSettings, indexSettings, scriptService));
|
||||
}
|
||||
return providers;
|
||||
}
|
||||
|
|
|
@ -467,7 +467,7 @@ public class IndicesService extends AbstractLifecycleComponent
|
|||
final IndexSettings idxSettings = new IndexSettings(indexMetaData, this.settings, indexScopeSetting);
|
||||
final IndexModule indexModule = new IndexModule(idxSettings, analysisRegistry);
|
||||
pluginsService.onIndexModule(indexModule);
|
||||
return indexModule.newIndexMapperService(xContentRegistry, mapperRegistry);
|
||||
return indexModule.newIndexMapperService(xContentRegistry, mapperRegistry, scriptService);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -25,6 +25,8 @@ import org.elasticsearch.common.bytes.BytesArray;
|
|||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.io.stream.Writeable;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
|
||||
import org.elasticsearch.common.xcontent.ObjectParser;
|
||||
import org.elasticsearch.common.xcontent.ObjectParser.ValueType;
|
||||
import org.elasticsearch.common.xcontent.ToXContentObject;
|
||||
|
@ -33,6 +35,7 @@ import org.elasticsearch.common.xcontent.XContentFactory;
|
|||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.common.xcontent.XContentParser.Token;
|
||||
import org.elasticsearch.common.xcontent.XContentType;
|
||||
import org.elasticsearch.common.xcontent.json.JsonXContent;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.UncheckedIOException;
|
||||
|
@ -269,6 +272,20 @@ public final class Script implements ToXContentObject, Writeable {
|
|||
return parse(parser, DEFAULT_SCRIPT_LANG);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the script configured in the given settings.
|
||||
*/
|
||||
public static Script parse(Settings settings) {
|
||||
try {
|
||||
XContentBuilder builder = JsonXContent.contentBuilder();
|
||||
builder.map(settings.getAsStructuredMap());
|
||||
return parse(JsonXContent.jsonXContent.createParser(NamedXContentRegistry.EMPTY, builder.bytes()));
|
||||
} catch (IOException e) {
|
||||
// it should not happen since we are not actually reading from a stream but an in-memory byte[]
|
||||
throw new IllegalStateException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This will parse XContent into a {@link Script}. The following formats can be parsed:
|
||||
*
|
||||
|
|
|
@ -45,6 +45,8 @@ public class ScriptModule {
|
|||
ExecutableScript.AGGS_CONTEXT,
|
||||
ExecutableScript.UPDATE_CONTEXT,
|
||||
ExecutableScript.INGEST_CONTEXT,
|
||||
SimilarityScript.CONTEXT,
|
||||
SimilarityWeightScript.CONTEXT,
|
||||
TemplateScript.CONTEXT
|
||||
).collect(Collectors.toMap(c -> c.name, Function.identity()));
|
||||
}
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.script;
|
||||
|
||||
import org.elasticsearch.index.similarity.ScriptedSimilarity;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/** A script that is used to build {@link ScriptedSimilarity} instances. */
|
||||
public abstract class SimilarityScript {
|
||||
|
||||
/** Compute the score.
|
||||
* @param weight weight computed by the {@link SimilarityWeightScript} if any, or 1.
|
||||
* @param query scoring factors that come from the query
|
||||
* @param field field-level statistics
|
||||
* @param term term-level statistics
|
||||
* @param doc per-document statistics
|
||||
*/
|
||||
public abstract double execute(double weight, ScriptedSimilarity.Query query,
|
||||
ScriptedSimilarity.Field field, ScriptedSimilarity.Term term, ScriptedSimilarity.Doc doc) throws IOException;
|
||||
|
||||
public interface Factory {
|
||||
SimilarityScript newInstance();
|
||||
}
|
||||
|
||||
public static final String[] PARAMETERS = new String[] {"weight", "query", "field", "term", "doc"};
|
||||
public static final ScriptContext<Factory> CONTEXT = new ScriptContext<>("similarity", Factory.class);
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.script;
|
||||
|
||||
import org.elasticsearch.index.similarity.ScriptedSimilarity;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/** A script that is used to compute scoring factors that are the same for all documents. */
|
||||
public abstract class SimilarityWeightScript {
|
||||
|
||||
/** Compute the weight.
|
||||
* @param query scoring factors that come from the query
|
||||
* @param field field-level statistics
|
||||
* @param term term-level statistics
|
||||
*/
|
||||
public abstract double execute(ScriptedSimilarity.Query query, ScriptedSimilarity.Field field,
|
||||
ScriptedSimilarity.Term term) throws IOException;
|
||||
|
||||
public interface Factory {
|
||||
SimilarityWeightScript newInstance();
|
||||
}
|
||||
|
||||
public static final String[] PARAMETERS = new String[] {"query", "field", "term"};
|
||||
public static final ScriptContext<Factory> CONTEXT = new ScriptContext<>("similarity_weight", Factory.class);
|
||||
}
|
|
@ -284,7 +284,7 @@ public class IndexModuleTests extends ESTestCase {
|
|||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings), emptyAnalysisRegistry);
|
||||
module.addSimilarity("test_similarity", (string, providerSettings, indexLevelSettings) -> new SimilarityProvider() {
|
||||
module.addSimilarity("test_similarity", (string, providerSettings, indexLevelSettings, scriptService) -> new SimilarityProvider() {
|
||||
@Override
|
||||
public String name() {
|
||||
return string;
|
||||
|
|
|
@ -90,7 +90,7 @@ public class CodecTests extends ESTestCase {
|
|||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
|
||||
.build();
|
||||
IndexSettings settings = IndexSettingsModule.newIndexSettings("_na", nodeSettings);
|
||||
SimilarityService similarityService = new SimilarityService(settings, Collections.emptyMap());
|
||||
SimilarityService similarityService = new SimilarityService(settings, null, Collections.emptyMap());
|
||||
IndexAnalyzers indexAnalyzers = createTestAnalysis(settings, nodeSettings).indexAnalyzers;
|
||||
MapperRegistry mapperRegistry = new MapperRegistry(Collections.emptyMap(), Collections.emptyMap());
|
||||
MapperService service = new MapperService(settings, indexAnalyzers, xContentRegistry(), similarityService, mapperRegistry,
|
||||
|
|
|
@ -2774,7 +2774,7 @@ public class InternalEngineTests extends ESTestCase {
|
|||
public TranslogHandler(NamedXContentRegistry xContentRegistry, IndexSettings indexSettings) {
|
||||
NamedAnalyzer defaultAnalyzer = new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer());
|
||||
IndexAnalyzers indexAnalyzers = new IndexAnalyzers(indexSettings, defaultAnalyzer, defaultAnalyzer, defaultAnalyzer, Collections.emptyMap(), Collections.emptyMap());
|
||||
SimilarityService similarityService = new SimilarityService(indexSettings, Collections.emptyMap());
|
||||
SimilarityService similarityService = new SimilarityService(indexSettings, null, Collections.emptyMap());
|
||||
MapperRegistry mapperRegistry = new IndicesModule(Collections.emptyList()).getMapperRegistry();
|
||||
mapperService = new MapperService(indexSettings, indexAnalyzers, xContentRegistry, similarityService, mapperRegistry,
|
||||
() -> null);
|
||||
|
|
|
@ -113,7 +113,7 @@ public class ParentFieldMapperTests extends ESSingleNodeTestCase {
|
|||
NamedAnalyzer namedAnalyzer = new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer());
|
||||
IndexAnalyzers indexAnalyzers = new IndexAnalyzers(indexSettings, namedAnalyzer, namedAnalyzer, namedAnalyzer,
|
||||
Collections.emptyMap(), Collections.emptyMap());
|
||||
SimilarityService similarityService = new SimilarityService(indexSettings, Collections.emptyMap());
|
||||
SimilarityService similarityService = new SimilarityService(indexSettings, null, Collections.emptyMap());
|
||||
MapperService mapperService = new MapperService(indexSettings, indexAnalyzers, xContentRegistry(), similarityService,
|
||||
new IndicesModule(emptyList()).getMapperRegistry(), () -> null);
|
||||
XContentBuilder mappingSource = jsonBuilder().startObject().startObject("some_type")
|
||||
|
|
|
@ -0,0 +1,224 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.script.SimilarityScript;
|
||||
import org.elasticsearch.script.SimilarityWeightScript;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
public class ScriptedSimilarityTests extends ESTestCase {
|
||||
|
||||
public void testSameNormsAsBM25CountOverlaps() {
|
||||
doTestSameNormsAsBM25(false);
|
||||
}
|
||||
|
||||
public void testSameNormsAsBM25DiscountOverlaps() {
|
||||
doTestSameNormsAsBM25(true);
|
||||
}
|
||||
|
||||
private void doTestSameNormsAsBM25(boolean discountOverlaps) {
|
||||
ScriptedSimilarity sim1 = new ScriptedSimilarity("foobar", null, "foobaz", null, discountOverlaps);
|
||||
BM25Similarity sim2 = new BM25Similarity();
|
||||
sim2.setDiscountOverlaps(discountOverlaps);
|
||||
for (int iter = 0; iter < 100; ++iter) {
|
||||
final int length = TestUtil.nextInt(random(), 1, 100);
|
||||
final int position = random().nextInt(length);
|
||||
final int numOverlaps = random().nextInt(length);
|
||||
FieldInvertState state = new FieldInvertState(Version.LATEST.major, "foo", position, length, numOverlaps, 100);
|
||||
assertEquals(
|
||||
sim2.computeNorm(state),
|
||||
sim1.computeNorm(state),
|
||||
0f);
|
||||
}
|
||||
}
|
||||
|
||||
public void testBasics() throws IOException {
|
||||
final AtomicBoolean called = new AtomicBoolean();
|
||||
SimilarityScript.Factory scriptFactory = () -> {
|
||||
return new SimilarityScript() {
|
||||
|
||||
@Override
|
||||
public double execute(double weight, ScriptedSimilarity.Query query,
|
||||
ScriptedSimilarity.Field field, ScriptedSimilarity.Term term,
|
||||
ScriptedSimilarity.Doc doc) throws IOException {
|
||||
assertEquals(1, weight, 0);
|
||||
assertNotNull(doc);
|
||||
assertEquals(2f, doc.getFreq(), 0);
|
||||
assertEquals(3, doc.getLength(), 0);
|
||||
assertNotNull(field);
|
||||
assertEquals(3, field.getDocCount());
|
||||
assertEquals(5, field.getSumDocFreq());
|
||||
assertEquals(6, field.getSumTotalTermFreq());
|
||||
assertNotNull(term);
|
||||
assertEquals(2, term.getDocFreq());
|
||||
assertEquals(3, term.getTotalTermFreq());
|
||||
assertNotNull(query);
|
||||
assertEquals(3.2f, query.getBoost(), 0);
|
||||
called.set(true);
|
||||
return 42f;
|
||||
}
|
||||
|
||||
};
|
||||
};
|
||||
ScriptedSimilarity sim = new ScriptedSimilarity("foobar", null, "foobaz", scriptFactory, true);
|
||||
Directory dir = new RAMDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(sim));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField("f", "foo bar", Store.NO));
|
||||
doc.add(new StringField("match", "no", Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new TextField("f", "foo foo bar", Store.NO));
|
||||
doc.add(new StringField("match", "yes", Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new TextField("f", "bar", Store.NO));
|
||||
doc.add(new StringField("match", "no", Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
IndexReader r = DirectoryReader.open(w);
|
||||
w.close();
|
||||
IndexSearcher searcher = new IndexSearcher(r);
|
||||
searcher.setSimilarity(sim);
|
||||
Query query = new BoostQuery(new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("f", "foo")), Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("match", "yes")), Occur.FILTER)
|
||||
.build(), 3.2f);
|
||||
TopDocs topDocs = searcher.search(query, 1);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
assertTrue(called.get());
|
||||
assertEquals(42, topDocs.scoreDocs[0].score, 0);
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testInitScript() throws IOException {
|
||||
final AtomicBoolean initCalled = new AtomicBoolean();
|
||||
SimilarityWeightScript.Factory weightScriptFactory = () -> {
|
||||
return new SimilarityWeightScript() {
|
||||
|
||||
@Override
|
||||
public double execute(ScriptedSimilarity.Query query, ScriptedSimilarity.Field field,
|
||||
ScriptedSimilarity.Term term) throws IOException {
|
||||
assertNotNull(field);
|
||||
assertEquals(3, field.getDocCount());
|
||||
assertEquals(5, field.getSumDocFreq());
|
||||
assertEquals(6, field.getSumTotalTermFreq());
|
||||
assertNotNull(term);
|
||||
assertEquals(2, term.getDocFreq());
|
||||
assertEquals(3, term.getTotalTermFreq());
|
||||
assertNotNull(query);
|
||||
assertEquals(3.2f, query.getBoost(), 0);
|
||||
initCalled.set(true);
|
||||
return 28;
|
||||
}
|
||||
|
||||
};
|
||||
};
|
||||
final AtomicBoolean called = new AtomicBoolean();
|
||||
SimilarityScript.Factory scriptFactory = () -> {
|
||||
return new SimilarityScript() {
|
||||
|
||||
@Override
|
||||
public double execute(double weight, ScriptedSimilarity.Query query,
|
||||
ScriptedSimilarity.Field field, ScriptedSimilarity.Term term,
|
||||
ScriptedSimilarity.Doc doc) throws IOException {
|
||||
assertEquals(28, weight, 0d);
|
||||
assertNotNull(doc);
|
||||
assertEquals(2f, doc.getFreq(), 0);
|
||||
assertEquals(3, doc.getLength(), 0);
|
||||
assertNotNull(field);
|
||||
assertEquals(3, field.getDocCount());
|
||||
assertEquals(5, field.getSumDocFreq());
|
||||
assertEquals(6, field.getSumTotalTermFreq());
|
||||
assertNotNull(term);
|
||||
assertEquals(2, term.getDocFreq());
|
||||
assertEquals(3, term.getTotalTermFreq());
|
||||
assertNotNull(query);
|
||||
assertEquals(3.2f, query.getBoost(), 0);
|
||||
called.set(true);
|
||||
return 42;
|
||||
}
|
||||
|
||||
};
|
||||
};
|
||||
ScriptedSimilarity sim = new ScriptedSimilarity("foobar", weightScriptFactory, "foobaz", scriptFactory, true);
|
||||
Directory dir = new RAMDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(sim));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField("f", "foo bar", Store.NO));
|
||||
doc.add(new StringField("match", "no", Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new TextField("f", "foo foo bar", Store.NO));
|
||||
doc.add(new StringField("match", "yes", Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new TextField("f", "bar", Store.NO));
|
||||
doc.add(new StringField("match", "no", Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
IndexReader r = DirectoryReader.open(w);
|
||||
w.close();
|
||||
IndexSearcher searcher = new IndexSearcher(r);
|
||||
searcher.setSimilarity(sim);
|
||||
Query query = new BoostQuery(new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("f", "foo")), Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("match", "yes")), Occur.FILTER)
|
||||
.build(), 3.2f);
|
||||
TopDocs topDocs = searcher.search(query, 1);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
assertTrue(initCalled.get());
|
||||
assertTrue(called.get());
|
||||
assertEquals(42, topDocs.scoreDocs[0].score, 0);
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
|
@ -33,7 +33,7 @@ public class SimilarityServiceTests extends ESTestCase {
|
|||
public void testDefaultSimilarity() {
|
||||
Settings settings = Settings.builder().build();
|
||||
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
|
||||
SimilarityService service = new SimilarityService(indexSettings, Collections.emptyMap());
|
||||
SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap());
|
||||
assertThat(service.getDefaultSimilarity(), instanceOf(BM25Similarity.class));
|
||||
}
|
||||
|
||||
|
@ -42,7 +42,7 @@ public class SimilarityServiceTests extends ESTestCase {
|
|||
Settings settings = Settings.builder().put("index.similarity.BM25.type", "classic").build();
|
||||
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
|
||||
try {
|
||||
new SimilarityService(indexSettings, Collections.emptyMap());
|
||||
new SimilarityService(indexSettings, null, Collections.emptyMap());
|
||||
fail("can't override bm25");
|
||||
} catch (IllegalArgumentException ex) {
|
||||
assertEquals(ex.getMessage(), "Cannot redefine built-in Similarity [BM25]");
|
||||
|
@ -53,7 +53,7 @@ public class SimilarityServiceTests extends ESTestCase {
|
|||
Settings settings = Settings.builder().put("index.similarity.default.type", "classic")
|
||||
.build();
|
||||
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
|
||||
SimilarityService service = new SimilarityService(indexSettings, Collections.emptyMap());
|
||||
SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap());
|
||||
assertTrue(service.getDefaultSimilarity() instanceof ClassicSimilarity);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -105,7 +105,8 @@ public class IndicesServiceTests extends ESSingleNodeTestCase {
|
|||
@Override
|
||||
public void onIndexModule(IndexModule indexModule) {
|
||||
super.onIndexModule(indexModule);
|
||||
indexModule.addSimilarity("fake-similarity", BM25SimilarityProvider::new);
|
||||
indexModule.addSimilarity("fake-similarity",
|
||||
(name, settings, indexSettings, scriptService) -> new BM25SimilarityProvider(name, settings, indexSettings));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -163,6 +163,325 @@ for title queries and `0.7` for long queries. Default to `0.1`. When value appro
|
|||
|
||||
Type name: `LMJelinekMercer`
|
||||
|
||||
[float]
|
||||
[[scripted_similarity]]
|
||||
==== Scripted similarity
|
||||
|
||||
A similarity that allows you to use a script in order to specify how scores
|
||||
should be computed. For instance, the below example shows how to reimplement
|
||||
TF-IDF:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT index
|
||||
{
|
||||
"settings": {
|
||||
"number_of_shards": 1,
|
||||
"similarity": {
|
||||
"scripted_tfidf": {
|
||||
"type": "scripted",
|
||||
"script": {
|
||||
"source": "double tf = Math.sqrt(doc.freq); double idf = Math.log((field.docCount+1.0)/(term.docFreq+1.0)) + 1.0; double norm = 1/Math.sqrt(doc.length); return query.boost * tf * idf * norm;"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"mappings": {
|
||||
"doc": {
|
||||
"properties": {
|
||||
"field": {
|
||||
"type": "text",
|
||||
"similarity": "scripted_tfidf"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PUT index/doc/1
|
||||
{
|
||||
"field": "foo bar foo"
|
||||
}
|
||||
|
||||
PUT index/doc/2
|
||||
{
|
||||
"field": "bar baz"
|
||||
}
|
||||
|
||||
POST index/_refresh
|
||||
|
||||
GET index/_search?explain=true
|
||||
{
|
||||
"query": {
|
||||
"query_string": {
|
||||
"query": "foo^1.7",
|
||||
"default_field": "field"
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
Which yields:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"took": 12,
|
||||
"timed_out": false,
|
||||
"_shards": {
|
||||
"total": 1,
|
||||
"successful": 1,
|
||||
"skipped": 0,
|
||||
"failed": 0
|
||||
},
|
||||
"hits": {
|
||||
"total": 1,
|
||||
"max_score": 1.9508477,
|
||||
"hits": [
|
||||
{
|
||||
"_shard": "[index][0]",
|
||||
"_node": "OzrdjxNtQGaqs4DmioFw9A",
|
||||
"_index": "index",
|
||||
"_type": "doc",
|
||||
"_id": "1",
|
||||
"_score": 1.9508477,
|
||||
"_source": {
|
||||
"field": "foo bar foo"
|
||||
},
|
||||
"_explanation": {
|
||||
"value": 1.9508477,
|
||||
"description": "weight(field:foo in 0) [PerFieldSimilarity], result of:",
|
||||
"details": [
|
||||
{
|
||||
"value": 1.9508477,
|
||||
"description": "score from ScriptedSimilarity(weightScript=[null], script=[Script{type=inline, lang='painless', idOrCode='double tf = Math.sqrt(doc.freq); double idf = Math.log((field.docCount+1.0)/(term.docFreq+1.0)) + 1.0; double norm = 1/Math.sqrt(doc.length); return query.boost * tf * idf * norm;', options={}, params={}}]) computed from:",
|
||||
"details": [
|
||||
{
|
||||
"value": 1.0,
|
||||
"description": "weight",
|
||||
"details": []
|
||||
},
|
||||
{
|
||||
"value": 1.7,
|
||||
"description": "query.boost",
|
||||
"details": []
|
||||
},
|
||||
{
|
||||
"value": 2.0,
|
||||
"description": "field.docCount",
|
||||
"details": []
|
||||
},
|
||||
{
|
||||
"value": 4.0,
|
||||
"description": "field.sumDocFreq",
|
||||
"details": []
|
||||
},
|
||||
{
|
||||
"value": 5.0,
|
||||
"description": "field.sumTotalTermFreq",
|
||||
"details": []
|
||||
},
|
||||
{
|
||||
"value": 1.0,
|
||||
"description": "term.docFreq",
|
||||
"details": []
|
||||
},
|
||||
{
|
||||
"value": 2.0,
|
||||
"description": "term.totalTermFreq",
|
||||
"details": []
|
||||
},
|
||||
{
|
||||
"value": 2.0,
|
||||
"description": "doc.freq",
|
||||
"details": []
|
||||
},
|
||||
{
|
||||
"value": 3.0,
|
||||
"description": "doc.length",
|
||||
"details": []
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// TESTRESPONSE[s/"took": 12/"took" : $body.took/]
|
||||
// TESTRESPONSE[s/OzrdjxNtQGaqs4DmioFw9A/$body.hits.hits.0._node/]
|
||||
|
||||
You might have noticed that a significant part of the script depends on
|
||||
statistics that are the same for every document. It is possible to make the
|
||||
above slightly more efficient by providing an `weight_script` which will
|
||||
compute the document-independent part of the score and will be available
|
||||
under the `weight` variable. When no `weight_script` is provided, `weight`
|
||||
is equal to `1`. The `weight_script` has access to the same variables as
|
||||
the `script` except `doc` since it is supposed to compute a
|
||||
document-independent contribution to the score.
|
||||
|
||||
The below configuration will give the same tf-idf scores but is slightly
|
||||
more efficient:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT index
|
||||
{
|
||||
"settings": {
|
||||
"number_of_shards": 1,
|
||||
"similarity": {
|
||||
"scripted_tfidf": {
|
||||
"type": "scripted",
|
||||
"weight_script": {
|
||||
"source": "double idf = Math.log((field.docCount+1.0)/(term.docFreq+1.0)) + 1.0; return query.boost * idf;"
|
||||
},
|
||||
"script": {
|
||||
"source": "double tf = Math.sqrt(doc.freq); double norm = 1/Math.sqrt(doc.length); return weight * tf * norm;"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"mappings": {
|
||||
"doc": {
|
||||
"properties": {
|
||||
"field": {
|
||||
"type": "text",
|
||||
"similarity": "scripted_tfidf"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
////////////////////
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT index/doc/1
|
||||
{
|
||||
"field": "foo bar foo"
|
||||
}
|
||||
|
||||
PUT index/doc/2
|
||||
{
|
||||
"field": "bar baz"
|
||||
}
|
||||
|
||||
POST index/_refresh
|
||||
|
||||
GET index/_search?explain=true
|
||||
{
|
||||
"query": {
|
||||
"query_string": {
|
||||
"query": "foo^1.7",
|
||||
"default_field": "field"
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[continued]
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"took": 1,
|
||||
"timed_out": false,
|
||||
"_shards": {
|
||||
"total": 1,
|
||||
"successful": 1,
|
||||
"skipped": 0,
|
||||
"failed": 0
|
||||
},
|
||||
"hits": {
|
||||
"total": 1,
|
||||
"max_score": 1.9508477,
|
||||
"hits": [
|
||||
{
|
||||
"_shard": "[index][0]",
|
||||
"_node": "OzrdjxNtQGaqs4DmioFw9A",
|
||||
"_index": "index",
|
||||
"_type": "doc",
|
||||
"_id": "1",
|
||||
"_score": 1.9508477,
|
||||
"_source": {
|
||||
"field": "foo bar foo"
|
||||
},
|
||||
"_explanation": {
|
||||
"value": 1.9508477,
|
||||
"description": "weight(field:foo in 0) [PerFieldSimilarity], result of:",
|
||||
"details": [
|
||||
{
|
||||
"value": 1.9508477,
|
||||
"description": "score from ScriptedSimilarity(weightScript=[Script{type=inline, lang='painless', idOrCode='double idf = Math.log((field.docCount+1.0)/(term.docFreq+1.0)) + 1.0; return query.boost * idf;', options={}, params={}}], script=[Script{type=inline, lang='painless', idOrCode='double tf = Math.sqrt(doc.freq); double norm = 1/Math.sqrt(doc.length); return weight * tf * norm;', options={}, params={}}]) computed from:",
|
||||
"details": [
|
||||
{
|
||||
"value": 2.3892908,
|
||||
"description": "weight",
|
||||
"details": []
|
||||
},
|
||||
{
|
||||
"value": 1.7,
|
||||
"description": "query.boost",
|
||||
"details": []
|
||||
},
|
||||
{
|
||||
"value": 2.0,
|
||||
"description": "field.docCount",
|
||||
"details": []
|
||||
},
|
||||
{
|
||||
"value": 4.0,
|
||||
"description": "field.sumDocFreq",
|
||||
"details": []
|
||||
},
|
||||
{
|
||||
"value": 5.0,
|
||||
"description": "field.sumTotalTermFreq",
|
||||
"details": []
|
||||
},
|
||||
{
|
||||
"value": 1.0,
|
||||
"description": "term.docFreq",
|
||||
"details": []
|
||||
},
|
||||
{
|
||||
"value": 2.0,
|
||||
"description": "term.totalTermFreq",
|
||||
"details": []
|
||||
},
|
||||
{
|
||||
"value": 2.0,
|
||||
"description": "doc.freq",
|
||||
"details": []
|
||||
},
|
||||
{
|
||||
"value": 3.0,
|
||||
"description": "doc.length",
|
||||
"details": []
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// TESTRESPONSE[s/"took": 1/"took" : $body.took/]
|
||||
// TESTRESPONSE[s/OzrdjxNtQGaqs4DmioFw9A/$body.hits.hits.0._node/]
|
||||
|
||||
////////////////////
|
||||
|
||||
|
||||
Type name: `scripted`
|
||||
|
||||
[float]
|
||||
[[default-base]]
|
||||
==== Default Similarity
|
||||
|
|
|
@ -165,3 +165,23 @@ class org.elasticsearch.search.lookup.FieldLookup -> org.elasticsearch.search.lo
|
|||
List getValues()
|
||||
boolean isEmpty()
|
||||
}
|
||||
|
||||
class org.elasticsearch.index.similarity.ScriptedSimilarity.Query -> org.elasticsearch.index.similarity.ScriptedSimilarity$Query extends Object {
|
||||
float getBoost()
|
||||
}
|
||||
|
||||
class org.elasticsearch.index.similarity.ScriptedSimilarity.Field -> org.elasticsearch.index.similarity.ScriptedSimilarity$Field extends Object {
|
||||
long getDocCount()
|
||||
long getSumDocFreq()
|
||||
long getSumTotalTermFreq()
|
||||
}
|
||||
|
||||
class org.elasticsearch.index.similarity.ScriptedSimilarity.Term -> org.elasticsearch.index.similarity.ScriptedSimilarity$Term extends Object {
|
||||
long getDocFreq()
|
||||
long getTotalTermFreq()
|
||||
}
|
||||
|
||||
class org.elasticsearch.index.similarity.ScriptedSimilarity.Doc -> org.elasticsearch.index.similarity.ScriptedSimilarity$Doc extends Object {
|
||||
int getLength()
|
||||
float getFreq()
|
||||
}
|
||||
|
|
|
@ -0,0 +1,131 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.painless;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.elasticsearch.index.similarity.ScriptedSimilarity;
|
||||
import org.elasticsearch.script.ScriptContext;
|
||||
import org.elasticsearch.script.SimilarityScript;
|
||||
import org.elasticsearch.script.SimilarityWeightScript;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
||||
public class SimilarityScriptTests extends ScriptTestCase {
|
||||
|
||||
@Override
|
||||
protected Collection<ScriptContext<?>> scriptContexts() {
|
||||
return Arrays.asList(SimilarityScript.CONTEXT, SimilarityWeightScript.CONTEXT);
|
||||
}
|
||||
|
||||
public void testBasics() throws IOException {
|
||||
SimilarityScript.Factory factory = scriptEngine.compile(
|
||||
"foobar", "return query.boost * doc.freq / doc.length", SimilarityScript.CONTEXT, Collections.emptyMap());
|
||||
ScriptedSimilarity sim = new ScriptedSimilarity("foobar", null, "foobaz", factory::newInstance, true);
|
||||
Directory dir = new RAMDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(sim));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField("f", "foo bar", Store.NO));
|
||||
doc.add(new StringField("match", "no", Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new TextField("f", "foo foo bar", Store.NO));
|
||||
doc.add(new StringField("match", "yes", Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new TextField("f", "bar", Store.NO));
|
||||
doc.add(new StringField("match", "no", Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
IndexReader r = DirectoryReader.open(w);
|
||||
w.close();
|
||||
IndexSearcher searcher = new IndexSearcher(r);
|
||||
searcher.setSimilarity(sim);
|
||||
Query query = new BoostQuery(new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("f", "foo")), Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("match", "yes")), Occur.FILTER)
|
||||
.build(), 3.2f);
|
||||
TopDocs topDocs = searcher.search(query, 1);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
assertEquals((float) (3.2 * 2 / 3), topDocs.scoreDocs[0].score, 0);
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testWeightScript() throws IOException {
|
||||
SimilarityWeightScript.Factory weightFactory = scriptEngine.compile(
|
||||
"foobar", "return query.boost", SimilarityWeightScript.CONTEXT, Collections.emptyMap());
|
||||
SimilarityScript.Factory factory = scriptEngine.compile(
|
||||
"foobar", "return weight * doc.freq / doc.length", SimilarityScript.CONTEXT, Collections.emptyMap());
|
||||
ScriptedSimilarity sim = new ScriptedSimilarity("foobar", weightFactory::newInstance, "foobaz", factory::newInstance, true);
|
||||
Directory dir = new RAMDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(sim));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField("f", "foo bar", Store.NO));
|
||||
doc.add(new StringField("match", "no", Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new TextField("f", "foo foo bar", Store.NO));
|
||||
doc.add(new StringField("match", "yes", Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new TextField("f", "bar", Store.NO));
|
||||
doc.add(new StringField("match", "no", Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
IndexReader r = DirectoryReader.open(w);
|
||||
w.close();
|
||||
IndexSearcher searcher = new IndexSearcher(r);
|
||||
searcher.setSimilarity(sim);
|
||||
Query query = new BoostQuery(new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("f", "foo")), Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("match", "yes")), Occur.FILTER)
|
||||
.build(), 3.2f);
|
||||
TopDocs topDocs = searcher.search(query, 1);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
assertEquals((float) (3.2 * 2 / 3), topDocs.scoreDocs[0].score, 0);
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
|
@ -336,7 +336,8 @@ public class HasChildQueryBuilderTests extends AbstractQueryTestCase<HasChildQue
|
|||
hasChildQuery(CHILD_DOC, new TermQueryBuilder("custom_string", "value"), ScoreMode.None);
|
||||
HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext);
|
||||
Similarity expected = SimilarityService.BUILT_IN.get(similarity)
|
||||
.apply(similarity, Settings.EMPTY, Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build())
|
||||
.create(similarity, Settings.EMPTY,
|
||||
Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(), null)
|
||||
.get();
|
||||
assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass()));
|
||||
}
|
||||
|
|
|
@ -322,7 +322,8 @@ public class LegacyHasChildQueryBuilderTests extends AbstractQueryTestCase<HasCh
|
|||
hasChildQuery(CHILD_TYPE, new TermQueryBuilder("custom_string", "value"), ScoreMode.None);
|
||||
HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext);
|
||||
Similarity expected = SimilarityService.BUILT_IN.get(similarity)
|
||||
.apply(similarity, Settings.EMPTY, Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build())
|
||||
.create(similarity, Settings.EMPTY,
|
||||
Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(), null)
|
||||
.get();
|
||||
assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass()));
|
||||
}
|
||||
|
|
|
@ -60,7 +60,7 @@ public class MapperTestUtils {
|
|||
MapperRegistry mapperRegistry = indicesModule.getMapperRegistry();
|
||||
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(indexName, finalSettings);
|
||||
IndexAnalyzers indexAnalyzers = createTestAnalysis(indexSettings, finalSettings).indexAnalyzers;
|
||||
SimilarityService similarityService = new SimilarityService(indexSettings, Collections.emptyMap());
|
||||
SimilarityService similarityService = new SimilarityService(indexSettings, null, Collections.emptyMap());
|
||||
return new MapperService(indexSettings,
|
||||
indexAnalyzers,
|
||||
xContentRegistry,
|
||||
|
|
|
@ -274,7 +274,7 @@ public abstract class IndexShardTestCase extends ESTestCase {
|
|||
MapperService mapperService = MapperTestUtils.newMapperService(xContentRegistry(), createTempDir(),
|
||||
indexSettings.getSettings(), "index");
|
||||
mapperService.merge(indexMetaData, MapperService.MergeReason.MAPPING_RECOVERY, true);
|
||||
SimilarityService similarityService = new SimilarityService(indexSettings, Collections.emptyMap());
|
||||
SimilarityService similarityService = new SimilarityService(indexSettings, null, Collections.emptyMap());
|
||||
final IndexEventListener indexEventListener = new IndexEventListener() {
|
||||
};
|
||||
final Engine.Warmer warmer = searcher -> {
|
||||
|
|
|
@ -21,6 +21,11 @@ package org.elasticsearch.script;
|
|||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.elasticsearch.index.similarity.ScriptedSimilarity.Doc;
|
||||
import org.elasticsearch.index.similarity.ScriptedSimilarity.Field;
|
||||
import org.elasticsearch.index.similarity.ScriptedSimilarity.Query;
|
||||
import org.elasticsearch.index.similarity.ScriptedSimilarity.Term;
|
||||
import org.elasticsearch.index.similarity.SimilarityService;
|
||||
import org.elasticsearch.search.lookup.LeafSearchLookup;
|
||||
import org.elasticsearch.search.lookup.SearchLookup;
|
||||
|
||||
|
@ -94,6 +99,12 @@ public class MockScriptEngine implements ScriptEngine {
|
|||
};
|
||||
};
|
||||
return context.factoryClazz.cast(factory);
|
||||
} else if (context.instanceClazz.equals(SimilarityScript.class)) {
|
||||
SimilarityScript.Factory factory = mockCompiled::createSimilarityScript;
|
||||
return context.factoryClazz.cast(factory);
|
||||
} else if (context.instanceClazz.equals(SimilarityWeightScript.class)) {
|
||||
SimilarityWeightScript.Factory factory = mockCompiled::createSimilarityWeightScript;
|
||||
return context.factoryClazz.cast(factory);
|
||||
}
|
||||
throw new IllegalArgumentException("mock script engine does not know how to handle context [" + context.name + "]");
|
||||
}
|
||||
|
@ -141,6 +152,14 @@ public class MockScriptEngine implements ScriptEngine {
|
|||
}
|
||||
return new MockSearchScript(lookup, context, script != null ? script : ctx -> source);
|
||||
}
|
||||
|
||||
public SimilarityScript createSimilarityScript() {
|
||||
return new MockSimilarityScript(script != null ? script : ctx -> 42d);
|
||||
}
|
||||
|
||||
public SimilarityWeightScript createSimilarityWeightScript() {
|
||||
return new MockSimilarityWeightScript(script != null ? script : ctx -> 42d);
|
||||
}
|
||||
}
|
||||
|
||||
public class MockExecutableScript implements ExecutableScript {
|
||||
|
@ -224,6 +243,44 @@ public class MockScriptEngine implements ScriptEngine {
|
|||
}
|
||||
}
|
||||
|
||||
public class MockSimilarityScript extends SimilarityScript {
|
||||
|
||||
private final Function<Map<String, Object>, Object> script;
|
||||
|
||||
MockSimilarityScript(Function<Map<String, Object>, Object> script) {
|
||||
this.script = script;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double execute(double weight, Query query, Field field, Term term, Doc doc) throws IOException {
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
map.put("weight", weight);
|
||||
map.put("query", query);
|
||||
map.put("field", field);
|
||||
map.put("term", term);
|
||||
map.put("doc", doc);
|
||||
return ((Number) script.apply(map)).doubleValue();
|
||||
}
|
||||
}
|
||||
|
||||
public class MockSimilarityWeightScript extends SimilarityWeightScript {
|
||||
|
||||
private final Function<Map<String, Object>, Object> script;
|
||||
|
||||
MockSimilarityWeightScript(Function<Map<String, Object>, Object> script) {
|
||||
this.script = script;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double execute(Query query, Field field, Term term) throws IOException {
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
map.put("query", query);
|
||||
map.put("field", field);
|
||||
map.put("term", term);
|
||||
return ((Number) script.apply(map)).doubleValue();
|
||||
}
|
||||
}
|
||||
|
||||
public static Script mockInlineScript(final String script) {
|
||||
return new Script(ScriptType.INLINE, "mock", script, emptyMap());
|
||||
}
|
||||
|
|
|
@ -1036,7 +1036,7 @@ public abstract class AbstractQueryTestCase<QB extends AbstractQueryBuilder<QB>>
|
|||
AnalysisModule analysisModule = new AnalysisModule(new Environment(nodeSettings), emptyList());
|
||||
IndexAnalyzers indexAnalyzers = analysisModule.getAnalysisRegistry().build(idxSettings);
|
||||
scriptService = scriptModule.getScriptService();
|
||||
similarityService = new SimilarityService(idxSettings, Collections.emptyMap());
|
||||
similarityService = new SimilarityService(idxSettings, null, Collections.emptyMap());
|
||||
MapperRegistry mapperRegistry = indicesModule.getMapperRegistry();
|
||||
mapperService = new MapperService(idxSettings, indexAnalyzers, xContentRegistry, similarityService, mapperRegistry,
|
||||
this::createShardContext);
|
||||
|
|
Loading…
Reference in New Issue