# Rescore Feature
The rescore feature allows te rescore a document returned by a query based on a secondary algorithm. Rescoring is commonly used if a scoring algorithm is too costly to be executed across the entire document set but efficient enough to be executed on the Top-K documents scored by a faster retrieval method. Rescoring can help to improve precision by reordering a larger Top-K window than actually returned to the user. Typically is it executed on a window between 100 and 500 documents while the actual result window requested by the user remains the same. # Query Rescorer The `query` rescorer executes a secondary query only on the Top-K results of the actual user query and rescores the documents based on a linear combination of the user query's score and the score of the `rescore_query`. This allows to execute any exposed query as a `rescore_query` and supports a `query_weight` as well as a `rescore_query_weight` to weight the factors of the linear combination. # Rescore API The `rescore` request is defined along side the query part in the json request: ```json curl -s -XPOST 'localhost:9200/_search' -d { "query" : { "match" : { "field1" : { "query" : "the quick brown", "type" : "boolean", "operator" : "OR" } } }, "rescore" : { "window_size" : 50, "query" : { "rescore_query" : { "match" : { "field1" : { "query" : "the quick brown", "type" : "phrase", "slop" : 2 } } }, "query_weight" : 0.7, "rescore_query_weight" : 1.2 } } } ``` Each `rescore` request is executed on a per-shard basis within the same roundtrip. Currently the rescore API has only one implementation (the `query` rescorer) which modifies the result set in-place. Future developments could include dedicated rescore results if needed by the implemenation ie. a pair-wise reranker. *Note:* Only regualr queries are rescored, if the search type is set to `scan` or `count` rescorers are not executed. Closes #2640
This commit is contained in:
parent
c65aff7775
commit
a7bbab7e87
|
@ -44,6 +44,9 @@ import org.elasticsearch.indices.IndicesService;
|
||||||
import org.elasticsearch.script.ScriptService;
|
import org.elasticsearch.script.ScriptService;
|
||||||
import org.elasticsearch.search.internal.SearchContext;
|
import org.elasticsearch.search.internal.SearchContext;
|
||||||
import org.elasticsearch.search.internal.ShardSearchRequest;
|
import org.elasticsearch.search.internal.ShardSearchRequest;
|
||||||
|
import org.elasticsearch.search.rescore.RescorePhase;
|
||||||
|
import org.elasticsearch.search.rescore.RescoreSearchContext;
|
||||||
|
import org.elasticsearch.search.rescore.Rescorer;
|
||||||
import org.elasticsearch.threadpool.ThreadPool;
|
import org.elasticsearch.threadpool.ThreadPool;
|
||||||
import org.elasticsearch.transport.TransportService;
|
import org.elasticsearch.transport.TransportService;
|
||||||
|
|
||||||
|
@ -105,8 +108,14 @@ public class TransportExplainAction extends TransportShardSingleOperationAction<
|
||||||
context.parsedQuery(parseQuery(request, indexService));
|
context.parsedQuery(parseQuery(request, indexService));
|
||||||
context.preProcess();
|
context.preProcess();
|
||||||
int topLevelDocId = result.docIdAndVersion().docId + result.docIdAndVersion().reader.docBase;
|
int topLevelDocId = result.docIdAndVersion().docId + result.docIdAndVersion().reader.docBase;
|
||||||
|
Explanation explanation;
|
||||||
Explanation explanation = context.searcher().explain(context.query(), topLevelDocId);
|
if (context.rescore() != null) {
|
||||||
|
RescoreSearchContext ctx = context.rescore();
|
||||||
|
Rescorer rescorer = ctx.rescorer();
|
||||||
|
explanation = rescorer.explain(topLevelDocId, context, ctx);
|
||||||
|
} else {
|
||||||
|
explanation = context.searcher().explain(context.query(), topLevelDocId);
|
||||||
|
}
|
||||||
if (request.fields() != null) {
|
if (request.fields() != null) {
|
||||||
if (request.fields().length == 1 && "_source".equals(request.fields()[0])) {
|
if (request.fields().length == 1 && "_source".equals(request.fields()[0])) {
|
||||||
request.fields(null); // Load the _source field
|
request.fields(null); // Load the _source field
|
||||||
|
|
|
@ -35,6 +35,7 @@ import org.elasticsearch.search.Scroll;
|
||||||
import org.elasticsearch.search.builder.SearchSourceBuilder;
|
import org.elasticsearch.search.builder.SearchSourceBuilder;
|
||||||
import org.elasticsearch.search.facet.AbstractFacetBuilder;
|
import org.elasticsearch.search.facet.AbstractFacetBuilder;
|
||||||
import org.elasticsearch.search.highlight.HighlightBuilder;
|
import org.elasticsearch.search.highlight.HighlightBuilder;
|
||||||
|
import org.elasticsearch.search.rescore.RescoreBuilder;
|
||||||
import org.elasticsearch.search.sort.SortBuilder;
|
import org.elasticsearch.search.sort.SortBuilder;
|
||||||
import org.elasticsearch.search.sort.SortOrder;
|
import org.elasticsearch.search.sort.SortOrder;
|
||||||
import org.elasticsearch.search.suggest.SuggestBuilder;
|
import org.elasticsearch.search.suggest.SuggestBuilder;
|
||||||
|
@ -662,7 +663,17 @@ public class SearchRequestBuilder extends ActionRequestBuilder<SearchRequest, Se
|
||||||
suggestBuilder().addSuggestion(suggestion);
|
suggestBuilder().addSuggestion(suggestion);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public SearchRequestBuilder setRescorer(RescoreBuilder.Rescorer rescorer) {
|
||||||
|
rescoreBuilder().setRescorer(rescorer);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SearchRequestBuilder setRescoreWindow(int window) {
|
||||||
|
rescoreBuilder().setWindowSize(window);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the source of the request as a json string. Note, settings anything other
|
* Sets the source of the request as a json string. Note, settings anything other
|
||||||
* than the search type will cause this source to be overridden, consider using
|
* than the search type will cause this source to be overridden, consider using
|
||||||
|
@ -839,5 +850,9 @@ public class SearchRequestBuilder extends ActionRequestBuilder<SearchRequest, Se
|
||||||
private SuggestBuilder suggestBuilder() {
|
private SuggestBuilder suggestBuilder() {
|
||||||
return sourceBuilder().suggest();
|
return sourceBuilder().suggest();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private RescoreBuilder rescoreBuilder() {
|
||||||
|
return sourceBuilder().rescore();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,6 +38,7 @@ import org.elasticsearch.index.query.FilterBuilder;
|
||||||
import org.elasticsearch.index.query.QueryBuilder;
|
import org.elasticsearch.index.query.QueryBuilder;
|
||||||
import org.elasticsearch.search.facet.AbstractFacetBuilder;
|
import org.elasticsearch.search.facet.AbstractFacetBuilder;
|
||||||
import org.elasticsearch.search.highlight.HighlightBuilder;
|
import org.elasticsearch.search.highlight.HighlightBuilder;
|
||||||
|
import org.elasticsearch.search.rescore.RescoreBuilder;
|
||||||
import org.elasticsearch.search.sort.SortBuilder;
|
import org.elasticsearch.search.sort.SortBuilder;
|
||||||
import org.elasticsearch.search.sort.SortBuilders;
|
import org.elasticsearch.search.sort.SortBuilders;
|
||||||
import org.elasticsearch.search.sort.SortOrder;
|
import org.elasticsearch.search.sort.SortOrder;
|
||||||
|
@ -105,6 +106,8 @@ public class SearchSourceBuilder implements ToXContent {
|
||||||
private HighlightBuilder highlightBuilder;
|
private HighlightBuilder highlightBuilder;
|
||||||
|
|
||||||
private SuggestBuilder suggestBuilder;
|
private SuggestBuilder suggestBuilder;
|
||||||
|
|
||||||
|
private RescoreBuilder rescoreBuilder;
|
||||||
|
|
||||||
private TObjectFloatHashMap<String> indexBoost = null;
|
private TObjectFloatHashMap<String> indexBoost = null;
|
||||||
|
|
||||||
|
@ -409,6 +412,13 @@ public class SearchSourceBuilder implements ToXContent {
|
||||||
}
|
}
|
||||||
return suggestBuilder;
|
return suggestBuilder;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public RescoreBuilder rescore() {
|
||||||
|
if (rescoreBuilder == null) {
|
||||||
|
rescoreBuilder = new RescoreBuilder();
|
||||||
|
}
|
||||||
|
return rescoreBuilder;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets no fields to be loaded, resulting in only id and type to be returned per field.
|
* Sets no fields to be loaded, resulting in only id and type to be returned per field.
|
||||||
|
@ -722,6 +732,10 @@ public class SearchSourceBuilder implements ToXContent {
|
||||||
if (suggestBuilder != null) {
|
if (suggestBuilder != null) {
|
||||||
suggestBuilder.toXContent(builder, params);
|
suggestBuilder.toXContent(builder, params);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rescoreBuilder != null) {
|
||||||
|
rescoreBuilder.toXContent(builder, params);
|
||||||
|
}
|
||||||
|
|
||||||
if (stats != null) {
|
if (stats != null) {
|
||||||
builder.startArray("stats");
|
builder.startArray("stats");
|
||||||
|
|
|
@ -33,10 +33,7 @@ import org.elasticsearch.search.SearchParseElement;
|
||||||
import org.elasticsearch.search.SearchPhase;
|
import org.elasticsearch.search.SearchPhase;
|
||||||
import org.elasticsearch.search.internal.SearchContext;
|
import org.elasticsearch.search.internal.SearchContext;
|
||||||
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
@ -68,6 +65,10 @@ public class DfsPhase implements SearchPhase {
|
||||||
THashSet<Term> termsSet = cachedTermsSet.get().get();
|
THashSet<Term> termsSet = cachedTermsSet.get().get();
|
||||||
termsSet.clear();
|
termsSet.clear();
|
||||||
context.query().extractTerms(termsSet);
|
context.query().extractTerms(termsSet);
|
||||||
|
if (context.rescore() != null) {
|
||||||
|
context.rescore().rescorer().extractTerms(context, context.rescore(), termsSet);
|
||||||
|
}
|
||||||
|
|
||||||
Term[] terms = termsSet.toArray(new Term[termsSet.size()]);
|
Term[] terms = termsSet.toArray(new Term[termsSet.size()]);
|
||||||
TermStatistics[] termStatistics = new TermStatistics[terms.length];
|
TermStatistics[] termStatistics = new TermStatistics[terms.length];
|
||||||
IndexReaderContext indexReaderContext = context.searcher().getTopReaderContext();
|
IndexReaderContext indexReaderContext = context.searcher().getTopReaderContext();
|
||||||
|
|
|
@ -20,12 +20,16 @@
|
||||||
package org.elasticsearch.search.fetch.explain;
|
package org.elasticsearch.search.fetch.explain;
|
||||||
|
|
||||||
import com.google.common.collect.ImmutableMap;
|
import com.google.common.collect.ImmutableMap;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.Explanation;
|
||||||
import org.elasticsearch.ElasticSearchException;
|
import org.elasticsearch.ElasticSearchException;
|
||||||
import org.elasticsearch.search.SearchParseElement;
|
import org.elasticsearch.search.SearchParseElement;
|
||||||
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
|
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
|
||||||
import org.elasticsearch.search.fetch.FetchSubPhase;
|
import org.elasticsearch.search.fetch.FetchSubPhase;
|
||||||
import org.elasticsearch.search.internal.InternalSearchHit;
|
import org.elasticsearch.search.internal.InternalSearchHit;
|
||||||
import org.elasticsearch.search.internal.SearchContext;
|
import org.elasticsearch.search.internal.SearchContext;
|
||||||
|
import org.elasticsearch.search.rescore.RescoreSearchContext;
|
||||||
|
import org.elasticsearch.search.rescore.Rescorer;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -57,8 +61,18 @@ public class ExplainFetchSubPhase implements FetchSubPhase {
|
||||||
@Override
|
@Override
|
||||||
public void hitExecute(SearchContext context, HitContext hitContext) throws ElasticSearchException {
|
public void hitExecute(SearchContext context, HitContext hitContext) throws ElasticSearchException {
|
||||||
try {
|
try {
|
||||||
|
final int topLevelDocId = hitContext.hit().docId();
|
||||||
|
Explanation explanation;
|
||||||
|
|
||||||
|
if (context.rescore() != null) {
|
||||||
|
RescoreSearchContext ctx = context.rescore();
|
||||||
|
Rescorer rescorer = ctx.rescorer();
|
||||||
|
explanation = rescorer.explain(topLevelDocId, context, ctx);
|
||||||
|
} else {
|
||||||
|
explanation = context.searcher().explain(context.query(), topLevelDocId);
|
||||||
|
}
|
||||||
// we use the top level doc id, since we work with the top level searcher
|
// we use the top level doc id, since we work with the top level searcher
|
||||||
hitContext.hit().explanation(context.searcher().explain(context.query(), hitContext.hit().docId()));
|
hitContext.hit().explanation(explanation);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new FetchPhaseExecutionException(context, "Failed to explain doc [" + hitContext.hit().type() + "#" + hitContext.hit().id() + "]", e);
|
throw new FetchPhaseExecutionException(context, "Failed to explain doc [" + hitContext.hit().type() + "#" + hitContext.hit().id() + "]", e);
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,6 +58,7 @@ import org.elasticsearch.search.fetch.script.ScriptFieldsContext;
|
||||||
import org.elasticsearch.search.highlight.SearchContextHighlight;
|
import org.elasticsearch.search.highlight.SearchContextHighlight;
|
||||||
import org.elasticsearch.search.lookup.SearchLookup;
|
import org.elasticsearch.search.lookup.SearchLookup;
|
||||||
import org.elasticsearch.search.query.QuerySearchResult;
|
import org.elasticsearch.search.query.QuerySearchResult;
|
||||||
|
import org.elasticsearch.search.rescore.RescoreSearchContext;
|
||||||
import org.elasticsearch.search.scan.ScanContext;
|
import org.elasticsearch.search.scan.ScanContext;
|
||||||
import org.elasticsearch.search.suggest.SuggestionSearchContext;
|
import org.elasticsearch.search.suggest.SuggestionSearchContext;
|
||||||
|
|
||||||
|
@ -166,6 +167,8 @@ public class SearchContext implements Releasable {
|
||||||
private SearchContextHighlight highlight;
|
private SearchContextHighlight highlight;
|
||||||
|
|
||||||
private SuggestionSearchContext suggest;
|
private SuggestionSearchContext suggest;
|
||||||
|
|
||||||
|
private RescoreSearchContext rescore;
|
||||||
|
|
||||||
private SearchLookup searchLookup;
|
private SearchLookup searchLookup;
|
||||||
|
|
||||||
|
@ -177,6 +180,7 @@ public class SearchContext implements Releasable {
|
||||||
|
|
||||||
private List<Rewrite> rewrites = null;
|
private List<Rewrite> rewrites = null;
|
||||||
|
|
||||||
|
|
||||||
public SearchContext(long id, ShardSearchRequest request, SearchShardTarget shardTarget,
|
public SearchContext(long id, ShardSearchRequest request, SearchShardTarget shardTarget,
|
||||||
Engine.Searcher engineSearcher, IndexService indexService, IndexShard indexShard, ScriptService scriptService) {
|
Engine.Searcher engineSearcher, IndexService indexService, IndexShard indexShard, ScriptService scriptService) {
|
||||||
this.id = id;
|
this.id = id;
|
||||||
|
@ -313,6 +317,14 @@ public class SearchContext implements Releasable {
|
||||||
public void suggest(SuggestionSearchContext suggest) {
|
public void suggest(SuggestionSearchContext suggest) {
|
||||||
this.suggest = suggest;
|
this.suggest = suggest;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public RescoreSearchContext rescore() {
|
||||||
|
return this.rescore;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void rescore(RescoreSearchContext rescore) {
|
||||||
|
this.rescore = rescore;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean hasScriptFields() {
|
public boolean hasScriptFields() {
|
||||||
return scriptFields != null;
|
return scriptFields != null;
|
||||||
|
|
|
@ -31,6 +31,8 @@ import org.elasticsearch.search.SearchPhase;
|
||||||
import org.elasticsearch.search.facet.FacetPhase;
|
import org.elasticsearch.search.facet.FacetPhase;
|
||||||
import org.elasticsearch.search.internal.ContextIndexSearcher;
|
import org.elasticsearch.search.internal.ContextIndexSearcher;
|
||||||
import org.elasticsearch.search.internal.SearchContext;
|
import org.elasticsearch.search.internal.SearchContext;
|
||||||
|
import org.elasticsearch.search.rescore.RescorePhase;
|
||||||
|
import org.elasticsearch.search.rescore.RescoreSearchContext;
|
||||||
import org.elasticsearch.search.sort.SortParseElement;
|
import org.elasticsearch.search.sort.SortParseElement;
|
||||||
import org.elasticsearch.search.sort.TrackScoresParseElement;
|
import org.elasticsearch.search.sort.TrackScoresParseElement;
|
||||||
import org.elasticsearch.search.suggest.SuggestPhase;
|
import org.elasticsearch.search.suggest.SuggestPhase;
|
||||||
|
@ -45,11 +47,13 @@ public class QueryPhase implements SearchPhase {
|
||||||
|
|
||||||
private final FacetPhase facetPhase;
|
private final FacetPhase facetPhase;
|
||||||
private final SuggestPhase suggestPhase;
|
private final SuggestPhase suggestPhase;
|
||||||
|
private RescorePhase rescorePhase;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public QueryPhase(FacetPhase facetPhase, SuggestPhase suggestPhase) {
|
public QueryPhase(FacetPhase facetPhase, SuggestPhase suggestPhase, RescorePhase rescorePhase) {
|
||||||
this.facetPhase = facetPhase;
|
this.facetPhase = facetPhase;
|
||||||
this.suggestPhase = suggestPhase;
|
this.suggestPhase = suggestPhase;
|
||||||
|
this.rescorePhase = rescorePhase;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -71,7 +75,8 @@ public class QueryPhase implements SearchPhase {
|
||||||
.put("minScore", new MinScoreParseElement())
|
.put("minScore", new MinScoreParseElement())
|
||||||
.put("timeout", new TimeoutParseElement())
|
.put("timeout", new TimeoutParseElement())
|
||||||
.putAll(facetPhase.parseElements())
|
.putAll(facetPhase.parseElements())
|
||||||
.putAll(suggestPhase.parseElements());
|
.putAll(suggestPhase.parseElements())
|
||||||
|
.putAll(rescorePhase.parseElements());
|
||||||
return parseElements.build();
|
return parseElements.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -99,6 +104,7 @@ public class QueryPhase implements SearchPhase {
|
||||||
}
|
}
|
||||||
|
|
||||||
searchContext.searcher().inStage(ContextIndexSearcher.Stage.MAIN_QUERY);
|
searchContext.searcher().inStage(ContextIndexSearcher.Stage.MAIN_QUERY);
|
||||||
|
boolean rescore = false;
|
||||||
try {
|
try {
|
||||||
searchContext.queryResult().from(searchContext.from());
|
searchContext.queryResult().from(searchContext.from());
|
||||||
searchContext.queryResult().size(searchContext.size());
|
searchContext.queryResult().size(searchContext.size());
|
||||||
|
@ -106,7 +112,7 @@ public class QueryPhase implements SearchPhase {
|
||||||
Query query = searchContext.query();
|
Query query = searchContext.query();
|
||||||
|
|
||||||
TopDocs topDocs;
|
TopDocs topDocs;
|
||||||
int numDocs = searchContext.from() + searchContext.size();
|
int numDocs = searchContext.from() + searchContext.size() ;
|
||||||
if (numDocs == 0) {
|
if (numDocs == 0) {
|
||||||
// if 0 was asked, change it to 1 since 0 is not allowed
|
// if 0 was asked, change it to 1 since 0 is not allowed
|
||||||
numDocs = 1;
|
numDocs = 1;
|
||||||
|
@ -122,6 +128,10 @@ public class QueryPhase implements SearchPhase {
|
||||||
topDocs = searchContext.searcher().search(query, null, numDocs, searchContext.sort(),
|
topDocs = searchContext.searcher().search(query, null, numDocs, searchContext.sort(),
|
||||||
searchContext.trackScores(), searchContext.trackScores());
|
searchContext.trackScores(), searchContext.trackScores());
|
||||||
} else {
|
} else {
|
||||||
|
if (searchContext.rescore() != null) {
|
||||||
|
rescore = true;
|
||||||
|
numDocs = Math.max(searchContext.rescore().window(), numDocs);
|
||||||
|
}
|
||||||
topDocs = searchContext.searcher().search(query, numDocs);
|
topDocs = searchContext.searcher().search(query, numDocs);
|
||||||
}
|
}
|
||||||
searchContext.queryResult().topDocs(topDocs);
|
searchContext.queryResult().topDocs(topDocs);
|
||||||
|
@ -130,7 +140,9 @@ public class QueryPhase implements SearchPhase {
|
||||||
} finally {
|
} finally {
|
||||||
searchContext.searcher().finishStage(ContextIndexSearcher.Stage.MAIN_QUERY);
|
searchContext.searcher().finishStage(ContextIndexSearcher.Stage.MAIN_QUERY);
|
||||||
}
|
}
|
||||||
|
if (rescore) { // only if we do a regular search
|
||||||
|
rescorePhase.execute(searchContext);
|
||||||
|
}
|
||||||
suggestPhase.execute(searchContext);
|
suggestPhase.execute(searchContext);
|
||||||
facetPhase.execute(searchContext);
|
facetPhase.execute(searchContext);
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,324 @@
|
||||||
|
package org.elasticsearch.search.rescore;
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.ComplexExplanation;
|
||||||
|
import org.apache.lucene.search.DocIdSet;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.search.Explanation;
|
||||||
|
import org.apache.lucene.search.Filter;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.SorterTemplate;
|
||||||
|
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentParser.Token;
|
||||||
|
import org.elasticsearch.index.query.ParsedQuery;
|
||||||
|
import org.elasticsearch.search.internal.ContextIndexSearcher;
|
||||||
|
import org.elasticsearch.search.internal.SearchContext;
|
||||||
|
|
||||||
|
final class QueryRescorer implements Rescorer {
|
||||||
|
|
||||||
|
public static final Rescorer INSTANCE = new QueryRescorer();
|
||||||
|
public static final String NAME = "query";
|
||||||
|
@Override
|
||||||
|
public String name() {
|
||||||
|
return NAME;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void rescore(TopDocs topDocs, SearchContext context, RescoreSearchContext rescoreContext) throws IOException{
|
||||||
|
assert rescoreContext != null;
|
||||||
|
QueryRescoreContext rescore = ((QueryRescoreContext) rescoreContext);
|
||||||
|
TopDocs queryTopDocs = context.queryResult().topDocs();
|
||||||
|
if (queryTopDocs == null || queryTopDocs.totalHits == 0 || queryTopDocs.scoreDocs.length == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ContextIndexSearcher searcher = context.searcher();
|
||||||
|
topDocs = searcher.search(rescore.query(), new TopDocsFilter(queryTopDocs), queryTopDocs.scoreDocs.length);
|
||||||
|
context.queryResult().topDocs(merge(queryTopDocs, topDocs, rescore));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Explanation explain(int topLevelDocId, SearchContext context, RescoreSearchContext rescoreContext) throws IOException {
|
||||||
|
QueryRescoreContext rescore = ((QueryRescoreContext) context.rescore());
|
||||||
|
ContextIndexSearcher searcher = context.searcher();
|
||||||
|
Explanation primaryExplain = searcher.explain(context.query(), topLevelDocId);
|
||||||
|
if (primaryExplain == null) {
|
||||||
|
// this should not happen but just in case
|
||||||
|
return new ComplexExplanation(false, 0.0f, "nothing matched");
|
||||||
|
}
|
||||||
|
Explanation rescoreExplain = searcher.explain(rescore.query(), topLevelDocId);
|
||||||
|
float primaryWeight = rescore.queryWeight();
|
||||||
|
ComplexExplanation prim = new ComplexExplanation(primaryExplain.isMatch(),
|
||||||
|
primaryExplain.getValue() * primaryWeight,
|
||||||
|
"product of:");
|
||||||
|
prim.addDetail(primaryExplain);
|
||||||
|
prim.addDetail(new Explanation(primaryWeight, "primaryWeight"));
|
||||||
|
if (rescoreExplain != null) {
|
||||||
|
ComplexExplanation sumExpl = new ComplexExplanation();
|
||||||
|
sumExpl.setDescription("sum of:");
|
||||||
|
sumExpl.addDetail(prim);
|
||||||
|
sumExpl.setMatch(prim.isMatch());
|
||||||
|
float secondaryWeight = rescore.rescoreQueryWeight();
|
||||||
|
ComplexExplanation sec = new ComplexExplanation(rescoreExplain.isMatch(),
|
||||||
|
rescoreExplain.getValue() * secondaryWeight,
|
||||||
|
"product of:");
|
||||||
|
sec.addDetail(rescoreExplain);
|
||||||
|
sec.addDetail(new Explanation(secondaryWeight, "secondaryWeight"));
|
||||||
|
sumExpl.addDetail(sec);
|
||||||
|
sumExpl.setValue(prim.getValue() + sec.getValue());
|
||||||
|
return sumExpl;
|
||||||
|
} else {
|
||||||
|
return prim;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public RescoreSearchContext parse(XContentParser parser, SearchContext context) throws IOException {
|
||||||
|
Token token;
|
||||||
|
String fieldName = null;
|
||||||
|
QueryRescoreContext rescoreContext = new QueryRescoreContext(this);
|
||||||
|
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||||
|
if (token == XContentParser.Token.FIELD_NAME) {
|
||||||
|
fieldName = parser.currentName();
|
||||||
|
if ("rescore_query".equals(fieldName)) {
|
||||||
|
ParsedQuery parsedQuery = context.queryParserService().parse(parser);
|
||||||
|
rescoreContext.setParsedQuery(parsedQuery);
|
||||||
|
}
|
||||||
|
} else if (token.isValue()) {
|
||||||
|
if("query_weight".equals(fieldName)) {
|
||||||
|
rescoreContext.setQueryWeight(parser.floatValue());
|
||||||
|
} else if("rescore_query_weight".equals(fieldName)) {
|
||||||
|
rescoreContext.setRescoreQueryWeight(parser.floatValue());
|
||||||
|
} else {
|
||||||
|
throw new ElasticSearchIllegalArgumentException("rescore doesn't support [" + fieldName + "]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return rescoreContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
static class QueryRescoreContext extends RescoreSearchContext {
|
||||||
|
|
||||||
|
public QueryRescoreContext(QueryRescorer rescorer) {
|
||||||
|
super(NAME, 10, rescorer);
|
||||||
|
}
|
||||||
|
|
||||||
|
private ParsedQuery parsedQuery;
|
||||||
|
private float queryWeight = 1.0f;
|
||||||
|
private float rescoreQueryWeight = 1.0f;
|
||||||
|
|
||||||
|
public void setParsedQuery(ParsedQuery parsedQuery) {
|
||||||
|
this.parsedQuery = parsedQuery;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Query query() {
|
||||||
|
return parsedQuery.query();
|
||||||
|
}
|
||||||
|
|
||||||
|
public float queryWeight() {
|
||||||
|
return queryWeight;
|
||||||
|
}
|
||||||
|
|
||||||
|
public float rescoreQueryWeight() {
|
||||||
|
return rescoreQueryWeight;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRescoreQueryWeight(float rescoreQueryWeight) {
|
||||||
|
this.rescoreQueryWeight = rescoreQueryWeight;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setQueryWeight(float queryWeight) {
|
||||||
|
this.queryWeight = queryWeight;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private TopDocs merge(TopDocs primary, TopDocs secondary, QueryRescoreContext context) {
|
||||||
|
DocIdSorter sorter = new DocIdSorter();
|
||||||
|
sorter.array = primary.scoreDocs;
|
||||||
|
sorter.mergeSort(0, sorter.array.length-1);
|
||||||
|
ScoreDoc[] primaryDocs = sorter.array;
|
||||||
|
sorter.array = secondary.scoreDocs;
|
||||||
|
sorter.mergeSort(0, sorter.array.length-1);
|
||||||
|
ScoreDoc[] secondaryDocs = sorter.array;
|
||||||
|
int j = 0;
|
||||||
|
float primaryWeight = context.queryWeight();
|
||||||
|
float secondaryWeight = context.rescoreQueryWeight();
|
||||||
|
for (int i = 0; i < primaryDocs.length && j < secondaryDocs.length; i++) {
|
||||||
|
if (primaryDocs[i].doc == secondaryDocs[j].doc) {
|
||||||
|
primaryDocs[i].score = (primaryDocs[i].score * primaryWeight) + (secondaryDocs[j++].score * secondaryWeight);
|
||||||
|
} else {
|
||||||
|
primaryDocs[i].score *= primaryWeight;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ScoreSorter scoreSorter = new ScoreSorter();
|
||||||
|
scoreSorter.array = primaryDocs;
|
||||||
|
scoreSorter.mergeSort(0, primaryDocs.length-1);
|
||||||
|
primary.setMaxScore(primaryDocs[0].score);
|
||||||
|
return primary;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class DocIdSorter extends SorterTemplate {
|
||||||
|
private ScoreDoc[] array;
|
||||||
|
private ScoreDoc pivot;
|
||||||
|
@Override
|
||||||
|
protected void swap(int i, int j) {
|
||||||
|
ScoreDoc scoreDoc = array[i];
|
||||||
|
array[i] = array[j];
|
||||||
|
array[j] = scoreDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int compare(int i, int j) {
|
||||||
|
return compareDocId(array[i], array[j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void setPivot(int i) {
|
||||||
|
pivot = array[i];
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int comparePivot(int j) {
|
||||||
|
return compareDocId(pivot, array[j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final int compareDocId(ScoreDoc left, ScoreDoc right) {
|
||||||
|
if (left.doc < right.doc) {
|
||||||
|
return 1;
|
||||||
|
} else if (left.doc == right.doc) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class ScoreSorter extends SorterTemplate {
|
||||||
|
private ScoreDoc[] array;
|
||||||
|
private ScoreDoc pivot;
|
||||||
|
@Override
|
||||||
|
protected void swap(int i, int j) {
|
||||||
|
ScoreDoc scoreDoc = array[i];
|
||||||
|
array[i] = array[j];
|
||||||
|
array[j] = scoreDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int compare(int i, int j) {
|
||||||
|
int cmp = Float.compare(array[j].score, array[i].score);
|
||||||
|
return cmp == 0 ? compareDocId(array[i], array[j]) : cmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void setPivot(int i) {
|
||||||
|
pivot = array[i];
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int comparePivot(int j) {
|
||||||
|
int cmp = Float.compare(array[j].score, pivot.score);
|
||||||
|
return cmp == 0 ? compareDocId(pivot, array[j]) : cmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class TopDocsFilter extends Filter {
|
||||||
|
|
||||||
|
private final int[] docIds;
|
||||||
|
public TopDocsFilter(TopDocs topDocs) {
|
||||||
|
this.docIds = new int[topDocs.scoreDocs.length];
|
||||||
|
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
|
||||||
|
for (int i = 0; i < scoreDocs.length; i++) {
|
||||||
|
docIds[i] = scoreDocs[i].doc;
|
||||||
|
}
|
||||||
|
Arrays.sort(docIds);
|
||||||
|
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||||
|
final int docBase = context.docBase;
|
||||||
|
int limit = docBase + context.reader().maxDoc();
|
||||||
|
int offset = Arrays.binarySearch(docIds, docBase);
|
||||||
|
if (offset < 0 ) {
|
||||||
|
offset = (-offset)-1;
|
||||||
|
}
|
||||||
|
int end = Arrays.binarySearch(docIds, limit);
|
||||||
|
if (end < 0) {
|
||||||
|
end = (-end)-1;
|
||||||
|
}
|
||||||
|
final int start = offset;
|
||||||
|
final int stop = end;
|
||||||
|
|
||||||
|
return new DocIdSet() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocIdSetIterator iterator() throws IOException {
|
||||||
|
return new DocIdSetIterator() {
|
||||||
|
private int current = start;
|
||||||
|
private int docId = NO_MORE_DOCS;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
if (current < stop) {
|
||||||
|
return docId = docIds[current++]-docBase;
|
||||||
|
}
|
||||||
|
return docId = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return docId;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
if (target == NO_MORE_DOCS) {
|
||||||
|
current = stop;
|
||||||
|
return docId = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
while(nextDoc() < target) {}
|
||||||
|
return docId;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void extractTerms(SearchContext context, RescoreSearchContext rescoreContext, Set<Term> termsSet) {
|
||||||
|
((QueryRescoreContext) context.rescore()).query().extractTerms(termsSet);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,125 @@
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.search.rescore;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.elasticsearch.ElasticSearchException;
|
||||||
|
import org.elasticsearch.common.xcontent.ToXContent;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||||
|
import org.elasticsearch.index.query.QueryBuilder;
|
||||||
|
import org.elasticsearch.index.query.QueryBuilderException;
|
||||||
|
|
||||||
|
public class RescoreBuilder implements ToXContent {
|
||||||
|
|
||||||
|
private Rescorer rescorer;
|
||||||
|
private Integer windowSize;
|
||||||
|
|
||||||
|
public static QueryRescorer queryRescorer(QueryBuilder queryBuilder) {
|
||||||
|
return new QueryRescorer(queryBuilder);
|
||||||
|
}
|
||||||
|
|
||||||
|
public RescoreBuilder setRescorer(Rescorer rescorer) {
|
||||||
|
this.rescorer = rescorer;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public RescoreBuilder setWindowSize(int windowSize) {
|
||||||
|
this.windowSize = windowSize;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||||
|
if (rescorer != null) {
|
||||||
|
builder.startObject("rescore");
|
||||||
|
if (windowSize != null) {
|
||||||
|
builder.field("window_size", windowSize);
|
||||||
|
}
|
||||||
|
rescorer.toXContent(builder, params);
|
||||||
|
builder.endObject();
|
||||||
|
}
|
||||||
|
return builder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static abstract class Rescorer implements ToXContent {
|
||||||
|
|
||||||
|
private String name;
|
||||||
|
|
||||||
|
public Rescorer(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||||
|
builder.startObject(name);
|
||||||
|
builder = innerToXContent(builder, params);
|
||||||
|
builder.endObject();
|
||||||
|
return builder;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class QueryRescorer extends Rescorer {
|
||||||
|
private static final String NAME = "query";
|
||||||
|
private QueryBuilder queryBuilder;
|
||||||
|
private Float rescoreQueryWeight;
|
||||||
|
private Float queryWeight;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new {@link QueryRescorer} instance
|
||||||
|
* @param builder the query builder to build the rescore query from
|
||||||
|
*/
|
||||||
|
public QueryRescorer(QueryBuilder builder) {
|
||||||
|
super(NAME);
|
||||||
|
this.queryBuilder = builder;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Sets the original query weight for rescoring. The default is <tt>1.0</tt>
|
||||||
|
*/
|
||||||
|
public QueryRescorer setQueryWeight(float queryWeight) {
|
||||||
|
this.queryWeight = queryWeight;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the original query weight for rescoring. The default is <tt>1.0</tt>
|
||||||
|
*/
|
||||||
|
public QueryRescorer setRescoreQueryWeight(float rescoreQueryWeight) {
|
||||||
|
this.rescoreQueryWeight = rescoreQueryWeight;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException {
|
||||||
|
builder.field("rescore_query", queryBuilder);
|
||||||
|
if (queryWeight != null) {
|
||||||
|
builder.field("query_weight", queryWeight);
|
||||||
|
}
|
||||||
|
if (rescoreQueryWeight != null) {
|
||||||
|
builder.field("rescore_query_weight", rescoreQueryWeight);
|
||||||
|
}
|
||||||
|
return builder;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,69 @@
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.search.rescore;
|
||||||
|
|
||||||
|
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
|
import org.elasticsearch.ElasticSearchParseException;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
import org.elasticsearch.index.query.ParsedQuery;
|
||||||
|
import org.elasticsearch.search.SearchParseElement;
|
||||||
|
import org.elasticsearch.search.internal.SearchContext;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class RescoreParseElement implements SearchParseElement {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void parse(XContentParser parser, SearchContext context) throws Exception {
|
||||||
|
String fieldName = null;
|
||||||
|
RescoreSearchContext rescoreContext = null;
|
||||||
|
Integer windowSize = null;
|
||||||
|
XContentParser.Token token;
|
||||||
|
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||||
|
if (token == XContentParser.Token.FIELD_NAME) {
|
||||||
|
fieldName = parser.currentName();
|
||||||
|
if (QueryRescorer.NAME.equals(fieldName)) {
|
||||||
|
// we only have one at this point
|
||||||
|
Rescorer rescorer = QueryRescorer.INSTANCE;
|
||||||
|
token = parser.nextToken();
|
||||||
|
if (token != XContentParser.Token.START_OBJECT) {
|
||||||
|
throw new ElasticSearchParseException("rescore type malformed, must start with start_object");
|
||||||
|
}
|
||||||
|
rescoreContext = rescorer.parse(parser, context);
|
||||||
|
}
|
||||||
|
} else if (token.isValue()) {
|
||||||
|
if ("window_size".equals(fieldName)) {
|
||||||
|
windowSize = parser.intValue();
|
||||||
|
} else {
|
||||||
|
throw new ElasticSearchIllegalArgumentException("rescore doesn't support [" + fieldName + "]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (rescoreContext == null) {
|
||||||
|
throw new ElasticSearchIllegalArgumentException("missing rescore type");
|
||||||
|
}
|
||||||
|
if (windowSize != null) {
|
||||||
|
rescoreContext.setWindowSize(windowSize.intValue());
|
||||||
|
}
|
||||||
|
context.rescore(rescoreContext);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,70 @@
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.search.rescore;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.elasticsearch.ElasticSearchException;
|
||||||
|
import org.elasticsearch.common.component.AbstractComponent;
|
||||||
|
import org.elasticsearch.common.inject.Inject;
|
||||||
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.search.SearchParseElement;
|
||||||
|
import org.elasticsearch.search.SearchPhase;
|
||||||
|
import org.elasticsearch.search.internal.SearchContext;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableMap;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*/
|
||||||
|
public class RescorePhase extends AbstractComponent implements SearchPhase {
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public RescorePhase(Settings settings) {
|
||||||
|
super(settings);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, ? extends SearchParseElement> parseElements() {
|
||||||
|
ImmutableMap.Builder<String, SearchParseElement> parseElements = ImmutableMap.builder();
|
||||||
|
parseElements.put("rescore", new RescoreParseElement());
|
||||||
|
return parseElements.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void preProcess(SearchContext context) {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void execute(SearchContext context) throws ElasticSearchException {
|
||||||
|
final RescoreSearchContext ctx = context.rescore();
|
||||||
|
final Rescorer rescorer = ctx.rescorer();
|
||||||
|
try {
|
||||||
|
rescorer.rescore(context.queryResult().topDocs(), context, ctx);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new ElasticSearchException("Rescore Phase Failed", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,57 @@
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.search.rescore;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
*/
|
||||||
|
public class RescoreSearchContext {
|
||||||
|
|
||||||
|
private int windowSize;
|
||||||
|
|
||||||
|
private final String type;
|
||||||
|
|
||||||
|
private final Rescorer rescorer;
|
||||||
|
|
||||||
|
public RescoreSearchContext(String type, int windowSize, Rescorer rescorer) {
|
||||||
|
super();
|
||||||
|
this.type = type;
|
||||||
|
this.windowSize = windowSize;
|
||||||
|
this.rescorer = rescorer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Rescorer rescorer() {
|
||||||
|
return rescorer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getType() {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setWindowSize(int windowSize) {
|
||||||
|
this.windowSize = windowSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int window() {
|
||||||
|
return windowSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,90 @@
|
||||||
|
package org.elasticsearch.search.rescore;
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.Explanation;
|
||||||
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
import org.elasticsearch.action.search.SearchType;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
import org.elasticsearch.search.internal.SearchContext;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A query rescorer interface used to re-rank the Top-K results of a previously
|
||||||
|
* executed search.
|
||||||
|
*/
|
||||||
|
public interface Rescorer {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the name of this rescorer
|
||||||
|
*/
|
||||||
|
public String name();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Modifies the result of the previously executed search ({@link TopDocs})
|
||||||
|
* in place based on the given {@link RescoreSearchContext}.
|
||||||
|
*
|
||||||
|
* @param topDocs the result of the previously exectued search
|
||||||
|
* @param context the current {@link SearchContext}. This will never be <code>null</code>.
|
||||||
|
* @param rescoreContext the {@link RescoreSearchContext}. This will never be <code>null</code>
|
||||||
|
* @throws IOException if an {@link IOException} occurs during rescoring
|
||||||
|
*/
|
||||||
|
public void rescore(TopDocs topDocs, SearchContext context, RescoreSearchContext rescoreContext) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Executes an {@link Explanation} phase on the rescorer.
|
||||||
|
* @param topLevelDocId the global / top-level document ID to explain
|
||||||
|
* @param context the current {@link SearchContext}
|
||||||
|
* @param rescoreContext TODO
|
||||||
|
* @return the explain for the given top level document ID.
|
||||||
|
* @throws IOException if an {@link IOException} occurs
|
||||||
|
*/
|
||||||
|
public Explanation explain(int topLevelDocId, SearchContext context, RescoreSearchContext rescoreContext) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses the {@link RescoreSearchContext} for this impelementation
|
||||||
|
* @param parser the parser to read the context from
|
||||||
|
* @param context the current search context
|
||||||
|
* @return the parsed {@link RescoreSearchContext}
|
||||||
|
* @throws IOException if an {@link IOException} occurs while parsing the context
|
||||||
|
*/
|
||||||
|
public RescoreSearchContext parse(XContentParser parser, SearchContext context) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts all terms needed to exectue this {@link Rescorer}. This method
|
||||||
|
* is executed in a distributed frequency collection roundtrip for
|
||||||
|
* {@link SearchType#DFS_QUERY_AND_FETCH} and
|
||||||
|
* {@link SearchType#DFS_QUERY_THEN_FETCH}
|
||||||
|
*/
|
||||||
|
public void extractTerms(SearchContext context, RescoreSearchContext rescoreContext, Set<Term> termsSet);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TODO: At this point we only have one implemenation which modifies the
|
||||||
|
* TopDocs given. Future implemenations might return actual resutls that
|
||||||
|
* contain information about the rescore context. For example a pair wise
|
||||||
|
* reranker might return the feature vector for the top N window in order to
|
||||||
|
* merge results on the callers side. For now we don't have a return type at
|
||||||
|
* all since something like this requires a more general refactoring how
|
||||||
|
* documents are merged since in such a case we don't really have a score
|
||||||
|
* per document rather a "X is more relevant than Y" relation
|
||||||
|
*/
|
||||||
|
|
||||||
|
}
|
|
@ -1,5 +1,22 @@
|
||||||
package org.elasticsearch.search.scan;
|
package org.elasticsearch.search.scan;
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
import com.google.common.collect.Maps;
|
import com.google.common.collect.Maps;
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
|
|
@ -0,0 +1,188 @@
|
||||||
|
package org.apache.lucene.util;
|
||||||
|
// LUCENE WATCH - if we use the test-framework we can trash this class
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts numbers to english strings for testing.
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
public final class English {
|
||||||
|
|
||||||
|
private English() {} // no instance
|
||||||
|
|
||||||
|
public static String longToEnglish(long i) {
|
||||||
|
StringBuilder result = new StringBuilder();
|
||||||
|
longToEnglish(i, result);
|
||||||
|
return result.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void longToEnglish(long i, StringBuilder result) {
|
||||||
|
if (i == 0) {
|
||||||
|
result.append("zero");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (i < 0) {
|
||||||
|
result.append("minus ");
|
||||||
|
i = -i;
|
||||||
|
}
|
||||||
|
if (i >= 1000000000000000000l) { // quadrillion
|
||||||
|
longToEnglish(i / 1000000000000000000l, result);
|
||||||
|
result.append("quintillion, ");
|
||||||
|
i = i % 1000000000000000000l;
|
||||||
|
}
|
||||||
|
if (i >= 1000000000000000l) { // quadrillion
|
||||||
|
longToEnglish(i / 1000000000000000l, result);
|
||||||
|
result.append("quadrillion, ");
|
||||||
|
i = i % 1000000000000000l;
|
||||||
|
}
|
||||||
|
if (i >= 1000000000000l) { // trillions
|
||||||
|
longToEnglish(i / 1000000000000l, result);
|
||||||
|
result.append("trillion, ");
|
||||||
|
i = i % 1000000000000l;
|
||||||
|
}
|
||||||
|
if (i >= 1000000000) { // billions
|
||||||
|
longToEnglish(i / 1000000000, result);
|
||||||
|
result.append("billion, ");
|
||||||
|
i = i % 1000000000;
|
||||||
|
}
|
||||||
|
if (i >= 1000000) { // millions
|
||||||
|
longToEnglish(i / 1000000, result);
|
||||||
|
result.append("million, ");
|
||||||
|
i = i % 1000000;
|
||||||
|
}
|
||||||
|
if (i >= 1000) { // thousands
|
||||||
|
longToEnglish(i / 1000, result);
|
||||||
|
result.append("thousand, ");
|
||||||
|
i = i % 1000;
|
||||||
|
}
|
||||||
|
if (i >= 100) { // hundreds
|
||||||
|
longToEnglish(i / 100, result);
|
||||||
|
result.append("hundred ");
|
||||||
|
i = i % 100;
|
||||||
|
}
|
||||||
|
//we know we are smaller here so we can cast
|
||||||
|
if (i >= 20) {
|
||||||
|
switch (((int) i) / 10) {
|
||||||
|
case 9:
|
||||||
|
result.append("ninety");
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
result.append("eighty");
|
||||||
|
break;
|
||||||
|
case 7:
|
||||||
|
result.append("seventy");
|
||||||
|
break;
|
||||||
|
case 6:
|
||||||
|
result.append("sixty");
|
||||||
|
break;
|
||||||
|
case 5:
|
||||||
|
result.append("fifty");
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
result.append("forty");
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
result.append("thirty");
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
result.append("twenty");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
i = i % 10;
|
||||||
|
if (i == 0)
|
||||||
|
result.append(" ");
|
||||||
|
else
|
||||||
|
result.append("-");
|
||||||
|
}
|
||||||
|
switch ((int) i) {
|
||||||
|
case 19:
|
||||||
|
result.append("nineteen ");
|
||||||
|
break;
|
||||||
|
case 18:
|
||||||
|
result.append("eighteen ");
|
||||||
|
break;
|
||||||
|
case 17:
|
||||||
|
result.append("seventeen ");
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
result.append("sixteen ");
|
||||||
|
break;
|
||||||
|
case 15:
|
||||||
|
result.append("fifteen ");
|
||||||
|
break;
|
||||||
|
case 14:
|
||||||
|
result.append("fourteen ");
|
||||||
|
break;
|
||||||
|
case 13:
|
||||||
|
result.append("thirteen ");
|
||||||
|
break;
|
||||||
|
case 12:
|
||||||
|
result.append("twelve ");
|
||||||
|
break;
|
||||||
|
case 11:
|
||||||
|
result.append("eleven ");
|
||||||
|
break;
|
||||||
|
case 10:
|
||||||
|
result.append("ten ");
|
||||||
|
break;
|
||||||
|
case 9:
|
||||||
|
result.append("nine ");
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
result.append("eight ");
|
||||||
|
break;
|
||||||
|
case 7:
|
||||||
|
result.append("seven ");
|
||||||
|
break;
|
||||||
|
case 6:
|
||||||
|
result.append("six ");
|
||||||
|
break;
|
||||||
|
case 5:
|
||||||
|
result.append("five ");
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
result.append("four ");
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
result.append("three ");
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
result.append("two ");
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
result.append("one ");
|
||||||
|
break;
|
||||||
|
case 0:
|
||||||
|
result.append("");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static String intToEnglish(int i) {
|
||||||
|
StringBuilder result = new StringBuilder();
|
||||||
|
longToEnglish(i, result);
|
||||||
|
return result.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void intToEnglish(int i, StringBuilder result) {
|
||||||
|
longToEnglish(i, result);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,352 @@
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.test.integration.search.rescore;
|
||||||
|
|
||||||
|
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||||
|
import static org.hamcrest.MatcherAssert.assertThat;
|
||||||
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
import static org.hamcrest.Matchers.notNullValue;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.English;
|
||||||
|
import org.elasticsearch.ElasticSearchException;
|
||||||
|
import org.elasticsearch.action.search.SearchRequestBuilder;
|
||||||
|
import org.elasticsearch.action.search.SearchResponse;
|
||||||
|
import org.elasticsearch.action.search.SearchType;
|
||||||
|
import org.elasticsearch.client.Client;
|
||||||
|
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||||
|
import org.elasticsearch.common.settings.ImmutableSettings.Builder;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||||
|
import org.elasticsearch.index.query.MatchQueryBuilder;
|
||||||
|
import org.elasticsearch.index.query.QueryBuilders;
|
||||||
|
import org.elasticsearch.search.SearchHit;
|
||||||
|
import org.elasticsearch.search.SearchHits;
|
||||||
|
import org.elasticsearch.search.rescore.RescoreBuilder;
|
||||||
|
import org.elasticsearch.test.integration.AbstractNodesTests;
|
||||||
|
import org.testng.annotations.AfterClass;
|
||||||
|
import org.testng.annotations.BeforeClass;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class QueryRescorerTests extends AbstractNodesTests {
|
||||||
|
|
||||||
|
private Client client;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public void createNodes() throws Exception {
|
||||||
|
startNode("node1");
|
||||||
|
client = getClient();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public void closeNodes() {
|
||||||
|
client.close();
|
||||||
|
closeAllNodes();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Client getClient() {
|
||||||
|
return client("node1");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRescorePhrase() throws Exception {
|
||||||
|
try {
|
||||||
|
client.admin().indices().prepareDelete("test").execute().actionGet();
|
||||||
|
} catch (Exception e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
|
||||||
|
client.admin()
|
||||||
|
.indices()
|
||||||
|
.prepareCreate("test")
|
||||||
|
.addMapping(
|
||||||
|
"type1",
|
||||||
|
jsonBuilder().startObject().startObject("type1").startObject("properties").startObject("field1")
|
||||||
|
.field("analyzer", "whitespace").field("type", "string").endObject().endObject().endObject().endObject())
|
||||||
|
.setSettings(ImmutableSettings.settingsBuilder().put("index.number_of_shards", 2)).execute().actionGet();
|
||||||
|
|
||||||
|
client.prepareIndex("test", "type1", "1").setSource("field1", "the quick brown fox").execute().actionGet();
|
||||||
|
client.prepareIndex("test", "type1", "2").setSource("field1", "the quick lazy huge brown fox jumps over the tree").execute()
|
||||||
|
.actionGet();
|
||||||
|
client.prepareIndex("test", "type1", "3")
|
||||||
|
.setSource("field1", "quick huge brown", "field2", "the quick lazy huge brown fox jumps over the tree").execute()
|
||||||
|
.actionGet();
|
||||||
|
client.admin().indices().prepareRefresh("test").execute().actionGet();
|
||||||
|
|
||||||
|
SearchResponse searchResponse = client.prepareSearch()
|
||||||
|
.setQuery(QueryBuilders.matchQuery("field1", "the quick brown").operator(MatchQueryBuilder.Operator.OR))
|
||||||
|
.setRescorer(RescoreBuilder.queryRescorer(QueryBuilders.matchPhraseQuery("field1", "quick brown").slop(2).boost(4.0f)))
|
||||||
|
.setRescoreWindow(5).execute().actionGet();
|
||||||
|
|
||||||
|
assertThat(searchResponse.hits().totalHits(), equalTo(3l));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("1"));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("3"));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("2"));
|
||||||
|
|
||||||
|
searchResponse = client.prepareSearch()
|
||||||
|
.setQuery(QueryBuilders.matchQuery("field1", "the quick brown").operator(MatchQueryBuilder.Operator.OR))
|
||||||
|
.setRescorer(RescoreBuilder.queryRescorer(QueryBuilders.matchPhraseQuery("field1", "the quick brown").slop(3)))
|
||||||
|
.setRescoreWindow(5).execute().actionGet();
|
||||||
|
|
||||||
|
assertThat(searchResponse.hits().totalHits(), equalTo(3l));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("1"));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("2"));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("3"));
|
||||||
|
|
||||||
|
searchResponse = client.prepareSearch()
|
||||||
|
.setQuery(QueryBuilders.matchQuery("field1", "the quick brown").operator(MatchQueryBuilder.Operator.OR))
|
||||||
|
.setRescorer(RescoreBuilder.queryRescorer((QueryBuilders.matchPhraseQuery("field1", "the quick brown"))))
|
||||||
|
.setRescoreWindow(5).execute().actionGet();
|
||||||
|
|
||||||
|
assertThat(searchResponse.hits().totalHits(), equalTo(3l));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("1"));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("2"));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("3"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMoreDocs() throws Exception {
|
||||||
|
try {
|
||||||
|
client.admin().indices().prepareDelete("test").execute().actionGet();
|
||||||
|
} catch (Exception e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
|
||||||
|
Builder builder = ImmutableSettings.builder();
|
||||||
|
builder.put("index.analysis.analyzer.synonym.tokenizer", "whitespace");
|
||||||
|
builder.putArray("index.analysis.analyzer.synonym.filter", "synonym", "lowercase");
|
||||||
|
builder.put("index.analysis.filter.synonym.type", "synonym");
|
||||||
|
builder.putArray("index.analysis.filter.synonym.synonyms", "ave => ave, avenue", "street => str, street");
|
||||||
|
|
||||||
|
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type2").startObject("properties")
|
||||||
|
.startObject("field1").field("type", "string").field("index_analyzer", "whitespace").field("search_analyzer", "synonym")
|
||||||
|
.endObject().endObject().endObject().endObject();
|
||||||
|
|
||||||
|
client.admin().indices().prepareCreate("test").addMapping("type1", mapping).setSettings(builder.put("index.number_of_shards", 1))
|
||||||
|
.execute().actionGet();
|
||||||
|
|
||||||
|
client.prepareIndex("test", "type1", "1").setSource("field1", "massachusetts avenue boston massachusetts").execute().actionGet();
|
||||||
|
client.prepareIndex("test", "type1", "2").setSource("field1", "lexington avenue boston massachusetts").execute().actionGet();
|
||||||
|
client.prepareIndex("test", "type1", "3").setSource("field1", "boston avenue lexington massachusetts").execute().actionGet();
|
||||||
|
client.admin().indices().prepareRefresh("test").execute().actionGet();
|
||||||
|
client.prepareIndex("test", "type1", "4").setSource("field1", "boston road lexington massachusetts").execute().actionGet();
|
||||||
|
client.prepareIndex("test", "type1", "5").setSource("field1", "lexington street lexington massachusetts").execute().actionGet();
|
||||||
|
client.prepareIndex("test", "type1", "6").setSource("field1", "massachusetts avenue lexington massachusetts").execute().actionGet();
|
||||||
|
client.prepareIndex("test", "type1", "7").setSource("field1", "bosten street san franciso california").execute().actionGet();
|
||||||
|
client.admin().indices().prepareRefresh("test").execute().actionGet();
|
||||||
|
client.prepareIndex("test", "type1", "8").setSource("field1", "hollywood boulevard los angeles california").execute().actionGet();
|
||||||
|
client.prepareIndex("test", "type1", "9").setSource("field1", "1st street boston massachussetts").execute().actionGet();
|
||||||
|
client.prepareIndex("test", "type1", "10").setSource("field1", "1st street boston massachusetts").execute().actionGet();
|
||||||
|
client.admin().indices().prepareRefresh("test").execute().actionGet();
|
||||||
|
client.prepareIndex("test", "type1", "11").setSource("field1", "2st street boston massachusetts").execute().actionGet();
|
||||||
|
client.prepareIndex("test", "type1", "12").setSource("field1", "3st street boston massachusetts").execute().actionGet();
|
||||||
|
client.admin().indices().prepareRefresh("test").execute().actionGet();
|
||||||
|
SearchResponse searchResponse = client
|
||||||
|
.prepareSearch()
|
||||||
|
.setQuery(QueryBuilders.matchQuery("field1", "lexington avenue massachusetts").operator(MatchQueryBuilder.Operator.OR))
|
||||||
|
.setFrom(0)
|
||||||
|
.setSize(5)
|
||||||
|
.setRescorer(
|
||||||
|
RescoreBuilder.queryRescorer(QueryBuilders.matchPhraseQuery("field1", "lexington avenue massachusetts").slop(3))
|
||||||
|
.setQueryWeight(0.6f).setRescoreQueryWeight(2.0f)).setRescoreWindow(20).execute().actionGet();
|
||||||
|
|
||||||
|
assertThat(searchResponse.hits().totalHits(), equalTo(9l));
|
||||||
|
assertThat(searchResponse.hits().hits().length, equalTo(5));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("2"));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("6"));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("3"));
|
||||||
|
|
||||||
|
|
||||||
|
searchResponse = client
|
||||||
|
.prepareSearch()
|
||||||
|
.setQuery(QueryBuilders.matchQuery("field1", "lexington avenue massachusetts").operator(MatchQueryBuilder.Operator.OR))
|
||||||
|
.setFrom(0)
|
||||||
|
.setSize(5)
|
||||||
|
.setSearchType(SearchType.DFS_QUERY_THEN_FETCH)
|
||||||
|
.setRescorer(
|
||||||
|
RescoreBuilder.queryRescorer(QueryBuilders.matchPhraseQuery("field1", "lexington avenue massachusetts").slop(3))
|
||||||
|
.setQueryWeight(0.6f).setRescoreQueryWeight(2.0f)).setRescoreWindow(20).execute().actionGet();
|
||||||
|
|
||||||
|
assertThat(searchResponse.hits().totalHits(), equalTo(9l));
|
||||||
|
assertThat(searchResponse.hits().hits().length, equalTo(5));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("2"));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("6"));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("3"));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final void assertEquivalent(SearchResponse plain, SearchResponse rescored) {
|
||||||
|
SearchHits leftHits = plain.getHits();
|
||||||
|
SearchHits rightHits = rescored.getHits();
|
||||||
|
assertThat(leftHits.getTotalHits(), equalTo(rightHits.getTotalHits()));
|
||||||
|
assertThat(leftHits.getHits().length, equalTo(rightHits.getHits().length));
|
||||||
|
SearchHit[] hits = leftHits.getHits();
|
||||||
|
for (int i = 0; i < hits.length; i++) {
|
||||||
|
assertThat(hits[i].getId(), equalTo(rightHits.getHits()[i].getId()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final void assertEquivalentOrSubstringMatch(String query, SearchResponse plain, SearchResponse rescored) {
|
||||||
|
SearchHits leftHits = plain.getHits();
|
||||||
|
SearchHits rightHits = rescored.getHits();
|
||||||
|
assertThat(leftHits.getTotalHits(), equalTo(rightHits.getTotalHits()));
|
||||||
|
assertThat(leftHits.getHits().length, equalTo(rightHits.getHits().length));
|
||||||
|
SearchHit[] hits = leftHits.getHits();
|
||||||
|
SearchHit[] otherHits = rightHits.getHits();
|
||||||
|
if (!hits[0].getId().equals(otherHits[0].getId())) {
|
||||||
|
assertThat(((String) otherHits[0].sourceAsMap().get("field1")).contains(query), equalTo(true));
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < hits.length; i++) {
|
||||||
|
assertThat(query, hits[i].getId(), equalTo(rightHits.getHits()[i].getId()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEquivalence() throws Exception {
|
||||||
|
try {
|
||||||
|
client.admin().indices().prepareDelete("test").execute().actionGet();
|
||||||
|
} catch (Exception e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
|
||||||
|
client.admin()
|
||||||
|
.indices()
|
||||||
|
.prepareCreate("test")
|
||||||
|
.addMapping(
|
||||||
|
"type1",
|
||||||
|
jsonBuilder().startObject().startObject("type1").startObject("properties").startObject("field1")
|
||||||
|
.field("analyzer", "whitespace").field("type", "string").endObject().endObject().endObject().endObject())
|
||||||
|
.setSettings(ImmutableSettings.settingsBuilder()).execute().actionGet();
|
||||||
|
int numDocs = 1000;
|
||||||
|
|
||||||
|
for (int i = 0; i < numDocs; i++) {
|
||||||
|
client.prepareIndex("test", "type1", String.valueOf(i)).setSource("field1", English.intToEnglish(i)).execute().actionGet();
|
||||||
|
}
|
||||||
|
|
||||||
|
client.admin().indices().prepareRefresh("test").execute().actionGet();
|
||||||
|
for (int i = 0; i < numDocs; i++) {
|
||||||
|
String intToEnglish = English.intToEnglish(i);
|
||||||
|
String query = intToEnglish.split(" ")[0];
|
||||||
|
SearchResponse rescored = client
|
||||||
|
.prepareSearch()
|
||||||
|
.setQuery(QueryBuilders.matchQuery("field1", query).operator(MatchQueryBuilder.Operator.OR))
|
||||||
|
.setFrom(0)
|
||||||
|
.setSize(10)
|
||||||
|
.setRescorer(
|
||||||
|
RescoreBuilder
|
||||||
|
.queryRescorer(
|
||||||
|
QueryBuilders
|
||||||
|
.constantScoreQuery(QueryBuilders.matchPhraseQuery("field1", intToEnglish).slop(3)))
|
||||||
|
.setQueryWeight(1.0f)
|
||||||
|
.setRescoreQueryWeight(0.0f)) // no weigth - so we basically use the same score as the actual query
|
||||||
|
.setRescoreWindow(50).execute().actionGet();
|
||||||
|
|
||||||
|
|
||||||
|
SearchResponse plain = client.prepareSearch()
|
||||||
|
.setQuery(QueryBuilders.matchQuery("field1", query).operator(MatchQueryBuilder.Operator.OR)).setFrom(0).setSize(10)
|
||||||
|
.execute().actionGet();
|
||||||
|
// check equivalence
|
||||||
|
assertEquivalent(plain, rescored);
|
||||||
|
|
||||||
|
rescored = client
|
||||||
|
.prepareSearch()
|
||||||
|
.setQuery(QueryBuilders.matchQuery("field1", query).operator(MatchQueryBuilder.Operator.OR))
|
||||||
|
.setFrom(0)
|
||||||
|
.setSize(10)
|
||||||
|
.setRescorer(
|
||||||
|
RescoreBuilder
|
||||||
|
.queryRescorer(
|
||||||
|
QueryBuilders
|
||||||
|
.constantScoreQuery(QueryBuilders.matchPhraseQuery("field1", "not in the index").slop(3)))
|
||||||
|
.setQueryWeight(1.0f)
|
||||||
|
.setRescoreQueryWeight(1.0f))
|
||||||
|
.setRescoreWindow(50).execute().actionGet();
|
||||||
|
// check equivalence
|
||||||
|
assertEquivalent(plain, rescored);
|
||||||
|
|
||||||
|
rescored = client
|
||||||
|
.prepareSearch()
|
||||||
|
.setQuery(QueryBuilders.matchQuery("field1", query).operator(MatchQueryBuilder.Operator.OR))
|
||||||
|
.setFrom(0)
|
||||||
|
.setSize(10)
|
||||||
|
.setRescorer(
|
||||||
|
RescoreBuilder
|
||||||
|
.queryRescorer(
|
||||||
|
QueryBuilders.matchPhraseQuery("field1", intToEnglish).slop(0))
|
||||||
|
.setQueryWeight(1.0f).setRescoreQueryWeight(1.0f)).setRescoreWindow(100).execute().actionGet();
|
||||||
|
// check equivalence or if the first match differs we check if the phrase is a substring of the top doc
|
||||||
|
assertEquivalentOrSubstringMatch(intToEnglish, plain, rescored);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testExplain() throws Exception {
|
||||||
|
try {
|
||||||
|
client.admin().indices().prepareDelete("test").execute().actionGet();
|
||||||
|
} catch (Exception e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
|
||||||
|
client.admin()
|
||||||
|
.indices()
|
||||||
|
.prepareCreate("test")
|
||||||
|
.addMapping(
|
||||||
|
"type1",
|
||||||
|
jsonBuilder().startObject().startObject("type1").startObject("properties").startObject("field1")
|
||||||
|
.field("analyzer", "whitespace").field("type", "string").endObject().endObject().endObject().endObject())
|
||||||
|
.setSettings(ImmutableSettings.settingsBuilder().put("index.number_of_shards", 2)).execute().actionGet();
|
||||||
|
|
||||||
|
client.prepareIndex("test", "type1", "1").setSource("field1", "the quick brown fox").execute().actionGet();
|
||||||
|
client.prepareIndex("test", "type1", "2").setSource("field1", "the quick lazy huge brown fox jumps over the tree").execute()
|
||||||
|
.actionGet();
|
||||||
|
client.prepareIndex("test", "type1", "3")
|
||||||
|
.setSource("field1", "quick huge brown", "field2", "the quick lazy huge brown fox jumps over the tree").execute()
|
||||||
|
.actionGet();
|
||||||
|
client.admin().indices().prepareRefresh("test").execute().actionGet();
|
||||||
|
|
||||||
|
SearchResponse searchResponse = client
|
||||||
|
.prepareSearch()
|
||||||
|
.setQuery(QueryBuilders.matchQuery("field1", "the quick brown").operator(MatchQueryBuilder.Operator.OR))
|
||||||
|
.setRescorer(
|
||||||
|
RescoreBuilder.queryRescorer(QueryBuilders.matchPhraseQuery("field1", "the quick brown").slop(2).boost(4.0f))
|
||||||
|
.setQueryWeight(0.5f).setRescoreQueryWeight(0.4f)).setRescoreWindow(5).setExplain(true).execute()
|
||||||
|
.actionGet();
|
||||||
|
assertThat(searchResponse.hits().totalHits(), equalTo(3l));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("1"));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("2"));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("3"));
|
||||||
|
|
||||||
|
for (int i = 0; i < 3; i++) {
|
||||||
|
assertThat(searchResponse.getHits().getAt(i).explanation(), notNullValue());
|
||||||
|
assertThat(searchResponse.getHits().getAt(i).explanation().isMatch(), equalTo(true));
|
||||||
|
assertThat(searchResponse.getHits().getAt(i).explanation().getDetails().length, equalTo(2));
|
||||||
|
assertThat(searchResponse.getHits().getAt(i).explanation().getDetails()[0].isMatch(), equalTo(true));
|
||||||
|
assertThat(searchResponse.getHits().getAt(i).explanation().getDetails()[0].getDetails()[1].getValue(), equalTo(0.5f));
|
||||||
|
assertThat(searchResponse.getHits().getAt(i).explanation().getDetails()[1].getDetails()[1].getValue(), equalTo(0.4f));
|
||||||
|
if (i == 2) {
|
||||||
|
assertThat(searchResponse.getHits().getAt(i).explanation().getDetails()[1].isMatch(), equalTo(false));
|
||||||
|
assertThat(searchResponse.getHits().getAt(i).explanation().getDetails()[1].getDetails()[0].getValue(), equalTo(0.0f));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue