# Rescore Feature

The rescore feature allows te rescore a document returned by a query based
on a secondary algorithm. Rescoring is commonly used if a scoring algorithm
is too costly to be executed across the entire document set but efficient enough
to be executed on the Top-K documents scored by a faster retrieval method. Rescoring
can help to improve precision by reordering a larger Top-K window than actually
returned to the user. Typically is it executed on a window between 100 and 500 documents
while the actual result window requested by the user remains the same.

# Query Rescorer

The `query` rescorer executes a secondary query only on the Top-K results of the actual
user query and rescores the documents based on a linear combination of the user query's score
and the score of the `rescore_query`. This allows to execute any exposed query as a
`rescore_query` and supports a `query_weight` as well as a `rescore_query_weight` to weight the
factors of the linear combination.

# Rescore API

The `rescore` request is defined along side the query part in the json request:

```json
curl -s -XPOST 'localhost:9200/_search' -d {
  "query" : {
    "match" : {
      "field1" : {
        "query" : "the quick brown",
        "type" : "boolean",
        "operator" : "OR"
      }
    }
  },
  "rescore" : {
    "window_size" : 50,
    "query" : {
      "rescore_query" : {
        "match" : {
          "field1" : {
            "query" : "the quick brown",
            "type" : "phrase",
            "slop" : 2
          }
        }
      },
      "query_weight" : 0.7,
      "rescore_query_weight" : 1.2
    }
  }
}
```

Each `rescore` request is executed on a per-shard basis within the same roundtrip. Currently the rescore API
has only one implementation (the `query` rescorer) which modifies the result set in-place. Future developments
could include dedicated rescore results if needed by the implemenation ie. a pair-wise reranker.
*Note:* Only regualr queries are rescored, if the search type is set to `scan` or `count` rescorers are not executed.

Closes #2640
This commit is contained in:
Simon Willnauer 2013-01-30 17:27:35 +01:00
parent c65aff7775
commit a7bbab7e87
16 changed files with 1381 additions and 12 deletions

View File

@ -44,6 +44,9 @@ import org.elasticsearch.indices.IndicesService;
import org.elasticsearch.script.ScriptService; import org.elasticsearch.script.ScriptService;
import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.internal.ShardSearchRequest; import org.elasticsearch.search.internal.ShardSearchRequest;
import org.elasticsearch.search.rescore.RescorePhase;
import org.elasticsearch.search.rescore.RescoreSearchContext;
import org.elasticsearch.search.rescore.Rescorer;
import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportService; import org.elasticsearch.transport.TransportService;
@ -105,8 +108,14 @@ public class TransportExplainAction extends TransportShardSingleOperationAction<
context.parsedQuery(parseQuery(request, indexService)); context.parsedQuery(parseQuery(request, indexService));
context.preProcess(); context.preProcess();
int topLevelDocId = result.docIdAndVersion().docId + result.docIdAndVersion().reader.docBase; int topLevelDocId = result.docIdAndVersion().docId + result.docIdAndVersion().reader.docBase;
Explanation explanation;
Explanation explanation = context.searcher().explain(context.query(), topLevelDocId); if (context.rescore() != null) {
RescoreSearchContext ctx = context.rescore();
Rescorer rescorer = ctx.rescorer();
explanation = rescorer.explain(topLevelDocId, context, ctx);
} else {
explanation = context.searcher().explain(context.query(), topLevelDocId);
}
if (request.fields() != null) { if (request.fields() != null) {
if (request.fields().length == 1 && "_source".equals(request.fields()[0])) { if (request.fields().length == 1 && "_source".equals(request.fields()[0])) {
request.fields(null); // Load the _source field request.fields(null); // Load the _source field

View File

@ -35,6 +35,7 @@ import org.elasticsearch.search.Scroll;
import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.facet.AbstractFacetBuilder; import org.elasticsearch.search.facet.AbstractFacetBuilder;
import org.elasticsearch.search.highlight.HighlightBuilder; import org.elasticsearch.search.highlight.HighlightBuilder;
import org.elasticsearch.search.rescore.RescoreBuilder;
import org.elasticsearch.search.sort.SortBuilder; import org.elasticsearch.search.sort.SortBuilder;
import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.search.sort.SortOrder;
import org.elasticsearch.search.suggest.SuggestBuilder; import org.elasticsearch.search.suggest.SuggestBuilder;
@ -663,6 +664,16 @@ public class SearchRequestBuilder extends ActionRequestBuilder<SearchRequest, Se
return this; return this;
} }
public SearchRequestBuilder setRescorer(RescoreBuilder.Rescorer rescorer) {
rescoreBuilder().setRescorer(rescorer);
return this;
}
public SearchRequestBuilder setRescoreWindow(int window) {
rescoreBuilder().setWindowSize(window);
return this;
}
/** /**
* Sets the source of the request as a json string. Note, settings anything other * Sets the source of the request as a json string. Note, settings anything other
* than the search type will cause this source to be overridden, consider using * than the search type will cause this source to be overridden, consider using
@ -840,4 +851,8 @@ public class SearchRequestBuilder extends ActionRequestBuilder<SearchRequest, Se
return sourceBuilder().suggest(); return sourceBuilder().suggest();
} }
private RescoreBuilder rescoreBuilder() {
return sourceBuilder().rescore();
}
} }

View File

@ -38,6 +38,7 @@ import org.elasticsearch.index.query.FilterBuilder;
import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.search.facet.AbstractFacetBuilder; import org.elasticsearch.search.facet.AbstractFacetBuilder;
import org.elasticsearch.search.highlight.HighlightBuilder; import org.elasticsearch.search.highlight.HighlightBuilder;
import org.elasticsearch.search.rescore.RescoreBuilder;
import org.elasticsearch.search.sort.SortBuilder; import org.elasticsearch.search.sort.SortBuilder;
import org.elasticsearch.search.sort.SortBuilders; import org.elasticsearch.search.sort.SortBuilders;
import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.search.sort.SortOrder;
@ -106,6 +107,8 @@ public class SearchSourceBuilder implements ToXContent {
private SuggestBuilder suggestBuilder; private SuggestBuilder suggestBuilder;
private RescoreBuilder rescoreBuilder;
private TObjectFloatHashMap<String> indexBoost = null; private TObjectFloatHashMap<String> indexBoost = null;
private String[] stats; private String[] stats;
@ -410,6 +413,13 @@ public class SearchSourceBuilder implements ToXContent {
return suggestBuilder; return suggestBuilder;
} }
public RescoreBuilder rescore() {
if (rescoreBuilder == null) {
rescoreBuilder = new RescoreBuilder();
}
return rescoreBuilder;
}
/** /**
* Sets no fields to be loaded, resulting in only id and type to be returned per field. * Sets no fields to be loaded, resulting in only id and type to be returned per field.
*/ */
@ -723,6 +733,10 @@ public class SearchSourceBuilder implements ToXContent {
suggestBuilder.toXContent(builder, params); suggestBuilder.toXContent(builder, params);
} }
if (rescoreBuilder != null) {
rescoreBuilder.toXContent(builder, params);
}
if (stats != null) { if (stats != null) {
builder.startArray("stats"); builder.startArray("stats");
for (String stat : stats) { for (String stat : stats) {

View File

@ -33,10 +33,7 @@ import org.elasticsearch.search.SearchParseElement;
import org.elasticsearch.search.SearchPhase; import org.elasticsearch.search.SearchPhase;
import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.internal.SearchContext;
import java.util.HashSet;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set;
/** /**
* *
@ -68,6 +65,10 @@ public class DfsPhase implements SearchPhase {
THashSet<Term> termsSet = cachedTermsSet.get().get(); THashSet<Term> termsSet = cachedTermsSet.get().get();
termsSet.clear(); termsSet.clear();
context.query().extractTerms(termsSet); context.query().extractTerms(termsSet);
if (context.rescore() != null) {
context.rescore().rescorer().extractTerms(context, context.rescore(), termsSet);
}
Term[] terms = termsSet.toArray(new Term[termsSet.size()]); Term[] terms = termsSet.toArray(new Term[termsSet.size()]);
TermStatistics[] termStatistics = new TermStatistics[terms.length]; TermStatistics[] termStatistics = new TermStatistics[terms.length];
IndexReaderContext indexReaderContext = context.searcher().getTopReaderContext(); IndexReaderContext indexReaderContext = context.searcher().getTopReaderContext();

View File

@ -20,12 +20,16 @@
package org.elasticsearch.search.fetch.explain; package org.elasticsearch.search.fetch.explain;
import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap;
import org.apache.lucene.search.Explanation;
import org.elasticsearch.ElasticSearchException; import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.search.SearchParseElement; import org.elasticsearch.search.SearchParseElement;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException; import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
import org.elasticsearch.search.fetch.FetchSubPhase; import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.internal.InternalSearchHit; import org.elasticsearch.search.internal.InternalSearchHit;
import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.rescore.RescoreSearchContext;
import org.elasticsearch.search.rescore.Rescorer;
import java.io.IOException; import java.io.IOException;
import java.util.Map; import java.util.Map;
@ -57,8 +61,18 @@ public class ExplainFetchSubPhase implements FetchSubPhase {
@Override @Override
public void hitExecute(SearchContext context, HitContext hitContext) throws ElasticSearchException { public void hitExecute(SearchContext context, HitContext hitContext) throws ElasticSearchException {
try { try {
final int topLevelDocId = hitContext.hit().docId();
Explanation explanation;
if (context.rescore() != null) {
RescoreSearchContext ctx = context.rescore();
Rescorer rescorer = ctx.rescorer();
explanation = rescorer.explain(topLevelDocId, context, ctx);
} else {
explanation = context.searcher().explain(context.query(), topLevelDocId);
}
// we use the top level doc id, since we work with the top level searcher // we use the top level doc id, since we work with the top level searcher
hitContext.hit().explanation(context.searcher().explain(context.query(), hitContext.hit().docId())); hitContext.hit().explanation(explanation);
} catch (IOException e) { } catch (IOException e) {
throw new FetchPhaseExecutionException(context, "Failed to explain doc [" + hitContext.hit().type() + "#" + hitContext.hit().id() + "]", e); throw new FetchPhaseExecutionException(context, "Failed to explain doc [" + hitContext.hit().type() + "#" + hitContext.hit().id() + "]", e);
} }

View File

@ -58,6 +58,7 @@ import org.elasticsearch.search.fetch.script.ScriptFieldsContext;
import org.elasticsearch.search.highlight.SearchContextHighlight; import org.elasticsearch.search.highlight.SearchContextHighlight;
import org.elasticsearch.search.lookup.SearchLookup; import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.search.query.QuerySearchResult; import org.elasticsearch.search.query.QuerySearchResult;
import org.elasticsearch.search.rescore.RescoreSearchContext;
import org.elasticsearch.search.scan.ScanContext; import org.elasticsearch.search.scan.ScanContext;
import org.elasticsearch.search.suggest.SuggestionSearchContext; import org.elasticsearch.search.suggest.SuggestionSearchContext;
@ -167,6 +168,8 @@ public class SearchContext implements Releasable {
private SuggestionSearchContext suggest; private SuggestionSearchContext suggest;
private RescoreSearchContext rescore;
private SearchLookup searchLookup; private SearchLookup searchLookup;
private boolean queryRewritten; private boolean queryRewritten;
@ -177,6 +180,7 @@ public class SearchContext implements Releasable {
private List<Rewrite> rewrites = null; private List<Rewrite> rewrites = null;
public SearchContext(long id, ShardSearchRequest request, SearchShardTarget shardTarget, public SearchContext(long id, ShardSearchRequest request, SearchShardTarget shardTarget,
Engine.Searcher engineSearcher, IndexService indexService, IndexShard indexShard, ScriptService scriptService) { Engine.Searcher engineSearcher, IndexService indexService, IndexShard indexShard, ScriptService scriptService) {
this.id = id; this.id = id;
@ -314,6 +318,14 @@ public class SearchContext implements Releasable {
this.suggest = suggest; this.suggest = suggest;
} }
public RescoreSearchContext rescore() {
return this.rescore;
}
public void rescore(RescoreSearchContext rescore) {
this.rescore = rescore;
}
public boolean hasScriptFields() { public boolean hasScriptFields() {
return scriptFields != null; return scriptFields != null;
} }

View File

@ -31,6 +31,8 @@ import org.elasticsearch.search.SearchPhase;
import org.elasticsearch.search.facet.FacetPhase; import org.elasticsearch.search.facet.FacetPhase;
import org.elasticsearch.search.internal.ContextIndexSearcher; import org.elasticsearch.search.internal.ContextIndexSearcher;
import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.rescore.RescorePhase;
import org.elasticsearch.search.rescore.RescoreSearchContext;
import org.elasticsearch.search.sort.SortParseElement; import org.elasticsearch.search.sort.SortParseElement;
import org.elasticsearch.search.sort.TrackScoresParseElement; import org.elasticsearch.search.sort.TrackScoresParseElement;
import org.elasticsearch.search.suggest.SuggestPhase; import org.elasticsearch.search.suggest.SuggestPhase;
@ -45,11 +47,13 @@ public class QueryPhase implements SearchPhase {
private final FacetPhase facetPhase; private final FacetPhase facetPhase;
private final SuggestPhase suggestPhase; private final SuggestPhase suggestPhase;
private RescorePhase rescorePhase;
@Inject @Inject
public QueryPhase(FacetPhase facetPhase, SuggestPhase suggestPhase) { public QueryPhase(FacetPhase facetPhase, SuggestPhase suggestPhase, RescorePhase rescorePhase) {
this.facetPhase = facetPhase; this.facetPhase = facetPhase;
this.suggestPhase = suggestPhase; this.suggestPhase = suggestPhase;
this.rescorePhase = rescorePhase;
} }
@Override @Override
@ -71,7 +75,8 @@ public class QueryPhase implements SearchPhase {
.put("minScore", new MinScoreParseElement()) .put("minScore", new MinScoreParseElement())
.put("timeout", new TimeoutParseElement()) .put("timeout", new TimeoutParseElement())
.putAll(facetPhase.parseElements()) .putAll(facetPhase.parseElements())
.putAll(suggestPhase.parseElements()); .putAll(suggestPhase.parseElements())
.putAll(rescorePhase.parseElements());
return parseElements.build(); return parseElements.build();
} }
@ -99,6 +104,7 @@ public class QueryPhase implements SearchPhase {
} }
searchContext.searcher().inStage(ContextIndexSearcher.Stage.MAIN_QUERY); searchContext.searcher().inStage(ContextIndexSearcher.Stage.MAIN_QUERY);
boolean rescore = false;
try { try {
searchContext.queryResult().from(searchContext.from()); searchContext.queryResult().from(searchContext.from());
searchContext.queryResult().size(searchContext.size()); searchContext.queryResult().size(searchContext.size());
@ -106,7 +112,7 @@ public class QueryPhase implements SearchPhase {
Query query = searchContext.query(); Query query = searchContext.query();
TopDocs topDocs; TopDocs topDocs;
int numDocs = searchContext.from() + searchContext.size(); int numDocs = searchContext.from() + searchContext.size() ;
if (numDocs == 0) { if (numDocs == 0) {
// if 0 was asked, change it to 1 since 0 is not allowed // if 0 was asked, change it to 1 since 0 is not allowed
numDocs = 1; numDocs = 1;
@ -122,6 +128,10 @@ public class QueryPhase implements SearchPhase {
topDocs = searchContext.searcher().search(query, null, numDocs, searchContext.sort(), topDocs = searchContext.searcher().search(query, null, numDocs, searchContext.sort(),
searchContext.trackScores(), searchContext.trackScores()); searchContext.trackScores(), searchContext.trackScores());
} else { } else {
if (searchContext.rescore() != null) {
rescore = true;
numDocs = Math.max(searchContext.rescore().window(), numDocs);
}
topDocs = searchContext.searcher().search(query, numDocs); topDocs = searchContext.searcher().search(query, numDocs);
} }
searchContext.queryResult().topDocs(topDocs); searchContext.queryResult().topDocs(topDocs);
@ -130,7 +140,9 @@ public class QueryPhase implements SearchPhase {
} finally { } finally {
searchContext.searcher().finishStage(ContextIndexSearcher.Stage.MAIN_QUERY); searchContext.searcher().finishStage(ContextIndexSearcher.Stage.MAIN_QUERY);
} }
if (rescore) { // only if we do a regular search
rescorePhase.execute(searchContext);
}
suggestPhase.execute(searchContext); suggestPhase.execute(searchContext);
facetPhase.execute(searchContext); facetPhase.execute(searchContext);
} }

View File

@ -0,0 +1,324 @@
package org.elasticsearch.search.rescore;
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import java.util.Set;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.SorterTemplate;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentParser.Token;
import org.elasticsearch.index.query.ParsedQuery;
import org.elasticsearch.search.internal.ContextIndexSearcher;
import org.elasticsearch.search.internal.SearchContext;
final class QueryRescorer implements Rescorer {
public static final Rescorer INSTANCE = new QueryRescorer();
public static final String NAME = "query";
@Override
public String name() {
return NAME;
}
@Override
public void rescore(TopDocs topDocs, SearchContext context, RescoreSearchContext rescoreContext) throws IOException{
assert rescoreContext != null;
QueryRescoreContext rescore = ((QueryRescoreContext) rescoreContext);
TopDocs queryTopDocs = context.queryResult().topDocs();
if (queryTopDocs == null || queryTopDocs.totalHits == 0 || queryTopDocs.scoreDocs.length == 0) {
return;
}
ContextIndexSearcher searcher = context.searcher();
topDocs = searcher.search(rescore.query(), new TopDocsFilter(queryTopDocs), queryTopDocs.scoreDocs.length);
context.queryResult().topDocs(merge(queryTopDocs, topDocs, rescore));
}
@Override
public Explanation explain(int topLevelDocId, SearchContext context, RescoreSearchContext rescoreContext) throws IOException {
QueryRescoreContext rescore = ((QueryRescoreContext) context.rescore());
ContextIndexSearcher searcher = context.searcher();
Explanation primaryExplain = searcher.explain(context.query(), topLevelDocId);
if (primaryExplain == null) {
// this should not happen but just in case
return new ComplexExplanation(false, 0.0f, "nothing matched");
}
Explanation rescoreExplain = searcher.explain(rescore.query(), topLevelDocId);
float primaryWeight = rescore.queryWeight();
ComplexExplanation prim = new ComplexExplanation(primaryExplain.isMatch(),
primaryExplain.getValue() * primaryWeight,
"product of:");
prim.addDetail(primaryExplain);
prim.addDetail(new Explanation(primaryWeight, "primaryWeight"));
if (rescoreExplain != null) {
ComplexExplanation sumExpl = new ComplexExplanation();
sumExpl.setDescription("sum of:");
sumExpl.addDetail(prim);
sumExpl.setMatch(prim.isMatch());
float secondaryWeight = rescore.rescoreQueryWeight();
ComplexExplanation sec = new ComplexExplanation(rescoreExplain.isMatch(),
rescoreExplain.getValue() * secondaryWeight,
"product of:");
sec.addDetail(rescoreExplain);
sec.addDetail(new Explanation(secondaryWeight, "secondaryWeight"));
sumExpl.addDetail(sec);
sumExpl.setValue(prim.getValue() + sec.getValue());
return sumExpl;
} else {
return prim;
}
}
@Override
public RescoreSearchContext parse(XContentParser parser, SearchContext context) throws IOException {
Token token;
String fieldName = null;
QueryRescoreContext rescoreContext = new QueryRescoreContext(this);
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
fieldName = parser.currentName();
if ("rescore_query".equals(fieldName)) {
ParsedQuery parsedQuery = context.queryParserService().parse(parser);
rescoreContext.setParsedQuery(parsedQuery);
}
} else if (token.isValue()) {
if("query_weight".equals(fieldName)) {
rescoreContext.setQueryWeight(parser.floatValue());
} else if("rescore_query_weight".equals(fieldName)) {
rescoreContext.setRescoreQueryWeight(parser.floatValue());
} else {
throw new ElasticSearchIllegalArgumentException("rescore doesn't support [" + fieldName + "]");
}
}
}
return rescoreContext;
}
static class QueryRescoreContext extends RescoreSearchContext {
public QueryRescoreContext(QueryRescorer rescorer) {
super(NAME, 10, rescorer);
}
private ParsedQuery parsedQuery;
private float queryWeight = 1.0f;
private float rescoreQueryWeight = 1.0f;
public void setParsedQuery(ParsedQuery parsedQuery) {
this.parsedQuery = parsedQuery;
}
public Query query() {
return parsedQuery.query();
}
public float queryWeight() {
return queryWeight;
}
public float rescoreQueryWeight() {
return rescoreQueryWeight;
}
public void setRescoreQueryWeight(float rescoreQueryWeight) {
this.rescoreQueryWeight = rescoreQueryWeight;
}
public void setQueryWeight(float queryWeight) {
this.queryWeight = queryWeight;
}
}
private TopDocs merge(TopDocs primary, TopDocs secondary, QueryRescoreContext context) {
DocIdSorter sorter = new DocIdSorter();
sorter.array = primary.scoreDocs;
sorter.mergeSort(0, sorter.array.length-1);
ScoreDoc[] primaryDocs = sorter.array;
sorter.array = secondary.scoreDocs;
sorter.mergeSort(0, sorter.array.length-1);
ScoreDoc[] secondaryDocs = sorter.array;
int j = 0;
float primaryWeight = context.queryWeight();
float secondaryWeight = context.rescoreQueryWeight();
for (int i = 0; i < primaryDocs.length && j < secondaryDocs.length; i++) {
if (primaryDocs[i].doc == secondaryDocs[j].doc) {
primaryDocs[i].score = (primaryDocs[i].score * primaryWeight) + (secondaryDocs[j++].score * secondaryWeight);
} else {
primaryDocs[i].score *= primaryWeight;
}
}
ScoreSorter scoreSorter = new ScoreSorter();
scoreSorter.array = primaryDocs;
scoreSorter.mergeSort(0, primaryDocs.length-1);
primary.setMaxScore(primaryDocs[0].score);
return primary;
}
private static final class DocIdSorter extends SorterTemplate {
private ScoreDoc[] array;
private ScoreDoc pivot;
@Override
protected void swap(int i, int j) {
ScoreDoc scoreDoc = array[i];
array[i] = array[j];
array[j] = scoreDoc;
}
@Override
protected int compare(int i, int j) {
return compareDocId(array[i], array[j]);
}
@Override
protected void setPivot(int i) {
pivot = array[i];
}
@Override
protected int comparePivot(int j) {
return compareDocId(pivot, array[j]);
}
}
private static final int compareDocId(ScoreDoc left, ScoreDoc right) {
if (left.doc < right.doc) {
return 1;
} else if (left.doc == right.doc) {
return 0;
}
return -1;
}
private static final class ScoreSorter extends SorterTemplate {
private ScoreDoc[] array;
private ScoreDoc pivot;
@Override
protected void swap(int i, int j) {
ScoreDoc scoreDoc = array[i];
array[i] = array[j];
array[j] = scoreDoc;
}
@Override
protected int compare(int i, int j) {
int cmp = Float.compare(array[j].score, array[i].score);
return cmp == 0 ? compareDocId(array[i], array[j]) : cmp;
}
@Override
protected void setPivot(int i) {
pivot = array[i];
}
@Override
protected int comparePivot(int j) {
int cmp = Float.compare(array[j].score, pivot.score);
return cmp == 0 ? compareDocId(pivot, array[j]) : cmp;
}
}
private static final class TopDocsFilter extends Filter {
private final int[] docIds;
public TopDocsFilter(TopDocs topDocs) {
this.docIds = new int[topDocs.scoreDocs.length];
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (int i = 0; i < scoreDocs.length; i++) {
docIds[i] = scoreDocs[i].doc;
}
Arrays.sort(docIds);
}
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final int docBase = context.docBase;
int limit = docBase + context.reader().maxDoc();
int offset = Arrays.binarySearch(docIds, docBase);
if (offset < 0 ) {
offset = (-offset)-1;
}
int end = Arrays.binarySearch(docIds, limit);
if (end < 0) {
end = (-end)-1;
}
final int start = offset;
final int stop = end;
return new DocIdSet() {
@Override
public DocIdSetIterator iterator() throws IOException {
return new DocIdSetIterator() {
private int current = start;
private int docId = NO_MORE_DOCS;
@Override
public int nextDoc() throws IOException {
if (current < stop) {
return docId = docIds[current++]-docBase;
}
return docId = NO_MORE_DOCS;
}
@Override
public int docID() {
return docId;
}
@Override
public int advance(int target) throws IOException {
if (target == NO_MORE_DOCS) {
current = stop;
return docId = NO_MORE_DOCS;
}
while(nextDoc() < target) {}
return docId;
}
};
}
};
}
}
@Override
public void extractTerms(SearchContext context, RescoreSearchContext rescoreContext, Set<Term> termsSet) {
((QueryRescoreContext) context.rescore()).query().extractTerms(termsSet);
}
}

View File

@ -0,0 +1,125 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.rescore;
import java.io.IOException;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilderException;
public class RescoreBuilder implements ToXContent {
private Rescorer rescorer;
private Integer windowSize;
public static QueryRescorer queryRescorer(QueryBuilder queryBuilder) {
return new QueryRescorer(queryBuilder);
}
public RescoreBuilder setRescorer(Rescorer rescorer) {
this.rescorer = rescorer;
return this;
}
public RescoreBuilder setWindowSize(int windowSize) {
this.windowSize = windowSize;
return this;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
if (rescorer != null) {
builder.startObject("rescore");
if (windowSize != null) {
builder.field("window_size", windowSize);
}
rescorer.toXContent(builder, params);
builder.endObject();
}
return builder;
}
public static abstract class Rescorer implements ToXContent {
private String name;
public Rescorer(String name) {
this.name = name;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(name);
builder = innerToXContent(builder, params);
builder.endObject();
return builder;
}
protected abstract XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException;
}
public static class QueryRescorer extends Rescorer {
private static final String NAME = "query";
private QueryBuilder queryBuilder;
private Float rescoreQueryWeight;
private Float queryWeight;
/**
* Creates a new {@link QueryRescorer} instance
* @param builder the query builder to build the rescore query from
*/
public QueryRescorer(QueryBuilder builder) {
super(NAME);
this.queryBuilder = builder;
}
/**
* Sets the original query weight for rescoring. The default is <tt>1.0</tt>
*/
public QueryRescorer setQueryWeight(float queryWeight) {
this.queryWeight = queryWeight;
return this;
}
/**
* Sets the original query weight for rescoring. The default is <tt>1.0</tt>
*/
public QueryRescorer setRescoreQueryWeight(float rescoreQueryWeight) {
this.rescoreQueryWeight = rescoreQueryWeight;
return this;
}
@Override
protected XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException {
builder.field("rescore_query", queryBuilder);
if (queryWeight != null) {
builder.field("query_weight", queryWeight);
}
if (rescoreQueryWeight != null) {
builder.field("rescore_query_weight", rescoreQueryWeight);
}
return builder;
}
}
}

View File

@ -0,0 +1,69 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.rescore;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.ElasticSearchParseException;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.query.ParsedQuery;
import org.elasticsearch.search.SearchParseElement;
import org.elasticsearch.search.internal.SearchContext;
/**
*
*/
public class RescoreParseElement implements SearchParseElement {
@Override
public void parse(XContentParser parser, SearchContext context) throws Exception {
String fieldName = null;
RescoreSearchContext rescoreContext = null;
Integer windowSize = null;
XContentParser.Token token;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
fieldName = parser.currentName();
if (QueryRescorer.NAME.equals(fieldName)) {
// we only have one at this point
Rescorer rescorer = QueryRescorer.INSTANCE;
token = parser.nextToken();
if (token != XContentParser.Token.START_OBJECT) {
throw new ElasticSearchParseException("rescore type malformed, must start with start_object");
}
rescoreContext = rescorer.parse(parser, context);
}
} else if (token.isValue()) {
if ("window_size".equals(fieldName)) {
windowSize = parser.intValue();
} else {
throw new ElasticSearchIllegalArgumentException("rescore doesn't support [" + fieldName + "]");
}
}
}
if (rescoreContext == null) {
throw new ElasticSearchIllegalArgumentException("missing rescore type");
}
if (windowSize != null) {
rescoreContext.setWindowSize(windowSize.intValue());
}
context.rescore(rescoreContext);
}
}

View File

@ -0,0 +1,70 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.rescore;
import java.io.IOException;
import java.util.Map;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.search.SearchParseElement;
import org.elasticsearch.search.SearchPhase;
import org.elasticsearch.search.internal.SearchContext;
import com.google.common.collect.ImmutableMap;
/**
*/
public class RescorePhase extends AbstractComponent implements SearchPhase {
@Inject
public RescorePhase(Settings settings) {
super(settings);
}
@Override
public Map<String, ? extends SearchParseElement> parseElements() {
ImmutableMap.Builder<String, SearchParseElement> parseElements = ImmutableMap.builder();
parseElements.put("rescore", new RescoreParseElement());
return parseElements.build();
}
@Override
public void preProcess(SearchContext context) {
}
@Override
public void execute(SearchContext context) throws ElasticSearchException {
final RescoreSearchContext ctx = context.rescore();
final Rescorer rescorer = ctx.rescorer();
try {
rescorer.rescore(context.queryResult().topDocs(), context, ctx);
} catch (IOException e) {
throw new ElasticSearchException("Rescore Phase Failed", e);
}
}
}

View File

@ -0,0 +1,57 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.rescore;
/**
*/
public class RescoreSearchContext {
private int windowSize;
private final String type;
private final Rescorer rescorer;
public RescoreSearchContext(String type, int windowSize, Rescorer rescorer) {
super();
this.type = type;
this.windowSize = windowSize;
this.rescorer = rescorer;
}
public Rescorer rescorer() {
return rescorer;
}
public String getType() {
return type;
}
public void setWindowSize(int windowSize) {
this.windowSize = windowSize;
}
public int window() {
return windowSize;
}
}

View File

@ -0,0 +1,90 @@
package org.elasticsearch.search.rescore;
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.TopDocs;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.search.internal.SearchContext;
/**
* A query rescorer interface used to re-rank the Top-K results of a previously
* executed search.
*/
public interface Rescorer {
/**
* Returns the name of this rescorer
*/
public String name();
/**
* Modifies the result of the previously executed search ({@link TopDocs})
* in place based on the given {@link RescoreSearchContext}.
*
* @param topDocs the result of the previously exectued search
* @param context the current {@link SearchContext}. This will never be <code>null</code>.
* @param rescoreContext the {@link RescoreSearchContext}. This will never be <code>null</code>
* @throws IOException if an {@link IOException} occurs during rescoring
*/
public void rescore(TopDocs topDocs, SearchContext context, RescoreSearchContext rescoreContext) throws IOException;
/**
* Executes an {@link Explanation} phase on the rescorer.
* @param topLevelDocId the global / top-level document ID to explain
* @param context the current {@link SearchContext}
* @param rescoreContext TODO
* @return the explain for the given top level document ID.
* @throws IOException if an {@link IOException} occurs
*/
public Explanation explain(int topLevelDocId, SearchContext context, RescoreSearchContext rescoreContext) throws IOException;
/**
* Parses the {@link RescoreSearchContext} for this impelementation
* @param parser the parser to read the context from
* @param context the current search context
* @return the parsed {@link RescoreSearchContext}
* @throws IOException if an {@link IOException} occurs while parsing the context
*/
public RescoreSearchContext parse(XContentParser parser, SearchContext context) throws IOException;
/**
* Extracts all terms needed to exectue this {@link Rescorer}. This method
* is executed in a distributed frequency collection roundtrip for
* {@link SearchType#DFS_QUERY_AND_FETCH} and
* {@link SearchType#DFS_QUERY_THEN_FETCH}
*/
public void extractTerms(SearchContext context, RescoreSearchContext rescoreContext, Set<Term> termsSet);
/*
* TODO: At this point we only have one implemenation which modifies the
* TopDocs given. Future implemenations might return actual resutls that
* contain information about the rescore context. For example a pair wise
* reranker might return the feature vector for the top N window in order to
* merge results on the callers side. For now we don't have a return type at
* all since something like this requires a more general refactoring how
* documents are merged since in such a case we don't really have a score
* per document rather a "X is more relevant than Y" relation
*/
}

View File

@ -1,5 +1,22 @@
package org.elasticsearch.search.scan; package org.elasticsearch.search.scan;
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import com.google.common.collect.Maps; import com.google.common.collect.Maps;
import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;

View File

@ -0,0 +1,188 @@
package org.apache.lucene.util;
// LUCENE WATCH - if we use the test-framework we can trash this class
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/**
* Converts numbers to english strings for testing.
* @lucene.internal
*/
public final class English {
private English() {} // no instance
public static String longToEnglish(long i) {
StringBuilder result = new StringBuilder();
longToEnglish(i, result);
return result.toString();
}
public static void longToEnglish(long i, StringBuilder result) {
if (i == 0) {
result.append("zero");
return;
}
if (i < 0) {
result.append("minus ");
i = -i;
}
if (i >= 1000000000000000000l) { // quadrillion
longToEnglish(i / 1000000000000000000l, result);
result.append("quintillion, ");
i = i % 1000000000000000000l;
}
if (i >= 1000000000000000l) { // quadrillion
longToEnglish(i / 1000000000000000l, result);
result.append("quadrillion, ");
i = i % 1000000000000000l;
}
if (i >= 1000000000000l) { // trillions
longToEnglish(i / 1000000000000l, result);
result.append("trillion, ");
i = i % 1000000000000l;
}
if (i >= 1000000000) { // billions
longToEnglish(i / 1000000000, result);
result.append("billion, ");
i = i % 1000000000;
}
if (i >= 1000000) { // millions
longToEnglish(i / 1000000, result);
result.append("million, ");
i = i % 1000000;
}
if (i >= 1000) { // thousands
longToEnglish(i / 1000, result);
result.append("thousand, ");
i = i % 1000;
}
if (i >= 100) { // hundreds
longToEnglish(i / 100, result);
result.append("hundred ");
i = i % 100;
}
//we know we are smaller here so we can cast
if (i >= 20) {
switch (((int) i) / 10) {
case 9:
result.append("ninety");
break;
case 8:
result.append("eighty");
break;
case 7:
result.append("seventy");
break;
case 6:
result.append("sixty");
break;
case 5:
result.append("fifty");
break;
case 4:
result.append("forty");
break;
case 3:
result.append("thirty");
break;
case 2:
result.append("twenty");
break;
}
i = i % 10;
if (i == 0)
result.append(" ");
else
result.append("-");
}
switch ((int) i) {
case 19:
result.append("nineteen ");
break;
case 18:
result.append("eighteen ");
break;
case 17:
result.append("seventeen ");
break;
case 16:
result.append("sixteen ");
break;
case 15:
result.append("fifteen ");
break;
case 14:
result.append("fourteen ");
break;
case 13:
result.append("thirteen ");
break;
case 12:
result.append("twelve ");
break;
case 11:
result.append("eleven ");
break;
case 10:
result.append("ten ");
break;
case 9:
result.append("nine ");
break;
case 8:
result.append("eight ");
break;
case 7:
result.append("seven ");
break;
case 6:
result.append("six ");
break;
case 5:
result.append("five ");
break;
case 4:
result.append("four ");
break;
case 3:
result.append("three ");
break;
case 2:
result.append("two ");
break;
case 1:
result.append("one ");
break;
case 0:
result.append("");
break;
}
}
public static String intToEnglish(int i) {
StringBuilder result = new StringBuilder();
longToEnglish(i, result);
return result.toString();
}
public static void intToEnglish(int i, StringBuilder result) {
longToEnglish(i, result);
}
}

View File

@ -0,0 +1,352 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.integration.search.rescore;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.notNullValue;
import org.apache.lucene.util.English;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.ImmutableSettings.Builder;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.rescore.RescoreBuilder;
import org.elasticsearch.test.integration.AbstractNodesTests;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
/**
*
*/
public class QueryRescorerTests extends AbstractNodesTests {
private Client client;
@BeforeClass
public void createNodes() throws Exception {
startNode("node1");
client = getClient();
}
@AfterClass
public void closeNodes() {
client.close();
closeAllNodes();
}
protected Client getClient() {
return client("node1");
}
@Test
public void testRescorePhrase() throws Exception {
try {
client.admin().indices().prepareDelete("test").execute().actionGet();
} catch (Exception e) {
// ignore
}
client.admin()
.indices()
.prepareCreate("test")
.addMapping(
"type1",
jsonBuilder().startObject().startObject("type1").startObject("properties").startObject("field1")
.field("analyzer", "whitespace").field("type", "string").endObject().endObject().endObject().endObject())
.setSettings(ImmutableSettings.settingsBuilder().put("index.number_of_shards", 2)).execute().actionGet();
client.prepareIndex("test", "type1", "1").setSource("field1", "the quick brown fox").execute().actionGet();
client.prepareIndex("test", "type1", "2").setSource("field1", "the quick lazy huge brown fox jumps over the tree").execute()
.actionGet();
client.prepareIndex("test", "type1", "3")
.setSource("field1", "quick huge brown", "field2", "the quick lazy huge brown fox jumps over the tree").execute()
.actionGet();
client.admin().indices().prepareRefresh("test").execute().actionGet();
SearchResponse searchResponse = client.prepareSearch()
.setQuery(QueryBuilders.matchQuery("field1", "the quick brown").operator(MatchQueryBuilder.Operator.OR))
.setRescorer(RescoreBuilder.queryRescorer(QueryBuilders.matchPhraseQuery("field1", "quick brown").slop(2).boost(4.0f)))
.setRescoreWindow(5).execute().actionGet();
assertThat(searchResponse.hits().totalHits(), equalTo(3l));
assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("1"));
assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("3"));
assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("2"));
searchResponse = client.prepareSearch()
.setQuery(QueryBuilders.matchQuery("field1", "the quick brown").operator(MatchQueryBuilder.Operator.OR))
.setRescorer(RescoreBuilder.queryRescorer(QueryBuilders.matchPhraseQuery("field1", "the quick brown").slop(3)))
.setRescoreWindow(5).execute().actionGet();
assertThat(searchResponse.hits().totalHits(), equalTo(3l));
assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("1"));
assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("2"));
assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("3"));
searchResponse = client.prepareSearch()
.setQuery(QueryBuilders.matchQuery("field1", "the quick brown").operator(MatchQueryBuilder.Operator.OR))
.setRescorer(RescoreBuilder.queryRescorer((QueryBuilders.matchPhraseQuery("field1", "the quick brown"))))
.setRescoreWindow(5).execute().actionGet();
assertThat(searchResponse.hits().totalHits(), equalTo(3l));
assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("1"));
assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("2"));
assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("3"));
}
@Test
public void testMoreDocs() throws Exception {
try {
client.admin().indices().prepareDelete("test").execute().actionGet();
} catch (Exception e) {
// ignore
}
Builder builder = ImmutableSettings.builder();
builder.put("index.analysis.analyzer.synonym.tokenizer", "whitespace");
builder.putArray("index.analysis.analyzer.synonym.filter", "synonym", "lowercase");
builder.put("index.analysis.filter.synonym.type", "synonym");
builder.putArray("index.analysis.filter.synonym.synonyms", "ave => ave, avenue", "street => str, street");
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type2").startObject("properties")
.startObject("field1").field("type", "string").field("index_analyzer", "whitespace").field("search_analyzer", "synonym")
.endObject().endObject().endObject().endObject();
client.admin().indices().prepareCreate("test").addMapping("type1", mapping).setSettings(builder.put("index.number_of_shards", 1))
.execute().actionGet();
client.prepareIndex("test", "type1", "1").setSource("field1", "massachusetts avenue boston massachusetts").execute().actionGet();
client.prepareIndex("test", "type1", "2").setSource("field1", "lexington avenue boston massachusetts").execute().actionGet();
client.prepareIndex("test", "type1", "3").setSource("field1", "boston avenue lexington massachusetts").execute().actionGet();
client.admin().indices().prepareRefresh("test").execute().actionGet();
client.prepareIndex("test", "type1", "4").setSource("field1", "boston road lexington massachusetts").execute().actionGet();
client.prepareIndex("test", "type1", "5").setSource("field1", "lexington street lexington massachusetts").execute().actionGet();
client.prepareIndex("test", "type1", "6").setSource("field1", "massachusetts avenue lexington massachusetts").execute().actionGet();
client.prepareIndex("test", "type1", "7").setSource("field1", "bosten street san franciso california").execute().actionGet();
client.admin().indices().prepareRefresh("test").execute().actionGet();
client.prepareIndex("test", "type1", "8").setSource("field1", "hollywood boulevard los angeles california").execute().actionGet();
client.prepareIndex("test", "type1", "9").setSource("field1", "1st street boston massachussetts").execute().actionGet();
client.prepareIndex("test", "type1", "10").setSource("field1", "1st street boston massachusetts").execute().actionGet();
client.admin().indices().prepareRefresh("test").execute().actionGet();
client.prepareIndex("test", "type1", "11").setSource("field1", "2st street boston massachusetts").execute().actionGet();
client.prepareIndex("test", "type1", "12").setSource("field1", "3st street boston massachusetts").execute().actionGet();
client.admin().indices().prepareRefresh("test").execute().actionGet();
SearchResponse searchResponse = client
.prepareSearch()
.setQuery(QueryBuilders.matchQuery("field1", "lexington avenue massachusetts").operator(MatchQueryBuilder.Operator.OR))
.setFrom(0)
.setSize(5)
.setRescorer(
RescoreBuilder.queryRescorer(QueryBuilders.matchPhraseQuery("field1", "lexington avenue massachusetts").slop(3))
.setQueryWeight(0.6f).setRescoreQueryWeight(2.0f)).setRescoreWindow(20).execute().actionGet();
assertThat(searchResponse.hits().totalHits(), equalTo(9l));
assertThat(searchResponse.hits().hits().length, equalTo(5));
assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("2"));
assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("6"));
assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("3"));
searchResponse = client
.prepareSearch()
.setQuery(QueryBuilders.matchQuery("field1", "lexington avenue massachusetts").operator(MatchQueryBuilder.Operator.OR))
.setFrom(0)
.setSize(5)
.setSearchType(SearchType.DFS_QUERY_THEN_FETCH)
.setRescorer(
RescoreBuilder.queryRescorer(QueryBuilders.matchPhraseQuery("field1", "lexington avenue massachusetts").slop(3))
.setQueryWeight(0.6f).setRescoreQueryWeight(2.0f)).setRescoreWindow(20).execute().actionGet();
assertThat(searchResponse.hits().totalHits(), equalTo(9l));
assertThat(searchResponse.hits().hits().length, equalTo(5));
assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("2"));
assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("6"));
assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("3"));
}
private static final void assertEquivalent(SearchResponse plain, SearchResponse rescored) {
SearchHits leftHits = plain.getHits();
SearchHits rightHits = rescored.getHits();
assertThat(leftHits.getTotalHits(), equalTo(rightHits.getTotalHits()));
assertThat(leftHits.getHits().length, equalTo(rightHits.getHits().length));
SearchHit[] hits = leftHits.getHits();
for (int i = 0; i < hits.length; i++) {
assertThat(hits[i].getId(), equalTo(rightHits.getHits()[i].getId()));
}
}
private static final void assertEquivalentOrSubstringMatch(String query, SearchResponse plain, SearchResponse rescored) {
SearchHits leftHits = plain.getHits();
SearchHits rightHits = rescored.getHits();
assertThat(leftHits.getTotalHits(), equalTo(rightHits.getTotalHits()));
assertThat(leftHits.getHits().length, equalTo(rightHits.getHits().length));
SearchHit[] hits = leftHits.getHits();
SearchHit[] otherHits = rightHits.getHits();
if (!hits[0].getId().equals(otherHits[0].getId())) {
assertThat(((String) otherHits[0].sourceAsMap().get("field1")).contains(query), equalTo(true));
} else {
for (int i = 0; i < hits.length; i++) {
assertThat(query, hits[i].getId(), equalTo(rightHits.getHits()[i].getId()));
}
}
}
@Test
public void testEquivalence() throws Exception {
try {
client.admin().indices().prepareDelete("test").execute().actionGet();
} catch (Exception e) {
// ignore
}
client.admin()
.indices()
.prepareCreate("test")
.addMapping(
"type1",
jsonBuilder().startObject().startObject("type1").startObject("properties").startObject("field1")
.field("analyzer", "whitespace").field("type", "string").endObject().endObject().endObject().endObject())
.setSettings(ImmutableSettings.settingsBuilder()).execute().actionGet();
int numDocs = 1000;
for (int i = 0; i < numDocs; i++) {
client.prepareIndex("test", "type1", String.valueOf(i)).setSource("field1", English.intToEnglish(i)).execute().actionGet();
}
client.admin().indices().prepareRefresh("test").execute().actionGet();
for (int i = 0; i < numDocs; i++) {
String intToEnglish = English.intToEnglish(i);
String query = intToEnglish.split(" ")[0];
SearchResponse rescored = client
.prepareSearch()
.setQuery(QueryBuilders.matchQuery("field1", query).operator(MatchQueryBuilder.Operator.OR))
.setFrom(0)
.setSize(10)
.setRescorer(
RescoreBuilder
.queryRescorer(
QueryBuilders
.constantScoreQuery(QueryBuilders.matchPhraseQuery("field1", intToEnglish).slop(3)))
.setQueryWeight(1.0f)
.setRescoreQueryWeight(0.0f)) // no weigth - so we basically use the same score as the actual query
.setRescoreWindow(50).execute().actionGet();
SearchResponse plain = client.prepareSearch()
.setQuery(QueryBuilders.matchQuery("field1", query).operator(MatchQueryBuilder.Operator.OR)).setFrom(0).setSize(10)
.execute().actionGet();
// check equivalence
assertEquivalent(plain, rescored);
rescored = client
.prepareSearch()
.setQuery(QueryBuilders.matchQuery("field1", query).operator(MatchQueryBuilder.Operator.OR))
.setFrom(0)
.setSize(10)
.setRescorer(
RescoreBuilder
.queryRescorer(
QueryBuilders
.constantScoreQuery(QueryBuilders.matchPhraseQuery("field1", "not in the index").slop(3)))
.setQueryWeight(1.0f)
.setRescoreQueryWeight(1.0f))
.setRescoreWindow(50).execute().actionGet();
// check equivalence
assertEquivalent(plain, rescored);
rescored = client
.prepareSearch()
.setQuery(QueryBuilders.matchQuery("field1", query).operator(MatchQueryBuilder.Operator.OR))
.setFrom(0)
.setSize(10)
.setRescorer(
RescoreBuilder
.queryRescorer(
QueryBuilders.matchPhraseQuery("field1", intToEnglish).slop(0))
.setQueryWeight(1.0f).setRescoreQueryWeight(1.0f)).setRescoreWindow(100).execute().actionGet();
// check equivalence or if the first match differs we check if the phrase is a substring of the top doc
assertEquivalentOrSubstringMatch(intToEnglish, plain, rescored);
}
}
@Test
public void testExplain() throws Exception {
try {
client.admin().indices().prepareDelete("test").execute().actionGet();
} catch (Exception e) {
// ignore
}
client.admin()
.indices()
.prepareCreate("test")
.addMapping(
"type1",
jsonBuilder().startObject().startObject("type1").startObject("properties").startObject("field1")
.field("analyzer", "whitespace").field("type", "string").endObject().endObject().endObject().endObject())
.setSettings(ImmutableSettings.settingsBuilder().put("index.number_of_shards", 2)).execute().actionGet();
client.prepareIndex("test", "type1", "1").setSource("field1", "the quick brown fox").execute().actionGet();
client.prepareIndex("test", "type1", "2").setSource("field1", "the quick lazy huge brown fox jumps over the tree").execute()
.actionGet();
client.prepareIndex("test", "type1", "3")
.setSource("field1", "quick huge brown", "field2", "the quick lazy huge brown fox jumps over the tree").execute()
.actionGet();
client.admin().indices().prepareRefresh("test").execute().actionGet();
SearchResponse searchResponse = client
.prepareSearch()
.setQuery(QueryBuilders.matchQuery("field1", "the quick brown").operator(MatchQueryBuilder.Operator.OR))
.setRescorer(
RescoreBuilder.queryRescorer(QueryBuilders.matchPhraseQuery("field1", "the quick brown").slop(2).boost(4.0f))
.setQueryWeight(0.5f).setRescoreQueryWeight(0.4f)).setRescoreWindow(5).setExplain(true).execute()
.actionGet();
assertThat(searchResponse.hits().totalHits(), equalTo(3l));
assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("1"));
assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("2"));
assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("3"));
for (int i = 0; i < 3; i++) {
assertThat(searchResponse.getHits().getAt(i).explanation(), notNullValue());
assertThat(searchResponse.getHits().getAt(i).explanation().isMatch(), equalTo(true));
assertThat(searchResponse.getHits().getAt(i).explanation().getDetails().length, equalTo(2));
assertThat(searchResponse.getHits().getAt(i).explanation().getDetails()[0].isMatch(), equalTo(true));
assertThat(searchResponse.getHits().getAt(i).explanation().getDetails()[0].getDetails()[1].getValue(), equalTo(0.5f));
assertThat(searchResponse.getHits().getAt(i).explanation().getDetails()[1].getDetails()[1].getValue(), equalTo(0.4f));
if (i == 2) {
assertThat(searchResponse.getHits().getAt(i).explanation().getDetails()[1].isMatch(), equalTo(false));
assertThat(searchResponse.getHits().getAt(i).explanation().getDetails()[1].getDetails()[0].getValue(), equalTo(0.0f));
}
}
}
}