From a7bbab7e878b8eefef66e106203de5177265cf5c Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Wed, 30 Jan 2013 17:27:35 +0100 Subject: [PATCH] # Rescore Feature The rescore feature allows te rescore a document returned by a query based on a secondary algorithm. Rescoring is commonly used if a scoring algorithm is too costly to be executed across the entire document set but efficient enough to be executed on the Top-K documents scored by a faster retrieval method. Rescoring can help to improve precision by reordering a larger Top-K window than actually returned to the user. Typically is it executed on a window between 100 and 500 documents while the actual result window requested by the user remains the same. # Query Rescorer The `query` rescorer executes a secondary query only on the Top-K results of the actual user query and rescores the documents based on a linear combination of the user query's score and the score of the `rescore_query`. This allows to execute any exposed query as a `rescore_query` and supports a `query_weight` as well as a `rescore_query_weight` to weight the factors of the linear combination. # Rescore API The `rescore` request is defined along side the query part in the json request: ```json curl -s -XPOST 'localhost:9200/_search' -d { "query" : { "match" : { "field1" : { "query" : "the quick brown", "type" : "boolean", "operator" : "OR" } } }, "rescore" : { "window_size" : 50, "query" : { "rescore_query" : { "match" : { "field1" : { "query" : "the quick brown", "type" : "phrase", "slop" : 2 } } }, "query_weight" : 0.7, "rescore_query_weight" : 1.2 } } } ``` Each `rescore` request is executed on a per-shard basis within the same roundtrip. Currently the rescore API has only one implementation (the `query` rescorer) which modifies the result set in-place. Future developments could include dedicated rescore results if needed by the implemenation ie. a pair-wise reranker. *Note:* Only regualr queries are rescored, if the search type is set to `scan` or `count` rescorers are not executed. Closes #2640 --- .../explain/TransportExplainAction.java | 13 +- .../action/search/SearchRequestBuilder.java | 17 +- .../search/builder/SearchSourceBuilder.java | 14 + .../elasticsearch/search/dfs/DfsPhase.java | 7 +- .../fetch/explain/ExplainFetchSubPhase.java | 16 +- .../search/internal/SearchContext.java | 12 + .../search/query/QueryPhase.java | 20 +- .../search/rescore/QueryRescorer.java | 324 ++++++++++++++++ .../search/rescore/RescoreBuilder.java | 125 +++++++ .../search/rescore/RescoreParseElement.java | 69 ++++ .../search/rescore/RescorePhase.java | 70 ++++ .../search/rescore/RescoreSearchContext.java | 57 +++ .../search/rescore/Rescorer.java | 90 +++++ .../search/scan/ScanContext.java | 19 +- .../java/org/apache/lucene/util/English.java | 188 ++++++++++ .../search/rescore/QueryRescorerTests.java | 352 ++++++++++++++++++ 16 files changed, 1381 insertions(+), 12 deletions(-) create mode 100644 src/main/java/org/elasticsearch/search/rescore/QueryRescorer.java create mode 100644 src/main/java/org/elasticsearch/search/rescore/RescoreBuilder.java create mode 100644 src/main/java/org/elasticsearch/search/rescore/RescoreParseElement.java create mode 100644 src/main/java/org/elasticsearch/search/rescore/RescorePhase.java create mode 100644 src/main/java/org/elasticsearch/search/rescore/RescoreSearchContext.java create mode 100644 src/main/java/org/elasticsearch/search/rescore/Rescorer.java create mode 100644 src/test/java/org/apache/lucene/util/English.java create mode 100644 src/test/java/org/elasticsearch/test/integration/search/rescore/QueryRescorerTests.java diff --git a/src/main/java/org/elasticsearch/action/explain/TransportExplainAction.java b/src/main/java/org/elasticsearch/action/explain/TransportExplainAction.java index 6df3d126609..372207d7917 100644 --- a/src/main/java/org/elasticsearch/action/explain/TransportExplainAction.java +++ b/src/main/java/org/elasticsearch/action/explain/TransportExplainAction.java @@ -44,6 +44,9 @@ import org.elasticsearch.indices.IndicesService; import org.elasticsearch.script.ScriptService; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.internal.ShardSearchRequest; +import org.elasticsearch.search.rescore.RescorePhase; +import org.elasticsearch.search.rescore.RescoreSearchContext; +import org.elasticsearch.search.rescore.Rescorer; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; @@ -105,8 +108,14 @@ public class TransportExplainAction extends TransportShardSingleOperationAction< context.parsedQuery(parseQuery(request, indexService)); context.preProcess(); int topLevelDocId = result.docIdAndVersion().docId + result.docIdAndVersion().reader.docBase; - - Explanation explanation = context.searcher().explain(context.query(), topLevelDocId); + Explanation explanation; + if (context.rescore() != null) { + RescoreSearchContext ctx = context.rescore(); + Rescorer rescorer = ctx.rescorer(); + explanation = rescorer.explain(topLevelDocId, context, ctx); + } else { + explanation = context.searcher().explain(context.query(), topLevelDocId); + } if (request.fields() != null) { if (request.fields().length == 1 && "_source".equals(request.fields()[0])) { request.fields(null); // Load the _source field diff --git a/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java b/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java index 0003ae465e1..ca9b8813aeb 100644 --- a/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java +++ b/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java @@ -35,6 +35,7 @@ import org.elasticsearch.search.Scroll; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.facet.AbstractFacetBuilder; import org.elasticsearch.search.highlight.HighlightBuilder; +import org.elasticsearch.search.rescore.RescoreBuilder; import org.elasticsearch.search.sort.SortBuilder; import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.search.suggest.SuggestBuilder; @@ -662,7 +663,17 @@ public class SearchRequestBuilder extends ActionRequestBuilder indexBoost = null; @@ -409,6 +412,13 @@ public class SearchSourceBuilder implements ToXContent { } return suggestBuilder; } + + public RescoreBuilder rescore() { + if (rescoreBuilder == null) { + rescoreBuilder = new RescoreBuilder(); + } + return rescoreBuilder; + } /** * Sets no fields to be loaded, resulting in only id and type to be returned per field. @@ -722,6 +732,10 @@ public class SearchSourceBuilder implements ToXContent { if (suggestBuilder != null) { suggestBuilder.toXContent(builder, params); } + + if (rescoreBuilder != null) { + rescoreBuilder.toXContent(builder, params); + } if (stats != null) { builder.startArray("stats"); diff --git a/src/main/java/org/elasticsearch/search/dfs/DfsPhase.java b/src/main/java/org/elasticsearch/search/dfs/DfsPhase.java index 7fd8d20f594..c8861d97824 100644 --- a/src/main/java/org/elasticsearch/search/dfs/DfsPhase.java +++ b/src/main/java/org/elasticsearch/search/dfs/DfsPhase.java @@ -33,10 +33,7 @@ import org.elasticsearch.search.SearchParseElement; import org.elasticsearch.search.SearchPhase; import org.elasticsearch.search.internal.SearchContext; -import java.util.HashSet; -import java.util.List; import java.util.Map; -import java.util.Set; /** * @@ -68,6 +65,10 @@ public class DfsPhase implements SearchPhase { THashSet termsSet = cachedTermsSet.get().get(); termsSet.clear(); context.query().extractTerms(termsSet); + if (context.rescore() != null) { + context.rescore().rescorer().extractTerms(context, context.rescore(), termsSet); + } + Term[] terms = termsSet.toArray(new Term[termsSet.size()]); TermStatistics[] termStatistics = new TermStatistics[terms.length]; IndexReaderContext indexReaderContext = context.searcher().getTopReaderContext(); diff --git a/src/main/java/org/elasticsearch/search/fetch/explain/ExplainFetchSubPhase.java b/src/main/java/org/elasticsearch/search/fetch/explain/ExplainFetchSubPhase.java index 244f05e601e..35cc3f1450c 100644 --- a/src/main/java/org/elasticsearch/search/fetch/explain/ExplainFetchSubPhase.java +++ b/src/main/java/org/elasticsearch/search/fetch/explain/ExplainFetchSubPhase.java @@ -20,12 +20,16 @@ package org.elasticsearch.search.fetch.explain; import com.google.common.collect.ImmutableMap; + +import org.apache.lucene.search.Explanation; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.search.SearchParseElement; import org.elasticsearch.search.fetch.FetchPhaseExecutionException; import org.elasticsearch.search.fetch.FetchSubPhase; import org.elasticsearch.search.internal.InternalSearchHit; import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.search.rescore.RescoreSearchContext; +import org.elasticsearch.search.rescore.Rescorer; import java.io.IOException; import java.util.Map; @@ -57,8 +61,18 @@ public class ExplainFetchSubPhase implements FetchSubPhase { @Override public void hitExecute(SearchContext context, HitContext hitContext) throws ElasticSearchException { try { + final int topLevelDocId = hitContext.hit().docId(); + Explanation explanation; + + if (context.rescore() != null) { + RescoreSearchContext ctx = context.rescore(); + Rescorer rescorer = ctx.rescorer(); + explanation = rescorer.explain(topLevelDocId, context, ctx); + } else { + explanation = context.searcher().explain(context.query(), topLevelDocId); + } // we use the top level doc id, since we work with the top level searcher - hitContext.hit().explanation(context.searcher().explain(context.query(), hitContext.hit().docId())); + hitContext.hit().explanation(explanation); } catch (IOException e) { throw new FetchPhaseExecutionException(context, "Failed to explain doc [" + hitContext.hit().type() + "#" + hitContext.hit().id() + "]", e); } diff --git a/src/main/java/org/elasticsearch/search/internal/SearchContext.java b/src/main/java/org/elasticsearch/search/internal/SearchContext.java index b89f1734c5c..bd3a7d9d695 100644 --- a/src/main/java/org/elasticsearch/search/internal/SearchContext.java +++ b/src/main/java/org/elasticsearch/search/internal/SearchContext.java @@ -58,6 +58,7 @@ import org.elasticsearch.search.fetch.script.ScriptFieldsContext; import org.elasticsearch.search.highlight.SearchContextHighlight; import org.elasticsearch.search.lookup.SearchLookup; import org.elasticsearch.search.query.QuerySearchResult; +import org.elasticsearch.search.rescore.RescoreSearchContext; import org.elasticsearch.search.scan.ScanContext; import org.elasticsearch.search.suggest.SuggestionSearchContext; @@ -166,6 +167,8 @@ public class SearchContext implements Releasable { private SearchContextHighlight highlight; private SuggestionSearchContext suggest; + + private RescoreSearchContext rescore; private SearchLookup searchLookup; @@ -177,6 +180,7 @@ public class SearchContext implements Releasable { private List rewrites = null; + public SearchContext(long id, ShardSearchRequest request, SearchShardTarget shardTarget, Engine.Searcher engineSearcher, IndexService indexService, IndexShard indexShard, ScriptService scriptService) { this.id = id; @@ -313,6 +317,14 @@ public class SearchContext implements Releasable { public void suggest(SuggestionSearchContext suggest) { this.suggest = suggest; } + + public RescoreSearchContext rescore() { + return this.rescore; + } + + public void rescore(RescoreSearchContext rescore) { + this.rescore = rescore; + } public boolean hasScriptFields() { return scriptFields != null; diff --git a/src/main/java/org/elasticsearch/search/query/QueryPhase.java b/src/main/java/org/elasticsearch/search/query/QueryPhase.java index 6c4ae2c4ab0..3d39695ccc1 100644 --- a/src/main/java/org/elasticsearch/search/query/QueryPhase.java +++ b/src/main/java/org/elasticsearch/search/query/QueryPhase.java @@ -31,6 +31,8 @@ import org.elasticsearch.search.SearchPhase; import org.elasticsearch.search.facet.FacetPhase; import org.elasticsearch.search.internal.ContextIndexSearcher; import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.search.rescore.RescorePhase; +import org.elasticsearch.search.rescore.RescoreSearchContext; import org.elasticsearch.search.sort.SortParseElement; import org.elasticsearch.search.sort.TrackScoresParseElement; import org.elasticsearch.search.suggest.SuggestPhase; @@ -45,11 +47,13 @@ public class QueryPhase implements SearchPhase { private final FacetPhase facetPhase; private final SuggestPhase suggestPhase; + private RescorePhase rescorePhase; @Inject - public QueryPhase(FacetPhase facetPhase, SuggestPhase suggestPhase) { + public QueryPhase(FacetPhase facetPhase, SuggestPhase suggestPhase, RescorePhase rescorePhase) { this.facetPhase = facetPhase; this.suggestPhase = suggestPhase; + this.rescorePhase = rescorePhase; } @Override @@ -71,7 +75,8 @@ public class QueryPhase implements SearchPhase { .put("minScore", new MinScoreParseElement()) .put("timeout", new TimeoutParseElement()) .putAll(facetPhase.parseElements()) - .putAll(suggestPhase.parseElements()); + .putAll(suggestPhase.parseElements()) + .putAll(rescorePhase.parseElements()); return parseElements.build(); } @@ -99,6 +104,7 @@ public class QueryPhase implements SearchPhase { } searchContext.searcher().inStage(ContextIndexSearcher.Stage.MAIN_QUERY); + boolean rescore = false; try { searchContext.queryResult().from(searchContext.from()); searchContext.queryResult().size(searchContext.size()); @@ -106,7 +112,7 @@ public class QueryPhase implements SearchPhase { Query query = searchContext.query(); TopDocs topDocs; - int numDocs = searchContext.from() + searchContext.size(); + int numDocs = searchContext.from() + searchContext.size() ; if (numDocs == 0) { // if 0 was asked, change it to 1 since 0 is not allowed numDocs = 1; @@ -122,6 +128,10 @@ public class QueryPhase implements SearchPhase { topDocs = searchContext.searcher().search(query, null, numDocs, searchContext.sort(), searchContext.trackScores(), searchContext.trackScores()); } else { + if (searchContext.rescore() != null) { + rescore = true; + numDocs = Math.max(searchContext.rescore().window(), numDocs); + } topDocs = searchContext.searcher().search(query, numDocs); } searchContext.queryResult().topDocs(topDocs); @@ -130,7 +140,9 @@ public class QueryPhase implements SearchPhase { } finally { searchContext.searcher().finishStage(ContextIndexSearcher.Stage.MAIN_QUERY); } - + if (rescore) { // only if we do a regular search + rescorePhase.execute(searchContext); + } suggestPhase.execute(searchContext); facetPhase.execute(searchContext); } diff --git a/src/main/java/org/elasticsearch/search/rescore/QueryRescorer.java b/src/main/java/org/elasticsearch/search/rescore/QueryRescorer.java new file mode 100644 index 00000000000..742f7f6a802 --- /dev/null +++ b/src/main/java/org/elasticsearch/search/rescore/QueryRescorer.java @@ -0,0 +1,324 @@ +package org.elasticsearch.search.rescore; +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import java.io.IOException; +import java.util.Arrays; +import java.util.Set; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.ComplexExplanation; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.SorterTemplate; +import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentParser.Token; +import org.elasticsearch.index.query.ParsedQuery; +import org.elasticsearch.search.internal.ContextIndexSearcher; +import org.elasticsearch.search.internal.SearchContext; + +final class QueryRescorer implements Rescorer { + + public static final Rescorer INSTANCE = new QueryRescorer(); + public static final String NAME = "query"; + @Override + public String name() { + return NAME; + } + + @Override + public void rescore(TopDocs topDocs, SearchContext context, RescoreSearchContext rescoreContext) throws IOException{ + assert rescoreContext != null; + QueryRescoreContext rescore = ((QueryRescoreContext) rescoreContext); + TopDocs queryTopDocs = context.queryResult().topDocs(); + if (queryTopDocs == null || queryTopDocs.totalHits == 0 || queryTopDocs.scoreDocs.length == 0) { + return; + } + + ContextIndexSearcher searcher = context.searcher(); + topDocs = searcher.search(rescore.query(), new TopDocsFilter(queryTopDocs), queryTopDocs.scoreDocs.length); + context.queryResult().topDocs(merge(queryTopDocs, topDocs, rescore)); + } + + @Override + public Explanation explain(int topLevelDocId, SearchContext context, RescoreSearchContext rescoreContext) throws IOException { + QueryRescoreContext rescore = ((QueryRescoreContext) context.rescore()); + ContextIndexSearcher searcher = context.searcher(); + Explanation primaryExplain = searcher.explain(context.query(), topLevelDocId); + if (primaryExplain == null) { + // this should not happen but just in case + return new ComplexExplanation(false, 0.0f, "nothing matched"); + } + Explanation rescoreExplain = searcher.explain(rescore.query(), topLevelDocId); + float primaryWeight = rescore.queryWeight(); + ComplexExplanation prim = new ComplexExplanation(primaryExplain.isMatch(), + primaryExplain.getValue() * primaryWeight, + "product of:"); + prim.addDetail(primaryExplain); + prim.addDetail(new Explanation(primaryWeight, "primaryWeight")); + if (rescoreExplain != null) { + ComplexExplanation sumExpl = new ComplexExplanation(); + sumExpl.setDescription("sum of:"); + sumExpl.addDetail(prim); + sumExpl.setMatch(prim.isMatch()); + float secondaryWeight = rescore.rescoreQueryWeight(); + ComplexExplanation sec = new ComplexExplanation(rescoreExplain.isMatch(), + rescoreExplain.getValue() * secondaryWeight, + "product of:"); + sec.addDetail(rescoreExplain); + sec.addDetail(new Explanation(secondaryWeight, "secondaryWeight")); + sumExpl.addDetail(sec); + sumExpl.setValue(prim.getValue() + sec.getValue()); + return sumExpl; + } else { + return prim; + } + } + + @Override + public RescoreSearchContext parse(XContentParser parser, SearchContext context) throws IOException { + Token token; + String fieldName = null; + QueryRescoreContext rescoreContext = new QueryRescoreContext(this); + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + fieldName = parser.currentName(); + if ("rescore_query".equals(fieldName)) { + ParsedQuery parsedQuery = context.queryParserService().parse(parser); + rescoreContext.setParsedQuery(parsedQuery); + } + } else if (token.isValue()) { + if("query_weight".equals(fieldName)) { + rescoreContext.setQueryWeight(parser.floatValue()); + } else if("rescore_query_weight".equals(fieldName)) { + rescoreContext.setRescoreQueryWeight(parser.floatValue()); + } else { + throw new ElasticSearchIllegalArgumentException("rescore doesn't support [" + fieldName + "]"); + } + } + } + return rescoreContext; + } + + static class QueryRescoreContext extends RescoreSearchContext { + + public QueryRescoreContext(QueryRescorer rescorer) { + super(NAME, 10, rescorer); + } + + private ParsedQuery parsedQuery; + private float queryWeight = 1.0f; + private float rescoreQueryWeight = 1.0f; + + public void setParsedQuery(ParsedQuery parsedQuery) { + this.parsedQuery = parsedQuery; + } + + public Query query() { + return parsedQuery.query(); + } + + public float queryWeight() { + return queryWeight; + } + + public float rescoreQueryWeight() { + return rescoreQueryWeight; + } + + public void setRescoreQueryWeight(float rescoreQueryWeight) { + this.rescoreQueryWeight = rescoreQueryWeight; + } + + public void setQueryWeight(float queryWeight) { + this.queryWeight = queryWeight; + } + + } + + + private TopDocs merge(TopDocs primary, TopDocs secondary, QueryRescoreContext context) { + DocIdSorter sorter = new DocIdSorter(); + sorter.array = primary.scoreDocs; + sorter.mergeSort(0, sorter.array.length-1); + ScoreDoc[] primaryDocs = sorter.array; + sorter.array = secondary.scoreDocs; + sorter.mergeSort(0, sorter.array.length-1); + ScoreDoc[] secondaryDocs = sorter.array; + int j = 0; + float primaryWeight = context.queryWeight(); + float secondaryWeight = context.rescoreQueryWeight(); + for (int i = 0; i < primaryDocs.length && j < secondaryDocs.length; i++) { + if (primaryDocs[i].doc == secondaryDocs[j].doc) { + primaryDocs[i].score = (primaryDocs[i].score * primaryWeight) + (secondaryDocs[j++].score * secondaryWeight); + } else { + primaryDocs[i].score *= primaryWeight; + } + } + ScoreSorter scoreSorter = new ScoreSorter(); + scoreSorter.array = primaryDocs; + scoreSorter.mergeSort(0, primaryDocs.length-1); + primary.setMaxScore(primaryDocs[0].score); + return primary; + } + + private static final class DocIdSorter extends SorterTemplate { + private ScoreDoc[] array; + private ScoreDoc pivot; + @Override + protected void swap(int i, int j) { + ScoreDoc scoreDoc = array[i]; + array[i] = array[j]; + array[j] = scoreDoc; + } + + @Override + protected int compare(int i, int j) { + return compareDocId(array[i], array[j]); + } + + @Override + protected void setPivot(int i) { + pivot = array[i]; + + } + + @Override + protected int comparePivot(int j) { + return compareDocId(pivot, array[j]); + } + + } + + private static final int compareDocId(ScoreDoc left, ScoreDoc right) { + if (left.doc < right.doc) { + return 1; + } else if (left.doc == right.doc) { + return 0; + } + return -1; + } + + private static final class ScoreSorter extends SorterTemplate { + private ScoreDoc[] array; + private ScoreDoc pivot; + @Override + protected void swap(int i, int j) { + ScoreDoc scoreDoc = array[i]; + array[i] = array[j]; + array[j] = scoreDoc; + } + + @Override + protected int compare(int i, int j) { + int cmp = Float.compare(array[j].score, array[i].score); + return cmp == 0 ? compareDocId(array[i], array[j]) : cmp; + } + + @Override + protected void setPivot(int i) { + pivot = array[i]; + + } + + @Override + protected int comparePivot(int j) { + int cmp = Float.compare(array[j].score, pivot.score); + return cmp == 0 ? compareDocId(pivot, array[j]) : cmp; + } + + } + + private static final class TopDocsFilter extends Filter { + + private final int[] docIds; + public TopDocsFilter(TopDocs topDocs) { + this.docIds = new int[topDocs.scoreDocs.length]; + ScoreDoc[] scoreDocs = topDocs.scoreDocs; + for (int i = 0; i < scoreDocs.length; i++) { + docIds[i] = scoreDocs[i].doc; + } + Arrays.sort(docIds); + + } + @Override + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + final int docBase = context.docBase; + int limit = docBase + context.reader().maxDoc(); + int offset = Arrays.binarySearch(docIds, docBase); + if (offset < 0 ) { + offset = (-offset)-1; + } + int end = Arrays.binarySearch(docIds, limit); + if (end < 0) { + end = (-end)-1; + } + final int start = offset; + final int stop = end; + + return new DocIdSet() { + + @Override + public DocIdSetIterator iterator() throws IOException { + return new DocIdSetIterator() { + private int current = start; + private int docId = NO_MORE_DOCS; + + @Override + public int nextDoc() throws IOException { + if (current < stop) { + return docId = docIds[current++]-docBase; + } + return docId = NO_MORE_DOCS; + } + + @Override + public int docID() { + return docId; + } + + @Override + public int advance(int target) throws IOException { + if (target == NO_MORE_DOCS) { + current = stop; + return docId = NO_MORE_DOCS; + } + while(nextDoc() < target) {} + return docId; + } + }; + } + }; + } + + } + + @Override + public void extractTerms(SearchContext context, RescoreSearchContext rescoreContext, Set termsSet) { + ((QueryRescoreContext) context.rescore()).query().extractTerms(termsSet); + } + +} diff --git a/src/main/java/org/elasticsearch/search/rescore/RescoreBuilder.java b/src/main/java/org/elasticsearch/search/rescore/RescoreBuilder.java new file mode 100644 index 00000000000..2dd81ee2d3a --- /dev/null +++ b/src/main/java/org/elasticsearch/search/rescore/RescoreBuilder.java @@ -0,0 +1,125 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.rescore; + +import java.io.IOException; + +import org.elasticsearch.ElasticSearchException; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilderException; + +public class RescoreBuilder implements ToXContent { + + private Rescorer rescorer; + private Integer windowSize; + + public static QueryRescorer queryRescorer(QueryBuilder queryBuilder) { + return new QueryRescorer(queryBuilder); + } + + public RescoreBuilder setRescorer(Rescorer rescorer) { + this.rescorer = rescorer; + return this; + } + + public RescoreBuilder setWindowSize(int windowSize) { + this.windowSize = windowSize; + return this; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + if (rescorer != null) { + builder.startObject("rescore"); + if (windowSize != null) { + builder.field("window_size", windowSize); + } + rescorer.toXContent(builder, params); + builder.endObject(); + } + return builder; + } + + public static abstract class Rescorer implements ToXContent { + + private String name; + + public Rescorer(String name) { + this.name = name; + } + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(name); + builder = innerToXContent(builder, params); + builder.endObject(); + return builder; + } + + protected abstract XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException; + + } + + public static class QueryRescorer extends Rescorer { + private static final String NAME = "query"; + private QueryBuilder queryBuilder; + private Float rescoreQueryWeight; + private Float queryWeight; + + /** + * Creates a new {@link QueryRescorer} instance + * @param builder the query builder to build the rescore query from + */ + public QueryRescorer(QueryBuilder builder) { + super(NAME); + this.queryBuilder = builder; + } + /** + * Sets the original query weight for rescoring. The default is 1.0 + */ + public QueryRescorer setQueryWeight(float queryWeight) { + this.queryWeight = queryWeight; + return this; + } + + /** + * Sets the original query weight for rescoring. The default is 1.0 + */ + public QueryRescorer setRescoreQueryWeight(float rescoreQueryWeight) { + this.rescoreQueryWeight = rescoreQueryWeight; + return this; + } + + @Override + protected XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException { + builder.field("rescore_query", queryBuilder); + if (queryWeight != null) { + builder.field("query_weight", queryWeight); + } + if (rescoreQueryWeight != null) { + builder.field("rescore_query_weight", rescoreQueryWeight); + } + return builder; + } + } + +} diff --git a/src/main/java/org/elasticsearch/search/rescore/RescoreParseElement.java b/src/main/java/org/elasticsearch/search/rescore/RescoreParseElement.java new file mode 100644 index 00000000000..eec6d6e7d72 --- /dev/null +++ b/src/main/java/org/elasticsearch/search/rescore/RescoreParseElement.java @@ -0,0 +1,69 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.rescore; + +import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.ElasticSearchParseException; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.query.ParsedQuery; +import org.elasticsearch.search.SearchParseElement; +import org.elasticsearch.search.internal.SearchContext; + +/** + * + */ +public class RescoreParseElement implements SearchParseElement { + + @Override + public void parse(XContentParser parser, SearchContext context) throws Exception { + String fieldName = null; + RescoreSearchContext rescoreContext = null; + Integer windowSize = null; + XContentParser.Token token; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + fieldName = parser.currentName(); + if (QueryRescorer.NAME.equals(fieldName)) { + // we only have one at this point + Rescorer rescorer = QueryRescorer.INSTANCE; + token = parser.nextToken(); + if (token != XContentParser.Token.START_OBJECT) { + throw new ElasticSearchParseException("rescore type malformed, must start with start_object"); + } + rescoreContext = rescorer.parse(parser, context); + } + } else if (token.isValue()) { + if ("window_size".equals(fieldName)) { + windowSize = parser.intValue(); + } else { + throw new ElasticSearchIllegalArgumentException("rescore doesn't support [" + fieldName + "]"); + } + } + } + if (rescoreContext == null) { + throw new ElasticSearchIllegalArgumentException("missing rescore type"); + } + if (windowSize != null) { + rescoreContext.setWindowSize(windowSize.intValue()); + } + context.rescore(rescoreContext); + } + +} diff --git a/src/main/java/org/elasticsearch/search/rescore/RescorePhase.java b/src/main/java/org/elasticsearch/search/rescore/RescorePhase.java new file mode 100644 index 00000000000..7f0998584f3 --- /dev/null +++ b/src/main/java/org/elasticsearch/search/rescore/RescorePhase.java @@ -0,0 +1,70 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.rescore; + +import java.io.IOException; +import java.util.Map; + +import org.elasticsearch.ElasticSearchException; +import org.elasticsearch.common.component.AbstractComponent; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.search.SearchParseElement; +import org.elasticsearch.search.SearchPhase; +import org.elasticsearch.search.internal.SearchContext; + +import com.google.common.collect.ImmutableMap; + +/** + */ +public class RescorePhase extends AbstractComponent implements SearchPhase { + + @Inject + public RescorePhase(Settings settings) { + super(settings); + } + + @Override + public Map parseElements() { + ImmutableMap.Builder parseElements = ImmutableMap.builder(); + parseElements.put("rescore", new RescoreParseElement()); + return parseElements.build(); + } + + @Override + public void preProcess(SearchContext context) { + } + + @Override + public void execute(SearchContext context) throws ElasticSearchException { + final RescoreSearchContext ctx = context.rescore(); + final Rescorer rescorer = ctx.rescorer(); + try { + rescorer.rescore(context.queryResult().topDocs(), context, ctx); + } catch (IOException e) { + throw new ElasticSearchException("Rescore Phase Failed", e); + } + } + + + + + +} diff --git a/src/main/java/org/elasticsearch/search/rescore/RescoreSearchContext.java b/src/main/java/org/elasticsearch/search/rescore/RescoreSearchContext.java new file mode 100644 index 00000000000..1d3636b9533 --- /dev/null +++ b/src/main/java/org/elasticsearch/search/rescore/RescoreSearchContext.java @@ -0,0 +1,57 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.rescore; + + + +/** + */ +public class RescoreSearchContext { + + private int windowSize; + + private final String type; + + private final Rescorer rescorer; + + public RescoreSearchContext(String type, int windowSize, Rescorer rescorer) { + super(); + this.type = type; + this.windowSize = windowSize; + this.rescorer = rescorer; + } + + public Rescorer rescorer() { + return rescorer; + } + + public String getType() { + return type; + } + + public void setWindowSize(int windowSize) { + this.windowSize = windowSize; + } + + public int window() { + return windowSize; + } + +} diff --git a/src/main/java/org/elasticsearch/search/rescore/Rescorer.java b/src/main/java/org/elasticsearch/search/rescore/Rescorer.java new file mode 100644 index 00000000000..1a0d4dd16d2 --- /dev/null +++ b/src/main/java/org/elasticsearch/search/rescore/Rescorer.java @@ -0,0 +1,90 @@ +package org.elasticsearch.search.rescore; +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import java.io.IOException; +import java.util.Set; + +import org.apache.lucene.index.Term; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.TopDocs; +import org.elasticsearch.action.search.SearchType; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.search.internal.SearchContext; + +/** + * A query rescorer interface used to re-rank the Top-K results of a previously + * executed search. + */ +public interface Rescorer { + + /** + * Returns the name of this rescorer + */ + public String name(); + + /** + * Modifies the result of the previously executed search ({@link TopDocs}) + * in place based on the given {@link RescoreSearchContext}. + * + * @param topDocs the result of the previously exectued search + * @param context the current {@link SearchContext}. This will never be null. + * @param rescoreContext the {@link RescoreSearchContext}. This will never be null + * @throws IOException if an {@link IOException} occurs during rescoring + */ + public void rescore(TopDocs topDocs, SearchContext context, RescoreSearchContext rescoreContext) throws IOException; + + /** + * Executes an {@link Explanation} phase on the rescorer. + * @param topLevelDocId the global / top-level document ID to explain + * @param context the current {@link SearchContext} + * @param rescoreContext TODO + * @return the explain for the given top level document ID. + * @throws IOException if an {@link IOException} occurs + */ + public Explanation explain(int topLevelDocId, SearchContext context, RescoreSearchContext rescoreContext) throws IOException; + + /** + * Parses the {@link RescoreSearchContext} for this impelementation + * @param parser the parser to read the context from + * @param context the current search context + * @return the parsed {@link RescoreSearchContext} + * @throws IOException if an {@link IOException} occurs while parsing the context + */ + public RescoreSearchContext parse(XContentParser parser, SearchContext context) throws IOException; + + /** + * Extracts all terms needed to exectue this {@link Rescorer}. This method + * is executed in a distributed frequency collection roundtrip for + * {@link SearchType#DFS_QUERY_AND_FETCH} and + * {@link SearchType#DFS_QUERY_THEN_FETCH} + */ + public void extractTerms(SearchContext context, RescoreSearchContext rescoreContext, Set termsSet); + + /* + * TODO: At this point we only have one implemenation which modifies the + * TopDocs given. Future implemenations might return actual resutls that + * contain information about the rescore context. For example a pair wise + * reranker might return the feature vector for the top N window in order to + * merge results on the callers side. For now we don't have a return type at + * all since something like this requires a more general refactoring how + * documents are merged since in such a case we don't really have a score + * per document rather a "X is more relevant than Y" relation + */ + +} diff --git a/src/main/java/org/elasticsearch/search/scan/ScanContext.java b/src/main/java/org/elasticsearch/search/scan/ScanContext.java index 2255005dd52..51e3b007872 100644 --- a/src/main/java/org/elasticsearch/search/scan/ScanContext.java +++ b/src/main/java/org/elasticsearch/search/scan/ScanContext.java @@ -1,5 +1,22 @@ package org.elasticsearch.search.scan; - +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ import com.google.common.collect.Maps; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; diff --git a/src/test/java/org/apache/lucene/util/English.java b/src/test/java/org/apache/lucene/util/English.java new file mode 100644 index 00000000000..416ca2714d7 --- /dev/null +++ b/src/test/java/org/apache/lucene/util/English.java @@ -0,0 +1,188 @@ +package org.apache.lucene.util; +// LUCENE WATCH - if we use the test-framework we can trash this class +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * Converts numbers to english strings for testing. + * @lucene.internal + */ +public final class English { + + private English() {} // no instance + + public static String longToEnglish(long i) { + StringBuilder result = new StringBuilder(); + longToEnglish(i, result); + return result.toString(); + } + + public static void longToEnglish(long i, StringBuilder result) { + if (i == 0) { + result.append("zero"); + return; + } + if (i < 0) { + result.append("minus "); + i = -i; + } + if (i >= 1000000000000000000l) { // quadrillion + longToEnglish(i / 1000000000000000000l, result); + result.append("quintillion, "); + i = i % 1000000000000000000l; + } + if (i >= 1000000000000000l) { // quadrillion + longToEnglish(i / 1000000000000000l, result); + result.append("quadrillion, "); + i = i % 1000000000000000l; + } + if (i >= 1000000000000l) { // trillions + longToEnglish(i / 1000000000000l, result); + result.append("trillion, "); + i = i % 1000000000000l; + } + if (i >= 1000000000) { // billions + longToEnglish(i / 1000000000, result); + result.append("billion, "); + i = i % 1000000000; + } + if (i >= 1000000) { // millions + longToEnglish(i / 1000000, result); + result.append("million, "); + i = i % 1000000; + } + if (i >= 1000) { // thousands + longToEnglish(i / 1000, result); + result.append("thousand, "); + i = i % 1000; + } + if (i >= 100) { // hundreds + longToEnglish(i / 100, result); + result.append("hundred "); + i = i % 100; + } + //we know we are smaller here so we can cast + if (i >= 20) { + switch (((int) i) / 10) { + case 9: + result.append("ninety"); + break; + case 8: + result.append("eighty"); + break; + case 7: + result.append("seventy"); + break; + case 6: + result.append("sixty"); + break; + case 5: + result.append("fifty"); + break; + case 4: + result.append("forty"); + break; + case 3: + result.append("thirty"); + break; + case 2: + result.append("twenty"); + break; + } + i = i % 10; + if (i == 0) + result.append(" "); + else + result.append("-"); + } + switch ((int) i) { + case 19: + result.append("nineteen "); + break; + case 18: + result.append("eighteen "); + break; + case 17: + result.append("seventeen "); + break; + case 16: + result.append("sixteen "); + break; + case 15: + result.append("fifteen "); + break; + case 14: + result.append("fourteen "); + break; + case 13: + result.append("thirteen "); + break; + case 12: + result.append("twelve "); + break; + case 11: + result.append("eleven "); + break; + case 10: + result.append("ten "); + break; + case 9: + result.append("nine "); + break; + case 8: + result.append("eight "); + break; + case 7: + result.append("seven "); + break; + case 6: + result.append("six "); + break; + case 5: + result.append("five "); + break; + case 4: + result.append("four "); + break; + case 3: + result.append("three "); + break; + case 2: + result.append("two "); + break; + case 1: + result.append("one "); + break; + case 0: + result.append(""); + break; + } + } + + + public static String intToEnglish(int i) { + StringBuilder result = new StringBuilder(); + longToEnglish(i, result); + return result.toString(); + } + + public static void intToEnglish(int i, StringBuilder result) { + longToEnglish(i, result); + } +} diff --git a/src/test/java/org/elasticsearch/test/integration/search/rescore/QueryRescorerTests.java b/src/test/java/org/elasticsearch/test/integration/search/rescore/QueryRescorerTests.java new file mode 100644 index 00000000000..226ba07cdfd --- /dev/null +++ b/src/test/java/org/elasticsearch/test/integration/search/rescore/QueryRescorerTests.java @@ -0,0 +1,352 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.test.integration.search.rescore; + +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.notNullValue; + +import org.apache.lucene.util.English; +import org.elasticsearch.ElasticSearchException; +import org.elasticsearch.action.search.SearchRequestBuilder; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.action.search.SearchType; +import org.elasticsearch.client.Client; +import org.elasticsearch.common.settings.ImmutableSettings; +import org.elasticsearch.common.settings.ImmutableSettings.Builder; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.index.query.MatchQueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.SearchHits; +import org.elasticsearch.search.rescore.RescoreBuilder; +import org.elasticsearch.test.integration.AbstractNodesTests; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +/** + * + */ +public class QueryRescorerTests extends AbstractNodesTests { + + private Client client; + + @BeforeClass + public void createNodes() throws Exception { + startNode("node1"); + client = getClient(); + } + + @AfterClass + public void closeNodes() { + client.close(); + closeAllNodes(); + } + + protected Client getClient() { + return client("node1"); + } + + @Test + public void testRescorePhrase() throws Exception { + try { + client.admin().indices().prepareDelete("test").execute().actionGet(); + } catch (Exception e) { + // ignore + } + + client.admin() + .indices() + .prepareCreate("test") + .addMapping( + "type1", + jsonBuilder().startObject().startObject("type1").startObject("properties").startObject("field1") + .field("analyzer", "whitespace").field("type", "string").endObject().endObject().endObject().endObject()) + .setSettings(ImmutableSettings.settingsBuilder().put("index.number_of_shards", 2)).execute().actionGet(); + + client.prepareIndex("test", "type1", "1").setSource("field1", "the quick brown fox").execute().actionGet(); + client.prepareIndex("test", "type1", "2").setSource("field1", "the quick lazy huge brown fox jumps over the tree").execute() + .actionGet(); + client.prepareIndex("test", "type1", "3") + .setSource("field1", "quick huge brown", "field2", "the quick lazy huge brown fox jumps over the tree").execute() + .actionGet(); + client.admin().indices().prepareRefresh("test").execute().actionGet(); + + SearchResponse searchResponse = client.prepareSearch() + .setQuery(QueryBuilders.matchQuery("field1", "the quick brown").operator(MatchQueryBuilder.Operator.OR)) + .setRescorer(RescoreBuilder.queryRescorer(QueryBuilders.matchPhraseQuery("field1", "quick brown").slop(2).boost(4.0f))) + .setRescoreWindow(5).execute().actionGet(); + + assertThat(searchResponse.hits().totalHits(), equalTo(3l)); + assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("1")); + assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("3")); + assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("2")); + + searchResponse = client.prepareSearch() + .setQuery(QueryBuilders.matchQuery("field1", "the quick brown").operator(MatchQueryBuilder.Operator.OR)) + .setRescorer(RescoreBuilder.queryRescorer(QueryBuilders.matchPhraseQuery("field1", "the quick brown").slop(3))) + .setRescoreWindow(5).execute().actionGet(); + + assertThat(searchResponse.hits().totalHits(), equalTo(3l)); + assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("1")); + assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("2")); + assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("3")); + + searchResponse = client.prepareSearch() + .setQuery(QueryBuilders.matchQuery("field1", "the quick brown").operator(MatchQueryBuilder.Operator.OR)) + .setRescorer(RescoreBuilder.queryRescorer((QueryBuilders.matchPhraseQuery("field1", "the quick brown")))) + .setRescoreWindow(5).execute().actionGet(); + + assertThat(searchResponse.hits().totalHits(), equalTo(3l)); + assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("1")); + assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("2")); + assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("3")); + } + + @Test + public void testMoreDocs() throws Exception { + try { + client.admin().indices().prepareDelete("test").execute().actionGet(); + } catch (Exception e) { + // ignore + } + + Builder builder = ImmutableSettings.builder(); + builder.put("index.analysis.analyzer.synonym.tokenizer", "whitespace"); + builder.putArray("index.analysis.analyzer.synonym.filter", "synonym", "lowercase"); + builder.put("index.analysis.filter.synonym.type", "synonym"); + builder.putArray("index.analysis.filter.synonym.synonyms", "ave => ave, avenue", "street => str, street"); + + XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type2").startObject("properties") + .startObject("field1").field("type", "string").field("index_analyzer", "whitespace").field("search_analyzer", "synonym") + .endObject().endObject().endObject().endObject(); + + client.admin().indices().prepareCreate("test").addMapping("type1", mapping).setSettings(builder.put("index.number_of_shards", 1)) + .execute().actionGet(); + + client.prepareIndex("test", "type1", "1").setSource("field1", "massachusetts avenue boston massachusetts").execute().actionGet(); + client.prepareIndex("test", "type1", "2").setSource("field1", "lexington avenue boston massachusetts").execute().actionGet(); + client.prepareIndex("test", "type1", "3").setSource("field1", "boston avenue lexington massachusetts").execute().actionGet(); + client.admin().indices().prepareRefresh("test").execute().actionGet(); + client.prepareIndex("test", "type1", "4").setSource("field1", "boston road lexington massachusetts").execute().actionGet(); + client.prepareIndex("test", "type1", "5").setSource("field1", "lexington street lexington massachusetts").execute().actionGet(); + client.prepareIndex("test", "type1", "6").setSource("field1", "massachusetts avenue lexington massachusetts").execute().actionGet(); + client.prepareIndex("test", "type1", "7").setSource("field1", "bosten street san franciso california").execute().actionGet(); + client.admin().indices().prepareRefresh("test").execute().actionGet(); + client.prepareIndex("test", "type1", "8").setSource("field1", "hollywood boulevard los angeles california").execute().actionGet(); + client.prepareIndex("test", "type1", "9").setSource("field1", "1st street boston massachussetts").execute().actionGet(); + client.prepareIndex("test", "type1", "10").setSource("field1", "1st street boston massachusetts").execute().actionGet(); + client.admin().indices().prepareRefresh("test").execute().actionGet(); + client.prepareIndex("test", "type1", "11").setSource("field1", "2st street boston massachusetts").execute().actionGet(); + client.prepareIndex("test", "type1", "12").setSource("field1", "3st street boston massachusetts").execute().actionGet(); + client.admin().indices().prepareRefresh("test").execute().actionGet(); + SearchResponse searchResponse = client + .prepareSearch() + .setQuery(QueryBuilders.matchQuery("field1", "lexington avenue massachusetts").operator(MatchQueryBuilder.Operator.OR)) + .setFrom(0) + .setSize(5) + .setRescorer( + RescoreBuilder.queryRescorer(QueryBuilders.matchPhraseQuery("field1", "lexington avenue massachusetts").slop(3)) + .setQueryWeight(0.6f).setRescoreQueryWeight(2.0f)).setRescoreWindow(20).execute().actionGet(); + + assertThat(searchResponse.hits().totalHits(), equalTo(9l)); + assertThat(searchResponse.hits().hits().length, equalTo(5)); + assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("2")); + assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("6")); + assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("3")); + + + searchResponse = client + .prepareSearch() + .setQuery(QueryBuilders.matchQuery("field1", "lexington avenue massachusetts").operator(MatchQueryBuilder.Operator.OR)) + .setFrom(0) + .setSize(5) + .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) + .setRescorer( + RescoreBuilder.queryRescorer(QueryBuilders.matchPhraseQuery("field1", "lexington avenue massachusetts").slop(3)) + .setQueryWeight(0.6f).setRescoreQueryWeight(2.0f)).setRescoreWindow(20).execute().actionGet(); + + assertThat(searchResponse.hits().totalHits(), equalTo(9l)); + assertThat(searchResponse.hits().hits().length, equalTo(5)); + assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("2")); + assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("6")); + assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("3")); + } + + private static final void assertEquivalent(SearchResponse plain, SearchResponse rescored) { + SearchHits leftHits = plain.getHits(); + SearchHits rightHits = rescored.getHits(); + assertThat(leftHits.getTotalHits(), equalTo(rightHits.getTotalHits())); + assertThat(leftHits.getHits().length, equalTo(rightHits.getHits().length)); + SearchHit[] hits = leftHits.getHits(); + for (int i = 0; i < hits.length; i++) { + assertThat(hits[i].getId(), equalTo(rightHits.getHits()[i].getId())); + } + } + + private static final void assertEquivalentOrSubstringMatch(String query, SearchResponse plain, SearchResponse rescored) { + SearchHits leftHits = plain.getHits(); + SearchHits rightHits = rescored.getHits(); + assertThat(leftHits.getTotalHits(), equalTo(rightHits.getTotalHits())); + assertThat(leftHits.getHits().length, equalTo(rightHits.getHits().length)); + SearchHit[] hits = leftHits.getHits(); + SearchHit[] otherHits = rightHits.getHits(); + if (!hits[0].getId().equals(otherHits[0].getId())) { + assertThat(((String) otherHits[0].sourceAsMap().get("field1")).contains(query), equalTo(true)); + } else { + for (int i = 0; i < hits.length; i++) { + assertThat(query, hits[i].getId(), equalTo(rightHits.getHits()[i].getId())); + } + } + } + + @Test + public void testEquivalence() throws Exception { + try { + client.admin().indices().prepareDelete("test").execute().actionGet(); + } catch (Exception e) { + // ignore + } + + client.admin() + .indices() + .prepareCreate("test") + .addMapping( + "type1", + jsonBuilder().startObject().startObject("type1").startObject("properties").startObject("field1") + .field("analyzer", "whitespace").field("type", "string").endObject().endObject().endObject().endObject()) + .setSettings(ImmutableSettings.settingsBuilder()).execute().actionGet(); + int numDocs = 1000; + + for (int i = 0; i < numDocs; i++) { + client.prepareIndex("test", "type1", String.valueOf(i)).setSource("field1", English.intToEnglish(i)).execute().actionGet(); + } + + client.admin().indices().prepareRefresh("test").execute().actionGet(); + for (int i = 0; i < numDocs; i++) { + String intToEnglish = English.intToEnglish(i); + String query = intToEnglish.split(" ")[0]; + SearchResponse rescored = client + .prepareSearch() + .setQuery(QueryBuilders.matchQuery("field1", query).operator(MatchQueryBuilder.Operator.OR)) + .setFrom(0) + .setSize(10) + .setRescorer( + RescoreBuilder + .queryRescorer( + QueryBuilders + .constantScoreQuery(QueryBuilders.matchPhraseQuery("field1", intToEnglish).slop(3))) + .setQueryWeight(1.0f) + .setRescoreQueryWeight(0.0f)) // no weigth - so we basically use the same score as the actual query + .setRescoreWindow(50).execute().actionGet(); + + + SearchResponse plain = client.prepareSearch() + .setQuery(QueryBuilders.matchQuery("field1", query).operator(MatchQueryBuilder.Operator.OR)).setFrom(0).setSize(10) + .execute().actionGet(); + // check equivalence + assertEquivalent(plain, rescored); + + rescored = client + .prepareSearch() + .setQuery(QueryBuilders.matchQuery("field1", query).operator(MatchQueryBuilder.Operator.OR)) + .setFrom(0) + .setSize(10) + .setRescorer( + RescoreBuilder + .queryRescorer( + QueryBuilders + .constantScoreQuery(QueryBuilders.matchPhraseQuery("field1", "not in the index").slop(3))) + .setQueryWeight(1.0f) + .setRescoreQueryWeight(1.0f)) + .setRescoreWindow(50).execute().actionGet(); + // check equivalence + assertEquivalent(plain, rescored); + + rescored = client + .prepareSearch() + .setQuery(QueryBuilders.matchQuery("field1", query).operator(MatchQueryBuilder.Operator.OR)) + .setFrom(0) + .setSize(10) + .setRescorer( + RescoreBuilder + .queryRescorer( + QueryBuilders.matchPhraseQuery("field1", intToEnglish).slop(0)) + .setQueryWeight(1.0f).setRescoreQueryWeight(1.0f)).setRescoreWindow(100).execute().actionGet(); + // check equivalence or if the first match differs we check if the phrase is a substring of the top doc + assertEquivalentOrSubstringMatch(intToEnglish, plain, rescored); + } + } + + @Test + public void testExplain() throws Exception { + try { + client.admin().indices().prepareDelete("test").execute().actionGet(); + } catch (Exception e) { + // ignore + } + + client.admin() + .indices() + .prepareCreate("test") + .addMapping( + "type1", + jsonBuilder().startObject().startObject("type1").startObject("properties").startObject("field1") + .field("analyzer", "whitespace").field("type", "string").endObject().endObject().endObject().endObject()) + .setSettings(ImmutableSettings.settingsBuilder().put("index.number_of_shards", 2)).execute().actionGet(); + + client.prepareIndex("test", "type1", "1").setSource("field1", "the quick brown fox").execute().actionGet(); + client.prepareIndex("test", "type1", "2").setSource("field1", "the quick lazy huge brown fox jumps over the tree").execute() + .actionGet(); + client.prepareIndex("test", "type1", "3") + .setSource("field1", "quick huge brown", "field2", "the quick lazy huge brown fox jumps over the tree").execute() + .actionGet(); + client.admin().indices().prepareRefresh("test").execute().actionGet(); + + SearchResponse searchResponse = client + .prepareSearch() + .setQuery(QueryBuilders.matchQuery("field1", "the quick brown").operator(MatchQueryBuilder.Operator.OR)) + .setRescorer( + RescoreBuilder.queryRescorer(QueryBuilders.matchPhraseQuery("field1", "the quick brown").slop(2).boost(4.0f)) + .setQueryWeight(0.5f).setRescoreQueryWeight(0.4f)).setRescoreWindow(5).setExplain(true).execute() + .actionGet(); + assertThat(searchResponse.hits().totalHits(), equalTo(3l)); + assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("1")); + assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("2")); + assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("3")); + + for (int i = 0; i < 3; i++) { + assertThat(searchResponse.getHits().getAt(i).explanation(), notNullValue()); + assertThat(searchResponse.getHits().getAt(i).explanation().isMatch(), equalTo(true)); + assertThat(searchResponse.getHits().getAt(i).explanation().getDetails().length, equalTo(2)); + assertThat(searchResponse.getHits().getAt(i).explanation().getDetails()[0].isMatch(), equalTo(true)); + assertThat(searchResponse.getHits().getAt(i).explanation().getDetails()[0].getDetails()[1].getValue(), equalTo(0.5f)); + assertThat(searchResponse.getHits().getAt(i).explanation().getDetails()[1].getDetails()[1].getValue(), equalTo(0.4f)); + if (i == 2) { + assertThat(searchResponse.getHits().getAt(i).explanation().getDetails()[1].isMatch(), equalTo(false)); + assertThat(searchResponse.getHits().getAt(i).explanation().getDetails()[1].getDetails()[0].getValue(), equalTo(0.0f)); + } + } + } + +}