Rescore collapsed documents (#28521)

This change adds the ability to rescore collapsed documents.
This commit is contained in:
Sergey Galkin 2018-03-05 00:39:50 +03:00 committed by Jim Ferenczi
parent c26bd6046b
commit f057fc294a
4 changed files with 78 additions and 31 deletions

View File

@ -237,28 +237,6 @@ setup:
search_after: [6]
sort: [{ sort: desc }]
---
"field collapsing and rescore":
- skip:
version: " - 5.2.99"
reason: this uses a new API that has been added in 5.3
- do:
catch: /cannot use \`collapse\` in conjunction with \`rescore\`/
search:
index: test
type: test
body:
collapse: { field: numeric_group }
rescore:
window_size: 20
query:
rescore_query:
match_all: {}
query_weight: 1
rescore_query_weight: 2
---
"no hits and inner_hits":

View File

@ -225,9 +225,6 @@ public class CollapseBuilder implements Writeable, ToXContentObject {
if (context.searchAfter() != null) {
throw new SearchContextException(context, "cannot use `collapse` in conjunction with `search_after`");
}
if (context.rescore() != null && context.rescore().isEmpty() == false) {
throw new SearchContextException(context, "cannot use `collapse` in conjunction with `rescore`");
}
MappedFieldType fieldType = context.getQueryShardContext().fieldMapper(field);
if (fieldType == null) {

View File

@ -128,6 +128,7 @@ abstract class TopDocsCollectorContext extends QueryCollectorContext {
static class CollapsingTopDocsCollectorContext extends TopDocsCollectorContext {
private final DocValueFormat[] sortFmt;
private final CollapsingTopDocsCollector<?> topDocsCollector;
private final boolean rescore;
/**
* Ctr
@ -139,13 +140,14 @@ abstract class TopDocsCollectorContext extends QueryCollectorContext {
private CollapsingTopDocsCollectorContext(CollapseContext collapseContext,
@Nullable SortAndFormats sortAndFormats,
int numHits,
boolean trackMaxScore) {
boolean trackMaxScore, boolean rescore) {
super(REASON_SEARCH_TOP_HITS, numHits);
assert numHits > 0;
assert collapseContext != null;
Sort sort = sortAndFormats == null ? Sort.RELEVANCE : sortAndFormats.sort;
this.sortFmt = sortAndFormats == null ? new DocValueFormat[] { DocValueFormat.RAW } : sortAndFormats.formats;
this.topDocsCollector = collapseContext.createTopDocs(sort, numHits, trackMaxScore);
this.rescore = rescore;
}
@Override
@ -158,6 +160,11 @@ abstract class TopDocsCollectorContext extends QueryCollectorContext {
void postProcess(QuerySearchResult result) throws IOException {
result.topDocs(topDocsCollector.getTopDocs(), sortFmt);
}
@Override
boolean shouldRescore() {
return rescore;
}
}
abstract static class SimpleTopDocsCollectorContext extends TopDocsCollectorContext {
@ -332,11 +339,6 @@ abstract class TopDocsCollectorContext extends QueryCollectorContext {
return new ScrollingTopDocsCollectorContext(reader, query, searchContext.scrollContext(),
searchContext.sort(), numDocs, searchContext.trackScores(), searchContext.numberOfShards(),
searchContext.trackTotalHits(), hasFilterCollector);
} else if (searchContext.collapse() != null) {
boolean trackScores = searchContext.sort() == null ? true : searchContext.trackScores();
int numDocs = Math.min(searchContext.from() + searchContext.size(), totalNumDocs);
return new CollapsingTopDocsCollectorContext(searchContext.collapse(),
searchContext.sort(), numDocs, trackScores);
} else {
int numDocs = Math.min(searchContext.from() + searchContext.size(), totalNumDocs);
final boolean rescore = searchContext.rescore().isEmpty() == false;
@ -346,6 +348,11 @@ abstract class TopDocsCollectorContext extends QueryCollectorContext {
numDocs = Math.max(numDocs, rescoreContext.getWindowSize());
}
}
if (searchContext.collapse() != null) {
boolean trackScores = searchContext.sort() == null ? true : searchContext.trackScores();
return new CollapsingTopDocsCollectorContext(searchContext.collapse(),
searchContext.sort(), numDocs, trackScores, rescore);
}
return new SimpleTopDocsCollectorContext(reader, query, searchContext.sort(), searchContext.searchAfter(), numDocs,
searchContext.trackScores(), searchContext.trackTotalHits(), hasFilterCollector) {
@Override

View File

@ -36,13 +36,17 @@ import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.collapse.CollapseBuilder;
import org.elasticsearch.search.rescore.QueryRescoreMode;
import org.elasticsearch.search.rescore.QueryRescorerBuilder;
import org.elasticsearch.search.sort.SortBuilders;
import org.elasticsearch.test.ESIntegTestCase;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Map;
import java.util.stream.Collectors;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
@ -67,6 +71,7 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSeco
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertThirdHit;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.hasId;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.hasScore;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
@ -748,4 +753,64 @@ public class QueryRescorerIT extends ESIntegTestCase {
assertThat(hit.getScore(), equalTo(101f));
}
}
public void testRescoreAfterCollapse() throws Exception {
assertAcked(prepareCreate("test")
.addMapping(
"type1",
jsonBuilder()
.startObject()
.startObject("properties")
.startObject("group")
.field("type", "keyword")
.endObject()
.endObject()
.endObject())
);
ensureGreen("test");
indexDocument(1, "miss", "a", 1, 10);
indexDocument(2, "name", "a", 2, 20);
indexDocument(3, "name", "b", 2, 30);
// should be highest on rescore, but filtered out during collapse
indexDocument(4, "name", "b", 1, 40);
refresh("test");
SearchResponse searchResponse = client().prepareSearch("test")
.setTypes("type1")
.setQuery(staticScoreQuery("static_score"))
.addRescorer(new QueryRescorerBuilder(staticScoreQuery("static_rescore")))
.setCollapse(new CollapseBuilder("group"))
.get();
assertThat(searchResponse.getHits().totalHits, equalTo(3L));
assertThat(searchResponse.getHits().getHits().length, equalTo(2));
Map<String, Float> collapsedHits = Arrays
.stream(searchResponse.getHits().getHits())
.collect(Collectors.toMap(SearchHit::getId, SearchHit::getScore));
assertThat(collapsedHits.keySet(), containsInAnyOrder("2", "3"));
assertThat(collapsedHits.get("2"), equalTo(22F));
assertThat(collapsedHits.get("3"), equalTo(32F));
}
private QueryBuilder staticScoreQuery(String scoreField) {
return functionScoreQuery(termQuery("name", "name"), ScoreFunctionBuilders.fieldValueFactorFunction(scoreField))
.boostMode(CombineFunction.REPLACE);
}
private void indexDocument(int id, String name, String group, int score, int rescore) throws IOException {
XContentBuilder docBuilder =jsonBuilder()
.startObject()
.field("name", name)
.field("group", group)
.field("static_score", score)
.field("static_rescore", rescore)
.endObject();
client().prepareIndex("test", "type1", Integer.toString(id)).setSource(docBuilder).get();
}
}