From 283b0931ff7de13b1e35ad8646bec1dba63b8112 Mon Sep 17 00:00:00 2001 From: markharwood Date: Tue, 26 May 2015 17:32:48 +0100 Subject: [PATCH] Aggregations fix: queries with size=0 broke aggregations that require scores. Aggregations like Sampler and TopHits that require access to scores did not work if the query has size param set to zero. The assumption was that the Lucene query scoring logic was not required in these cases. Added a Junit test to demonstrate the issue and a fix which relies on earlier creation of Collector wrappers so that Collector.needsScores() calls work for all search operations. Closes #11119 --- .../search/internal/ContextIndexSearcher.java | 12 ++- .../aggregations/bucket/TopHitsTests.java | 73 ++++++++++++++++++- 2 files changed, 81 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java b/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java index 388ed8e8ae9..e4c61209825 100644 --- a/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java +++ b/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java @@ -133,8 +133,11 @@ public class ContextIndexSearcher extends IndexSearcher implements Releasable { } } + @Override - public void search(List leaves, Weight weight, Collector collector) throws IOException { + public void search(Query query, Collector collector) throws IOException { + // Wrap the caller's collector with various wrappers e.g. those used to siphon + // matches off for aggregation or to impose a time-limit on collection. final boolean timeoutSet = searchContext.timeoutInMillis() != -1; final boolean terminateAfterSet = searchContext.terminateAfter() != SearchContext.DEFAULT_TERMINATE_AFTER; @@ -166,8 +169,13 @@ public class ContextIndexSearcher extends IndexSearcher implements Releasable { collector = new MinimumScoreCollector(collector, searchContext.minimumScore()); } } + super.search(query, collector); + } - // we only compute the doc id set once since within a context, we execute the same query always... + @Override + public void search(List leaves, Weight weight, Collector collector) throws IOException { + final boolean timeoutSet = searchContext.timeoutInMillis() != -1; + final boolean terminateAfterSet = searchContext.terminateAfter() != SearchContext.DEFAULT_TERMINATE_AFTER; try { if (timeoutSet || terminateAfterSet) { try { diff --git a/src/test/java/org/elasticsearch/search/aggregations/bucket/TopHitsTests.java b/src/test/java/org/elasticsearch/search/aggregations/bucket/TopHitsTests.java index ba927efd641..a592ec18b95 100644 --- a/src/test/java/org/elasticsearch/search/aggregations/bucket/TopHitsTests.java +++ b/src/test/java/org/elasticsearch/search/aggregations/bucket/TopHitsTests.java @@ -63,7 +63,15 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcke import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse; -import static org.hamcrest.Matchers.*; +import static org.hamcrest.Matchers.arrayContaining; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.lessThanOrEqualTo; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.Matchers.nullValue; +import static org.hamcrest.Matchers.sameInstance; /** * @@ -228,7 +236,9 @@ public class TopHitsTests extends ElasticsearchIntegrationTest { @Test public void testBasics() throws Exception { - SearchResponse response = client().prepareSearch("idx").setTypes("type") + SearchResponse response = client() + .prepareSearch("idx") + .setTypes("type") .addAggregation(terms("terms") .executionHint(randomExecutionHint()) .field(TERMS_AGGS_FIELD) @@ -264,6 +274,65 @@ public class TopHitsTests extends ElasticsearchIntegrationTest { } } + @Test + public void testIssue11119() throws Exception { + // Test that top_hits aggregation is fed scores if query results size=0 + SearchResponse response = client() + .prepareSearch("idx") + .setTypes("field-collapsing") + .setSize(0) + .setQuery(matchQuery("text", "x y z")) + .addAggregation(terms("terms").executionHint(randomExecutionHint()).field("group").subAggregation(topHits("hits"))) + .get(); + + assertSearchResponse(response); + + assertThat(response.getHits().getTotalHits(), equalTo(8l)); + assertThat(response.getHits().hits().length, equalTo(0)); + assertThat(response.getHits().maxScore(), equalTo(0f)); + Terms terms = response.getAggregations().get("terms"); + assertThat(terms, notNullValue()); + assertThat(terms.getName(), equalTo("terms")); + assertThat(terms.getBuckets().size(), equalTo(3)); + + for (Terms.Bucket bucket : terms.getBuckets()) { + assertThat(bucket, notNullValue()); + TopHits topHits = bucket.getAggregations().get("hits"); + SearchHits hits = topHits.getHits(); + float bestScore = Float.MAX_VALUE; + for (int h = 0; h < hits.getHits().length; h++) { + float score=hits.getAt(h).getScore(); + assertThat(score, lessThanOrEqualTo(bestScore)); + assertThat(score, greaterThan(0f)); + bestScore = hits.getAt(h).getScore(); + } + } + + // Also check that min_score setting works when size=0 + // (technically not a test of top_hits but implementation details are + // tied up with the need to feed scores into the agg tree even when + // users don't want ranked set of query results.) + response = client() + .prepareSearch("idx") + .setTypes("field-collapsing") + .setSize(0) + .setMinScore(0.0001f) + .setQuery(matchQuery("text", "x y z")) + .addAggregation(terms("terms").executionHint(randomExecutionHint()).field("group")) + .get(); + + assertSearchResponse(response); + + assertThat(response.getHits().getTotalHits(), equalTo(8l)); + assertThat(response.getHits().hits().length, equalTo(0)); + assertThat(response.getHits().maxScore(), equalTo(0f)); + terms = response.getAggregations().get("terms"); + assertThat(terms, notNullValue()); + assertThat(terms.getName(), equalTo("terms")); + assertThat(terms.getBuckets().size(), equalTo(3)); + } + + @Test public void testBreadthFirst() throws Exception { // breadth_first will be ignored since we need scores