From 283b0931ff7de13b1e35ad8646bec1dba63b8112 Mon Sep 17 00:00:00 2001
From: markharwood <markharwood@gmail.com>
Date: Tue, 26 May 2015 17:32:48 +0100
Subject: [PATCH] Aggregations fix: queries with size=0 broke aggregations that
 require scores. Aggregations like Sampler and TopHits that require access to
 scores did not work if the query has size param set to zero. The assumption
 was that the Lucene query scoring logic was not required in these cases.
 Added a Junit test to demonstrate the issue and a fix which relies on earlier
 creation of Collector wrappers so that Collector.needsScores() calls work for
 all search operations.

Closes #11119
---
 .../search/internal/ContextIndexSearcher.java | 12 ++-
 .../aggregations/bucket/TopHitsTests.java     | 73 ++++++++++++++++++-
 2 files changed, 81 insertions(+), 4 deletions(-)

diff --git a/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java b/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java
index 388ed8e8ae9..e4c61209825 100644
--- a/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java
+++ b/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java
@@ -133,8 +133,11 @@ public class ContextIndexSearcher extends IndexSearcher implements Releasable {
         }
     }
 
+
     @Override
-    public void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
+    public void search(Query query, Collector collector) throws IOException {
+        // Wrap the caller's collector with various wrappers e.g. those used to siphon
+        // matches off for aggregation or to impose a time-limit on collection.
         final boolean timeoutSet = searchContext.timeoutInMillis() != -1;
         final boolean terminateAfterSet = searchContext.terminateAfter() != SearchContext.DEFAULT_TERMINATE_AFTER;
 
@@ -166,8 +169,13 @@ public class ContextIndexSearcher extends IndexSearcher implements Releasable {
                 collector = new MinimumScoreCollector(collector, searchContext.minimumScore());
             }
         }
+        super.search(query, collector);
+    }
 
-        // we only compute the doc id set once since within a context, we execute the same query always...
+    @Override
+    public void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
+        final boolean timeoutSet = searchContext.timeoutInMillis() != -1;
+        final boolean terminateAfterSet = searchContext.terminateAfter() != SearchContext.DEFAULT_TERMINATE_AFTER;
         try {
             if (timeoutSet || terminateAfterSet) {
                 try {
diff --git a/src/test/java/org/elasticsearch/search/aggregations/bucket/TopHitsTests.java b/src/test/java/org/elasticsearch/search/aggregations/bucket/TopHitsTests.java
index ba927efd641..a592ec18b95 100644
--- a/src/test/java/org/elasticsearch/search/aggregations/bucket/TopHitsTests.java
+++ b/src/test/java/org/elasticsearch/search/aggregations/bucket/TopHitsTests.java
@@ -63,7 +63,15 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcke
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
-import static org.hamcrest.Matchers.*;
+import static org.hamcrest.Matchers.arrayContaining;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.lessThanOrEqualTo;
+import static org.hamcrest.Matchers.not;
+import static org.hamcrest.Matchers.notNullValue;
+import static org.hamcrest.Matchers.nullValue;
+import static org.hamcrest.Matchers.sameInstance;
 
 /**
  *
@@ -228,7 +236,9 @@ public class TopHitsTests extends ElasticsearchIntegrationTest {
 
     @Test
     public void testBasics() throws Exception {
-        SearchResponse response = client().prepareSearch("idx").setTypes("type")
+        SearchResponse response = client()
+                .prepareSearch("idx")
+                .setTypes("type")
                 .addAggregation(terms("terms")
                         .executionHint(randomExecutionHint())
                         .field(TERMS_AGGS_FIELD)
@@ -264,6 +274,65 @@ public class TopHitsTests extends ElasticsearchIntegrationTest {
         }
     }
 
+    @Test
+    public void testIssue11119() throws Exception {
+        // Test that top_hits aggregation is fed scores if query results size=0
+        SearchResponse response = client()
+                .prepareSearch("idx")
+                .setTypes("field-collapsing")
+                .setSize(0)
+                .setQuery(matchQuery("text", "x y z"))
+                .addAggregation(terms("terms").executionHint(randomExecutionHint()).field("group").subAggregation(topHits("hits")))
+                .get();
+
+        assertSearchResponse(response);
+
+        assertThat(response.getHits().getTotalHits(), equalTo(8l));
+        assertThat(response.getHits().hits().length, equalTo(0));
+        assertThat(response.getHits().maxScore(), equalTo(0f));
+        Terms terms = response.getAggregations().get("terms");
+        assertThat(terms, notNullValue());
+        assertThat(terms.getName(), equalTo("terms"));
+        assertThat(terms.getBuckets().size(), equalTo(3));
+
+        for (Terms.Bucket bucket : terms.getBuckets()) {
+            assertThat(bucket, notNullValue());
+            TopHits topHits = bucket.getAggregations().get("hits");
+            SearchHits hits = topHits.getHits();
+            float bestScore = Float.MAX_VALUE;
+            for (int h = 0; h < hits.getHits().length; h++) {
+                float score=hits.getAt(h).getScore();
+                assertThat(score, lessThanOrEqualTo(bestScore));
+                assertThat(score, greaterThan(0f));
+                bestScore = hits.getAt(h).getScore();
+            }
+        }
+
+        // Also check that min_score setting works when size=0
+        // (technically not a test of top_hits but implementation details are
+        // tied up with the need to feed scores into the agg tree even when
+        // users don't want ranked set of query results.)
+        response = client()
+                .prepareSearch("idx")
+                .setTypes("field-collapsing")
+                .setSize(0)
+                .setMinScore(0.0001f)
+                .setQuery(matchQuery("text", "x y z"))
+                .addAggregation(terms("terms").executionHint(randomExecutionHint()).field("group"))
+                .get();
+
+        assertSearchResponse(response);
+
+        assertThat(response.getHits().getTotalHits(), equalTo(8l));
+        assertThat(response.getHits().hits().length, equalTo(0));
+        assertThat(response.getHits().maxScore(), equalTo(0f));
+        terms = response.getAggregations().get("terms");
+        assertThat(terms, notNullValue());
+        assertThat(terms.getName(), equalTo("terms"));
+        assertThat(terms.getBuckets().size(), equalTo(3));
+    }
+
+
     @Test
     public void testBreadthFirst() throws Exception {
         // breadth_first will be ignored since we need scores