Add tests for top_hits aggregation (#22754)

Add unit tests for `TopHitsAggregator` and convert some snippets in docs for `top_hits` aggregation to `// CONSOLE`. Relates to #22278 Relates to #18160
2025-03-09 14:34:43 +00:00 · 2017-01-25 16:15:50 -05:00 · 2017-01-25 16:15:50 -05:00 · d704a880e7
commit d704a880e7
parent f6ed39aa08
5 changed files with 226 additions and 130 deletions
--- a/buildSrc/src/main/resources/checkstyle_suppressions.xml
+++ b/buildSrc/src/main/resources/checkstyle_suppressions.xml
@ -499,7 +499,6 @@
  <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]metrics[/\\]percentiles[/\\]tdigest[/\\]TDigestPercentilesAggregator.java" checks="LineLength" />
  <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]metrics[/\\]scripted[/\\]ScriptedMetricAggregator.java" checks="LineLength" />
  <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]metrics[/\\]stats[/\\]extended[/\\]ExtendedStatsAggregator.java" checks="LineLength" />
-  <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]metrics[/\\]tophits[/\\]TopHitsAggregator.java" checks="LineLength" />
  <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]pipeline[/\\]bucketscript[/\\]BucketScriptPipelineAggregator.java" checks="LineLength" />
  <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]support[/\\]AggregationPath.java" checks="LineLength" />
  <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]support[/\\]ValuesSourceParser.java" checks="LineLength" />
--- a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregator.java
+++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregator.java
@ -122,7 +122,13 @@ public class TopHitsAggregator extends MetricsAggregator {
                    // In the QueryPhase we don't need this protection, because it is build into the IndexSearcher,
                    // but here we create collectors ourselves and we need prevent OOM because of crazy an offset and size.
                    topN = Math.min(topN, subSearchContext.searcher().getIndexReader().maxDoc());
-                    TopDocsCollector<?> topLevelCollector = sort != null ? TopFieldCollector.create(sort.sort, topN, true, subSearchContext.trackScores(), subSearchContext.trackScores()) : TopScoreDocCollector.create(topN);
+                    TopDocsCollector<?> topLevelCollector;
+                    if (sort == null) {
+                        topLevelCollector = TopScoreDocCollector.create(topN);
+                    } else {
+                        topLevelCollector = TopFieldCollector.create(sort.sort, topN, true, subSearchContext.trackScores(),
+                                subSearchContext.trackScores());
+                    }
                    collectors = new TopDocsAndLeafCollector(topLevelCollector);
                    collectors.leafCollector = collectors.topLevelCollector.getLeafCollector(ctx);
                    collectors.leafCollector.setScorer(scorer);
@ -170,8 +176,8 @@ public class TopHitsAggregator extends MetricsAggregator {
                    searchHitFields.sortValues(fieldDoc.fields, subSearchContext.sort().formats);
                }
            }
-            topHits = new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, fetchResult.hits(), pipelineAggregators(),
-                metaData());
+            topHits = new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, fetchResult.hits(),
+                    pipelineAggregators(), metaData());
        }
        return topHits;
    }
@ -184,7 +190,8 @@ public class TopHitsAggregator extends MetricsAggregator {
        } else {
            topDocs = Lucene.EMPTY_TOP_DOCS;
        }
-        return new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, InternalSearchHits.empty(), pipelineAggregators(), metaData());
+        return new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, InternalSearchHits.empty(),
+                pipelineAggregators(), metaData());
    }

    @Override
--- a/core/src/test/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java
+++ b/core/src/test/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java
@ -94,6 +94,7 @@ public abstract class AggregatorTestCase extends ESTestCase {

        CircuitBreakerService circuitBreakerService = new NoneCircuitBreakerService();
        SearchContext searchContext = mock(SearchContext.class);
+        when(searchContext.numberOfShards()).thenReturn(1);
        when(searchContext.searcher()).thenReturn(contextIndexSearcher);
        when(searchContext.bigArrays()).thenReturn(new MockBigArrays(Settings.EMPTY, circuitBreakerService));
        when(searchContext.fetchPhase())
@ -163,13 +164,17 @@ public abstract class AggregatorTestCase extends ESTestCase {
        List<InternalAggregation> aggs = new ArrayList<> ();
        Query rewritten = searcher.rewrite(query);
        Weight weight = searcher.createWeight(rewritten, true);
-        try (C root = createAggregator(builder, searcher, fieldTypes)) {
+        C root = createAggregator(builder, searcher, fieldTypes);
+        try {
            for (ShardSearcher subSearcher : subSearchers) {
-                try (C a = createAggregator(builder, subSearcher, fieldTypes)) {
+                C a = createAggregator(builder, subSearcher, fieldTypes);
+                try {
                    a.preCollection();
                    subSearcher.search(weight, a);
                    a.postCollection();
                    aggs.add(a.buildAggregation(0L));
+                } finally {
+                    closeAgg(a);
                }
            }
            if (aggs.isEmpty()) {
@ -179,6 +184,15 @@ public abstract class AggregatorTestCase extends ESTestCase {
                A internalAgg = (A) aggs.get(0).doReduce(aggs, new InternalAggregation.ReduceContext(root.context().bigArrays(), null));
                return internalAgg;
            }
+        } finally {
+            closeAgg(root);
+        }
+    }
+
+    private void closeAgg(Aggregator agg) {
+        agg.close();
+        for (Aggregator sub : ((AggregatorBase) agg).subAggregators) {
+            closeAgg(sub);
        }
    }

--- a/core/src/test/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregatorTests.java
+++ b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregatorTests.java
@ -18,14 +18,18 @@
 */
 package org.elasticsearch.search.aggregations.metrics.tophits;

+import org.apache.lucene.analysis.core.KeywordAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.queryparser.classic.QueryParser;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MatchNoDocsQuery;
+import org.apache.lucene.search.Query;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.index.mapper.KeywordFieldMapper;
@ -33,55 +37,114 @@ import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.Uid;
 import org.elasticsearch.index.mapper.UidFieldMapper;
 import org.elasticsearch.search.SearchHits;
+import org.elasticsearch.search.aggregations.Aggregation;
+import org.elasticsearch.search.aggregations.AggregationBuilder;
 import org.elasticsearch.search.aggregations.AggregatorTestCase;
+import org.elasticsearch.search.aggregations.bucket.terms.Terms;
 import org.elasticsearch.search.sort.SortOrder;

-public class TopHitsAggregatorTests extends AggregatorTestCase {
+import java.io.IOException;

-    public void testTermsAggregator() throws Exception {
+import static org.elasticsearch.search.aggregations.AggregationBuilders.terms;
+import static org.elasticsearch.search.aggregations.AggregationBuilders.topHits;
+
+public class TopHitsAggregatorTests extends AggregatorTestCase {
+    public void testTopLevel() throws Exception {
+        Aggregation result;
+        if (randomBoolean()) {
+            result = testCase(new MatchAllDocsQuery(), topHits("_name").sort("string", SortOrder.DESC));
+        } else {
+            Query query = new QueryParser("string", new KeywordAnalyzer()).parse("d^1000 c^100 b^10 a^1");
+            result = testCase(query, topHits("_name"));
+        }
+        SearchHits searchHits = ((TopHits) result).getHits();
+        assertEquals(3L, searchHits.getTotalHits());
+        assertEquals("3", searchHits.getAt(0).getId());
+        assertEquals("type", searchHits.getAt(0).getType());
+        assertEquals("2", searchHits.getAt(1).getId());
+        assertEquals("type", searchHits.getAt(1).getType());
+        assertEquals("1", searchHits.getAt(2).getId());
+        assertEquals("type", searchHits.getAt(2).getType());
+    }
+
+    public void testNoResults() throws Exception {
+        TopHits result = (TopHits) testCase(new MatchNoDocsQuery(), topHits("_name").sort("string", SortOrder.DESC));
+        SearchHits searchHits = ((TopHits) result).getHits();
+        assertEquals(0L, searchHits.getTotalHits());
+    }
+
+    /**
+     * Tests {@code top_hits} inside of {@code terms}. While not strictly a unit test this is a fairly common way to run {@code top_hits}
+     * and serves as a good example of running {@code top_hits} inside of another aggregation.
+     */
+    public void testInsideTerms() throws Exception {
+        Aggregation result;
+        if (randomBoolean()) {
+            result = testCase(new MatchAllDocsQuery(),
+                    terms("term").field("string")
+                        .subAggregation(topHits("top").sort("string", SortOrder.DESC)));
+        } else {
+            Query query = new QueryParser("string", new KeywordAnalyzer()).parse("d^1000 c^100 b^10 a^1");
+            result = testCase(query,
+                    terms("term").field("string")
+                        .subAggregation(topHits("top")));
+        }
+        Terms terms = (Terms) result;
+
+        // The "a" bucket
+        TopHits hits = (TopHits) terms.getBucketByKey("a").getAggregations().get("top");
+        SearchHits searchHits = (hits).getHits();
+        assertEquals(2L, searchHits.getTotalHits());
+        assertEquals("2", searchHits.getAt(0).getId());
+        assertEquals("1", searchHits.getAt(1).getId());
+
+        // The "b" bucket
+        searchHits = ((TopHits) terms.getBucketByKey("b").getAggregations().get("top")).getHits();
+        assertEquals(2L, searchHits.getTotalHits());
+        assertEquals("3", searchHits.getAt(0).getId());
+        assertEquals("1", searchHits.getAt(1).getId());
+
+        // The "c" bucket
+        searchHits = ((TopHits) terms.getBucketByKey("c").getAggregations().get("top")).getHits();
+        assertEquals(1L, searchHits.getTotalHits());
+        assertEquals("2", searchHits.getAt(0).getId());
+
+        // The "d" bucket
+        searchHits = ((TopHits) terms.getBucketByKey("d").getAggregations().get("top")).getHits();
+        assertEquals(1L, searchHits.getTotalHits());
+        assertEquals("3", searchHits.getAt(0).getId());
+    }
+
+    private static final MappedFieldType STRING_FIELD_TYPE = new KeywordFieldMapper.KeywordFieldType();
+    static {
+        STRING_FIELD_TYPE.setName("string");
+        STRING_FIELD_TYPE.setHasDocValues(true);
+    }
+
+    private Aggregation testCase(Query query, AggregationBuilder builder) throws IOException {
        Directory directory = newDirectory();
-        RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
-        Document document = new Document();
-        document.add(new Field(UidFieldMapper.NAME, Uid.createUid("type", "1"), UidFieldMapper.Defaults.FIELD_TYPE));
-        document.add(new SortedSetDocValuesField("string", new BytesRef("a")));
-        document.add(new SortedSetDocValuesField("string", new BytesRef("b")));
-        indexWriter.addDocument(document);
-        document = new Document();
-        document.add(new Field(UidFieldMapper.NAME, Uid.createUid("type", "2"), UidFieldMapper.Defaults.FIELD_TYPE));
-        document.add(new SortedSetDocValuesField("string", new BytesRef("c")));
-        document.add(new SortedSetDocValuesField("string", new BytesRef("a")));
-        indexWriter.addDocument(document);
-        document = new Document();
-        document.add(new Field(UidFieldMapper.NAME, Uid.createUid("type", "3"), UidFieldMapper.Defaults.FIELD_TYPE));
-        document.add(new SortedSetDocValuesField("string", new BytesRef("b")));
-        document.add(new SortedSetDocValuesField("string", new BytesRef("d")));
-        indexWriter.addDocument(document);
-        indexWriter.close();
+        RandomIndexWriter iw = new RandomIndexWriter(random(), directory);
+        iw.addDocument(document("1", "a", "b"));
+        iw.addDocument(document("2", "c", "a"));
+        iw.addDocument(document("3", "b", "d"));
+        iw.close();

        IndexReader indexReader = DirectoryReader.open(directory);
        IndexSearcher indexSearcher = newSearcher(indexReader, true, true);

-        MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType();
-        fieldType.setName("string");
-        fieldType.setHasDocValues(true );
-        TopHitsAggregationBuilder aggregationBuilder = new TopHitsAggregationBuilder("_name");
-        aggregationBuilder.sort("string", SortOrder.DESC);
-        try (TopHitsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType)){
-            aggregator.preCollection();
-            indexSearcher.search(new MatchAllDocsQuery(), aggregator);
-            aggregator.postCollection();
-            TopHits topHits = (TopHits) aggregator.buildAggregation(0L);
-            SearchHits searchHits = topHits.getHits();
-            assertEquals(3L, searchHits.getTotalHits());
-            assertEquals("3", searchHits.getAt(0).getId());
-            assertEquals("type", searchHits.getAt(0).getType());
-            assertEquals("2", searchHits.getAt(1).getId());
-            assertEquals("type", searchHits.getAt(1).getType());
-            assertEquals("1", searchHits.getAt(2).getId());
-            assertEquals("type", searchHits.getAt(2).getType());
-        }
+        Aggregation result = searchAndReduce(indexSearcher, query, builder, STRING_FIELD_TYPE);
        indexReader.close();
        directory.close();
+        return result;
    }

+    private Document document(String id, String... stringValues) {
+        Document document = new Document();
+        document.add(new Field(UidFieldMapper.NAME, Uid.createUid("type", id), UidFieldMapper.Defaults.FIELD_TYPE));
+        for (String stringValue : stringValues) {
+            document.add(new Field("string", stringValue, STRING_FIELD_TYPE));
+            document.add(new SortedSetDocValuesField("string", new BytesRef(stringValue)));
+        }
+        return document;
+    }
 }
--- a/docs/reference/aggregations/metrics/tophits-aggregation.asciidoc
+++ b/docs/reference/aggregations/metrics/tophits-aggregation.asciidoc
@ -33,27 +33,26 @@ only the title field is being included in the source.

 [source,js]
 --------------------------------------------------
+POST /sales/_search?size=0
 {
    "aggs": {
-        "top-tags": {
+        "top_tags": {
            "terms": {
-                "field": "tags",
+                "field": "type",
                "size": 3
            },
            "aggs": {
-                "top_tag_hits": {
+                "top_sales_hits": {
                    "top_hits": {
                        "sort": [
                            {
-                                "last_activity_date": {
+                                "date": {
                                    "order": "desc"
                                }
                            }
                        ],
                        "_source": {
-                            "includes": [
-                                "title"
-                            ]
+                            "includes": [ "date", "price" ]
                        },
                        "size" : 1
                    }
@ -63,90 +62,105 @@ only the title field is being included in the source.
    }
 }
 --------------------------------------------------
+// CONSOLE
+// TEST[setup:sales]

-Possible response snippet:
+Possible response:

 [source,js]
 --------------------------------------------------
-"aggregations": {
-  "top-tags": {
-     "buckets": [
-        {
-           "key": "windows-7",
-           "doc_count": 25365,
-           "top_tags_hits": {
-              "hits": {
-                 "total": 25365,
-                 "max_score": 1,
-                 "hits": [
-                    {
-                       "_index": "stack",
-                       "_type": "question",
-                       "_id": "602679",
-                       "_score": 1,
-                       "_source": {
-                          "title": "Windows port opening"
-                       },
-                       "sort": [
-                          1370143231177
-                       ]
-                    }
-                 ]
-              }
-           }
-        },
-        {
-           "key": "linux",
-           "doc_count": 18342,
-           "top_tags_hits": {
-              "hits": {
-                 "total": 18342,
-                 "max_score": 1,
-                 "hits": [
-                    {
-                       "_index": "stack",
-                       "_type": "question",
-                       "_id": "602672",
-                       "_score": 1,
-                       "_source": {
-                          "title": "Ubuntu RFID Screensaver lock-unlock"
-                       },
-                       "sort": [
-                          1370143379747
-                       ]
-                    }
-                 ]
-              }
-           }
-        },
-        {
-           "key": "windows",
-           "doc_count": 18119,
-           "top_tags_hits": {
-              "hits": {
-                 "total": 18119,
-                 "max_score": 1,
-                 "hits": [
-                    {
-                       "_index": "stack",
-                       "_type": "question",
-                       "_id": "602678",
-                       "_score": 1,
-                       "_source": {
-                          "title": "If I change my computers date / time, what could be affected?"
-                       },
-                       "sort": [
-                          1370142868283
-                       ]
-                    }
-                 ]
-              }
-           }
-        }
-     ]
+{
+  ...
+  "aggregations": {
+    "top_tags": {
+       "doc_count_error_upper_bound": 0,
+       "sum_other_doc_count": 0,
+       "buckets": [
+          {
+             "key": "hat",
+             "doc_count": 3,
+             "top_sales_hits": {
+                "hits": {
+                   "total": 3,
+                   "max_score": null,
+                   "hits": [
+                      {
+                         "_index": "sales",
+                         "_type": "sale",
+                         "_id": "AVnNBmauCQpcRyxw6ChK",
+                         "_source": {
+                            "date": "2015/03/01 00:00:00",
+                            "price": 200
+                         },
+                         "sort": [
+                            1425168000000
+                         ],
+                         "_score": null
+                      }
+                   ]
+                }
+             }
+          },
+          {
+             "key": "t-shirt",
+             "doc_count": 3,
+             "top_sales_hits": {
+                "hits": {
+                   "total": 3,
+                   "max_score": null,
+                   "hits": [
+                      {
+                         "_index": "sales",
+                         "_type": "sale",
+                         "_id": "AVnNBmauCQpcRyxw6ChL",
+                         "_source": {
+                            "date": "2015/03/01 00:00:00",
+                            "price": 175
+                         },
+                         "sort": [
+                            1425168000000
+                         ],
+                         "_score": null
+                      }
+                   ]
+                }
+             }
+          },
+          {
+             "key": "bag",
+             "doc_count": 1,
+             "top_sales_hits": {
+                "hits": {
+                   "total": 1,
+                   "max_score": null,
+                   "hits": [
+                      {
+                         "_index": "sales",
+                         "_type": "sale",
+                         "_id": "AVnNBmatCQpcRyxw6ChH",
+                         "_source": {
+                            "date": "2015/01/01 00:00:00",
+                            "price": 150
+                         },
+                         "sort": [
+                            1420070400000
+                         ],
+                         "_score": null
+                      }
+                   ]
+                }
+             }
+          }
+       ]
+    }
  }
 }
 --------------------------------------------------
+// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/]
+// TESTRESPONSE[s/AVnNBmauCQpcRyxw6ChK/$body.aggregations.top_tags.buckets.0.top_sales_hits.hits.hits.0._id/]
+// TESTRESPONSE[s/AVnNBmauCQpcRyxw6ChL/$body.aggregations.top_tags.buckets.1.top_sales_hits.hits.hits.0._id/]
+// TESTRESPONSE[s/AVnNBmatCQpcRyxw6ChH/$body.aggregations.top_tags.buckets.2.top_sales_hits.hits.hits.0._id/]
+

 ==== Field collapse example

@ -184,7 +198,6 @@ relevancy order of the most relevant document in a bucket.
        "top_hit" : {
          "max": {
            "script": {
-              "lang": "painless",
              "inline": "_score"
            }
          }