Add tests for top_hits aggregation (#22754)

Add unit tests for `TopHitsAggregator` and convert some snippets in
docs for `top_hits` aggregation to `// CONSOLE`.

Relates to #22278
Relates to #18160
This commit is contained in:
Nik Everett 2017-01-25 16:15:50 -05:00 committed by GitHub
parent f6ed39aa08
commit d704a880e7
5 changed files with 226 additions and 130 deletions

View File

@ -499,7 +499,6 @@
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]metrics[/\\]percentiles[/\\]tdigest[/\\]TDigestPercentilesAggregator.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]metrics[/\\]scripted[/\\]ScriptedMetricAggregator.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]metrics[/\\]stats[/\\]extended[/\\]ExtendedStatsAggregator.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]metrics[/\\]tophits[/\\]TopHitsAggregator.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]pipeline[/\\]bucketscript[/\\]BucketScriptPipelineAggregator.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]support[/\\]AggregationPath.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]support[/\\]ValuesSourceParser.java" checks="LineLength" />

View File

@ -122,7 +122,13 @@ public class TopHitsAggregator extends MetricsAggregator {
// In the QueryPhase we don't need this protection, because it is build into the IndexSearcher,
// but here we create collectors ourselves and we need prevent OOM because of crazy an offset and size.
topN = Math.min(topN, subSearchContext.searcher().getIndexReader().maxDoc());
TopDocsCollector<?> topLevelCollector = sort != null ? TopFieldCollector.create(sort.sort, topN, true, subSearchContext.trackScores(), subSearchContext.trackScores()) : TopScoreDocCollector.create(topN);
TopDocsCollector<?> topLevelCollector;
if (sort == null) {
topLevelCollector = TopScoreDocCollector.create(topN);
} else {
topLevelCollector = TopFieldCollector.create(sort.sort, topN, true, subSearchContext.trackScores(),
subSearchContext.trackScores());
}
collectors = new TopDocsAndLeafCollector(topLevelCollector);
collectors.leafCollector = collectors.topLevelCollector.getLeafCollector(ctx);
collectors.leafCollector.setScorer(scorer);
@ -170,8 +176,8 @@ public class TopHitsAggregator extends MetricsAggregator {
searchHitFields.sortValues(fieldDoc.fields, subSearchContext.sort().formats);
}
}
topHits = new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, fetchResult.hits(), pipelineAggregators(),
metaData());
topHits = new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, fetchResult.hits(),
pipelineAggregators(), metaData());
}
return topHits;
}
@ -184,7 +190,8 @@ public class TopHitsAggregator extends MetricsAggregator {
} else {
topDocs = Lucene.EMPTY_TOP_DOCS;
}
return new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, InternalSearchHits.empty(), pipelineAggregators(), metaData());
return new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, InternalSearchHits.empty(),
pipelineAggregators(), metaData());
}
@Override

View File

@ -94,6 +94,7 @@ public abstract class AggregatorTestCase extends ESTestCase {
CircuitBreakerService circuitBreakerService = new NoneCircuitBreakerService();
SearchContext searchContext = mock(SearchContext.class);
when(searchContext.numberOfShards()).thenReturn(1);
when(searchContext.searcher()).thenReturn(contextIndexSearcher);
when(searchContext.bigArrays()).thenReturn(new MockBigArrays(Settings.EMPTY, circuitBreakerService));
when(searchContext.fetchPhase())
@ -163,13 +164,17 @@ public abstract class AggregatorTestCase extends ESTestCase {
List<InternalAggregation> aggs = new ArrayList<> ();
Query rewritten = searcher.rewrite(query);
Weight weight = searcher.createWeight(rewritten, true);
try (C root = createAggregator(builder, searcher, fieldTypes)) {
C root = createAggregator(builder, searcher, fieldTypes);
try {
for (ShardSearcher subSearcher : subSearchers) {
try (C a = createAggregator(builder, subSearcher, fieldTypes)) {
C a = createAggregator(builder, subSearcher, fieldTypes);
try {
a.preCollection();
subSearcher.search(weight, a);
a.postCollection();
aggs.add(a.buildAggregation(0L));
} finally {
closeAgg(a);
}
}
if (aggs.isEmpty()) {
@ -179,6 +184,15 @@ public abstract class AggregatorTestCase extends ESTestCase {
A internalAgg = (A) aggs.get(0).doReduce(aggs, new InternalAggregation.ReduceContext(root.context().bigArrays(), null));
return internalAgg;
}
} finally {
closeAgg(root);
}
}
private void closeAgg(Aggregator agg) {
agg.close();
for (Aggregator sub : ((AggregatorBase) agg).subAggregators) {
closeAgg(sub);
}
}

View File

@ -18,14 +18,18 @@
*/
package org.elasticsearch.search.aggregations.metrics.tophits;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
@ -33,55 +37,114 @@ import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.index.mapper.UidFieldMapper;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.sort.SortOrder;
public class TopHitsAggregatorTests extends AggregatorTestCase {
import java.io.IOException;
public void testTermsAggregator() throws Exception {
import static org.elasticsearch.search.aggregations.AggregationBuilders.terms;
import static org.elasticsearch.search.aggregations.AggregationBuilders.topHits;
public class TopHitsAggregatorTests extends AggregatorTestCase {
public void testTopLevel() throws Exception {
Aggregation result;
if (randomBoolean()) {
result = testCase(new MatchAllDocsQuery(), topHits("_name").sort("string", SortOrder.DESC));
} else {
Query query = new QueryParser("string", new KeywordAnalyzer()).parse("d^1000 c^100 b^10 a^1");
result = testCase(query, topHits("_name"));
}
SearchHits searchHits = ((TopHits) result).getHits();
assertEquals(3L, searchHits.getTotalHits());
assertEquals("3", searchHits.getAt(0).getId());
assertEquals("type", searchHits.getAt(0).getType());
assertEquals("2", searchHits.getAt(1).getId());
assertEquals("type", searchHits.getAt(1).getType());
assertEquals("1", searchHits.getAt(2).getId());
assertEquals("type", searchHits.getAt(2).getType());
}
public void testNoResults() throws Exception {
TopHits result = (TopHits) testCase(new MatchNoDocsQuery(), topHits("_name").sort("string", SortOrder.DESC));
SearchHits searchHits = ((TopHits) result).getHits();
assertEquals(0L, searchHits.getTotalHits());
}
/**
* Tests {@code top_hits} inside of {@code terms}. While not strictly a unit test this is a fairly common way to run {@code top_hits}
* and serves as a good example of running {@code top_hits} inside of another aggregation.
*/
public void testInsideTerms() throws Exception {
Aggregation result;
if (randomBoolean()) {
result = testCase(new MatchAllDocsQuery(),
terms("term").field("string")
.subAggregation(topHits("top").sort("string", SortOrder.DESC)));
} else {
Query query = new QueryParser("string", new KeywordAnalyzer()).parse("d^1000 c^100 b^10 a^1");
result = testCase(query,
terms("term").field("string")
.subAggregation(topHits("top")));
}
Terms terms = (Terms) result;
// The "a" bucket
TopHits hits = (TopHits) terms.getBucketByKey("a").getAggregations().get("top");
SearchHits searchHits = (hits).getHits();
assertEquals(2L, searchHits.getTotalHits());
assertEquals("2", searchHits.getAt(0).getId());
assertEquals("1", searchHits.getAt(1).getId());
// The "b" bucket
searchHits = ((TopHits) terms.getBucketByKey("b").getAggregations().get("top")).getHits();
assertEquals(2L, searchHits.getTotalHits());
assertEquals("3", searchHits.getAt(0).getId());
assertEquals("1", searchHits.getAt(1).getId());
// The "c" bucket
searchHits = ((TopHits) terms.getBucketByKey("c").getAggregations().get("top")).getHits();
assertEquals(1L, searchHits.getTotalHits());
assertEquals("2", searchHits.getAt(0).getId());
// The "d" bucket
searchHits = ((TopHits) terms.getBucketByKey("d").getAggregations().get("top")).getHits();
assertEquals(1L, searchHits.getTotalHits());
assertEquals("3", searchHits.getAt(0).getId());
}
private static final MappedFieldType STRING_FIELD_TYPE = new KeywordFieldMapper.KeywordFieldType();
static {
STRING_FIELD_TYPE.setName("string");
STRING_FIELD_TYPE.setHasDocValues(true);
}
private Aggregation testCase(Query query, AggregationBuilder builder) throws IOException {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
Document document = new Document();
document.add(new Field(UidFieldMapper.NAME, Uid.createUid("type", "1"), UidFieldMapper.Defaults.FIELD_TYPE));
document.add(new SortedSetDocValuesField("string", new BytesRef("a")));
document.add(new SortedSetDocValuesField("string", new BytesRef("b")));
indexWriter.addDocument(document);
document = new Document();
document.add(new Field(UidFieldMapper.NAME, Uid.createUid("type", "2"), UidFieldMapper.Defaults.FIELD_TYPE));
document.add(new SortedSetDocValuesField("string", new BytesRef("c")));
document.add(new SortedSetDocValuesField("string", new BytesRef("a")));
indexWriter.addDocument(document);
document = new Document();
document.add(new Field(UidFieldMapper.NAME, Uid.createUid("type", "3"), UidFieldMapper.Defaults.FIELD_TYPE));
document.add(new SortedSetDocValuesField("string", new BytesRef("b")));
document.add(new SortedSetDocValuesField("string", new BytesRef("d")));
indexWriter.addDocument(document);
indexWriter.close();
RandomIndexWriter iw = new RandomIndexWriter(random(), directory);
iw.addDocument(document("1", "a", "b"));
iw.addDocument(document("2", "c", "a"));
iw.addDocument(document("3", "b", "d"));
iw.close();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType();
fieldType.setName("string");
fieldType.setHasDocValues(true );
TopHitsAggregationBuilder aggregationBuilder = new TopHitsAggregationBuilder("_name");
aggregationBuilder.sort("string", SortOrder.DESC);
try (TopHitsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType)){
aggregator.preCollection();
indexSearcher.search(new MatchAllDocsQuery(), aggregator);
aggregator.postCollection();
TopHits topHits = (TopHits) aggregator.buildAggregation(0L);
SearchHits searchHits = topHits.getHits();
assertEquals(3L, searchHits.getTotalHits());
assertEquals("3", searchHits.getAt(0).getId());
assertEquals("type", searchHits.getAt(0).getType());
assertEquals("2", searchHits.getAt(1).getId());
assertEquals("type", searchHits.getAt(1).getType());
assertEquals("1", searchHits.getAt(2).getId());
assertEquals("type", searchHits.getAt(2).getType());
}
Aggregation result = searchAndReduce(indexSearcher, query, builder, STRING_FIELD_TYPE);
indexReader.close();
directory.close();
return result;
}
private Document document(String id, String... stringValues) {
Document document = new Document();
document.add(new Field(UidFieldMapper.NAME, Uid.createUid("type", id), UidFieldMapper.Defaults.FIELD_TYPE));
for (String stringValue : stringValues) {
document.add(new Field("string", stringValue, STRING_FIELD_TYPE));
document.add(new SortedSetDocValuesField("string", new BytesRef(stringValue)));
}
return document;
}
}

View File

@ -33,27 +33,26 @@ only the title field is being included in the source.
[source,js]
--------------------------------------------------
POST /sales/_search?size=0
{
"aggs": {
"top-tags": {
"top_tags": {
"terms": {
"field": "tags",
"field": "type",
"size": 3
},
"aggs": {
"top_tag_hits": {
"top_sales_hits": {
"top_hits": {
"sort": [
{
"last_activity_date": {
"date": {
"order": "desc"
}
}
],
"_source": {
"includes": [
"title"
]
"includes": [ "date", "price" ]
},
"size" : 1
}
@ -63,90 +62,105 @@ only the title field is being included in the source.
}
}
--------------------------------------------------
// CONSOLE
// TEST[setup:sales]
Possible response snippet:
Possible response:
[source,js]
--------------------------------------------------
"aggregations": {
"top-tags": {
"buckets": [
{
"key": "windows-7",
"doc_count": 25365,
"top_tags_hits": {
"hits": {
"total": 25365,
"max_score": 1,
"hits": [
{
"_index": "stack",
"_type": "question",
"_id": "602679",
"_score": 1,
"_source": {
"title": "Windows port opening"
},
"sort": [
1370143231177
]
}
]
}
}
},
{
"key": "linux",
"doc_count": 18342,
"top_tags_hits": {
"hits": {
"total": 18342,
"max_score": 1,
"hits": [
{
"_index": "stack",
"_type": "question",
"_id": "602672",
"_score": 1,
"_source": {
"title": "Ubuntu RFID Screensaver lock-unlock"
},
"sort": [
1370143379747
]
}
]
}
}
},
{
"key": "windows",
"doc_count": 18119,
"top_tags_hits": {
"hits": {
"total": 18119,
"max_score": 1,
"hits": [
{
"_index": "stack",
"_type": "question",
"_id": "602678",
"_score": 1,
"_source": {
"title": "If I change my computers date / time, what could be affected?"
},
"sort": [
1370142868283
]
}
]
}
}
}
]
{
...
"aggregations": {
"top_tags": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "hat",
"doc_count": 3,
"top_sales_hits": {
"hits": {
"total": 3,
"max_score": null,
"hits": [
{
"_index": "sales",
"_type": "sale",
"_id": "AVnNBmauCQpcRyxw6ChK",
"_source": {
"date": "2015/03/01 00:00:00",
"price": 200
},
"sort": [
1425168000000
],
"_score": null
}
]
}
}
},
{
"key": "t-shirt",
"doc_count": 3,
"top_sales_hits": {
"hits": {
"total": 3,
"max_score": null,
"hits": [
{
"_index": "sales",
"_type": "sale",
"_id": "AVnNBmauCQpcRyxw6ChL",
"_source": {
"date": "2015/03/01 00:00:00",
"price": 175
},
"sort": [
1425168000000
],
"_score": null
}
]
}
}
},
{
"key": "bag",
"doc_count": 1,
"top_sales_hits": {
"hits": {
"total": 1,
"max_score": null,
"hits": [
{
"_index": "sales",
"_type": "sale",
"_id": "AVnNBmatCQpcRyxw6ChH",
"_source": {
"date": "2015/01/01 00:00:00",
"price": 150
},
"sort": [
1420070400000
],
"_score": null
}
]
}
}
}
]
}
}
}
--------------------------------------------------
// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/]
// TESTRESPONSE[s/AVnNBmauCQpcRyxw6ChK/$body.aggregations.top_tags.buckets.0.top_sales_hits.hits.hits.0._id/]
// TESTRESPONSE[s/AVnNBmauCQpcRyxw6ChL/$body.aggregations.top_tags.buckets.1.top_sales_hits.hits.hits.0._id/]
// TESTRESPONSE[s/AVnNBmatCQpcRyxw6ChH/$body.aggregations.top_tags.buckets.2.top_sales_hits.hits.hits.0._id/]
==== Field collapse example
@ -184,7 +198,6 @@ relevancy order of the most relevant document in a bucket.
"top_hit" : {
"max": {
"script": {
"lang": "painless",
"inline": "_score"
}
}