Add tests for top_hits aggregation (#22754)
Add unit tests for `TopHitsAggregator` and convert some snippets in docs for `top_hits` aggregation to `// CONSOLE`. Relates to #22278 Relates to #18160
This commit is contained in:
parent
f6ed39aa08
commit
d704a880e7
|
@ -499,7 +499,6 @@
|
|||
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]metrics[/\\]percentiles[/\\]tdigest[/\\]TDigestPercentilesAggregator.java" checks="LineLength" />
|
||||
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]metrics[/\\]scripted[/\\]ScriptedMetricAggregator.java" checks="LineLength" />
|
||||
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]metrics[/\\]stats[/\\]extended[/\\]ExtendedStatsAggregator.java" checks="LineLength" />
|
||||
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]metrics[/\\]tophits[/\\]TopHitsAggregator.java" checks="LineLength" />
|
||||
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]pipeline[/\\]bucketscript[/\\]BucketScriptPipelineAggregator.java" checks="LineLength" />
|
||||
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]support[/\\]AggregationPath.java" checks="LineLength" />
|
||||
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]support[/\\]ValuesSourceParser.java" checks="LineLength" />
|
||||
|
|
|
@ -122,7 +122,13 @@ public class TopHitsAggregator extends MetricsAggregator {
|
|||
// In the QueryPhase we don't need this protection, because it is build into the IndexSearcher,
|
||||
// but here we create collectors ourselves and we need prevent OOM because of crazy an offset and size.
|
||||
topN = Math.min(topN, subSearchContext.searcher().getIndexReader().maxDoc());
|
||||
TopDocsCollector<?> topLevelCollector = sort != null ? TopFieldCollector.create(sort.sort, topN, true, subSearchContext.trackScores(), subSearchContext.trackScores()) : TopScoreDocCollector.create(topN);
|
||||
TopDocsCollector<?> topLevelCollector;
|
||||
if (sort == null) {
|
||||
topLevelCollector = TopScoreDocCollector.create(topN);
|
||||
} else {
|
||||
topLevelCollector = TopFieldCollector.create(sort.sort, topN, true, subSearchContext.trackScores(),
|
||||
subSearchContext.trackScores());
|
||||
}
|
||||
collectors = new TopDocsAndLeafCollector(topLevelCollector);
|
||||
collectors.leafCollector = collectors.topLevelCollector.getLeafCollector(ctx);
|
||||
collectors.leafCollector.setScorer(scorer);
|
||||
|
@ -170,8 +176,8 @@ public class TopHitsAggregator extends MetricsAggregator {
|
|||
searchHitFields.sortValues(fieldDoc.fields, subSearchContext.sort().formats);
|
||||
}
|
||||
}
|
||||
topHits = new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, fetchResult.hits(), pipelineAggregators(),
|
||||
metaData());
|
||||
topHits = new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, fetchResult.hits(),
|
||||
pipelineAggregators(), metaData());
|
||||
}
|
||||
return topHits;
|
||||
}
|
||||
|
@ -184,7 +190,8 @@ public class TopHitsAggregator extends MetricsAggregator {
|
|||
} else {
|
||||
topDocs = Lucene.EMPTY_TOP_DOCS;
|
||||
}
|
||||
return new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, InternalSearchHits.empty(), pipelineAggregators(), metaData());
|
||||
return new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, InternalSearchHits.empty(),
|
||||
pipelineAggregators(), metaData());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -94,6 +94,7 @@ public abstract class AggregatorTestCase extends ESTestCase {
|
|||
|
||||
CircuitBreakerService circuitBreakerService = new NoneCircuitBreakerService();
|
||||
SearchContext searchContext = mock(SearchContext.class);
|
||||
when(searchContext.numberOfShards()).thenReturn(1);
|
||||
when(searchContext.searcher()).thenReturn(contextIndexSearcher);
|
||||
when(searchContext.bigArrays()).thenReturn(new MockBigArrays(Settings.EMPTY, circuitBreakerService));
|
||||
when(searchContext.fetchPhase())
|
||||
|
@ -163,13 +164,17 @@ public abstract class AggregatorTestCase extends ESTestCase {
|
|||
List<InternalAggregation> aggs = new ArrayList<> ();
|
||||
Query rewritten = searcher.rewrite(query);
|
||||
Weight weight = searcher.createWeight(rewritten, true);
|
||||
try (C root = createAggregator(builder, searcher, fieldTypes)) {
|
||||
C root = createAggregator(builder, searcher, fieldTypes);
|
||||
try {
|
||||
for (ShardSearcher subSearcher : subSearchers) {
|
||||
try (C a = createAggregator(builder, subSearcher, fieldTypes)) {
|
||||
C a = createAggregator(builder, subSearcher, fieldTypes);
|
||||
try {
|
||||
a.preCollection();
|
||||
subSearcher.search(weight, a);
|
||||
a.postCollection();
|
||||
aggs.add(a.buildAggregation(0L));
|
||||
} finally {
|
||||
closeAgg(a);
|
||||
}
|
||||
}
|
||||
if (aggs.isEmpty()) {
|
||||
|
@ -179,6 +184,15 @@ public abstract class AggregatorTestCase extends ESTestCase {
|
|||
A internalAgg = (A) aggs.get(0).doReduce(aggs, new InternalAggregation.ReduceContext(root.context().bigArrays(), null));
|
||||
return internalAgg;
|
||||
}
|
||||
} finally {
|
||||
closeAgg(root);
|
||||
}
|
||||
}
|
||||
|
||||
private void closeAgg(Aggregator agg) {
|
||||
agg.close();
|
||||
for (Aggregator sub : ((AggregatorBase) agg).subAggregators) {
|
||||
closeAgg(sub);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -18,14 +18,18 @@
|
|||
*/
|
||||
package org.elasticsearch.search.aggregations.metrics.tophits;
|
||||
|
||||
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.queryparser.classic.QueryParser;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.index.mapper.KeywordFieldMapper;
|
||||
|
@ -33,55 +37,114 @@ import org.elasticsearch.index.mapper.MappedFieldType;
|
|||
import org.elasticsearch.index.mapper.Uid;
|
||||
import org.elasticsearch.index.mapper.UidFieldMapper;
|
||||
import org.elasticsearch.search.SearchHits;
|
||||
import org.elasticsearch.search.aggregations.Aggregation;
|
||||
import org.elasticsearch.search.aggregations.AggregationBuilder;
|
||||
import org.elasticsearch.search.aggregations.AggregatorTestCase;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
|
||||
import org.elasticsearch.search.sort.SortOrder;
|
||||
|
||||
public class TopHitsAggregatorTests extends AggregatorTestCase {
|
||||
import java.io.IOException;
|
||||
|
||||
public void testTermsAggregator() throws Exception {
|
||||
import static org.elasticsearch.search.aggregations.AggregationBuilders.terms;
|
||||
import static org.elasticsearch.search.aggregations.AggregationBuilders.topHits;
|
||||
|
||||
public class TopHitsAggregatorTests extends AggregatorTestCase {
|
||||
public void testTopLevel() throws Exception {
|
||||
Aggregation result;
|
||||
if (randomBoolean()) {
|
||||
result = testCase(new MatchAllDocsQuery(), topHits("_name").sort("string", SortOrder.DESC));
|
||||
} else {
|
||||
Query query = new QueryParser("string", new KeywordAnalyzer()).parse("d^1000 c^100 b^10 a^1");
|
||||
result = testCase(query, topHits("_name"));
|
||||
}
|
||||
SearchHits searchHits = ((TopHits) result).getHits();
|
||||
assertEquals(3L, searchHits.getTotalHits());
|
||||
assertEquals("3", searchHits.getAt(0).getId());
|
||||
assertEquals("type", searchHits.getAt(0).getType());
|
||||
assertEquals("2", searchHits.getAt(1).getId());
|
||||
assertEquals("type", searchHits.getAt(1).getType());
|
||||
assertEquals("1", searchHits.getAt(2).getId());
|
||||
assertEquals("type", searchHits.getAt(2).getType());
|
||||
}
|
||||
|
||||
public void testNoResults() throws Exception {
|
||||
TopHits result = (TopHits) testCase(new MatchNoDocsQuery(), topHits("_name").sort("string", SortOrder.DESC));
|
||||
SearchHits searchHits = ((TopHits) result).getHits();
|
||||
assertEquals(0L, searchHits.getTotalHits());
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests {@code top_hits} inside of {@code terms}. While not strictly a unit test this is a fairly common way to run {@code top_hits}
|
||||
* and serves as a good example of running {@code top_hits} inside of another aggregation.
|
||||
*/
|
||||
public void testInsideTerms() throws Exception {
|
||||
Aggregation result;
|
||||
if (randomBoolean()) {
|
||||
result = testCase(new MatchAllDocsQuery(),
|
||||
terms("term").field("string")
|
||||
.subAggregation(topHits("top").sort("string", SortOrder.DESC)));
|
||||
} else {
|
||||
Query query = new QueryParser("string", new KeywordAnalyzer()).parse("d^1000 c^100 b^10 a^1");
|
||||
result = testCase(query,
|
||||
terms("term").field("string")
|
||||
.subAggregation(topHits("top")));
|
||||
}
|
||||
Terms terms = (Terms) result;
|
||||
|
||||
// The "a" bucket
|
||||
TopHits hits = (TopHits) terms.getBucketByKey("a").getAggregations().get("top");
|
||||
SearchHits searchHits = (hits).getHits();
|
||||
assertEquals(2L, searchHits.getTotalHits());
|
||||
assertEquals("2", searchHits.getAt(0).getId());
|
||||
assertEquals("1", searchHits.getAt(1).getId());
|
||||
|
||||
// The "b" bucket
|
||||
searchHits = ((TopHits) terms.getBucketByKey("b").getAggregations().get("top")).getHits();
|
||||
assertEquals(2L, searchHits.getTotalHits());
|
||||
assertEquals("3", searchHits.getAt(0).getId());
|
||||
assertEquals("1", searchHits.getAt(1).getId());
|
||||
|
||||
// The "c" bucket
|
||||
searchHits = ((TopHits) terms.getBucketByKey("c").getAggregations().get("top")).getHits();
|
||||
assertEquals(1L, searchHits.getTotalHits());
|
||||
assertEquals("2", searchHits.getAt(0).getId());
|
||||
|
||||
// The "d" bucket
|
||||
searchHits = ((TopHits) terms.getBucketByKey("d").getAggregations().get("top")).getHits();
|
||||
assertEquals(1L, searchHits.getTotalHits());
|
||||
assertEquals("3", searchHits.getAt(0).getId());
|
||||
}
|
||||
|
||||
private static final MappedFieldType STRING_FIELD_TYPE = new KeywordFieldMapper.KeywordFieldType();
|
||||
static {
|
||||
STRING_FIELD_TYPE.setName("string");
|
||||
STRING_FIELD_TYPE.setHasDocValues(true);
|
||||
}
|
||||
|
||||
private Aggregation testCase(Query query, AggregationBuilder builder) throws IOException {
|
||||
Directory directory = newDirectory();
|
||||
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
|
||||
Document document = new Document();
|
||||
document.add(new Field(UidFieldMapper.NAME, Uid.createUid("type", "1"), UidFieldMapper.Defaults.FIELD_TYPE));
|
||||
document.add(new SortedSetDocValuesField("string", new BytesRef("a")));
|
||||
document.add(new SortedSetDocValuesField("string", new BytesRef("b")));
|
||||
indexWriter.addDocument(document);
|
||||
document = new Document();
|
||||
document.add(new Field(UidFieldMapper.NAME, Uid.createUid("type", "2"), UidFieldMapper.Defaults.FIELD_TYPE));
|
||||
document.add(new SortedSetDocValuesField("string", new BytesRef("c")));
|
||||
document.add(new SortedSetDocValuesField("string", new BytesRef("a")));
|
||||
indexWriter.addDocument(document);
|
||||
document = new Document();
|
||||
document.add(new Field(UidFieldMapper.NAME, Uid.createUid("type", "3"), UidFieldMapper.Defaults.FIELD_TYPE));
|
||||
document.add(new SortedSetDocValuesField("string", new BytesRef("b")));
|
||||
document.add(new SortedSetDocValuesField("string", new BytesRef("d")));
|
||||
indexWriter.addDocument(document);
|
||||
indexWriter.close();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), directory);
|
||||
iw.addDocument(document("1", "a", "b"));
|
||||
iw.addDocument(document("2", "c", "a"));
|
||||
iw.addDocument(document("3", "b", "d"));
|
||||
iw.close();
|
||||
|
||||
IndexReader indexReader = DirectoryReader.open(directory);
|
||||
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
|
||||
|
||||
MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType();
|
||||
fieldType.setName("string");
|
||||
fieldType.setHasDocValues(true );
|
||||
TopHitsAggregationBuilder aggregationBuilder = new TopHitsAggregationBuilder("_name");
|
||||
aggregationBuilder.sort("string", SortOrder.DESC);
|
||||
try (TopHitsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType)){
|
||||
aggregator.preCollection();
|
||||
indexSearcher.search(new MatchAllDocsQuery(), aggregator);
|
||||
aggregator.postCollection();
|
||||
TopHits topHits = (TopHits) aggregator.buildAggregation(0L);
|
||||
SearchHits searchHits = topHits.getHits();
|
||||
assertEquals(3L, searchHits.getTotalHits());
|
||||
assertEquals("3", searchHits.getAt(0).getId());
|
||||
assertEquals("type", searchHits.getAt(0).getType());
|
||||
assertEquals("2", searchHits.getAt(1).getId());
|
||||
assertEquals("type", searchHits.getAt(1).getType());
|
||||
assertEquals("1", searchHits.getAt(2).getId());
|
||||
assertEquals("type", searchHits.getAt(2).getType());
|
||||
}
|
||||
Aggregation result = searchAndReduce(indexSearcher, query, builder, STRING_FIELD_TYPE);
|
||||
indexReader.close();
|
||||
directory.close();
|
||||
return result;
|
||||
}
|
||||
|
||||
private Document document(String id, String... stringValues) {
|
||||
Document document = new Document();
|
||||
document.add(new Field(UidFieldMapper.NAME, Uid.createUid("type", id), UidFieldMapper.Defaults.FIELD_TYPE));
|
||||
for (String stringValue : stringValues) {
|
||||
document.add(new Field("string", stringValue, STRING_FIELD_TYPE));
|
||||
document.add(new SortedSetDocValuesField("string", new BytesRef(stringValue)));
|
||||
}
|
||||
return document;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,27 +33,26 @@ only the title field is being included in the source.
|
|||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
POST /sales/_search?size=0
|
||||
{
|
||||
"aggs": {
|
||||
"top-tags": {
|
||||
"top_tags": {
|
||||
"terms": {
|
||||
"field": "tags",
|
||||
"field": "type",
|
||||
"size": 3
|
||||
},
|
||||
"aggs": {
|
||||
"top_tag_hits": {
|
||||
"top_sales_hits": {
|
||||
"top_hits": {
|
||||
"sort": [
|
||||
{
|
||||
"last_activity_date": {
|
||||
"date": {
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
],
|
||||
"_source": {
|
||||
"includes": [
|
||||
"title"
|
||||
]
|
||||
"includes": [ "date", "price" ]
|
||||
},
|
||||
"size" : 1
|
||||
}
|
||||
|
@ -63,90 +62,105 @@ only the title field is being included in the source.
|
|||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[setup:sales]
|
||||
|
||||
Possible response snippet:
|
||||
Possible response:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
"aggregations": {
|
||||
"top-tags": {
|
||||
"buckets": [
|
||||
{
|
||||
"key": "windows-7",
|
||||
"doc_count": 25365,
|
||||
"top_tags_hits": {
|
||||
"hits": {
|
||||
"total": 25365,
|
||||
"max_score": 1,
|
||||
"hits": [
|
||||
{
|
||||
"_index": "stack",
|
||||
"_type": "question",
|
||||
"_id": "602679",
|
||||
"_score": 1,
|
||||
"_source": {
|
||||
"title": "Windows port opening"
|
||||
},
|
||||
"sort": [
|
||||
1370143231177
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "linux",
|
||||
"doc_count": 18342,
|
||||
"top_tags_hits": {
|
||||
"hits": {
|
||||
"total": 18342,
|
||||
"max_score": 1,
|
||||
"hits": [
|
||||
{
|
||||
"_index": "stack",
|
||||
"_type": "question",
|
||||
"_id": "602672",
|
||||
"_score": 1,
|
||||
"_source": {
|
||||
"title": "Ubuntu RFID Screensaver lock-unlock"
|
||||
},
|
||||
"sort": [
|
||||
1370143379747
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "windows",
|
||||
"doc_count": 18119,
|
||||
"top_tags_hits": {
|
||||
"hits": {
|
||||
"total": 18119,
|
||||
"max_score": 1,
|
||||
"hits": [
|
||||
{
|
||||
"_index": "stack",
|
||||
"_type": "question",
|
||||
"_id": "602678",
|
||||
"_score": 1,
|
||||
"_source": {
|
||||
"title": "If I change my computers date / time, what could be affected?"
|
||||
},
|
||||
"sort": [
|
||||
1370142868283
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
{
|
||||
...
|
||||
"aggregations": {
|
||||
"top_tags": {
|
||||
"doc_count_error_upper_bound": 0,
|
||||
"sum_other_doc_count": 0,
|
||||
"buckets": [
|
||||
{
|
||||
"key": "hat",
|
||||
"doc_count": 3,
|
||||
"top_sales_hits": {
|
||||
"hits": {
|
||||
"total": 3,
|
||||
"max_score": null,
|
||||
"hits": [
|
||||
{
|
||||
"_index": "sales",
|
||||
"_type": "sale",
|
||||
"_id": "AVnNBmauCQpcRyxw6ChK",
|
||||
"_source": {
|
||||
"date": "2015/03/01 00:00:00",
|
||||
"price": 200
|
||||
},
|
||||
"sort": [
|
||||
1425168000000
|
||||
],
|
||||
"_score": null
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "t-shirt",
|
||||
"doc_count": 3,
|
||||
"top_sales_hits": {
|
||||
"hits": {
|
||||
"total": 3,
|
||||
"max_score": null,
|
||||
"hits": [
|
||||
{
|
||||
"_index": "sales",
|
||||
"_type": "sale",
|
||||
"_id": "AVnNBmauCQpcRyxw6ChL",
|
||||
"_source": {
|
||||
"date": "2015/03/01 00:00:00",
|
||||
"price": 175
|
||||
},
|
||||
"sort": [
|
||||
1425168000000
|
||||
],
|
||||
"_score": null
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "bag",
|
||||
"doc_count": 1,
|
||||
"top_sales_hits": {
|
||||
"hits": {
|
||||
"total": 1,
|
||||
"max_score": null,
|
||||
"hits": [
|
||||
{
|
||||
"_index": "sales",
|
||||
"_type": "sale",
|
||||
"_id": "AVnNBmatCQpcRyxw6ChH",
|
||||
"_source": {
|
||||
"date": "2015/01/01 00:00:00",
|
||||
"price": 150
|
||||
},
|
||||
"sort": [
|
||||
1420070400000
|
||||
],
|
||||
"_score": null
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/]
|
||||
// TESTRESPONSE[s/AVnNBmauCQpcRyxw6ChK/$body.aggregations.top_tags.buckets.0.top_sales_hits.hits.hits.0._id/]
|
||||
// TESTRESPONSE[s/AVnNBmauCQpcRyxw6ChL/$body.aggregations.top_tags.buckets.1.top_sales_hits.hits.hits.0._id/]
|
||||
// TESTRESPONSE[s/AVnNBmatCQpcRyxw6ChH/$body.aggregations.top_tags.buckets.2.top_sales_hits.hits.hits.0._id/]
|
||||
|
||||
|
||||
==== Field collapse example
|
||||
|
||||
|
@ -184,7 +198,6 @@ relevancy order of the most relevant document in a bucket.
|
|||
"top_hit" : {
|
||||
"max": {
|
||||
"script": {
|
||||
"lang": "painless",
|
||||
"inline": "_score"
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue