From aab38fb2e6ae2b3d53d4e22dec5a9ea14eff367b Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 26 May 2014 17:56:07 +0200 Subject: [PATCH] Aggregations: added pagination support to `top_hits` aggregation by adding `from` option. Closes #6299 --- .../bucket/tophits-aggregation.asciidoc | 1 + .../bucket/tophits/InternalTopHits.java | 8 +++- .../aggregations/bucket/tophits/TopHits.java | 4 ++ .../bucket/tophits/TopHitsAggregator.java | 6 +-- .../bucket/tophits/TopHitsBuilder.java | 9 ++++ .../bucket/tophits/TopHitsContext.java | 6 ++- .../bucket/tophits/TopHitsParser.java | 3 ++ .../aggregations/bucket/TopHitsTests.java | 47 ++++++++++++++++++- 8 files changed, 76 insertions(+), 8 deletions(-) diff --git a/docs/reference/search/aggregations/bucket/tophits-aggregation.asciidoc b/docs/reference/search/aggregations/bucket/tophits-aggregation.asciidoc index 51e4686fc19..e7db5ab2433 100644 --- a/docs/reference/search/aggregations/bucket/tophits-aggregation.asciidoc +++ b/docs/reference/search/aggregations/bucket/tophits-aggregation.asciidoc @@ -13,6 +13,7 @@ This aggregator can't hold any sub-aggregators and therefor can only be used as ==== Options +* `from` - The offset from the first result you want to fetch. * `size` - The maximum number of top matching hits to return per bucket. By default the top three matching hits are returned. * `sort` - How the top matching hits should be sorted. By default the hits are sorted by the score of the main query. diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/InternalTopHits.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/InternalTopHits.java index 1cb58544fd1..411b7b30cf2 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/InternalTopHits.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/InternalTopHits.java @@ -54,6 +54,7 @@ public class InternalTopHits extends InternalAggregation implements TopHits, ToX AggregationStreams.registerStream(STREAM, TYPE.stream()); } + private int from; private int size; private Sort sort; private TopDocs topDocs; @@ -62,8 +63,9 @@ public class InternalTopHits extends InternalAggregation implements TopHits, ToX InternalTopHits() { } - public InternalTopHits(String name, int size, Sort sort, TopDocs topDocs, InternalSearchHits searchHits) { + public InternalTopHits(String name, int from, int size, Sort sort, TopDocs topDocs, InternalSearchHits searchHits) { this.name = name; + this.from = from; this.size = size; this.sort = sort; this.topDocs = topDocs; @@ -104,7 +106,7 @@ public class InternalTopHits extends InternalAggregation implements TopHits, ToX try { int[] tracker = new int[shardHits.length]; - TopDocs reducedTopDocs = TopDocs.merge(sort, size, shardDocs); + TopDocs reducedTopDocs = TopDocs.merge(sort, from, size, shardDocs); InternalSearchHit[] hits = new InternalSearchHit[reducedTopDocs.scoreDocs.length]; for (int i = 0; i < reducedTopDocs.scoreDocs.length; i++) { ScoreDoc scoreDoc = reducedTopDocs.scoreDocs[i]; @@ -119,6 +121,7 @@ public class InternalTopHits extends InternalAggregation implements TopHits, ToX @Override public void readFrom(StreamInput in) throws IOException { name = in.readString(); + from = in.readVInt(); size = in.readVInt(); topDocs = Lucene.readTopDocs(in); if (topDocs instanceof TopFieldDocs) { @@ -130,6 +133,7 @@ public class InternalTopHits extends InternalAggregation implements TopHits, ToX @Override public void writeTo(StreamOutput out) throws IOException { out.writeString(name); + out.writeVInt(from); out.writeVInt(size); Lucene.writeTopDocs(out, topDocs, 0); searchHits.writeTo(out); diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/TopHits.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/TopHits.java index 853a8a1dad6..4c20e430b9a 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/TopHits.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/TopHits.java @@ -22,9 +22,13 @@ import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.aggregations.Aggregation; /** + * Accumulation of the most relevant hits for a bucket this aggregation falls into. */ public interface TopHits extends Aggregation { + /** + * @return The top matching hits for the bucket + */ SearchHits getHits(); } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/TopHitsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/TopHitsAggregator.java index fab02ac4cd2..9953d3f8ccd 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/TopHitsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/TopHitsAggregator.java @@ -90,7 +90,7 @@ public class TopHitsAggregator extends BucketsAggregator implements ScorerAware searchHitFields.sortValues(fieldDoc.fields); } } - return new InternalTopHits(name, topHitsContext.size(), topHitsContext.sort(), topDocs, fetchResult.hits()); + return new InternalTopHits(name, topHitsContext.from(), topHitsContext.size(), topHitsContext.sort(), topDocs, fetchResult.hits()); } } @@ -104,10 +104,10 @@ public class TopHitsAggregator extends BucketsAggregator implements ScorerAware TopDocsCollector topDocsCollector = topDocsCollectors.get(bucketOrdinal); if (topDocsCollector == null) { Sort sort = topHitsContext.sort(); - int size = topHitsContext.size(); + int topN = topHitsContext.from() + topHitsContext.size(); topDocsCollectors.put( bucketOrdinal, - topDocsCollector = sort != null ? TopFieldCollector.create(sort, size, true, topHitsContext.trackScores(), true, false) : TopScoreDocCollector.create(size, false) + topDocsCollector = sort != null ? TopFieldCollector.create(sort, topN, true, topHitsContext.trackScores(), true, false) : TopScoreDocCollector.create(topN, false) ); topDocsCollector.setNextReader(currentContext); topDocsCollector.setScorer(currentScorer); diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/TopHitsBuilder.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/TopHitsBuilder.java index 285b21b2647..767158be0b0 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/TopHitsBuilder.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/TopHitsBuilder.java @@ -40,6 +40,15 @@ public class TopHitsBuilder extends AbstractAggregationBuilder { super(name, InternalTopHits.TYPE.name()); } + /** + * The index to start to return hits from. Defaults to 0. + */ + public TopHitsBuilder setFrom(int from) { + sourceBuilder().from(from); + return this; + } + + /** * The number of search hits to return. Defaults to 10. */ diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/TopHitsContext.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/TopHitsContext.java index 848c6ca414d..2023b4c9668 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/TopHitsContext.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/TopHitsContext.java @@ -71,6 +71,7 @@ public class TopHitsContext extends SearchContext { // the to hits are returned per bucket. private final static int DEFAULT_SIZE = 3; + private int from; private int size = DEFAULT_SIZE; private Sort sort; @@ -440,12 +441,13 @@ public class TopHitsContext extends SearchContext { @Override public int from() { - return context.from(); + return from; } @Override public SearchContext from(int from) { - throw new UnsupportedOperationException("Not supported"); + this.from = from; + return this; } @Override diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/TopHitsParser.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/TopHitsParser.java index eac833f5672..cfd93bb4f2d 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/TopHitsParser.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/TopHitsParser.java @@ -72,6 +72,9 @@ public class TopHitsParser implements Aggregator.Parser { currentFieldName = parser.currentName(); } else if (token.isValue()) { switch (currentFieldName) { + case "from": + topHitsContext.from(parser.intValue()); + break; case "size": topHitsContext.size(parser.intValue()); break; diff --git a/src/test/java/org/elasticsearch/search/aggregations/bucket/TopHitsTests.java b/src/test/java/org/elasticsearch/search/aggregations/bucket/TopHitsTests.java index 2c4fb2fee9e..925d44ae70c 100644 --- a/src/test/java/org/elasticsearch/search/aggregations/bucket/TopHitsTests.java +++ b/src/test/java/org/elasticsearch/search/aggregations/bucket/TopHitsTests.java @@ -23,6 +23,7 @@ import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchPhaseExecutionException; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchType; +import org.elasticsearch.index.query.FilterBuilders; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHitField; import org.elasticsearch.search.SearchHits; @@ -76,7 +77,6 @@ public class TopHitsTests extends ElasticsearchIntegrationTest { .endObject())); } - // Use routing to make sure all docs are in the same shard for consistent scoring builders.add(client().prepareIndex("idx", "field-collapsing", "1").setSource(jsonBuilder() .startObject() .field("group", "a") @@ -169,6 +169,51 @@ public class TopHitsTests extends ElasticsearchIntegrationTest { } } + @Test + public void testPagination() throws Exception { + int size = randomIntBetween(0, 10); + int from = randomIntBetween(0, 10); + SearchResponse response = client().prepareSearch("idx").setTypes("type") + .addAggregation(terms("terms") + .executionHint(randomExecutionHint()) + .field(TERMS_AGGS_FIELD) + .subAggregation( + topHits("hits").addSort(SortBuilders.fieldSort(SORT_FIELD).order(SortOrder.DESC)) + .setFrom(from) + .setSize(size) + ) + ) + .get(); + assertSearchResponse(response); + + SearchResponse control = client().prepareSearch("idx") + .setTypes("type") + .setFrom(from) + .setSize(size) + .setPostFilter(FilterBuilders.termFilter(TERMS_AGGS_FIELD, "val0")) + .addSort(SORT_FIELD, SortOrder.DESC) + .get(); + assertSearchResponse(control); + SearchHits controlHits = control.getHits(); + + Terms terms = response.getAggregations().get("terms"); + assertThat(terms, notNullValue()); + assertThat(terms.getName(), equalTo("terms")); + assertThat(terms.getBuckets().size(), equalTo(5)); + + Terms.Bucket bucket = terms.getBucketByKey("val0"); + assertThat(bucket, notNullValue()); + assertThat(bucket.getDocCount(), equalTo(10l)); + TopHits topHits = bucket.getAggregations().get("hits"); + SearchHits hits = topHits.getHits(); + assertThat(hits.totalHits(), equalTo(controlHits.totalHits())); + assertThat(hits.getHits().length, equalTo(controlHits.getHits().length)); + for (int i = 0; i < hits.getHits().length; i++) { + assertThat(hits.getAt(i).id(), equalTo(controlHits.getAt(i).id())); + assertThat(hits.getAt(i).sortValues()[0], equalTo(controlHits.getAt(i).sortValues()[0])); + } + } + @Test public void testSortByBucket() throws Exception { SearchResponse response = client().prepareSearch("idx").setTypes("type")