Added top_hits aggregation that keeps track of the most relevant document being aggregated per bucket.

Closes #6124
This commit is contained in:
Martijn van Groningen 2014-05-08 13:05:50 +02:00
parent 2d417cf5b6
commit 5fafd2451a
12 changed files with 2054 additions and 1 deletions

View File

@ -26,4 +26,6 @@ include::bucket/datehistogram-aggregation.asciidoc[]
include::bucket/geodistance-aggregation.asciidoc[]
include::bucket/geohashgrid-aggregation.asciidoc[]
include::bucket/geohashgrid-aggregation.asciidoc[]
include::bucket/tophits-aggregation.asciidoc[]

View File

@ -0,0 +1,199 @@
[[search-aggregations-bucket-top-hits-aggregation]]
=== Top hits Aggregation
coming[1.3.0]
The `top_hits` aggregator keeps track of the most relevant document being aggregated. This aggregator is intended to be
used as a sub aggregator, so that the top matching documents can be aggregated per bucket.
The `top_hits` aggregator can effectively be used to group result sets by certain fields via a bucket aggregator.
One or more bucket aggregators determines by which properties a result set get sliced into.
This aggregator can't hold any sub-aggregators and therefor can only be used as a leaf aggregator.
==== Options
* `size` - The maximum number of top matching hits to return per bucket. By default the top three matching hits are returned.
* `sort` - How the top matching hits should be sorted. By default the hits are sorted by the score of the main query.
==== Supported per hit features
The top_hits aggregation returns regular search hits, because of this many per hit features can be supported:
* <<search-request-highlighting,Highlighting>>
* <<search-request-explain,Explain>>
* <<search-request-named-queries-and-filters,Named filters and queries>>
* <<search-request-source-filtering,Source filtering>>
* <<search-request-script-fields,Script fields>>
* <<search-request-fielddata-fields,Fielddata fields>>
* <<search-request-version,Include versions>>
==== Example
In the following example we group the questions by tag and per tag we show the last active question. For each question
only the title field is being included in the source.
[source,js]
--------------------------------------------------
{
"aggs": {
"terms": {
"top-tags": {
"field": "tags",
"size": 3
},
"aggs": {
"top_tag_hits": {
"top_hits": {
"sort": [
{
"last_activity_date": {
"order": "desc"
}
}
],
"_source": {
"include": [
"title"
]
},
"size" : 1
}
}
}
}
}
}
--------------------------------------------------
Possible response snippet:
[source,js]
--------------------------------------------------
"aggregations": {
"top-tags": {
"buckets": [
{
"key": "windows-7",
"doc_count": 25365,
"top_tags_hits": {
"hits": {
"total": 25365,
"max_score": 1,
"hits": [
{
"_index": "stack",
"_type": "question",
"_id": "602679",
"_score": 1,
"_source": {
"title": "Windows port opening"
},
"sort": [
1370143231177
]
}
]
}
}
},
{
"key": "linux",
"doc_count": 18342,
"top_tags_hits": {
"hits": {
"total": 18342,
"max_score": 1,
"hits": [
{
"_index": "stack",
"_type": "question",
"_id": "602672",
"_score": 1,
"_source": {
"title": "Ubuntu RFID Screensaver lock-unlock"
},
"sort": [
1370143379747
]
}
]
}
}
},
{
"key": "windows",
"doc_count": 18119,
"top_tags_hits": {
"hits": {
"total": 18119,
"max_score": 1,
"hits": [
{
"_index": "stack",
"_type": "question",
"_id": "602678",
"_score": 1,
"_source": {
"title": "If I change my computers date / time, what could be affected?"
},
"sort": [
1370142868283
]
}
]
}
}
}
]
}
}
--------------------------------------------------
==== Field collapse example
Field collapsing or result grouping is a feature that logically groups a result set into groups and per group returns
top documents. The ordering of the groups is determined by the relevancy of the first document in a group. In
Elasticsearch this can be implemented via a bucket aggregator that wraps a `top_hits` aggregator as sub-aggregator.
In the example below we search across crawled webpages. For each webpage we store the body and the domain the webpage
belong to. By defining a `terms` aggregator on the `domain` field we group the result set of webpages by domain. The
`top_docs` aggregator is then defined as sub-aggregator, so that the top matching hits are collected per bucket.
Also a `max` aggregator is defined which is used by the `terms` aggregator's order feature the return the buckets by
relevancy order of the most relevant document in a bucket.
[source,js]
--------------------------------------------------
{
"query": {
"match": {
"body": "elections"
}
},
"aggs": {
"top-sites": {
"terms": {
"field": "domain",
"order": {
"top_hit": "desc"
}
},
"aggs": {
"top_tags_hits": {
"top_hits": {}
},
"top_hit" : {
"max": {
"script": "_doc.score"
}
}
}
}
}
}
--------------------------------------------------
At the moment the `max` (or `min`) aggregator is needed to make sure the buckets from the `terms` aggregator are
ordered according to the score of the most relevant webpage per domain. The `top_hits` aggregator isn't a metric aggregator
and therefor can't be used in the `order` option of the `terms` aggregator.

View File

@ -32,6 +32,7 @@ import org.elasticsearch.search.aggregations.bucket.range.ipv4.IPv4RangeBuilder;
import org.elasticsearch.search.aggregations.bucket.nested.ReverseNestedBuilder;
import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsBuilder;
import org.elasticsearch.search.aggregations.bucket.terms.TermsBuilder;
import org.elasticsearch.search.aggregations.bucket.tophits.TopHitsBuilder;
import org.elasticsearch.search.aggregations.metrics.avg.AvgBuilder;
import org.elasticsearch.search.aggregations.metrics.cardinality.CardinalityBuilder;
import org.elasticsearch.search.aggregations.metrics.max.MaxBuilder;
@ -141,4 +142,8 @@ public class AggregationBuilders {
public static CardinalityBuilder cardinality(String name) {
return new CardinalityBuilder(name);
}
public static TopHitsBuilder topHits(String name) {
return new TopHitsBuilder(name);
}
}

View File

@ -35,6 +35,7 @@ import org.elasticsearch.search.aggregations.bucket.range.ipv4.IpRangeParser;
import org.elasticsearch.search.aggregations.bucket.nested.ReverseNestedParser;
import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsParser;
import org.elasticsearch.search.aggregations.bucket.terms.TermsParser;
import org.elasticsearch.search.aggregations.bucket.tophits.TopHitsParser;
import org.elasticsearch.search.aggregations.metrics.avg.AvgParser;
import org.elasticsearch.search.aggregations.metrics.cardinality.CardinalityParser;
import org.elasticsearch.search.aggregations.metrics.max.MaxParser;
@ -79,6 +80,7 @@ public class AggregationModule extends AbstractModule {
parsers.add(GeoHashGridParser.class);
parsers.add(NestedParser.class);
parsers.add(ReverseNestedParser.class);
parsers.add(TopHitsParser.class);
}
/**

View File

@ -38,6 +38,7 @@ import org.elasticsearch.search.aggregations.bucket.terms.DoubleTerms;
import org.elasticsearch.search.aggregations.bucket.terms.LongTerms;
import org.elasticsearch.search.aggregations.bucket.terms.StringTerms;
import org.elasticsearch.search.aggregations.bucket.terms.UnmappedTerms;
import org.elasticsearch.search.aggregations.bucket.tophits.InternalTopHits;
import org.elasticsearch.search.aggregations.metrics.avg.InternalAvg;
import org.elasticsearch.search.aggregations.metrics.cardinality.InternalCardinality;
import org.elasticsearch.search.aggregations.metrics.max.InternalMax;
@ -87,5 +88,6 @@ public class TransportAggregationModule extends AbstractModule {
InternalGeoDistance.registerStream();
InternalNested.registerStream();
InternalReverseNested.registerStream();
InternalTopHits.registerStreams();
}
}

View File

@ -0,0 +1,149 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.tophits;
import org.apache.lucene.search.*;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Streamable;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.AggregationStreams;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.internal.InternalSearchHit;
import org.elasticsearch.search.internal.InternalSearchHits;
import java.io.IOException;
import java.util.List;
/**
*/
public class InternalTopHits extends InternalAggregation implements TopHits, ToXContent, Streamable {
public static final InternalAggregation.Type TYPE = new Type("top_hits");
public static final AggregationStreams.Stream STREAM = new AggregationStreams.Stream() {
@Override
public InternalTopHits readResult(StreamInput in) throws IOException {
InternalTopHits buckets = new InternalTopHits();
buckets.readFrom(in);
return buckets;
}
};
public static void registerStreams() {
AggregationStreams.registerStream(STREAM, TYPE.stream());
}
private int size;
private Sort sort;
private TopDocs topDocs;
private InternalSearchHits searchHits;
InternalTopHits() {
}
public InternalTopHits(String name, int size, Sort sort, TopDocs topDocs, InternalSearchHits searchHits) {
this.name = name;
this.size = size;
this.sort = sort;
this.topDocs = topDocs;
this.searchHits = searchHits;
}
public InternalTopHits(String name, InternalSearchHits searchHits) {
this.name = name;
this.searchHits = searchHits;
this.topDocs = new TopDocs(0, Lucene.EMPTY_SCORE_DOCS, 0);
}
@Override
public Type type() {
return TYPE;
}
@Override
public SearchHits getHits() {
return searchHits;
}
@Override
public InternalAggregation reduce(ReduceContext reduceContext) {
List<InternalAggregation> aggregations = reduceContext.aggregations();
if (aggregations.size() == 1) {
return aggregations.get(0);
}
TopDocs[] shardDocs = new TopDocs[aggregations.size()];
InternalSearchHits[] shardHits = new InternalSearchHits[aggregations.size()];
for (int i = 0; i < shardDocs.length; i++) {
InternalTopHits topHitsAgg = (InternalTopHits) aggregations.get(i);
shardDocs[i] = topHitsAgg.topDocs;
shardHits[i] = topHitsAgg.searchHits;
}
try {
int[] tracker = new int[shardHits.length];
TopDocs reducedTopDocs = TopDocs.merge(sort, size, shardDocs);
InternalSearchHit[] hits = new InternalSearchHit[reducedTopDocs.scoreDocs.length];
for (int i = 0; i < reducedTopDocs.scoreDocs.length; i++) {
ScoreDoc scoreDoc = reducedTopDocs.scoreDocs[i];
hits[i] = (InternalSearchHit) shardHits[scoreDoc.shardIndex].getAt(tracker[scoreDoc.shardIndex]++);
if (scoreDoc instanceof FieldDoc) {
FieldDoc fieldDoc = (FieldDoc) scoreDoc;
hits[i].sortValues(fieldDoc.fields);
}
}
return new InternalTopHits(name, new InternalSearchHits(hits, reducedTopDocs.totalHits, reducedTopDocs.getMaxScore()));
} catch (IOException e) {
throw ExceptionsHelper.convertToElastic(e);
}
}
@Override
public void readFrom(StreamInput in) throws IOException {
name = in.readString();
size = in.readVInt();
topDocs = Lucene.readTopDocs(in);
if (topDocs instanceof TopFieldDocs) {
sort = new Sort(((TopFieldDocs) topDocs).fields);
}
searchHits = InternalSearchHits.readSearchHits(in);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(name);
out.writeVInt(size);
Lucene.writeTopDocs(out, topDocs, 0);
searchHits.writeTo(out);
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(name);
searchHits.toXContent(builder, params);
builder.endObject();
return builder;
}
}

View File

@ -0,0 +1,30 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.tophits;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.Aggregation;
/**
*/
public interface TopHits extends Aggregation {
SearchHits getHits();
}

View File

@ -0,0 +1,162 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.tophits;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.*;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.lucene.ScorerAware;
import org.elasticsearch.common.util.LongObjectPagedHashMap;
import org.elasticsearch.search.aggregations.*;
import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.fetch.FetchPhase;
import org.elasticsearch.search.fetch.FetchSearchResult;
import org.elasticsearch.search.internal.InternalSearchHit;
import java.io.IOException;
/**
*/
public class TopHitsAggregator extends BucketsAggregator implements ScorerAware {
private final FetchPhase fetchPhase;
private final TopHitsContext topHitsContext;
private final LongObjectPagedHashMap<TopDocsCollector> topDocsCollectors;
private Scorer currentScorer;
private AtomicReaderContext currentContext;
public TopHitsAggregator(FetchPhase fetchPhase, TopHitsContext topHitsContext, String name, long estimatedBucketsCount, AggregationContext context, Aggregator parent) {
super(name, BucketAggregationMode.MULTI_BUCKETS, AggregatorFactories.EMPTY, estimatedBucketsCount, context, parent);
this.fetchPhase = fetchPhase;
topDocsCollectors = new LongObjectPagedHashMap<>(estimatedBucketsCount, context.bigArrays());
this.topHitsContext = topHitsContext;
context.registerScorerAware(this);
}
@Override
public boolean shouldCollect() {
return true;
}
@Override
public InternalAggregation buildAggregation(long owningBucketOrdinal) {
TopDocsCollector topDocsCollector = topDocsCollectors.get(owningBucketOrdinal);
if (topDocsCollector == null) {
return buildEmptyAggregation();
} else {
TopDocs topDocs = topDocsCollector.topDocs();
if (topDocs.totalHits == 0) {
return buildEmptyAggregation();
}
topHitsContext.queryResult().topDocs(topDocs);
int[] docIdsToLoad = new int[topDocs.scoreDocs.length];
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
docIdsToLoad[i] = topDocs.scoreDocs[i].doc;
}
topHitsContext.docIdsToLoad(docIdsToLoad, 0, docIdsToLoad.length);
fetchPhase.execute(topHitsContext);
FetchSearchResult fetchResult = topHitsContext.fetchResult();
InternalSearchHit[] internalHits = fetchResult.fetchResult().hits().internalHits();
for (int i = 0; i < internalHits.length; i++) {
InternalSearchHit searchHitFields = internalHits[i];
searchHitFields.shard(topHitsContext.shardTarget());
searchHitFields.score(topDocs.scoreDocs[i].score);
}
return new InternalTopHits(name, topHitsContext.size(), topHitsContext.sort(), topDocs, fetchResult.hits());
}
}
@Override
public InternalAggregation buildEmptyAggregation() {
return new InternalTopHits();
}
@Override
public void collect(int docId, long bucketOrdinal) throws IOException {
TopDocsCollector topDocsCollector = topDocsCollectors.get(bucketOrdinal);
if (topDocsCollector == null) {
Sort sort = topHitsContext.sort();
int size = topHitsContext.size();
topDocsCollectors.put(
bucketOrdinal,
topDocsCollector = sort != null ? TopFieldCollector.create(sort, size, true, topHitsContext.trackScores(), true, false) : TopScoreDocCollector.create(size, false)
);
topDocsCollector.setNextReader(currentContext);
topDocsCollector.setScorer(currentScorer);
}
topDocsCollector.collect(docId);
}
@Override
public void setNextReader(AtomicReaderContext context) {
this.currentContext = context;
for (LongObjectPagedHashMap.Cursor<TopDocsCollector> cursor : topDocsCollectors) {
try {
cursor.value.setNextReader(context);
} catch (IOException e) {
throw ExceptionsHelper.convertToElastic(e);
}
}
}
@Override
public void setScorer(Scorer scorer) {
this.currentScorer = scorer;
for (LongObjectPagedHashMap.Cursor<TopDocsCollector> cursor : topDocsCollectors) {
try {
cursor.value.setScorer(scorer);
} catch (IOException e) {
throw ExceptionsHelper.convertToElastic(e);
}
}
}
@Override
protected void doClose() {
Releasables.close(topDocsCollectors);
}
public static class Factory extends AggregatorFactory {
private final FetchPhase fetchPhase;
private final TopHitsContext topHitsContext;
public Factory(String name, FetchPhase fetchPhase, TopHitsContext topHitsContext) {
super(name, InternalTopHits.TYPE.name());
this.fetchPhase = fetchPhase;
this.topHitsContext = topHitsContext;
}
@Override
public Aggregator create(AggregationContext aggregationContext, Aggregator parent, long expectedBucketsCount) {
return new TopHitsAggregator(fetchPhase, topHitsContext, name, expectedBucketsCount, aggregationContext, parent);
}
@Override
public AggregatorFactory subFactories(AggregatorFactories subFactories) {
throw new AggregationInitializationException("Aggregator [" + name + "] of type [" + type + "] cannot accept sub-aggregations");
}
}
}

View File

@ -0,0 +1,385 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.tophits;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.search.aggregations.AbstractAggregationBuilder;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.highlight.HighlightBuilder;
import org.elasticsearch.search.sort.SortBuilder;
import org.elasticsearch.search.sort.SortOrder;
import java.io.IOException;
import java.util.Map;
/**
*/
public class TopHitsBuilder extends AbstractAggregationBuilder {
private SearchSourceBuilder sourceBuilder;
public TopHitsBuilder(String name) {
super(name, InternalTopHits.TYPE.name());
}
/**
* The number of search hits to return. Defaults to <tt>10</tt>.
*/
public TopHitsBuilder setSize(int size) {
sourceBuilder().size(size);
return this;
}
/**
* Applies when sorting, and controls if scores will be tracked as well. Defaults to
* <tt>false</tt>.
*/
public TopHitsBuilder setTrackScores(boolean trackScores) {
sourceBuilder().trackScores(trackScores);
return this;
}
/**
* Should each {@link org.elasticsearch.search.SearchHit} be returned with an
* explanation of the hit (ranking).
*/
public TopHitsBuilder setExplain(boolean explain) {
sourceBuilder().explain(explain);
return this;
}
/**
* Should each {@link org.elasticsearch.search.SearchHit} be returned with its
* version.
*/
public TopHitsBuilder setVersion(boolean version) {
sourceBuilder().version(version);
return this;
}
/**
* Sets no fields to be loaded, resulting in only id and type to be returned per field.
*/
public TopHitsBuilder setNoFields() {
sourceBuilder().noFields();
return this;
}
/**
* Indicates whether the response should contain the stored _source for every hit
*/
public TopHitsBuilder setFetchSource(boolean fetch) {
sourceBuilder().fetchSource(fetch);
return this;
}
/**
* Indicate that _source should be returned with every hit, with an "include" and/or "exclude" set which can include simple wildcard
* elements.
*
* @param include An optional include (optionally wildcarded) pattern to filter the returned _source
* @param exclude An optional exclude (optionally wildcarded) pattern to filter the returned _source
*/
public TopHitsBuilder setFetchSource(@Nullable String include, @Nullable String exclude) {
sourceBuilder().fetchSource(include, exclude);
return this;
}
/**
* Indicate that _source should be returned with every hit, with an "include" and/or "exclude" set which can include simple wildcard
* elements.
*
* @param includes An optional list of include (optionally wildcarded) pattern to filter the returned _source
* @param excludes An optional list of exclude (optionally wildcarded) pattern to filter the returned _source
*/
public TopHitsBuilder setFetchSource(@Nullable String[] includes, @Nullable String[] excludes) {
sourceBuilder().fetchSource(includes, excludes);
return this;
}
/**
* Adds a field data based field to load and return. The field does not have to be stored,
* but its recommended to use non analyzed or numeric fields.
*
* @param name The field to get from the field data cache
*/
public TopHitsBuilder addFieldDataField(String name) {
sourceBuilder().fieldDataField(name);
return this;
}
/**
* Adds a script based field to load and return. The field does not have to be stored,
* but its recommended to use non analyzed or numeric fields.
*
* @param name The name that will represent this value in the return hit
* @param script The script to use
*/
public TopHitsBuilder addScriptField(String name, String script) {
sourceBuilder().scriptField(name, script);
return this;
}
/**
* Adds a script based field to load and return. The field does not have to be stored,
* but its recommended to use non analyzed or numeric fields.
*
* @param name The name that will represent this value in the return hit
* @param script The script to use
* @param params Parameters that the script can use.
*/
public TopHitsBuilder addScriptField(String name, String script, Map<String, Object> params) {
sourceBuilder().scriptField(name, script, params);
return this;
}
/**
* Adds a script based field to load and return. The field does not have to be stored,
* but its recommended to use non analyzed or numeric fields.
*
* @param name The name that will represent this value in the return hit
* @param lang The language of the script
* @param script The script to use
* @param params Parameters that the script can use (can be <tt>null</tt>).
*/
public TopHitsBuilder addScriptField(String name, String lang, String script, Map<String, Object> params) {
sourceBuilder().scriptField(name, lang, script, params);
return this;
}
/**
* Adds a sort against the given field name and the sort ordering.
*
* @param field The name of the field
* @param order The sort ordering
*/
public TopHitsBuilder addSort(String field, SortOrder order) {
sourceBuilder().sort(field, order);
return this;
}
/**
* Adds a generic sort builder.
*
* @see org.elasticsearch.search.sort.SortBuilders
*/
public TopHitsBuilder addSort(SortBuilder sort) {
sourceBuilder().sort(sort);
return this;
}
/**
* Adds a field to be highlighted with default fragment size of 100 characters, and
* default number of fragments of 5.
*
* @param name The field to highlight
*/
public TopHitsBuilder addHighlightedField(String name) {
highlightBuilder().field(name);
return this;
}
/**
* Adds a field to be highlighted with a provided fragment size (in characters), and
* default number of fragments of 5.
*
* @param name The field to highlight
* @param fragmentSize The size of a fragment in characters
*/
public TopHitsBuilder addHighlightedField(String name, int fragmentSize) {
highlightBuilder().field(name, fragmentSize);
return this;
}
/**
* Adds a field to be highlighted with a provided fragment size (in characters), and
* a provided (maximum) number of fragments.
*
* @param name The field to highlight
* @param fragmentSize The size of a fragment in characters
* @param numberOfFragments The (maximum) number of fragments
*/
public TopHitsBuilder addHighlightedField(String name, int fragmentSize, int numberOfFragments) {
highlightBuilder().field(name, fragmentSize, numberOfFragments);
return this;
}
/**
* Adds a field to be highlighted with a provided fragment size (in characters),
* a provided (maximum) number of fragments and an offset for the highlight.
*
* @param name The field to highlight
* @param fragmentSize The size of a fragment in characters
* @param numberOfFragments The (maximum) number of fragments
*/
public TopHitsBuilder addHighlightedField(String name, int fragmentSize, int numberOfFragments,
int fragmentOffset) {
highlightBuilder().field(name, fragmentSize, numberOfFragments, fragmentOffset);
return this;
}
/**
* Adds a highlighted field.
*/
public TopHitsBuilder addHighlightedField(HighlightBuilder.Field field) {
highlightBuilder().field(field);
return this;
}
/**
* Set a tag scheme that encapsulates a built in pre and post tags. The allows schemes
* are <tt>styled</tt> and <tt>default</tt>.
*
* @param schemaName The tag scheme name
*/
public TopHitsBuilder setHighlighterTagsSchema(String schemaName) {
highlightBuilder().tagsSchema(schemaName);
return this;
}
public TopHitsBuilder setHighlighterFragmentSize(Integer fragmentSize) {
highlightBuilder().fragmentSize(fragmentSize);
return this;
}
public TopHitsBuilder setHighlighterNumOfFragments(Integer numOfFragments) {
highlightBuilder().numOfFragments(numOfFragments);
return this;
}
public TopHitsBuilder setHighlighterFilter(Boolean highlightFilter) {
highlightBuilder().highlightFilter(highlightFilter);
return this;
}
/**
* The encoder to set for highlighting
*/
public TopHitsBuilder setHighlighterEncoder(String encoder) {
highlightBuilder().encoder(encoder);
return this;
}
/**
* Explicitly set the pre tags that will be used for highlighting.
*/
public TopHitsBuilder setHighlighterPreTags(String... preTags) {
highlightBuilder().preTags(preTags);
return this;
}
/**
* Explicitly set the post tags that will be used for highlighting.
*/
public TopHitsBuilder setHighlighterPostTags(String... postTags) {
highlightBuilder().postTags(postTags);
return this;
}
/**
* The order of fragments per field. By default, ordered by the order in the
* highlighted text. Can be <tt>score</tt>, which then it will be ordered
* by score of the fragments.
*/
public TopHitsBuilder setHighlighterOrder(String order) {
highlightBuilder().order(order);
return this;
}
public TopHitsBuilder setHighlighterRequireFieldMatch(boolean requireFieldMatch) {
highlightBuilder().requireFieldMatch(requireFieldMatch);
return this;
}
public TopHitsBuilder setHighlighterBoundaryMaxScan(Integer boundaryMaxScan) {
highlightBuilder().boundaryMaxScan(boundaryMaxScan);
return this;
}
public TopHitsBuilder setHighlighterBoundaryChars(char[] boundaryChars) {
highlightBuilder().boundaryChars(boundaryChars);
return this;
}
/**
* The highlighter type to use.
*/
public TopHitsBuilder setHighlighterType(String type) {
highlightBuilder().highlighterType(type);
return this;
}
public TopHitsBuilder setHighlighterFragmenter(String fragmenter) {
highlightBuilder().fragmenter(fragmenter);
return this;
}
/**
* Sets a query to be used for highlighting all fields instead of the search query.
*/
public TopHitsBuilder setHighlighterQuery(QueryBuilder highlightQuery) {
highlightBuilder().highlightQuery(highlightQuery);
return this;
}
/**
* Sets the size of the fragment to return from the beginning of the field if there are no matches to
* highlight and the field doesn't also define noMatchSize.
* @param noMatchSize integer to set or null to leave out of request. default is null.
* @return this builder for chaining
*/
public TopHitsBuilder setHighlighterNoMatchSize(Integer noMatchSize) {
highlightBuilder().noMatchSize(noMatchSize);
return this;
}
/**
* Sets the maximum number of phrases the fvh will consider if the field doesn't also define phraseLimit.
*/
public TopHitsBuilder setHighlighterPhraseLimit(Integer phraseLimit) {
highlightBuilder().phraseLimit(phraseLimit);
return this;
}
public TopHitsBuilder setHighlighterOptions(Map<String, Object> options) {
highlightBuilder().options(options);
return this;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(name).field(type);
sourceBuilder.toXContent(builder, params);
return builder.endObject();
}
private SearchSourceBuilder sourceBuilder() {
if (sourceBuilder == null) {
sourceBuilder = new SearchSourceBuilder();
}
return sourceBuilder;
}
public HighlightBuilder highlightBuilder() {
return sourceBuilder().highlighter();
}
}

View File

@ -0,0 +1,617 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.tophits;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.cache.recycler.CacheRecycler;
import org.elasticsearch.cache.recycler.PageCacheRecycler;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.index.analysis.AnalysisService;
import org.elasticsearch.index.cache.docset.DocSetCache;
import org.elasticsearch.index.cache.filter.FilterCache;
import org.elasticsearch.index.fielddata.IndexFieldDataService;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.FieldMappers;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.query.IndexQueryParserService;
import org.elasticsearch.index.query.ParsedFilter;
import org.elasticsearch.index.query.ParsedQuery;
import org.elasticsearch.index.shard.service.IndexShard;
import org.elasticsearch.index.similarity.SimilarityService;
import org.elasticsearch.script.ScriptService;
import org.elasticsearch.search.Scroll;
import org.elasticsearch.search.SearchShardTarget;
import org.elasticsearch.search.aggregations.SearchContextAggregations;
import org.elasticsearch.search.dfs.DfsSearchResult;
import org.elasticsearch.search.facet.SearchContextFacets;
import org.elasticsearch.search.fetch.FetchSearchResult;
import org.elasticsearch.search.fetch.fielddata.FieldDataFieldsContext;
import org.elasticsearch.search.fetch.partial.PartialFieldsContext;
import org.elasticsearch.search.fetch.script.ScriptFieldsContext;
import org.elasticsearch.search.fetch.source.FetchSourceContext;
import org.elasticsearch.search.highlight.SearchContextHighlight;
import org.elasticsearch.search.internal.ContextIndexSearcher;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.internal.ShardSearchRequest;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.search.query.QuerySearchResult;
import org.elasticsearch.search.rescore.RescoreSearchContext;
import org.elasticsearch.search.scan.ScanContext;
import org.elasticsearch.search.suggest.SuggestionSearchContext;
import java.util.List;
/**
*/
public class TopHitsContext extends SearchContext {
// By default return 3 hits per bucket. A higher default would make the response really large by default, since
// the to hits are returned per bucket.
private final static int DEFAULT_SIZE = 3;
private int size = DEFAULT_SIZE;
private Sort sort;
private final FetchSearchResult fetchSearchResult;
private final QuerySearchResult querySearchResult;
private int[] docIdsToLoad;
private int docsIdsToLoadFrom;
private int docsIdsToLoadSize;
private final SearchContext context;
private List<String> fieldNames;
private FieldDataFieldsContext fieldDataFields;
private ScriptFieldsContext scriptFields;
private PartialFieldsContext partialFields;
private FetchSourceContext fetchSourceContext;
private SearchContextHighlight highlight;
private boolean explain;
private boolean trackScores;
private boolean version;
public TopHitsContext(SearchContext context) {
this.fetchSearchResult = new FetchSearchResult();
this.querySearchResult = new QuerySearchResult();
this.context = context;
}
@Override
protected void doClose() {
}
@Override
public void preProcess() {
}
@Override
public Filter searchFilter(String[] types) {
throw new UnsupportedOperationException("this context should be read only");
}
@Override
public long id() {
return context.id();
}
@Override
public String source() {
return context.source();
}
@Override
public ShardSearchRequest request() {
return context.request();
}
@Override
public SearchType searchType() {
return context.searchType();
}
@Override
public SearchContext searchType(SearchType searchType) {
throw new UnsupportedOperationException("this context should be read only");
}
@Override
public SearchShardTarget shardTarget() {
return context.shardTarget();
}
@Override
public int numberOfShards() {
return context.numberOfShards();
}
@Override
public boolean hasTypes() {
return context.hasTypes();
}
@Override
public String[] types() {
return context.types();
}
@Override
public float queryBoost() {
return context.queryBoost();
}
@Override
public SearchContext queryBoost(float queryBoost) {
throw new UnsupportedOperationException("Not supported");
}
@Override
public long nowInMillis() {
return context.nowInMillis();
}
@Override
public Scroll scroll() {
return context.scroll();
}
@Override
public SearchContext scroll(Scroll scroll) {
throw new UnsupportedOperationException("Not supported");
}
@Override
public SearchContextAggregations aggregations() {
return context.aggregations();
}
@Override
public SearchContext aggregations(SearchContextAggregations aggregations) {
throw new UnsupportedOperationException("Not supported");
}
@Override
public SearchContextFacets facets() {
return context.facets();
}
@Override
public SearchContext facets(SearchContextFacets facets) {
throw new UnsupportedOperationException("Not supported");
}
public SearchContextHighlight highlight() {
return highlight;
}
public void highlight(SearchContextHighlight highlight) {
this.highlight = highlight;
}
@Override
public SuggestionSearchContext suggest() {
return context.suggest();
}
@Override
public void suggest(SuggestionSearchContext suggest) {
throw new UnsupportedOperationException("Not supported");
}
@Override
public List<RescoreSearchContext> rescore() {
return context.rescore();
}
@Override
public void addRescore(RescoreSearchContext rescore) {
throw new UnsupportedOperationException("Not supported");
}
@Override
public boolean hasFieldDataFields() {
return fieldDataFields != null;
}
@Override
public FieldDataFieldsContext fieldDataFields() {
if (fieldDataFields == null) {
fieldDataFields = new FieldDataFieldsContext();
}
return this.fieldDataFields;
}
@Override
public boolean hasScriptFields() {
return scriptFields != null;
}
@Override
public ScriptFieldsContext scriptFields() {
if (scriptFields == null) {
scriptFields = new ScriptFieldsContext();
}
return this.scriptFields;
}
@Override
public boolean hasPartialFields() {
return partialFields != null;
}
@Override
public PartialFieldsContext partialFields() {
if (partialFields == null) {
partialFields = new PartialFieldsContext();
}
return this.partialFields;
}
@Override
public boolean sourceRequested() {
return fetchSourceContext != null && fetchSourceContext.fetchSource();
}
@Override
public boolean hasFetchSourceContext() {
return fetchSourceContext != null;
}
@Override
public FetchSourceContext fetchSourceContext() {
return fetchSourceContext;
}
@Override
public SearchContext fetchSourceContext(FetchSourceContext fetchSourceContext) {
this.fetchSourceContext = fetchSourceContext;
return this;
}
@Override
public ContextIndexSearcher searcher() {
return context.searcher();
}
@Override
public IndexShard indexShard() {
return context.indexShard();
}
@Override
public MapperService mapperService() {
return context.mapperService();
}
@Override
public AnalysisService analysisService() {
return context.analysisService();
}
@Override
public IndexQueryParserService queryParserService() {
return context.queryParserService();
}
@Override
public SimilarityService similarityService() {
return context.similarityService();
}
@Override
public ScriptService scriptService() {
return context.scriptService();
}
@Override
public CacheRecycler cacheRecycler() {
return context.cacheRecycler();
}
@Override
public PageCacheRecycler pageCacheRecycler() {
return context.pageCacheRecycler();
}
@Override
public BigArrays bigArrays() {
return context.bigArrays();
}
@Override
public FilterCache filterCache() {
return context.filterCache();
}
@Override
public DocSetCache docSetCache() {
return context.docSetCache();
}
@Override
public IndexFieldDataService fieldData() {
return context.fieldData();
}
@Override
public long timeoutInMillis() {
return context.timeoutInMillis();
}
@Override
public void timeoutInMillis(long timeoutInMillis) {
throw new UnsupportedOperationException("Not supported");
}
@Override
public SearchContext minimumScore(float minimumScore) {
throw new UnsupportedOperationException("Not supported");
}
@Override
public Float minimumScore() {
return context.minimumScore();
}
@Override
public SearchContext sort(Sort sort) {
this.sort = sort;
return null;
}
@Override
public Sort sort() {
return sort;
}
@Override
public SearchContext trackScores(boolean trackScores) {
this.trackScores = trackScores;
return this;
}
@Override
public boolean trackScores() {
return trackScores;
}
@Override
public SearchContext parsedPostFilter(ParsedFilter postFilter) {
throw new UnsupportedOperationException("Not supported");
}
@Override
public ParsedFilter parsedPostFilter() {
return context.parsedPostFilter();
}
@Override
public Filter aliasFilter() {
return context.aliasFilter();
}
@Override
public SearchContext parsedQuery(ParsedQuery query) {
return context.parsedQuery(query);
}
@Override
public ParsedQuery parsedQuery() {
return context.parsedQuery();
}
@Override
public Query query() {
return context.query();
}
@Override
public boolean queryRewritten() {
return context.queryRewritten();
}
@Override
public SearchContext updateRewriteQuery(Query rewriteQuery) {
throw new UnsupportedOperationException("Not supported");
}
@Override
public int from() {
return context.from();
}
@Override
public SearchContext from(int from) {
throw new UnsupportedOperationException("Not supported");
}
@Override
public int size() {
return size;
}
@Override
public SearchContext size(int size) {
this.size = size;
return this;
}
@Override
public boolean hasFieldNames() {
return fieldNames != null;
}
@Override
public List<String> fieldNames() {
if (fieldNames == null) {
fieldNames = Lists.newArrayList();
}
return fieldNames;
}
@Override
public void emptyFieldNames() {
this.fieldNames = ImmutableList.of();
}
@Override
public boolean explain() {
return explain;
}
@Override
public void explain(boolean explain) {
this.explain = explain;
}
@Override
public List<String> groupStats() {
return context.groupStats();
}
@Override
public void groupStats(List<String> groupStats) {
throw new UnsupportedOperationException("Not supported");
}
@Override
public boolean version() {
return version;
}
@Override
public void version(boolean version) {
this.version = version;
}
@Override
public int[] docIdsToLoad() {
return docIdsToLoad;
}
@Override
public int docIdsToLoadFrom() {
return docsIdsToLoadFrom;
}
@Override
public int docIdsToLoadSize() {
return docsIdsToLoadSize;
}
@Override
public SearchContext docIdsToLoad(int[] docIdsToLoad, int docsIdsToLoadFrom, int docsIdsToLoadSize) {
this.docIdsToLoad = docIdsToLoad;
this.docsIdsToLoadFrom = docsIdsToLoadFrom;
this.docsIdsToLoadSize = docsIdsToLoadSize;
return this;
}
@Override
public void accessed(long accessTime) {
throw new UnsupportedOperationException("Not supported");
}
@Override
public long lastAccessTime() {
return context.lastAccessTime();
}
@Override
public long keepAlive() {
return context.keepAlive();
}
@Override
public void keepAlive(long keepAlive) {
throw new UnsupportedOperationException("Not supported");
}
@Override
public void lastEmittedDoc(ScoreDoc doc) {
throw new UnsupportedOperationException("Not supported");
}
@Override
public ScoreDoc lastEmittedDoc() {
return context.lastEmittedDoc();
}
@Override
public SearchLookup lookup() {
return context.lookup();
}
@Override
public DfsSearchResult dfsResult() {
return context.dfsResult();
}
@Override
public QuerySearchResult queryResult() {
return querySearchResult;
}
@Override
public FetchSearchResult fetchResult() {
return fetchSearchResult;
}
@Override
public ScanContext scanContext() {
return context.scanContext();
}
@Override
public MapperService.SmartNameFieldMappers smartFieldMappers(String name) {
return context.smartFieldMappers(name);
}
@Override
public FieldMappers smartNameFieldMappers(String name) {
return context.smartNameFieldMappers(name);
}
@Override
public FieldMapper smartNameFieldMapper(String name) {
return context.smartNameFieldMapper(name);
}
@Override
public MapperService.SmartNameObjectMapper smartNameObjectMapper(String name) {
return context.smartNameObjectMapper(name);
}
@Override
public boolean useSlowScroll() {
return context.useSlowScroll();
}
@Override
public SearchContext useSlowScroll(boolean useSlowScroll) {
throw new UnsupportedOperationException("Not supported");
}
}

View File

@ -0,0 +1,134 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.tophits;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.search.SearchParseException;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactory;
import org.elasticsearch.search.fetch.FetchPhase;
import org.elasticsearch.search.fetch.fielddata.FieldDataFieldsParseElement;
import org.elasticsearch.search.fetch.script.ScriptFieldsParseElement;
import org.elasticsearch.search.fetch.source.FetchSourceParseElement;
import org.elasticsearch.search.highlight.HighlighterParseElement;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.sort.SortParseElement;
import java.io.IOException;
/**
*
*/
public class TopHitsParser implements Aggregator.Parser {
private final FetchPhase fetchPhase;
private final SortParseElement sortParseElement;
private final FetchSourceParseElement sourceParseElement;
private final HighlighterParseElement highlighterParseElement;
private final FieldDataFieldsParseElement fieldDataFieldsParseElement;
private final ScriptFieldsParseElement scriptFieldsParseElement;
@Inject
public TopHitsParser(FetchPhase fetchPhase, SortParseElement sortParseElement, FetchSourceParseElement sourceParseElement, HighlighterParseElement highlighterParseElement, FieldDataFieldsParseElement fieldDataFieldsParseElement, ScriptFieldsParseElement scriptFieldsParseElement) {
this.fetchPhase = fetchPhase;
this.sortParseElement = sortParseElement;
this.sourceParseElement = sourceParseElement;
this.highlighterParseElement = highlighterParseElement;
this.fieldDataFieldsParseElement = fieldDataFieldsParseElement;
this.scriptFieldsParseElement = scriptFieldsParseElement;
}
@Override
public String type() {
return InternalTopHits.TYPE.name();
}
@Override
public AggregatorFactory parse(String aggregationName, XContentParser parser, SearchContext context) throws IOException {
TopHitsContext topHitsContext = new TopHitsContext(context);
XContentParser.Token token;
String currentFieldName = null;
try {
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (token.isValue()) {
switch (currentFieldName) {
case "size":
topHitsContext.size(parser.intValue());
break;
case "sort":
sortParseElement.parse(parser, topHitsContext);
break;
case "track_scores":
case "trackScores":
topHitsContext.trackScores(parser.booleanValue());
break;
case "version":
topHitsContext.version(parser.booleanValue());
break;
case "explain":
topHitsContext.explain(parser.booleanValue());
break;
default:
throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "].");
}
} else if (token == XContentParser.Token.START_OBJECT) {
switch (currentFieldName) {
case "sort":
sortParseElement.parse(parser, topHitsContext);
break;
case "_source":
sourceParseElement.parse(parser, topHitsContext);
break;
case "highlight":
highlighterParseElement.parse(parser, topHitsContext);
break;
case "scriptFields":
case "script_fields":
scriptFieldsParseElement.parse(parser, topHitsContext);
break;
default:
throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "].");
}
} else if (token == XContentParser.Token.START_ARRAY) {
switch (currentFieldName) {
case "sort":
sortParseElement.parse(parser, topHitsContext);
break;
case "fielddataFields":
case "fielddata_fields":
fieldDataFieldsParseElement.parse(parser, topHitsContext);
break;
default:
throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "].");
}
} else {
throw new SearchParseException(context, "Unexpected token " + token + " in [" + aggregationName + "].");
}
}
} catch (Exception e) {
throw ExceptionsHelper.convertToElastic(e);
}
return new TopHitsAggregator.Factory(aggregationName, fetchPhase, topHitsContext);
}
}

View File

@ -0,0 +1,366 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket;
import org.apache.lucene.search.Explanation;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHitField;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory.ExecutionMode;
import org.elasticsearch.search.aggregations.bucket.tophits.TopHits;
import org.elasticsearch.search.aggregations.metrics.max.Max;
import org.elasticsearch.search.highlight.HighlightField;
import org.elasticsearch.search.sort.SortBuilders;
import org.elasticsearch.search.sort.SortOrder;
import org.elasticsearch.test.ElasticsearchIntegrationTest;
import org.junit.Test;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
import static org.elasticsearch.search.aggregations.AggregationBuilders.*;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.core.IsNull.notNullValue;
/**
*
*/
@ElasticsearchIntegrationTest.SuiteScopeTest()
public class TopHitsTests extends ElasticsearchIntegrationTest {
private static final String TERMS_AGGS_FIELD = "terms";
private static final String SORT_FIELD = "sort";
public static String randomExecutionHint() {
return randomBoolean() ? null : randomFrom(ExecutionMode.values()).toString();
}
@Override
public void setupSuiteScopeCluster() throws Exception {
createIndex("idx");
List<IndexRequestBuilder> builders = new ArrayList<>();
for (int i = 0; i < 50; i++) {
builders.add(client().prepareIndex("idx", "type", Integer.toString(i)).setSource(jsonBuilder()
.startObject()
.field(TERMS_AGGS_FIELD, "val" + (i / 10))
.field(SORT_FIELD, i + 1)
.field("text", "some text to entertain")
.field("field1", 5)
.endObject()));
}
// Use routing to make sure all docs are in the same shard for consistent scoring
builders.add(client().prepareIndex("idx", "field-collapsing", "1").setSource(jsonBuilder()
.startObject()
.field("group", "a")
.field("text", "term x y z b")
.endObject()));
builders.add(client().prepareIndex("idx", "field-collapsing", "2").setSource(jsonBuilder()
.startObject()
.field("group", "a")
.field("text", "term x y z n rare")
.endObject()));
builders.add(client().prepareIndex("idx", "field-collapsing", "3").setSource(jsonBuilder()
.startObject()
.field("group", "b")
.field("text", "x y z term")
.endObject()));
builders.add(client().prepareIndex("idx", "field-collapsing", "4").setSource(jsonBuilder()
.startObject()
.field("group", "b")
.field("text", "x y term")
.endObject()));
builders.add(client().prepareIndex("idx", "field-collapsing", "5").setSource(jsonBuilder()
.startObject()
.field("group", "b")
.field("text", "x term")
.endObject()));
builders.add(client().prepareIndex("idx", "field-collapsing", "6").setSource(jsonBuilder()
.startObject()
.field("group", "b")
.field("text", "term rare")
.endObject()));
builders.add(client().prepareIndex("idx", "field-collapsing", "7").setSource(jsonBuilder()
.startObject()
.field("group", "c")
.field("text", "x y z term")
.endObject()));
builders.add(client().prepareIndex("idx", "field-collapsing", "8").setSource(jsonBuilder()
.startObject()
.field("group", "c")
.field("text", "x y term b")
.endObject()));
builders.add(client().prepareIndex("idx", "field-collapsing", "9").setSource(jsonBuilder()
.startObject()
.field("group", "c")
.field("text", "rare x term")
.endObject()));
indexRandom(true, builders);
ensureSearchable();
}
private String key(Terms.Bucket bucket) {
return randomBoolean() ? bucket.getKey() : bucket.getKeyAsText().string();
}
@Test
public void testBasics() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.field(TERMS_AGGS_FIELD)
.subAggregation(
topHits("hits").addSort(SortBuilders.fieldSort(SORT_FIELD).order(SortOrder.DESC))
)
)
.get();
assertSearchResponse(response);
Terms terms = response.getAggregations().get("terms");
assertThat(terms, notNullValue());
assertThat(terms.getName(), equalTo("terms"));
assertThat(terms.getBuckets().size(), equalTo(5));
long higestSortValue = 0;
for (int i = 0; i < 5; i++) {
Terms.Bucket bucket = terms.getBucketByKey("val" + i);
assertThat(bucket, notNullValue());
assertThat(key(bucket), equalTo("val" + i));
assertThat(bucket.getDocCount(), equalTo(10l));
TopHits topHits = bucket.getAggregations().get("hits");
SearchHits hits = topHits.getHits();
assertThat(hits.totalHits(), equalTo(10l));
assertThat(hits.getHits().length, equalTo(3));
higestSortValue += 10;
assertThat((Long) hits.getAt(0).sortValues()[0], equalTo(higestSortValue));
assertThat((Long) hits.getAt(1).sortValues()[0], equalTo(higestSortValue - 1));
assertThat((Long) hits.getAt(2).sortValues()[0], equalTo(higestSortValue - 2));
assertThat(hits.getAt(0).sourceAsMap().size(), equalTo(4));
}
}
@Test
public void testSortByBucket() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.field(TERMS_AGGS_FIELD)
.order(Terms.Order.aggregation("max_sort", false))
.subAggregation(
topHits("hits").addSort(SortBuilders.fieldSort(SORT_FIELD).order(SortOrder.DESC)).setTrackScores(true)
)
.subAggregation(
max("max_sort").field(SORT_FIELD)
)
)
.get();
assertSearchResponse(response);
Terms terms = response.getAggregations().get("terms");
assertThat(terms, notNullValue());
assertThat(terms.getName(), equalTo("terms"));
assertThat(terms.getBuckets().size(), equalTo(5));
long higestSortValue = 50;
int currentBucket = 4;
for (Terms.Bucket bucket : terms.getBuckets()) {
assertThat(key(bucket), equalTo("val" + currentBucket--));
assertThat(bucket.getDocCount(), equalTo(10l));
TopHits topHits = bucket.getAggregations().get("hits");
SearchHits hits = topHits.getHits();
assertThat(hits.totalHits(), equalTo(10l));
assertThat(hits.getHits().length, equalTo(3));
assertThat((Long) hits.getAt(0).sortValues()[0], equalTo(higestSortValue));
assertThat((Long) hits.getAt(1).sortValues()[0], equalTo(higestSortValue - 1));
assertThat((Long) hits.getAt(2).sortValues()[0], equalTo(higestSortValue - 2));
Max max = bucket.getAggregations().get("max_sort");
assertThat(max.getValue(), equalTo(((Long) higestSortValue).doubleValue()));
higestSortValue -= 10;
}
}
@Test
public void testFieldCollapsing() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("field-collapsing")
.setSearchType(SearchType.DFS_QUERY_THEN_FETCH)
.setQuery(matchQuery("text", "term rare"))
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.field("group")
.order(Terms.Order.aggregation("max_score", false))
.subAggregation(
topHits("hits").setSize(1)
)
.subAggregation(
max("max_score").script("_doc.score")
)
)
.get();
assertSearchResponse(response);
Terms terms = response.getAggregations().get("terms");
assertThat(terms, notNullValue());
assertThat(terms.getName(), equalTo("terms"));
assertThat(terms.getBuckets().size(), equalTo(3));
Iterator<Terms.Bucket> bucketIterator = terms.getBuckets().iterator();
Terms.Bucket bucket = bucketIterator.next();
assertThat(key(bucket), equalTo("b"));
TopHits topHits = bucket.getAggregations().get("hits");
SearchHits hits = topHits.getHits();
assertThat(hits.totalHits(), equalTo(4l));
assertThat(hits.getHits().length, equalTo(1));
assertThat(hits.getAt(0).id(), equalTo("6"));
bucket = bucketIterator.next();
assertThat(key(bucket), equalTo("c"));
topHits = bucket.getAggregations().get("hits");
hits = topHits.getHits();
assertThat(hits.totalHits(), equalTo(3l));
assertThat(hits.getHits().length, equalTo(1));
assertThat(hits.getAt(0).id(), equalTo("9"));
bucket = bucketIterator.next();
assertThat(key(bucket), equalTo("a"));
topHits = bucket.getAggregations().get("hits");
hits = topHits.getHits();
assertThat(hits.totalHits(), equalTo(2l));
assertThat(hits.getHits().length, equalTo(1));
assertThat(hits.getAt(0).id(), equalTo("2"));
}
@Test
public void testFetchFeatures() {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.setQuery(matchQuery("text", "text").queryName("test"))
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.field(TERMS_AGGS_FIELD)
.subAggregation(
topHits("hits").setSize(1)
.addHighlightedField("text")
.setExplain(true)
.addFieldDataField("field1")
.addScriptField("script", "doc['field1'].value")
.setFetchSource("text", null)
.setVersion(true)
)
)
.get();
assertSearchResponse(response);
Terms terms = response.getAggregations().get("terms");
assertThat(terms, notNullValue());
assertThat(terms.getName(), equalTo("terms"));
assertThat(terms.getBuckets().size(), equalTo(5));
for (Terms.Bucket bucket : terms.getBuckets()) {
TopHits topHits = bucket.getAggregations().get("hits");
SearchHits hits = topHits.getHits();
assertThat(hits.totalHits(), equalTo(10l));
assertThat(hits.getHits().length, equalTo(1));
SearchHit hit = hits.getAt(0);
HighlightField highlightField = hit.getHighlightFields().get("text");
assertThat(highlightField.getFragments().length, equalTo(1));
assertThat(highlightField.getFragments()[0].string(), equalTo("some <em>text</em> to entertain"));
Explanation explanation = hit.explanation();
assertThat(explanation.toString(), containsString("text:text"));
long version = hit.version();
assertThat(version, equalTo(1l));
assertThat(hit.matchedQueries()[0], equalTo("test"));
SearchHitField field = hit.field("field1");
assertThat(field.getValue().toString(), equalTo("5"));
field = hit.field("script");
assertThat(field.getValue().toString(), equalTo("5"));
assertThat(hit.sourceAsMap().size(), equalTo(1));
assertThat(hit.sourceAsMap().get("text").toString(), equalTo("some text to entertain"));
}
}
@Test
public void testInvalidSortField() throws Exception {
try {
client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.field(TERMS_AGGS_FIELD)
.subAggregation(
topHits("hits").addSort(SortBuilders.fieldSort("xyz").order(SortOrder.DESC))
)
).get();
fail();
} catch (SearchPhaseExecutionException e) {
assertThat(e.getMessage(), containsString("No mapping found for [xyz] in order to sort on"));
}
}
@Test
public void testFailWithSubAgg() throws Exception {
String source = "{\n" +
" \"aggs\": {\n" +
" \"top-tags\": {\n" +
" \"terms\": {\n" +
" \"field\": \"tags\"\n" +
" },\n" +
" \"aggs\": {\n" +
" \"top_tags_hits\": {\n" +
" \"top_hits\": {},\n" +
" \"aggs\": {\n" +
" \"max\": {\n" +
" \"max\": {\n" +
" \"field\": \"age\"\n" +
" }\n" +
" }\n" +
" }\n" +
" }\n" +
" }\n" +
" }\n" +
" }\n" +
"}";
try {
client().prepareSearch("idx").setTypes("type")
.setSource(source)
.get();
fail();
} catch (SearchPhaseExecutionException e) {
assertThat(e.getMessage(), containsString("Aggregator [top_tags_hits] of type [top_hits] cannot accept sub-aggregations"));
}
}
}