From 013b3194158e18441f2b3ea27f7d459aa6cc4f60 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 24 Mar 2014 14:24:32 +0700 Subject: [PATCH] Added `reverse_nested` aggregation. The `reverse_nested` aggregation allows to aggregate on properties outside of the nested scope of a `nested` aggregation. Closes #5507 --- .../search/aggregations/bucket.asciidoc | 2 + .../reverse-nested-aggregation.asciidoc | 115 ++++++ .../aggregations/AggregationBuilders.java | 5 + .../aggregations/AggregationModule.java | 2 + .../search/aggregations/AggregationPhase.java | 2 +- .../TransportAggregationModule.java | 2 + .../bucket/nested/InternalReverseNested.java | 59 ++++ .../bucket/nested/NestedAggregator.java | 30 +- .../bucket/nested/ReverseNested.java | 27 ++ .../nested/ReverseNestedAggregator.java | 155 ++++++++ .../bucket/nested/ReverseNestedBuilder.java | 50 +++ .../bucket/nested/ReverseNestedParser.java | 60 ++++ .../support/AggregationContext.java | 9 + .../bucket/ReverseNestedTests.java | 332 ++++++++++++++++++ 14 files changed, 837 insertions(+), 13 deletions(-) create mode 100644 docs/reference/search/aggregations/bucket/reverse-nested-aggregation.asciidoc create mode 100644 src/main/java/org/elasticsearch/search/aggregations/bucket/nested/InternalReverseNested.java create mode 100644 src/main/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNested.java create mode 100644 src/main/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNestedAggregator.java create mode 100644 src/main/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNestedBuilder.java create mode 100644 src/main/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNestedParser.java create mode 100644 src/test/java/org/elasticsearch/search/aggregations/bucket/ReverseNestedTests.java diff --git a/docs/reference/search/aggregations/bucket.asciidoc b/docs/reference/search/aggregations/bucket.asciidoc index 1146adeeede..4b35e65fbc4 100644 --- a/docs/reference/search/aggregations/bucket.asciidoc +++ b/docs/reference/search/aggregations/bucket.asciidoc @@ -8,6 +8,8 @@ include::bucket/missing-aggregation.asciidoc[] include::bucket/nested-aggregation.asciidoc[] +include::bucket/reverse-nested-aggregation.asciidoc[] + include::bucket/terms-aggregation.asciidoc[] include::bucket/significantterms-aggregation.asciidoc[] diff --git a/docs/reference/search/aggregations/bucket/reverse-nested-aggregation.asciidoc b/docs/reference/search/aggregations/bucket/reverse-nested-aggregation.asciidoc new file mode 100644 index 00000000000..ea6e2e04115 --- /dev/null +++ b/docs/reference/search/aggregations/bucket/reverse-nested-aggregation.asciidoc @@ -0,0 +1,115 @@ +[[search-aggregations-bucket-reverse-nested-aggregation]] +=== Reverse nested + +A special single bucket aggregation that enables aggregating on parent docs from nested documents. Effectively this +aggregation can break out of the nested block structure and link to other nested structures or the root document, +which allows nesting other aggregations that aren't part of the nested object in a nested aggregation. + +The `reverse_nested` aggregation must be defined inside a `nested` aggregation. + +.Options: +* `path` - Which defines to what nested object field should be joined back. The default is empty, +which means that it joins back to the root / main document level. The path cannot contain a reference to +a nested object field that falls outside the `nested` aggregation's nested structure a `reverse_nested` is in. + +For example, lets say we have an index for a ticket system which issues and comments. The comments are inlined into +the issue documents as nested documents. The mapping could look like: + +[source,js] +-------------------------------------------------- +{ + ... + + "issue" : { + "properties" : { + "tags" : { "type" : "string" } + "comments" : { <1> + "type" : "nested" + "properties" : { + "username" : { "type" : "string", "index" : "not_analyzed" }, + "comment" : { "type" : "string" } + } + } + } + } +} +-------------------------------------------------- + +<1> The `comments` is an array that holds nested documents under the `issue` object. + +The following aggregations will return the top commenters' username that have commented and per top commenter the top +tags that issues have the commenter has commented to: + +[source,js] +-------------------------------------------------- +{ + "query" : { + "match" : { "name" : "led tv" } + } + "aggs" : { + "comments" : { + "nested" : { + "path" : "comments" + }, + "aggs" : { + "top_usernames" : { + "terms" : { + "field" : "comments.username" + } + }, + "aggs" : { + "comment_to_issue" : { + "reverse_nested" : { <1> + }, + "aggs" : { + "top_tags_per_comment" : { + "terms" : { "field" : "tags" } + } + } + } + } + } + } + } +} +-------------------------------------------------- + +As you can see above, the the `reverse_nested` aggregation is put in to a `nested` aggregation as this is the only place +in the dsl where the `reversed_nested` aggregation can be used. Its sole purpose is to join back to a parent doc higher +up in the nested structure. + +<1> A `reverse_nested` aggregation that joins back to the root / main document level, because no `path` has been defined. +Via the `path` option the `reverse_nested` aggregation can join back to a different level, if multiple layered nested +object types have been defined in the mapping + +Possible response snippet: + +[source,js] +-------------------------------------------------- +{ + "aggregations": { + "comments": { + "top_usernames": { + "buckets" : [ + { + "key" : "username_1", + "doc_count" : 12, + "comment_to_issue" : { + "top_tags_per_comment" : { + "buckets" : [ + { + "key" : "tag1", + "doc_count" : 9 + }, + ... + ] + } + }, + ... + } + ] + } + } + } +} +-------------------------------------------------- diff --git a/src/main/java/org/elasticsearch/search/aggregations/AggregationBuilders.java b/src/main/java/org/elasticsearch/search/aggregations/AggregationBuilders.java index 312fdd0e3df..3ad7ac7f4e1 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/AggregationBuilders.java +++ b/src/main/java/org/elasticsearch/search/aggregations/AggregationBuilders.java @@ -29,6 +29,7 @@ import org.elasticsearch.search.aggregations.bucket.range.RangeBuilder; import org.elasticsearch.search.aggregations.bucket.range.date.DateRangeBuilder; import org.elasticsearch.search.aggregations.bucket.range.geodistance.GeoDistanceBuilder; import org.elasticsearch.search.aggregations.bucket.range.ipv4.IPv4RangeBuilder; +import org.elasticsearch.search.aggregations.bucket.nested.ReverseNestedBuilder; import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsBuilder; import org.elasticsearch.search.aggregations.bucket.terms.TermsBuilder; import org.elasticsearch.search.aggregations.metrics.avg.AvgBuilder; @@ -93,6 +94,10 @@ public class AggregationBuilders { return new NestedBuilder(name); } + public static ReverseNestedBuilder reverseNested(String name) { + return new ReverseNestedBuilder(name); + } + public static GeoDistanceBuilder geoDistance(String name) { return new GeoDistanceBuilder(name); } diff --git a/src/main/java/org/elasticsearch/search/aggregations/AggregationModule.java b/src/main/java/org/elasticsearch/search/aggregations/AggregationModule.java index edd31038d26..0d9bbf4da7b 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/AggregationModule.java +++ b/src/main/java/org/elasticsearch/search/aggregations/AggregationModule.java @@ -32,6 +32,7 @@ import org.elasticsearch.search.aggregations.bucket.range.RangeParser; import org.elasticsearch.search.aggregations.bucket.range.date.DateRangeParser; import org.elasticsearch.search.aggregations.bucket.range.geodistance.GeoDistanceParser; import org.elasticsearch.search.aggregations.bucket.range.ipv4.IpRangeParser; +import org.elasticsearch.search.aggregations.bucket.nested.ReverseNestedParser; import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsParser; import org.elasticsearch.search.aggregations.bucket.terms.TermsParser; import org.elasticsearch.search.aggregations.metrics.avg.AvgParser; @@ -77,6 +78,7 @@ public class AggregationModule extends AbstractModule { parsers.add(GeoDistanceParser.class); parsers.add(GeoHashGridParser.class); parsers.add(NestedParser.class); + parsers.add(ReverseNestedParser.class); } /** diff --git a/src/main/java/org/elasticsearch/search/aggregations/AggregationPhase.java b/src/main/java/org/elasticsearch/search/aggregations/AggregationPhase.java index 060847a75aa..19eecdc01f8 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/AggregationPhase.java +++ b/src/main/java/org/elasticsearch/search/aggregations/AggregationPhase.java @@ -165,7 +165,7 @@ public class AggregationPhase implements SearchPhase { @Override public boolean acceptsDocsOutOfOrder() { - return true; + return !aggregationContext.scoreDocsInOrder(); } @Override diff --git a/src/main/java/org/elasticsearch/search/aggregations/TransportAggregationModule.java b/src/main/java/org/elasticsearch/search/aggregations/TransportAggregationModule.java index bb6ddf4f187..6c2266ac668 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/TransportAggregationModule.java +++ b/src/main/java/org/elasticsearch/search/aggregations/TransportAggregationModule.java @@ -30,6 +30,7 @@ import org.elasticsearch.search.aggregations.bucket.range.InternalRange; import org.elasticsearch.search.aggregations.bucket.range.date.InternalDateRange; import org.elasticsearch.search.aggregations.bucket.range.geodistance.InternalGeoDistance; import org.elasticsearch.search.aggregations.bucket.range.ipv4.InternalIPv4Range; +import org.elasticsearch.search.aggregations.bucket.nested.InternalReverseNested; import org.elasticsearch.search.aggregations.bucket.significant.SignificantLongTerms; import org.elasticsearch.search.aggregations.bucket.significant.SignificantStringTerms; import org.elasticsearch.search.aggregations.bucket.significant.UnmappedSignificantTerms; @@ -85,5 +86,6 @@ public class TransportAggregationModule extends AbstractModule { InternalDateHistogram.registerStream(); InternalGeoDistance.registerStream(); InternalNested.registerStream(); + InternalReverseNested.registerStream(); } } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/InternalReverseNested.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/InternalReverseNested.java new file mode 100644 index 00000000000..38721fbaf57 --- /dev/null +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/InternalReverseNested.java @@ -0,0 +1,59 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.elasticsearch.search.aggregations.bucket.nested; + +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.search.aggregations.AggregationStreams; +import org.elasticsearch.search.aggregations.InternalAggregations; +import org.elasticsearch.search.aggregations.bucket.InternalSingleBucketAggregation; + +import java.io.IOException; + +/** + * + */ +public class InternalReverseNested extends InternalSingleBucketAggregation implements ReverseNested { + + public static final Type TYPE = new Type("reverse_nested"); + + public final static AggregationStreams.Stream STREAM = new AggregationStreams.Stream() { + @Override + public InternalReverseNested readResult(StreamInput in) throws IOException { + InternalReverseNested result = new InternalReverseNested(); + result.readFrom(in); + return result; + } + }; + + public static void registerStream() { + AggregationStreams.registerStream(STREAM, TYPE.stream()); + } + + public InternalReverseNested() { + } + + public InternalReverseNested(String name, long docCount, InternalAggregations aggregations) { + super(name, docCount, aggregations); + } + + @Override + public Type type() { + return TYPE; + } + +} diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java index a1f793ac919..f42421fd720 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java @@ -40,6 +40,7 @@ import java.io.IOException; */ public class NestedAggregator extends SingleBucketAggregator implements ReaderContextAware { + private final String nestedPath; private final Aggregator parentAggregator; private Filter parentFilter; private final Filter childFilter; @@ -49,31 +50,23 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo public NestedAggregator(String name, AggregatorFactories factories, String nestedPath, AggregationContext aggregationContext, Aggregator parentAggregator) { super(name, factories, aggregationContext, parentAggregator); + this.nestedPath = nestedPath; this.parentAggregator = parentAggregator; MapperService.SmartNameObjectMapper mapper = aggregationContext.searchContext().smartNameObjectMapper(nestedPath); if (mapper == null) { - throw new AggregationExecutionException("facet nested path [" + nestedPath + "] not found"); + throw new AggregationExecutionException("[nested] nested path [" + nestedPath + "] not found"); } ObjectMapper objectMapper = mapper.mapper(); if (objectMapper == null) { - throw new AggregationExecutionException("facet nested path [" + nestedPath + "] not found"); + throw new AggregationExecutionException("[nested] nested path [" + nestedPath + "] not found"); } if (!objectMapper.nested().isNested()) { - throw new AggregationExecutionException("facet nested path [" + nestedPath + "] is not nested"); + throw new AggregationExecutionException("[nested] nested path [" + nestedPath + "] is not nested"); } childFilter = aggregationContext.searchContext().filterCache().cache(objectMapper.nestedTypeFilter()); } - private NestedAggregator findClosestNestedAggregator(Aggregator parent) { - for (; parent != null; parent = parent.parent()) { - if (parent instanceof NestedAggregator) { - return (NestedAggregator) parent; - } - } - return null; - } - @Override public void setNextReader(AtomicReaderContext reader) { if (parentFilter == null) { @@ -135,6 +128,19 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo return new InternalNested(name, 0, buildEmptySubAggregations()); } + public String getNestedPath() { + return nestedPath; + } + + static NestedAggregator findClosestNestedAggregator(Aggregator parent) { + for (; parent != null; parent = parent.parent()) { + if (parent instanceof NestedAggregator) { + return (NestedAggregator) parent; + } + } + return null; + } + public static class Factory extends AggregatorFactory { private final String path; diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNested.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNested.java new file mode 100644 index 00000000000..19e698ff22d --- /dev/null +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNested.java @@ -0,0 +1,27 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.bucket.nested; + +import org.elasticsearch.search.aggregations.bucket.SingleBucketAggregation; + +/** + */ +public interface ReverseNested extends SingleBucketAggregation { +} diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNestedAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNestedAggregator.java new file mode 100644 index 00000000000..6c90c490268 --- /dev/null +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNestedAggregator.java @@ -0,0 +1,155 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.elasticsearch.search.aggregations.bucket.nested; + +import com.carrotsearch.hppc.LongIntOpenHashMap; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Filter; +import org.elasticsearch.common.lease.Releasables; +import org.elasticsearch.common.lucene.ReaderContextAware; +import org.elasticsearch.common.lucene.docset.DocIdSets; +import org.elasticsearch.common.recycler.Recycler; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.object.ObjectMapper; +import org.elasticsearch.index.search.nested.NonNestedDocsFilter; +import org.elasticsearch.search.SearchParseException; +import org.elasticsearch.search.aggregations.*; +import org.elasticsearch.search.aggregations.bucket.SingleBucketAggregator; +import org.elasticsearch.search.aggregations.support.AggregationContext; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; + +import static org.elasticsearch.search.aggregations.bucket.nested.NestedAggregator.findClosestNestedAggregator; + +/** + * + */ +public class ReverseNestedAggregator extends SingleBucketAggregator implements ReaderContextAware { + + private final Filter parentFilter; + private DocIdSetIterator parentDocs; + + // TODO: Add LongIntPagedHashMap? + private final Recycler.V bucketOrdToLastCollectedParentDocRecycler; + private final LongIntOpenHashMap bucketOrdToLastCollectedParentDoc; + + public ReverseNestedAggregator(String name, AggregatorFactories factories, String nestedPath, AggregationContext aggregationContext, Aggregator parent) { + super(name, factories, aggregationContext, parent); + + // Early validation + NestedAggregator closestNestedAggregator = findClosestNestedAggregator(parent); + if (closestNestedAggregator == null) { + throw new SearchParseException(context.searchContext(), "Reverse nested aggregation [" + name + "] can only be used inside a [nested] aggregation"); + } + if (nestedPath == null) { + parentFilter = SearchContext.current().filterCache().cache(NonNestedDocsFilter.INSTANCE); + } else { + MapperService.SmartNameObjectMapper mapper = SearchContext.current().smartNameObjectMapper(nestedPath); + if (mapper == null) { + throw new AggregationExecutionException("[reverse_nested] nested path [" + nestedPath + "] not found"); + } + ObjectMapper objectMapper = mapper.mapper(); + if (objectMapper == null) { + throw new AggregationExecutionException("[reverse_nested] nested path [" + nestedPath + "] not found"); + } + if (!objectMapper.nested().isNested()) { + throw new AggregationExecutionException("[reverse_nested] nested path [" + nestedPath + "] is not nested"); + } + parentFilter = SearchContext.current().filterCache().cache(objectMapper.nestedTypeFilter()); + } + bucketOrdToLastCollectedParentDocRecycler = aggregationContext.searchContext().cacheRecycler().longIntMap(32); + bucketOrdToLastCollectedParentDoc = bucketOrdToLastCollectedParentDocRecycler.v(); + aggregationContext.ensureScoreDocsInOrder(); + } + + @Override + public void setNextReader(AtomicReaderContext reader) { + bucketOrdToLastCollectedParentDoc.clear(); + try { + // In ES if parent is deleted, then also the children are deleted, so the child docs this agg receives + // must belong to parent docs that are live. For this reason acceptedDocs can also null here. + DocIdSet docIdSet = parentFilter.getDocIdSet(reader, null); + if (DocIdSets.isEmpty(docIdSet)) { + parentDocs = null; + } else { + parentDocs = docIdSet.iterator(); + } + } catch (IOException ioe) { + throw new AggregationExecutionException("Failed to aggregate [" + name + "]", ioe); + } + } + + @Override + public void collect(int childDoc, long bucketOrd) throws IOException { + if (parentDocs == null) { + return; + } + + // fast forward to retrieve the parentDoc this childDoc belongs to + int parentDoc = parentDocs.advance(childDoc); + assert childDoc <= parentDoc && parentDoc != DocIdSetIterator.NO_MORE_DOCS; + if (bucketOrdToLastCollectedParentDoc.containsKey(bucketOrd)) { + int lastCollectedParentDoc = bucketOrdToLastCollectedParentDoc.lget(); + if (parentDoc > lastCollectedParentDoc) { + innerCollect(parentDoc, bucketOrd); + bucketOrdToLastCollectedParentDoc.lset(parentDoc); + } + } else { + innerCollect(parentDoc, bucketOrd); + bucketOrdToLastCollectedParentDoc.put(bucketOrd, parentDoc); + } + } + + private void innerCollect(int parentDoc, long bucketOrd) throws IOException { + collectBucket(parentDoc, bucketOrd); + } + + + @Override + public InternalAggregation buildAggregation(long owningBucketOrdinal) { + return new InternalReverseNested(name, bucketDocCount(owningBucketOrdinal), bucketAggregations(owningBucketOrdinal)); + } + + @Override + public InternalAggregation buildEmptyAggregation() { + return new InternalReverseNested(name, 0, buildEmptySubAggregations()); + } + + @Override + protected void doClose() { + Releasables.close(bucketOrdToLastCollectedParentDocRecycler); + } + + public static class Factory extends AggregatorFactory { + + private final String path; + + public Factory(String name, String path) { + super(name, InternalReverseNested.TYPE.name()); + this.path = path; + } + + @Override + public Aggregator create(AggregationContext context, Aggregator parent, long expectedBucketsCount) { + return new ReverseNestedAggregator(name, factories, path, context, parent); + } + } +} diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNestedBuilder.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNestedBuilder.java new file mode 100644 index 00000000000..59d7aa4a8dc --- /dev/null +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNestedBuilder.java @@ -0,0 +1,50 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.elasticsearch.search.aggregations.bucket.nested; + +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.search.aggregations.AggregationBuilder; + +import java.io.IOException; + +/** + * + */ +public class ReverseNestedBuilder extends AggregationBuilder { + + private String path; + + public ReverseNestedBuilder(String name) { + super(name, InternalReverseNested.TYPE.name()); + } + + public ReverseNestedBuilder path(String path) { + this.path = path; + return this; + } + + @Override + protected XContentBuilder internalXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + if (path != null) { + builder.field("path", path); + } + return builder.endObject(); + } +} diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNestedParser.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNestedParser.java new file mode 100644 index 00000000000..3803316e220 --- /dev/null +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNestedParser.java @@ -0,0 +1,60 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.elasticsearch.search.aggregations.bucket.nested; + +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.search.SearchParseException; +import org.elasticsearch.search.aggregations.Aggregator; +import org.elasticsearch.search.aggregations.AggregatorFactory; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; + +/** + * + */ +public class ReverseNestedParser implements Aggregator.Parser { + + @Override + public String type() { + return InternalReverseNested.TYPE.name(); + } + + @Override + public AggregatorFactory parse(String aggregationName, XContentParser parser, SearchContext context) throws IOException { + String path = null; + + XContentParser.Token token; + String currentFieldName = null; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token == XContentParser.Token.VALUE_STRING) { + if ("path".equals(currentFieldName)) { + path = parser.text(); + } else { + throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "]."); + } + } else { + throw new SearchParseException(context, "Unexpected token " + token + " in [" + aggregationName + "]."); + } + } + + return new ReverseNestedAggregator.Factory(aggregationName, path); + } +} diff --git a/src/main/java/org/elasticsearch/search/aggregations/support/AggregationContext.java b/src/main/java/org/elasticsearch/search/aggregations/support/AggregationContext.java index 8f36d370a29..8617d388bc8 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/support/AggregationContext.java +++ b/src/main/java/org/elasticsearch/search/aggregations/support/AggregationContext.java @@ -54,6 +54,7 @@ public class AggregationContext implements ReaderContextAware, ScorerAware { private AtomicReaderContext reader; private Scorer scorer; + private boolean scoreDocsInOrder = false; public AggregationContext(SearchContext searchContext) { this.searchContext = searchContext; @@ -99,6 +100,14 @@ public class AggregationContext implements ReaderContextAware, ScorerAware { } } + public boolean scoreDocsInOrder() { + return scoreDocsInOrder; + } + + public void ensureScoreDocsInOrder() { + this.scoreDocsInOrder = true; + } + /** Get a value source given its configuration and the depth of the aggregator in the aggregation tree. */ public VS valuesSource(ValuesSourceConfig config, int depth) { assert config.valid() : "value source config is invalid - must have either a field context or a script or marked as unmapped"; diff --git a/src/test/java/org/elasticsearch/search/aggregations/bucket/ReverseNestedTests.java b/src/test/java/org/elasticsearch/search/aggregations/bucket/ReverseNestedTests.java new file mode 100644 index 00000000000..5e7601a3e98 --- /dev/null +++ b/src/test/java/org/elasticsearch/search/aggregations/bucket/ReverseNestedTests.java @@ -0,0 +1,332 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.elasticsearch.search.aggregations.bucket; + +import org.elasticsearch.action.search.SearchPhaseExecutionException; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.search.aggregations.bucket.nested.Nested; +import org.elasticsearch.search.aggregations.bucket.nested.ReverseNested; +import org.elasticsearch.search.aggregations.bucket.terms.Terms; +import org.elasticsearch.test.ElasticsearchIntegrationTest; +import org.junit.Before; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; +import static org.elasticsearch.search.aggregations.AggregationBuilders.*; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.core.IsNull.notNullValue; + +/** + * + */ +@ElasticsearchIntegrationTest.ClusterScope(scope = ElasticsearchIntegrationTest.Scope.SUITE, numDataNodes = 1) +public class ReverseNestedTests extends ElasticsearchIntegrationTest { + + @Before + public void init() throws Exception { + assertAcked(prepareCreate("idx") + .addMapping( + "type1", + jsonBuilder().startObject().startObject("properties") + .startObject("field1").field("type", "string").endObject() + .startObject("nested1").field("type", "nested").startObject("properties") + .startObject("field2").field("type", "string").endObject() + .endObject().endObject() + .endObject().endObject() + ) + .addMapping( + "type2", + jsonBuilder().startObject().startObject("properties") + .startObject("nested1").field("type", "nested").startObject("properties") + .startObject("field1").field("type", "string").endObject() + .startObject("nested2").field("type", "nested").startObject("properties") + .startObject("field2").field("type", "string").endObject() + .endObject().endObject() + .endObject().endObject() + .endObject().endObject() + ) + ); + + insertDocs(Arrays.asList("a", "b", "c"), Arrays.asList("1", "2", "3", "4")); + insertDocs(Arrays.asList("b", "c", "d"), Arrays.asList("4", "5", "6", "7")); + insertDocs(Arrays.asList("c", "d", "e"), Arrays.asList("7", "8", "9", "1")); + refresh(); + insertDocs(Arrays.asList("a", "e"), Arrays.asList("7", "4", "1", "1")); + insertDocs(Arrays.asList("a", "c"), Arrays.asList("2", "1")); + insertDocs(Arrays.asList("a"), Arrays.asList("3", "4")); + refresh(); + insertDocs(Arrays.asList("x", "c"), Arrays.asList("1", "8")); + insertDocs(Arrays.asList("y", "c"), Arrays.asList("6")); + insertDocs(Arrays.asList("z"), Arrays.asList("5", "9")); + refresh(); + + ensureSearchable(); + } + + private void insertDocs(List values1, List values2) throws Exception { + XContentBuilder source = jsonBuilder() + .startObject() + .array("field1", values1.toArray()) + .startArray("nested1"); + for (String value1 : values2) { + source.startObject().field("field2", value1).endObject(); + } + source.endArray().endObject(); + indexRandom(false, client().prepareIndex("idx", "type1").setRouting("1").setSource(source)); + + source = jsonBuilder() + .startObject() + .field("x", "y") + .startArray("nested1").startObject() + .array("field1", values1.toArray()) + .startArray("nested2"); + for (String value1 : values2) { + source.startObject().field("field2", value1).endObject(); + } + source.endArray().endObject().endArray().endObject(); + indexRandom(false, client().prepareIndex("idx", "type2").setRouting("1").setSource(source)); + } + + @Test + public void simple_reverseNestedToRoot() throws Exception { + SearchResponse response = client().prepareSearch("idx").setTypes("type1") + .addAggregation(nested("nested1").path("nested1") + .subAggregation( + terms("field2").field("nested1.field2") + .subAggregation( + reverseNested("nested1_to_field1") + .subAggregation( + terms("field1").field("field1") + ) + ) + ) + ).get(); + + verifyResults(response); + } + + @Test + public void simple_reverseNestedToNested1() throws Exception { + SearchResponse response = client().prepareSearch("idx") + .addAggregation(nested("nested1").path("nested1.nested2") + .subAggregation( + terms("field2").field("nested1.nested2.field2") + .subAggregation( + reverseNested("nested1_to_field1").path("nested1") + .subAggregation( + terms("field1").field("nested1.field1") + ) + ) + ) + ).get(); + verifyResults(response); + } + + @Test(expected = SearchPhaseExecutionException.class) + public void testReverseNestedAggWithoutNestedAgg() throws Exception { + client().prepareSearch("idx") + .addAggregation(terms("field2").field("nested1.nested2.field2") + .subAggregation( + reverseNested("nested1_to_field1") + .subAggregation( + terms("field1").field("nested1.field1") + ) + ) + ).get(); + } + + private void verifyResults(SearchResponse response) { + assertSearchResponse(response); + + Nested nested = response.getAggregations().get("nested1"); + assertThat(nested, notNullValue()); + assertThat(nested.getName(), equalTo("nested1")); + assertThat(nested.getDocCount(), equalTo(25l)); + assertThat(nested.getAggregations().asList().isEmpty(), is(false)); + + Terms usernames = nested.getAggregations().get("field2"); + assertThat(usernames, notNullValue()); + assertThat(usernames.getBuckets().size(), equalTo(9)); + List usernameBuckets = new ArrayList<>(usernames.getBuckets()); + + // nested.field2: 1 + Terms.Bucket bucket = usernameBuckets.get(0); + assertThat(bucket.getKey(), equalTo("1")); + assertThat(bucket.getDocCount(), equalTo(6l)); + ReverseNested reverseNested = bucket.getAggregations().get("nested1_to_field1"); + Terms tags = reverseNested.getAggregations().get("field1"); + List tagsBuckets = new ArrayList<>(tags.getBuckets()); + assertThat(tagsBuckets.size(), equalTo(6)); + assertThat(tagsBuckets.get(0).getKey(), equalTo("c")); + assertThat(tagsBuckets.get(0).getDocCount(), equalTo(4l)); + assertThat(tagsBuckets.get(1).getKey(), equalTo("a")); + assertThat(tagsBuckets.get(1).getDocCount(), equalTo(3l)); + assertThat(tagsBuckets.get(2).getKey(), equalTo("e")); + assertThat(tagsBuckets.get(2).getDocCount(), equalTo(2l)); + assertThat(tagsBuckets.get(3).getKey(), equalTo("b")); + assertThat(tagsBuckets.get(3).getDocCount(), equalTo(1l)); + assertThat(tagsBuckets.get(4).getKey(), equalTo("d")); + assertThat(tagsBuckets.get(4).getDocCount(), equalTo(1l)); + assertThat(tagsBuckets.get(5).getKey(), equalTo("x")); + assertThat(tagsBuckets.get(5).getDocCount(), equalTo(1l)); + + // nested.field2: 4 + bucket = usernameBuckets.get(1); + assertThat(bucket.getKey(), equalTo("4")); + assertThat(bucket.getDocCount(), equalTo(4l)); + reverseNested = bucket.getAggregations().get("nested1_to_field1"); + tags = reverseNested.getAggregations().get("field1"); + tagsBuckets = new ArrayList<>(tags.getBuckets()); + assertThat(tagsBuckets.size(), equalTo(5)); + assertThat(tagsBuckets.get(0).getKey(), equalTo("a")); + assertThat(tagsBuckets.get(0).getDocCount(), equalTo(3l)); + assertThat(tagsBuckets.get(1).getKey(), equalTo("b")); + assertThat(tagsBuckets.get(1).getDocCount(), equalTo(2l)); + assertThat(tagsBuckets.get(2).getKey(), equalTo("c")); + assertThat(tagsBuckets.get(2).getDocCount(), equalTo(2l)); + assertThat(tagsBuckets.get(3).getKey(), equalTo("d")); + assertThat(tagsBuckets.get(3).getDocCount(), equalTo(1l)); + assertThat(tagsBuckets.get(4).getKey(), equalTo("e")); + assertThat(tagsBuckets.get(4).getDocCount(), equalTo(1l)); + + // nested.field2: 7 + bucket = usernameBuckets.get(2); + assertThat(bucket.getKey(), equalTo("7")); + assertThat(bucket.getDocCount(), equalTo(3l)); + reverseNested = bucket.getAggregations().get("nested1_to_field1"); + tags = reverseNested.getAggregations().get("field1"); + tagsBuckets = new ArrayList<>(tags.getBuckets()); + assertThat(tagsBuckets.size(), equalTo(5)); + assertThat(tagsBuckets.get(0).getKey(), equalTo("c")); + assertThat(tagsBuckets.get(0).getDocCount(), equalTo(2l)); + assertThat(tagsBuckets.get(1).getKey(), equalTo("d")); + assertThat(tagsBuckets.get(1).getDocCount(), equalTo(2l)); + assertThat(tagsBuckets.get(2).getKey(), equalTo("e")); + assertThat(tagsBuckets.get(2).getDocCount(), equalTo(2l)); + assertThat(tagsBuckets.get(3).getKey(), equalTo("a")); + assertThat(tagsBuckets.get(3).getDocCount(), equalTo(1l)); + assertThat(tagsBuckets.get(4).getKey(), equalTo("b")); + assertThat(tagsBuckets.get(4).getDocCount(), equalTo(1l)); + + // nested.field2: 2 + bucket = usernameBuckets.get(3); + assertThat(bucket.getKey(), equalTo("2")); + assertThat(bucket.getDocCount(), equalTo(2l)); + reverseNested = bucket.getAggregations().get("nested1_to_field1"); + tags = reverseNested.getAggregations().get("field1"); + tagsBuckets = new ArrayList<>(tags.getBuckets()); + assertThat(tagsBuckets.size(), equalTo(3)); + assertThat(tagsBuckets.get(0).getKey(), equalTo("a")); + assertThat(tagsBuckets.get(0).getDocCount(), equalTo(2l)); + assertThat(tagsBuckets.get(1).getKey(), equalTo("c")); + assertThat(tagsBuckets.get(1).getDocCount(), equalTo(2l)); + assertThat(tagsBuckets.get(2).getKey(), equalTo("b")); + assertThat(tagsBuckets.get(2).getDocCount(), equalTo(1l)); + + // nested.field2: 3 + bucket = usernameBuckets.get(4); + assertThat(bucket.getKey(), equalTo("3")); + assertThat(bucket.getDocCount(), equalTo(2l)); + reverseNested = bucket.getAggregations().get("nested1_to_field1"); + tags = reverseNested.getAggregations().get("field1"); + tagsBuckets = new ArrayList<>(tags.getBuckets()); + assertThat(tagsBuckets.size(), equalTo(3)); + assertThat(tagsBuckets.get(0).getKey(), equalTo("a")); + assertThat(tagsBuckets.get(0).getDocCount(), equalTo(2l)); + assertThat(tagsBuckets.get(1).getKey(), equalTo("b")); + assertThat(tagsBuckets.get(1).getDocCount(), equalTo(1l)); + assertThat(tagsBuckets.get(2).getKey(), equalTo("c")); + assertThat(tagsBuckets.get(2).getDocCount(), equalTo(1l)); + + // nested.field2: 5 + bucket = usernameBuckets.get(5); + assertThat(bucket.getKey(), equalTo("5")); + assertThat(bucket.getDocCount(), equalTo(2l)); + reverseNested = bucket.getAggregations().get("nested1_to_field1"); + tags = reverseNested.getAggregations().get("field1"); + tagsBuckets = new ArrayList<>(tags.getBuckets()); + assertThat(tagsBuckets.size(), equalTo(4)); + assertThat(tagsBuckets.get(0).getKey(), equalTo("b")); + assertThat(tagsBuckets.get(0).getDocCount(), equalTo(1l)); + assertThat(tagsBuckets.get(1).getKey(), equalTo("c")); + assertThat(tagsBuckets.get(1).getDocCount(), equalTo(1l)); + assertThat(tagsBuckets.get(2).getKey(), equalTo("d")); + assertThat(tagsBuckets.get(2).getDocCount(), equalTo(1l)); + assertThat(tagsBuckets.get(3).getKey(), equalTo("z")); + assertThat(tagsBuckets.get(3).getDocCount(), equalTo(1l)); + + // nested.field2: 6 + bucket = usernameBuckets.get(6); + assertThat(bucket.getKey(), equalTo("6")); + assertThat(bucket.getDocCount(), equalTo(2l)); + reverseNested = bucket.getAggregations().get("nested1_to_field1"); + tags = reverseNested.getAggregations().get("field1"); + tagsBuckets = new ArrayList<>(tags.getBuckets()); + assertThat(tagsBuckets.size(), equalTo(4)); + assertThat(tagsBuckets.get(0).getKey(), equalTo("c")); + assertThat(tagsBuckets.get(0).getDocCount(), equalTo(2l)); + assertThat(tagsBuckets.get(1).getKey(), equalTo("b")); + assertThat(tagsBuckets.get(1).getDocCount(), equalTo(1l)); + assertThat(tagsBuckets.get(2).getKey(), equalTo("d")); + assertThat(tagsBuckets.get(2).getDocCount(), equalTo(1l)); + assertThat(tagsBuckets.get(3).getKey(), equalTo("y")); + assertThat(tagsBuckets.get(3).getDocCount(), equalTo(1l)); + + // nested.field2: 8 + bucket = usernameBuckets.get(7); + assertThat(bucket.getKey(), equalTo("8")); + assertThat(bucket.getDocCount(), equalTo(2l)); + reverseNested = bucket.getAggregations().get("nested1_to_field1"); + tags = reverseNested.getAggregations().get("field1"); + tagsBuckets = new ArrayList<>(tags.getBuckets()); + assertThat(tagsBuckets.size(), equalTo(4)); + assertThat(tagsBuckets.get(0).getKey(), equalTo("c")); + assertThat(tagsBuckets.get(0).getDocCount(), equalTo(2l)); + assertThat(tagsBuckets.get(1).getKey(), equalTo("d")); + assertThat(tagsBuckets.get(1).getDocCount(), equalTo(1l)); + assertThat(tagsBuckets.get(2).getKey(), equalTo("e")); + assertThat(tagsBuckets.get(2).getDocCount(), equalTo(1l)); + assertThat(tagsBuckets.get(3).getKey(), equalTo("x")); + assertThat(tagsBuckets.get(3).getDocCount(), equalTo(1l)); + + // nested.field2: 9 + bucket = usernameBuckets.get(8); + assertThat(bucket.getKey(), equalTo("9")); + assertThat(bucket.getDocCount(), equalTo(2l)); + reverseNested = bucket.getAggregations().get("nested1_to_field1"); + tags = reverseNested.getAggregations().get("field1"); + tagsBuckets = new ArrayList<>(tags.getBuckets()); + assertThat(tagsBuckets.size(), equalTo(4)); + assertThat(tagsBuckets.get(0).getKey(), equalTo("c")); + assertThat(tagsBuckets.get(0).getDocCount(), equalTo(1l)); + assertThat(tagsBuckets.get(1).getKey(), equalTo("d")); + assertThat(tagsBuckets.get(1).getDocCount(), equalTo(1l)); + assertThat(tagsBuckets.get(2).getKey(), equalTo("e")); + assertThat(tagsBuckets.get(2).getDocCount(), equalTo(1l)); + assertThat(tagsBuckets.get(3).getKey(), equalTo("z")); + assertThat(tagsBuckets.get(3).getDocCount(), equalTo(1l)); + } +}