From b01070a3903fd9c1db01e2b15763df5c0761e935 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 6 Mar 2017 20:12:51 +0100 Subject: [PATCH] [TEST] Added unit tests for diversified sampler aggregator. --- .../BestDocsDeferringCollector.java | 34 ++-- .../sampler/DiversifiedAggregatorFactory.java | 2 +- ...DiversifiedBytesHashSamplerAggregator.java | 3 +- .../DiversifiedMapSamplerAggregator.java | 3 +- .../DiversifiedNumericSamplerAggregator.java | 3 +- .../DiversifiedOrdinalsSamplerAggregator.java | 3 +- .../bucket/sampler/SamplerAggregator.java | 7 +- .../sampler/SamplerAggregatorFactory.java | 2 +- .../bucket/sampler/UnmappedSampler.java | 2 +- .../aggregations/AggregatorTestCase.java | 66 +++---- .../BestBucketsDeferringCollectorTests.java | 110 ++++++++++++ .../BestDocsDeferringCollectorTests.java | 113 ++++++++++++ .../DiversifiedAggregationBuilderTests.java} | 5 +- .../sampler/DiversifiedSamplerTests.java | 165 ++++++++++++++++++ 14 files changed, 446 insertions(+), 72 deletions(-) rename core/src/main/java/org/elasticsearch/search/aggregations/bucket/{ => sampler}/BestDocsDeferringCollector.java (94%) create mode 100644 core/src/test/java/org/elasticsearch/search/aggregations/bucket/BestBucketsDeferringCollectorTests.java create mode 100644 core/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollectorTests.java rename core/src/test/java/org/elasticsearch/search/aggregations/bucket/{DiversifiedSamplerTests.java => sampler/DiversifiedAggregationBuilderTests.java} (89%) create mode 100644 core/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedSamplerTests.java diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/BestDocsDeferringCollector.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java similarity index 94% rename from core/src/main/java/org/elasticsearch/search/aggregations/bucket/BestDocsDeferringCollector.java rename to core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java index 8d699b7c438..acdb3f18e70 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/BestDocsDeferringCollector.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java @@ -16,8 +16,7 @@ * specific language governing permissions and limitations * under the License. */ - -package org.elasticsearch.search.aggregations.bucket; +package org.elasticsearch.search.aggregations.bucket.sampler; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.DocIdSetIterator; @@ -34,11 +33,11 @@ import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.ObjectArray; import org.elasticsearch.search.aggregations.BucketCollector; import org.elasticsearch.search.aggregations.LeafBucketCollector; +import org.elasticsearch.search.aggregations.bucket.DeferringBucketCollector; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Comparator; import java.util.List; /** @@ -48,13 +47,11 @@ import java.util.List; * {@link BestDocsDeferringCollector#createTopDocsCollector(int)} is designed to * be overridden and allows subclasses to choose a custom collector * implementation for determining the top N matches. - * */ - public class BestDocsDeferringCollector extends DeferringBucketCollector implements Releasable { - final List entries = new ArrayList<>(); - BucketCollector deferred; - ObjectArray perBucketSamples; + private final List entries = new ArrayList<>(); + private BucketCollector deferred; + private ObjectArray perBucketSamples; private int shardSize; private PerSegmentCollects perSegCollector; private final BigArrays bigArrays; @@ -65,14 +62,12 @@ public class BestDocsDeferringCollector extends DeferringBucketCollector impleme * @param shardSize * The number of top-scoring docs to collect for each bucket */ - public BestDocsDeferringCollector(int shardSize, BigArrays bigArrays) { + BestDocsDeferringCollector(int shardSize, BigArrays bigArrays) { this.shardSize = shardSize; this.bigArrays = bigArrays; perBucketSamples = bigArrays.newObjectArray(1); } - - @Override public boolean needsScores() { return true; @@ -126,7 +121,6 @@ public class BestDocsDeferringCollector extends DeferringBucketCollector impleme } private void runDeferredAggs() throws IOException { - List allDocs = new ArrayList<>(shardSize); for (int i = 0; i < perBucketSamples.size(); i++) { PerParentBucketSamples perBucketSample = perBucketSamples.get(i); @@ -138,15 +132,12 @@ public class BestDocsDeferringCollector extends DeferringBucketCollector impleme // Sort the top matches by docID for the benefit of deferred collector ScoreDoc[] docsArr = allDocs.toArray(new ScoreDoc[allDocs.size()]); - Arrays.sort(docsArr, new Comparator() { - @Override - public int compare(ScoreDoc o1, ScoreDoc o2) { - if(o1.doc == o2.doc){ - return o1.shardIndex - o2.shardIndex; - } - return o1.doc - o2.doc; - } - }); + Arrays.sort(docsArr, (o1, o2) -> { + if(o1.doc == o2.doc){ + return o1.shardIndex - o2.shardIndex; + } + return o1.doc - o2.doc; + }); try { for (PerSegmentCollects perSegDocs : entries) { perSegDocs.replayRelatedMatches(docsArr); @@ -295,7 +286,6 @@ public class BestDocsDeferringCollector extends DeferringBucketCollector impleme } } - public int getDocCount(long parentBucket) { PerParentBucketSamples sampler = perBucketSamples.get((int) parentBucket); if (sampler == null) { diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedAggregatorFactory.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedAggregatorFactory.java index 97a68649ca2..2bd68296d7b 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedAggregatorFactory.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedAggregatorFactory.java @@ -43,7 +43,7 @@ public class DiversifiedAggregatorFactory extends ValuesSourceAggregatorFactory< private final int maxDocsPerValue; private final String executionHint; - public DiversifiedAggregatorFactory(String name, ValuesSourceConfig config, int shardSize, int maxDocsPerValue, + DiversifiedAggregatorFactory(String name, ValuesSourceConfig config, int shardSize, int maxDocsPerValue, String executionHint, SearchContext context, AggregatorFactory parent, AggregatorFactories.Builder subFactoriesBuilder, Map metaData) throws IOException { super(name, config, context, parent, subFactoriesBuilder, metaData); diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedBytesHashSamplerAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedBytesHashSamplerAggregator.java index f4d45a7d471..33532a9f438 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedBytesHashSamplerAggregator.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedBytesHashSamplerAggregator.java @@ -29,7 +29,6 @@ import org.elasticsearch.ElasticsearchException; import org.elasticsearch.index.fielddata.SortedBinaryDocValues; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; -import org.elasticsearch.search.aggregations.bucket.BestDocsDeferringCollector; import org.elasticsearch.search.aggregations.bucket.DeferringBucketCollector; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.ValuesSource; @@ -48,7 +47,7 @@ public class DiversifiedBytesHashSamplerAggregator extends SamplerAggregator { private ValuesSource valuesSource; private int maxDocsPerValue; - public DiversifiedBytesHashSamplerAggregator(String name, int shardSize, AggregatorFactories factories, + DiversifiedBytesHashSamplerAggregator(String name, int shardSize, AggregatorFactories factories, SearchContext context, Aggregator parent, List pipelineAggregators, Map metaData, ValuesSource valuesSource, int maxDocsPerValue) throws IOException { diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedMapSamplerAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedMapSamplerAggregator.java index 08fa1bcb7fa..d16e798b961 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedMapSamplerAggregator.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedMapSamplerAggregator.java @@ -31,7 +31,6 @@ import org.elasticsearch.common.util.BytesRefHash; import org.elasticsearch.index.fielddata.SortedBinaryDocValues; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; -import org.elasticsearch.search.aggregations.bucket.BestDocsDeferringCollector; import org.elasticsearch.search.aggregations.bucket.DeferringBucketCollector; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.ValuesSource; @@ -47,7 +46,7 @@ public class DiversifiedMapSamplerAggregator extends SamplerAggregator { private int maxDocsPerValue; private BytesRefHash bucketOrds; - public DiversifiedMapSamplerAggregator(String name, int shardSize, AggregatorFactories factories, + DiversifiedMapSamplerAggregator(String name, int shardSize, AggregatorFactories factories, SearchContext context, Aggregator parent, List pipelineAggregators, Map metaData, ValuesSource valuesSource, int maxDocsPerValue) throws IOException { super(name, shardSize, factories, context, parent, pipelineAggregators, metaData); diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedNumericSamplerAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedNumericSamplerAggregator.java index c595fdb5c25..c33a1a55532 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedNumericSamplerAggregator.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedNumericSamplerAggregator.java @@ -28,7 +28,6 @@ import org.apache.lucene.search.TopDocsCollector; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; -import org.elasticsearch.search.aggregations.bucket.BestDocsDeferringCollector; import org.elasticsearch.search.aggregations.bucket.DeferringBucketCollector; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.ValuesSource; @@ -43,7 +42,7 @@ public class DiversifiedNumericSamplerAggregator extends SamplerAggregator { private ValuesSource.Numeric valuesSource; private int maxDocsPerValue; - public DiversifiedNumericSamplerAggregator(String name, int shardSize, AggregatorFactories factories, + DiversifiedNumericSamplerAggregator(String name, int shardSize, AggregatorFactories factories, SearchContext context, Aggregator parent, List pipelineAggregators, Map metaData, ValuesSource.Numeric valuesSource, int maxDocsPerValue) throws IOException { super(name, shardSize, factories, context, parent, pipelineAggregators, metaData); diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedOrdinalsSamplerAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedOrdinalsSamplerAggregator.java index 5eb37f310ad..a5d027debe1 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedOrdinalsSamplerAggregator.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedOrdinalsSamplerAggregator.java @@ -29,7 +29,6 @@ import org.apache.lucene.search.DiversifiedTopDocsCollector.ScoreDocKey; import org.apache.lucene.search.TopDocsCollector; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; -import org.elasticsearch.search.aggregations.bucket.BestDocsDeferringCollector; import org.elasticsearch.search.aggregations.bucket.DeferringBucketCollector; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.ValuesSource; @@ -44,7 +43,7 @@ public class DiversifiedOrdinalsSamplerAggregator extends SamplerAggregator { private ValuesSource.Bytes.WithOrdinals.FieldData valuesSource; private int maxDocsPerValue; - public DiversifiedOrdinalsSamplerAggregator(String name, int shardSize, AggregatorFactories factories, + DiversifiedOrdinalsSamplerAggregator(String name, int shardSize, AggregatorFactories factories, SearchContext context, Aggregator parent, List pipelineAggregators, Map metaData, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, int maxDocsPerValue) throws IOException { super(name, shardSize, factories, context, parent, pipelineAggregators, metaData); diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/SamplerAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/SamplerAggregator.java index ee01260acb3..0592ccf7cfc 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/SamplerAggregator.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/SamplerAggregator.java @@ -26,7 +26,6 @@ import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.LeafBucketCollector; -import org.elasticsearch.search.aggregations.bucket.BestDocsDeferringCollector; import org.elasticsearch.search.aggregations.bucket.DeferringBucketCollector; import org.elasticsearch.search.aggregations.bucket.SingleBucketAggregator; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; @@ -53,7 +52,6 @@ public class SamplerAggregator extends SingleBucketAggregator { public static final ParseField MAX_DOCS_PER_VALUE_FIELD = new ParseField("max_docs_per_value"); public static final ParseField EXECUTION_HINT_FIELD = new ParseField("execution_hint"); - public enum ExecutionMode { MAP(new ParseField("map")) { @@ -141,7 +139,7 @@ public class SamplerAggregator extends SingleBucketAggregator { protected final int shardSize; protected BestDocsDeferringCollector bdd; - public SamplerAggregator(String name, int shardSize, AggregatorFactories factories, SearchContext context, + SamplerAggregator(String name, int shardSize, AggregatorFactories factories, SearchContext context, Aggregator parent, List pipelineAggregators, Map metaData) throws IOException { super(name, factories, context, parent, pipelineAggregators, metaData); this.shardSize = shardSize; @@ -156,10 +154,8 @@ public class SamplerAggregator extends SingleBucketAggregator { public DeferringBucketCollector getDeferringCollector() { bdd = new BestDocsDeferringCollector(shardSize, context.bigArrays()); return bdd; - } - @Override protected boolean shouldDefer(Aggregator aggregator) { return true; @@ -193,4 +189,3 @@ public class SamplerAggregator extends SingleBucketAggregator { } } - diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/SamplerAggregatorFactory.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/SamplerAggregatorFactory.java index 4fb7e28f3d6..c122cc95fa1 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/SamplerAggregatorFactory.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/SamplerAggregatorFactory.java @@ -33,7 +33,7 @@ public class SamplerAggregatorFactory extends AggregatorFactory parent, + SamplerAggregatorFactory(String name, int shardSize, SearchContext context, AggregatorFactory parent, AggregatorFactories.Builder subFactories, Map metaData) throws IOException { super(name, context, parent, subFactories, metaData); this.shardSize = shardSize; diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/UnmappedSampler.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/UnmappedSampler.java index 1cf77baa1f0..6eb78c31c5c 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/UnmappedSampler.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/UnmappedSampler.java @@ -31,7 +31,7 @@ import java.util.Map; public class UnmappedSampler extends InternalSampler { public static final String NAME = "unmapped_sampler"; - public UnmappedSampler(String name, List pipelineAggregators, Map metaData) { + UnmappedSampler(String name, List pipelineAggregators, Map metaData) { super(name, 0, InternalAggregations.EMPTY, pipelineAggregators, metaData); } diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/core/src/test/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java index f3ba22e87f6..65f2965df97 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java @@ -51,7 +51,6 @@ import org.elasticsearch.search.internal.ContextIndexSearcher; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.lookup.SearchLookup; import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.test.IndexSettingsModule; import java.io.IOException; import java.util.ArrayList; @@ -75,15 +74,31 @@ public abstract class AggregatorTestCase extends ESTestCase { protected A createAggregator(B aggregationBuilder, IndexSearcher indexSearcher, MappedFieldType... fieldTypes) throws IOException { - IndexSettings indexSettings = new IndexSettings( - IndexMetaData.builder("_index").settings(Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)) - .numberOfShards(1) - .numberOfReplicas(0) - .creationDate(System.currentTimeMillis()) - .build(), - Settings.EMPTY - ); + IndexSettings indexSettings = createIndexSettings(); + SearchContext searchContext = createSearchContext(indexSearcher, indexSettings); + CircuitBreakerService circuitBreakerService = new NoneCircuitBreakerService(); + when(searchContext.bigArrays()).thenReturn(new MockBigArrays(Settings.EMPTY, circuitBreakerService)); + // TODO: now just needed for top_hits, this will need to be revised for other agg unit tests: + MapperService mapperService = mapperServiceMock(); + when(mapperService.hasNested()).thenReturn(false); + when(searchContext.mapperService()).thenReturn(mapperService); + IndexFieldDataService ifds = new IndexFieldDataService(indexSettings, + new IndicesFieldDataCache(Settings.EMPTY, new IndexFieldDataCache.Listener() { + }), circuitBreakerService, mapperService); + when(searchContext.fieldData()).thenReturn(ifds); + SearchLookup searchLookup = new SearchLookup(mapperService, ifds, new String[]{"type"}); + when(searchContext.lookup()).thenReturn(searchLookup); + + QueryShardContext queryShardContext = queryShardContextMock(fieldTypes, indexSettings, circuitBreakerService); + when(searchContext.getQueryShardContext()).thenReturn(queryShardContext); + + @SuppressWarnings("unchecked") + A aggregator = (A) aggregationBuilder.build(searchContext, null).create(null, true); + return aggregator; + } + + protected SearchContext createSearchContext(IndexSearcher indexSearcher, IndexSettings indexSettings) { Engine.Searcher searcher = new Engine.Searcher("aggregator_test", indexSearcher); QueryCache queryCache = new DisabledQueryCache(indexSettings); QueryCachingPolicy queryCachingPolicy = new QueryCachingPolicy() { @@ -99,38 +114,29 @@ public abstract class AggregatorTestCase extends ESTestCase { }; ContextIndexSearcher contextIndexSearcher = new ContextIndexSearcher(searcher, queryCache, queryCachingPolicy); - CircuitBreakerService circuitBreakerService = new NoneCircuitBreakerService(); SearchContext searchContext = mock(SearchContext.class); when(searchContext.numberOfShards()).thenReturn(1); when(searchContext.searcher()).thenReturn(contextIndexSearcher); - when(searchContext.bigArrays()).thenReturn(new MockBigArrays(Settings.EMPTY, circuitBreakerService)); when(searchContext.fetchPhase()) - .thenReturn(new FetchPhase(Arrays.asList(new FetchSourceSubPhase(), new DocValueFieldsFetchSubPhase()))); + .thenReturn(new FetchPhase(Arrays.asList(new FetchSourceSubPhase(), new DocValueFieldsFetchSubPhase()))); doAnswer(invocation -> { /* Store the releasables so we can release them at the end of the test case. This is important because aggregations don't * close their sub-aggregations. This is fairly similar to what the production code does. */ releasables.add((Releasable) invocation.getArguments()[0]); return null; }).when(searchContext).addReleasable(anyObject(), anyObject()); + return searchContext; + } - // TODO: now just needed for top_hits, this will need to be revised for other agg unit tests: - MapperService mapperService = mapperServiceMock(); - when(mapperService.hasNested()).thenReturn(false); - when(searchContext.mapperService()).thenReturn(mapperService); - IndexFieldDataService ifds = new IndexFieldDataService(IndexSettingsModule.newIndexSettings("test", Settings.EMPTY), - new IndicesFieldDataCache(Settings.EMPTY, new IndexFieldDataCache.Listener() { - }), circuitBreakerService, mapperService); - when(searchContext.fieldData()).thenReturn(ifds); - - SearchLookup searchLookup = new SearchLookup(mapperService, ifds, new String[]{"type"}); - when(searchContext.lookup()).thenReturn(searchLookup); - - QueryShardContext queryShardContext = queryShardContextMock(fieldTypes, indexSettings, circuitBreakerService); - when(searchContext.getQueryShardContext()).thenReturn(queryShardContext); - - @SuppressWarnings("unchecked") - A aggregator = (A) aggregationBuilder.build(searchContext, null).create(null, true); - return aggregator; + protected IndexSettings createIndexSettings() { + return new IndexSettings( + IndexMetaData.builder("_index").settings(Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)) + .numberOfShards(1) + .numberOfReplicas(0) + .creationDate(System.currentTimeMillis()) + .build(), + Settings.EMPTY + ); } /** diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/BestBucketsDeferringCollectorTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/BestBucketsDeferringCollectorTests.java new file mode 100644 index 00000000000..02cd88f16fa --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/BestBucketsDeferringCollectorTests.java @@ -0,0 +1,110 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.aggregations.bucket; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; +import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.BucketCollector; +import org.elasticsearch.search.aggregations.LeafBucketCollector; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +public class BestBucketsDeferringCollectorTests extends AggregatorTestCase { + + public void testReplay() throws Exception { + Directory directory = newDirectory(); + RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); + int numDocs = randomInt(128); + int maxNumValues = randomInt(16); + for (int i = 0; i < numDocs; i++) { + Document document = new Document(); + document.add(new StringField("field", String.valueOf(randomInt(maxNumValues)), Field.Store.NO)); + indexWriter.addDocument(document); + } + + indexWriter.close(); + IndexReader indexReader = DirectoryReader.open(directory); + IndexSearcher indexSearcher = new IndexSearcher(indexReader); + + TermQuery termQuery = new TermQuery(new Term("field", String.valueOf(randomInt(maxNumValues)))); + TopDocs topDocs = indexSearcher.search(termQuery, numDocs); + + SearchContext searchContext = createSearchContext(indexSearcher, createIndexSettings()); + BestBucketsDeferringCollector collector = new BestBucketsDeferringCollector(searchContext); + Set deferredCollectedDocIds = new HashSet<>(); + collector.setDeferredCollector(Collections.singleton(bla(deferredCollectedDocIds))); + collector.preCollection(); + indexSearcher.search(termQuery, collector); + collector.postCollection(); + collector.replay(0); + + assertEquals(topDocs.scoreDocs.length, deferredCollectedDocIds.size()); + for (ScoreDoc scoreDoc : topDocs.scoreDocs) { + assertTrue("expected docid [" + scoreDoc.doc + "] is missing", deferredCollectedDocIds.contains(scoreDoc.doc)); + } + indexReader.close(); + directory.close(); + } + + private BucketCollector bla(Set docIds) { + return new BucketCollector() { + @Override + public LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws IOException { + return new LeafBucketCollector() { + @Override + public void collect(int doc, long bucket) throws IOException { + docIds.add(ctx.docBase + doc); + } + }; + } + + @Override + public void preCollection() throws IOException { + + } + + @Override + public void postCollection() throws IOException { + + } + + @Override + public boolean needsScores() { + return false; + } + }; + } + +} diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollectorTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollectorTests.java new file mode 100644 index 00000000000..d99f7e9fa73 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollectorTests.java @@ -0,0 +1,113 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.aggregations.bucket.sampler; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.MockBigArrays; +import org.elasticsearch.indices.breaker.NoneCircuitBreakerService; +import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.BucketCollector; +import org.elasticsearch.search.aggregations.LeafBucketCollector; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +public class BestDocsDeferringCollectorTests extends AggregatorTestCase { + + public void testReplay() throws Exception { + Directory directory = newDirectory(); + RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); + int numDocs = randomIntBetween(1, 128); + int maxNumValues = randomInt(16); + for (int i = 0; i < numDocs; i++) { + Document document = new Document(); + document.add(new StringField("field", String.valueOf(randomInt(maxNumValues)), Field.Store.NO)); + indexWriter.addDocument(document); + } + + indexWriter.close(); + IndexReader indexReader = DirectoryReader.open(directory); + IndexSearcher indexSearcher = new IndexSearcher(indexReader); + + TermQuery termQuery = new TermQuery(new Term("field", String.valueOf(randomInt(maxNumValues)))); + TopDocs topDocs = indexSearcher.search(termQuery, numDocs); + + BestDocsDeferringCollector collector = + new BestDocsDeferringCollector(numDocs, new MockBigArrays(Settings.EMPTY, new NoneCircuitBreakerService())); + Set deferredCollectedDocIds = new HashSet<>(); + collector.setDeferredCollector(Collections.singleton(testCollector(deferredCollectedDocIds))); + collector.preCollection(); + indexSearcher.search(termQuery, collector); + collector.postCollection(); + collector.replay(0); + + assertEquals(topDocs.scoreDocs.length, deferredCollectedDocIds.size()); + for (ScoreDoc scoreDoc : topDocs.scoreDocs) { + assertTrue("expected docid [" + scoreDoc.doc + "] is missing", deferredCollectedDocIds.contains(scoreDoc.doc)); + } + collector.close(); + indexReader.close(); + directory.close(); + } + + private BucketCollector testCollector(Set docIds) { + return new BucketCollector() { + @Override + public LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws IOException { + return new LeafBucketCollector() { + @Override + public void collect(int doc, long bucket) throws IOException { + docIds.add(ctx.docBase + doc); + } + }; + } + + @Override + public void preCollection() throws IOException { + + } + + @Override + public void postCollection() throws IOException { + + } + + @Override + public boolean needsScores() { + return false; + } + }; + } + +} diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/DiversifiedSamplerTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedAggregationBuilderTests.java similarity index 89% rename from core/src/test/java/org/elasticsearch/search/aggregations/bucket/DiversifiedSamplerTests.java rename to core/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedAggregationBuilderTests.java index b68caad0ea7..eed258bb788 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/DiversifiedSamplerTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedAggregationBuilderTests.java @@ -17,14 +17,13 @@ * under the License. */ -package org.elasticsearch.search.aggregations.bucket; +package org.elasticsearch.search.aggregations.bucket.sampler; import org.elasticsearch.script.Script; import org.elasticsearch.search.aggregations.BaseAggregationTestCase; -import org.elasticsearch.search.aggregations.bucket.sampler.DiversifiedAggregationBuilder; import org.elasticsearch.search.aggregations.bucket.sampler.SamplerAggregator.ExecutionMode; -public class DiversifiedSamplerTests extends BaseAggregationTestCase { +public class DiversifiedAggregationBuilderTests extends BaseAggregationTestCase { @Override protected final DiversifiedAggregationBuilder createTestAggregatorBuilder() { diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedSamplerTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedSamplerTests.java new file mode 100644 index 00000000000..4e3e95fd5eb --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/DiversifiedSamplerTests.java @@ -0,0 +1,165 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.aggregations.bucket.sampler; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.DoubleDocValuesField; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.lucene.search.function.FieldValueFactorFunction; +import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.fielddata.IndexNumericFieldData; +import org.elasticsearch.index.fielddata.plain.SortedNumericDVIndexFieldData; +import org.elasticsearch.index.mapper.KeywordFieldMapper; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.NumberFieldMapper; +import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.bucket.terms.Terms; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; + +import java.io.IOException; +import java.util.function.Consumer; + +public class DiversifiedSamplerTests extends AggregatorTestCase { + + public void testDiversifiedSampler() throws Exception { + String data[] = { + // "id,cat,name,price,inStock,author_t,series_t,sequence_i,genre_s,genre_id", + "0553573403,book,A Game of Thrones,7.99,true,George R.R. Martin,A Song of Ice and Fire,1,fantasy,0", + "0553579908,book,A Clash of Kings,7.99,true,George R.R. Martin,A Song of Ice and Fire,2,fantasy,0", + "055357342X,book,A Storm of Swords,7.99,true,George R.R. Martin,A Song of Ice and Fire,3,fantasy,0", + "0553293354,book,Foundation,17.99,true,Isaac Asimov,Foundation Novels,1,scifi,1", + "0812521390,book,The Black Company,6.99,false,Glen Cook,The Chronicles of The Black Company,1,fantasy,0", + "0812550706,book,Ender's Game,6.99,true,Orson Scott Card,Ender,1,scifi,1", + "0441385532,book,Jhereg,7.95,false,Steven Brust,Vlad Taltos,1,fantasy,0", + "0380014300,book,Nine Princes In Amber,6.99,true,Roger Zelazny,the Chronicles of Amber,1,fantasy,0", + "0805080481,book,The Book of Three,5.99,true,Lloyd Alexander,The Chronicles of Prydain,1,fantasy,0", + "080508049X,book,The Black Cauldron,5.99,true,Lloyd Alexander,The Chronicles of Prydain,2,fantasy,0" + }; + + Directory directory = newDirectory(); + RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); + for (String entry : data) { + String[] parts = entry.split(","); + Document document = new Document(); + document.add(new SortedDocValuesField("id", new BytesRef(parts[0]))); + document.add(new StringField("cat", parts[1], Field.Store.NO)); + document.add(new TextField("name", parts[2], Field.Store.NO)); + document.add(new DoubleDocValuesField("price", Double.valueOf(parts[3]))); + document.add(new StringField("inStock", parts[4], Field.Store.NO)); + document.add(new StringField("author", parts[5], Field.Store.NO)); + document.add(new StringField("series", parts[6], Field.Store.NO)); + document.add(new StringField("sequence", parts[7], Field.Store.NO)); + document.add(new SortedDocValuesField("genre", new BytesRef(parts[8]))); + document.add(new NumericDocValuesField("genre_id", Long.valueOf(parts[9]))); + indexWriter.addDocument(document); + } + + indexWriter.close(); + IndexReader indexReader = DirectoryReader.open(directory); + IndexSearcher indexSearcher = new IndexSearcher(indexReader); + + MappedFieldType genreFieldType = new KeywordFieldMapper.KeywordFieldType(); + genreFieldType.setName("genre"); + genreFieldType.setHasDocValues(true); + Consumer verify = result -> { + Terms terms = result.getAggregations().get("terms"); + assertEquals(2, terms.getBuckets().size()); + assertEquals("0805080481", terms.getBuckets().get(0).getKeyAsString()); + assertEquals("0812550706", terms.getBuckets().get(1).getKeyAsString()); + }; + testCase(indexSearcher, genreFieldType, "map", verify); + testCase(indexSearcher, genreFieldType, "global_ordinals", verify); + testCase(indexSearcher, genreFieldType, "bytes_hash", verify); + + genreFieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG); + genreFieldType.setName("genre_id"); + testCase(indexSearcher, genreFieldType, null, verify); + + // wrong field: + genreFieldType = new KeywordFieldMapper.KeywordFieldType(); + genreFieldType.setName("wrong_field"); + genreFieldType.setHasDocValues(true); + testCase(indexSearcher, genreFieldType, null, result -> { + Terms terms = result.getAggregations().get("terms"); + assertEquals(1, terms.getBuckets().size()); + assertEquals("0805080481", terms.getBuckets().get(0).getKeyAsString()); + }); + + indexReader.close(); + directory.close(); + } + + private void testCase(IndexSearcher indexSearcher, MappedFieldType genreFieldType, String executionHint, + Consumer verify) throws IOException { + MappedFieldType idFieldType = new KeywordFieldMapper.KeywordFieldType(); + idFieldType.setName("id"); + idFieldType.setHasDocValues(true); + + SortedNumericDVIndexFieldData fieldData = new SortedNumericDVIndexFieldData(new Index("index", "index"), "price", + IndexNumericFieldData.NumericType.DOUBLE); + FunctionScoreQuery query = new FunctionScoreQuery(new MatchAllDocsQuery(), + new FieldValueFactorFunction("price", 1, FieldValueFactorFunction.Modifier.RECIPROCAL, null, fieldData)); + + DiversifiedAggregationBuilder builder = new DiversifiedAggregationBuilder("_name") + .field(genreFieldType.name()) + .executionHint(executionHint) + .subAggregation(new TermsAggregationBuilder("terms", null).field("id")); + + InternalSampler result = search(indexSearcher, query, builder, genreFieldType, idFieldType); + verify.accept(result); + } + + public void testDiversifiedSampler_noDocs() throws Exception { + Directory directory = newDirectory(); + RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); + indexWriter.close(); + IndexReader indexReader = DirectoryReader.open(directory); + IndexSearcher indexSearcher = new IndexSearcher(indexReader); + + MappedFieldType idFieldType = new KeywordFieldMapper.KeywordFieldType(); + idFieldType.setName("id"); + idFieldType.setHasDocValues(true); + + MappedFieldType genreFieldType = new KeywordFieldMapper.KeywordFieldType(); + genreFieldType.setName("genre"); + genreFieldType.setHasDocValues(true); + + DiversifiedAggregationBuilder builder = new DiversifiedAggregationBuilder("_name") + .field(genreFieldType.name()) + .subAggregation(new TermsAggregationBuilder("terms", null).field("id")); + + InternalSampler result = search(indexSearcher, new MatchAllDocsQuery(), builder, genreFieldType, idFieldType); + Terms terms = result.getAggregations().get("terms"); + assertEquals(0, terms.getBuckets().size()); + indexReader.close(); + directory.close(); + } +}