From 058869ed549deb080522d873e3891425d377cc48 Mon Sep 17 00:00:00 2001 From: Colin Goodheart-Smithe Date: Mon, 3 Apr 2017 09:39:03 +0100 Subject: [PATCH] Adds tests for cardinality and filter aggregations (#23826) * Adds tests for cardinality and filter aggregations Relates to #22278 * addresses review comments --- .../cardinality/HyperLogLogPlusPlus.java | 29 ++++ .../cardinality/InternalCardinality.java | 14 ++ .../bucket/FilterAggregatorTests.java | 104 ++++++++++++++ .../metrics/CardinalityAggregatorTests.java | 131 ++++++++++++++++++ .../cardinality/InternalCardinalityTests.java | 82 +++++++++++ 5 files changed, 360 insertions(+) create mode 100644 core/src/test/java/org/elasticsearch/search/aggregations/bucket/FilterAggregatorTests.java create mode 100644 core/src/test/java/org/elasticsearch/search/aggregations/metrics/CardinalityAggregatorTests.java create mode 100644 core/src/test/java/org/elasticsearch/search/aggregations/metrics/cardinality/InternalCardinalityTests.java diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/HyperLogLogPlusPlus.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/HyperLogLogPlusPlus.java index 42b4561e07b..6425cc3b68a 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/HyperLogLogPlusPlus.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/HyperLogLogPlusPlus.java @@ -34,6 +34,9 @@ import org.elasticsearch.common.util.IntArray; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; /** * Hyperloglog++ counter, implemented based on pseudo code from @@ -420,6 +423,32 @@ public final class HyperLogLogPlusPlus implements Releasable { Releasables.close(runLens, hashSet.sizes); } + private Set getComparableData(long bucket) { + Set values = new HashSet<>(); + if (algorithm.get(bucket) == LINEAR_COUNTING) { + try (IntArray hashSetValues = hashSet.values(bucket)) { + for (long i = 0; i < hashSetValues.size(); i++) { + values.add(hashSetValues.get(i)); + } + } + } else { + for (long i = 0; i < runLens.size(); i++) { + values.add(runLens.get((bucket << p) + i)); + } + } + return values; + } + + public int hashCode(long bucket) { + return Objects.hash(p, algorithm.get(bucket), getComparableData(bucket)); + } + + public boolean equals(long bucket, HyperLogLogPlusPlus other) { + return Objects.equals(p, other.p) && + Objects.equals(algorithm.get(bucket), other.algorithm.get(bucket)) && + Objects.equals(getComparableData(bucket), getComparableData(bucket)); + } + /** * We are actually using HyperLogLog's runLens array but interpreting it as a hash set * for linear counting. diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/InternalCardinality.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/InternalCardinality.java index 68e8935616f..028e97a69ff 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/InternalCardinality.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/InternalCardinality.java @@ -113,4 +113,18 @@ public final class InternalCardinality extends InternalNumericMetricsAggregation return builder; } + @Override + protected int doHashCode() { + return counts.hashCode(0); + } + + @Override + protected boolean doEquals(Object obj) { + InternalCardinality other = (InternalCardinality) obj; + return counts.equals(0, other.counts); + } + + HyperLogLogPlusPlus getState() { + return counts; + } } diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/FilterAggregatorTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/FilterAggregatorTests.java new file mode 100644 index 00000000000..491f445fdfa --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/FilterAggregatorTests.java @@ -0,0 +1,104 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.aggregations.bucket; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.store.Directory; +import org.elasticsearch.index.mapper.KeywordFieldMapper; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.bucket.filter.FilterAggregationBuilder; +import org.elasticsearch.search.aggregations.bucket.filter.InternalFilter; +import org.junit.Before; + +public class FilterAggregatorTests extends AggregatorTestCase { + private MappedFieldType fieldType; + + @Before + public void setUpTest() throws Exception { + super.setUp(); + fieldType = new KeywordFieldMapper.KeywordFieldType(); + fieldType.setHasDocValues(true); + fieldType.setIndexOptions(IndexOptions.DOCS); + fieldType.setName("field"); + } + + public void testEmpty() throws Exception { + Directory directory = newDirectory(); + RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); + indexWriter.close(); + IndexReader indexReader = DirectoryReader.open(directory); + IndexSearcher indexSearcher = newSearcher(indexReader, true, true); + QueryBuilder filter = QueryBuilders.termQuery("field", randomAsciiOfLength(5)); + FilterAggregationBuilder builder = new FilterAggregationBuilder("test", filter); + InternalFilter response = search(indexSearcher, new MatchAllDocsQuery(), builder, + fieldType); + assertEquals(response.getDocCount(), 0); + indexReader.close(); + directory.close(); + } + + public void testRandom() throws Exception { + Directory directory = newDirectory(); + RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); + int numDocs = randomIntBetween(100, 200); + int maxTerm = randomIntBetween(10, 50); + int[] expectedBucketCount = new int[maxTerm]; + Document document = new Document(); + for (int i = 0; i < numDocs; i++) { + if (frequently()) { + // make sure we have more than one segment to test the merge + indexWriter.getReader().close(); + } + int value = randomInt(maxTerm-1); + expectedBucketCount[value] += 1; + document.add(new Field("field", Integer.toString(value), fieldType)); + indexWriter.addDocument(document); + document.clear(); + } + indexWriter.close(); + + IndexReader indexReader = DirectoryReader.open(directory); + IndexSearcher indexSearcher = newSearcher(indexReader, true, true); + int value = randomInt(maxTerm - 1); + QueryBuilder filter = QueryBuilders.termQuery("field", Integer.toString(value)); + FilterAggregationBuilder builder = new FilterAggregationBuilder("test", filter); + + for (boolean doReduce : new boolean[] {true, false}) { + final InternalFilter response; + if (doReduce) { + response = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType); + } else { + response = search(indexSearcher, new MatchAllDocsQuery(), builder, fieldType); + } + assertEquals(response.getDocCount(), (long) expectedBucketCount[value]); + } + indexReader.close(); + directory.close(); + } +} diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/metrics/CardinalityAggregatorTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/CardinalityAggregatorTests.java new file mode 100644 index 00000000000..b80dd163fc9 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/CardinalityAggregatorTests.java @@ -0,0 +1,131 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.metrics; + +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.FieldValueQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; +import org.elasticsearch.common.CheckedConsumer; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.NumberFieldMapper; +import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.metrics.cardinality.CardinalityAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.cardinality.CardinalityAggregator; +import org.elasticsearch.search.aggregations.metrics.cardinality.InternalCardinality; +import org.elasticsearch.search.aggregations.support.ValueType; + +import java.io.IOException; +import java.util.Arrays; +import java.util.function.Consumer; + +import static java.util.Collections.singleton; + +public class CardinalityAggregatorTests extends AggregatorTestCase { + public void testNoDocs() throws IOException { + testCase(new MatchAllDocsQuery(), iw -> { + // Intentionally not writing any docs + }, card -> { + assertEquals(0.0, card.getValue(), 0); + }); + } + + public void testNoMatchingField() throws IOException { + testCase(new MatchAllDocsQuery(), iw -> { + iw.addDocument(singleton(new SortedNumericDocValuesField("wrong_number", 7))); + iw.addDocument(singleton(new SortedNumericDocValuesField("wrong_number", 1))); + }, card -> { + assertEquals(0.0, card.getValue(), 0); + }); + } + + public void testSomeMatchesSortedNumericDocValues() throws IOException { + testCase(new FieldValueQuery("number"), iw -> { + iw.addDocument(singleton(new SortedNumericDocValuesField("number", 7))); + iw.addDocument(singleton(new SortedNumericDocValuesField("number", 1))); + }, card -> { + assertEquals(2, card.getValue(), 0); + }); + } + + public void testSomeMatchesNumericDocValues() throws IOException { + testCase(new FieldValueQuery("number"), iw -> { + iw.addDocument(singleton(new NumericDocValuesField("number", 7))); + iw.addDocument(singleton(new NumericDocValuesField("number", 1))); + }, card -> { + assertEquals(2, card.getValue(), 0); + }); + } + + public void testQueryFiltering() throws IOException { + testCase(IntPoint.newRangeQuery("number", 0, 5), iw -> { + iw.addDocument(Arrays.asList(new IntPoint("number", 7), + new SortedNumericDocValuesField("number", 7))); + iw.addDocument(Arrays.asList(new IntPoint("number", 1), + new SortedNumericDocValuesField("number", 1))); + }, card -> { + assertEquals(1, card.getValue(), 0); + }); + } + + public void testQueryFiltersAll() throws IOException { + testCase(IntPoint.newRangeQuery("number", -1, 0), iw -> { + iw.addDocument(Arrays.asList(new IntPoint("number", 7), + new SortedNumericDocValuesField("number", 7))); + iw.addDocument(Arrays.asList(new IntPoint("number", 1), + new SortedNumericDocValuesField("number", 1))); + }, card -> { + assertEquals(0.0, card.getValue(), 0); + }); + } + + private void testCase(Query query, CheckedConsumer buildIndex, + Consumer verify) throws IOException { + Directory directory = newDirectory(); + RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); + buildIndex.accept(indexWriter); + indexWriter.close(); + + IndexReader indexReader = DirectoryReader.open(directory); + IndexSearcher indexSearcher = newSearcher(indexReader, true, true); + + CardinalityAggregationBuilder aggregationBuilder = new CardinalityAggregationBuilder( + "_name", ValueType.NUMERIC).field("number"); + MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType( + NumberFieldMapper.NumberType.LONG); + fieldType.setName("number"); + try (CardinalityAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, + fieldType)) { + aggregator.preCollection(); + indexSearcher.search(query, aggregator); + aggregator.postCollection(); + verify.accept((InternalCardinality) aggregator.buildAggregation(0L)); + } + indexReader.close(); + directory.close(); + } +} diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/metrics/cardinality/InternalCardinalityTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/cardinality/InternalCardinalityTests.java new file mode 100644 index 00000000000..7c5809f323b --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/cardinality/InternalCardinalityTests.java @@ -0,0 +1,82 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.metrics.cardinality; + +import org.elasticsearch.common.io.stream.Writeable.Reader; +import org.elasticsearch.common.lease.Releasables; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.MockBigArrays; +import org.elasticsearch.indices.breaker.NoneCircuitBreakerService; +import org.elasticsearch.search.aggregations.InternalAggregationTestCase; +import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; +import org.junit.After; +import org.junit.Before; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +public class InternalCardinalityTests extends InternalAggregationTestCase { + private static List algos; + private static int p; + + @Before + public void setup() { + algos = new ArrayList<>(); + p = randomIntBetween(HyperLogLogPlusPlus.MIN_PRECISION, HyperLogLogPlusPlus.MAX_PRECISION); + } + + @Override + protected InternalCardinality createTestInstance(String name, + List pipelineAggregators, Map metaData) { + HyperLogLogPlusPlus hllpp = new HyperLogLogPlusPlus(p, + new MockBigArrays(Settings.EMPTY, new NoneCircuitBreakerService()), 1); + algos.add(hllpp); + for (int i = 0; i < 100; i++) { + hllpp.collect(0, randomIntBetween(1, 100)); + } + return new InternalCardinality(name, hllpp, pipelineAggregators, metaData); + } + + @Override + protected Reader instanceReader() { + return InternalCardinality::new; + } + + @Override + protected void assertReduced(InternalCardinality reduced, List inputs) { + HyperLogLogPlusPlus[] algos = inputs.stream().map(InternalCardinality::getState) + .toArray(size -> new HyperLogLogPlusPlus[size]); + if (algos.length > 0) { + HyperLogLogPlusPlus result = algos[0]; + for (int i = 1; i < algos.length; i++) { + result.merge(0, algos[i], 0); + } + assertEquals(result.cardinality(0), reduced.value(), 0); + } + } + + @After + public void cleanup() { + Releasables.close(algos); + algos.clear(); + algos = null; + } +}