Adds tests for cardinality and filter aggregations (#23826)

* Adds tests for cardinality and filter aggregations

Relates to #22278

* addresses review comments
This commit is contained in:
Colin Goodheart-Smithe 2017-04-03 09:39:03 +01:00 committed by GitHub
parent 5cf1d4ae90
commit 058869ed54
5 changed files with 360 additions and 0 deletions

View File

@ -34,6 +34,9 @@ import org.elasticsearch.common.util.IntArray;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.HashSet;
import java.util.Objects;
import java.util.Set;
/**
* Hyperloglog++ counter, implemented based on pseudo code from
@ -420,6 +423,32 @@ public final class HyperLogLogPlusPlus implements Releasable {
Releasables.close(runLens, hashSet.sizes);
}
private Set<Object> getComparableData(long bucket) {
Set<Object> values = new HashSet<>();
if (algorithm.get(bucket) == LINEAR_COUNTING) {
try (IntArray hashSetValues = hashSet.values(bucket)) {
for (long i = 0; i < hashSetValues.size(); i++) {
values.add(hashSetValues.get(i));
}
}
} else {
for (long i = 0; i < runLens.size(); i++) {
values.add(runLens.get((bucket << p) + i));
}
}
return values;
}
public int hashCode(long bucket) {
return Objects.hash(p, algorithm.get(bucket), getComparableData(bucket));
}
public boolean equals(long bucket, HyperLogLogPlusPlus other) {
return Objects.equals(p, other.p) &&
Objects.equals(algorithm.get(bucket), other.algorithm.get(bucket)) &&
Objects.equals(getComparableData(bucket), getComparableData(bucket));
}
/**
* We are actually using HyperLogLog's runLens array but interpreting it as a hash set
* for linear counting.

View File

@ -113,4 +113,18 @@ public final class InternalCardinality extends InternalNumericMetricsAggregation
return builder;
}
@Override
protected int doHashCode() {
return counts.hashCode(0);
}
@Override
protected boolean doEquals(Object obj) {
InternalCardinality other = (InternalCardinality) obj;
return counts.equals(0, other.counts);
}
HyperLogLogPlusPlus getState() {
return counts;
}
}

View File

@ -0,0 +1,104 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.store.Directory;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.bucket.filter.FilterAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.filter.InternalFilter;
import org.junit.Before;
public class FilterAggregatorTests extends AggregatorTestCase {
private MappedFieldType fieldType;
@Before
public void setUpTest() throws Exception {
super.setUp();
fieldType = new KeywordFieldMapper.KeywordFieldType();
fieldType.setHasDocValues(true);
fieldType.setIndexOptions(IndexOptions.DOCS);
fieldType.setName("field");
}
public void testEmpty() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
indexWriter.close();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
QueryBuilder filter = QueryBuilders.termQuery("field", randomAsciiOfLength(5));
FilterAggregationBuilder builder = new FilterAggregationBuilder("test", filter);
InternalFilter response = search(indexSearcher, new MatchAllDocsQuery(), builder,
fieldType);
assertEquals(response.getDocCount(), 0);
indexReader.close();
directory.close();
}
public void testRandom() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
int numDocs = randomIntBetween(100, 200);
int maxTerm = randomIntBetween(10, 50);
int[] expectedBucketCount = new int[maxTerm];
Document document = new Document();
for (int i = 0; i < numDocs; i++) {
if (frequently()) {
// make sure we have more than one segment to test the merge
indexWriter.getReader().close();
}
int value = randomInt(maxTerm-1);
expectedBucketCount[value] += 1;
document.add(new Field("field", Integer.toString(value), fieldType));
indexWriter.addDocument(document);
document.clear();
}
indexWriter.close();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
int value = randomInt(maxTerm - 1);
QueryBuilder filter = QueryBuilders.termQuery("field", Integer.toString(value));
FilterAggregationBuilder builder = new FilterAggregationBuilder("test", filter);
for (boolean doReduce : new boolean[] {true, false}) {
final InternalFilter response;
if (doReduce) {
response = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
} else {
response = search(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
}
assertEquals(response.getDocCount(), (long) expectedBucketCount[value]);
}
indexReader.close();
directory.close();
}
}

View File

@ -0,0 +1,131 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.metrics;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.FieldValueQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.elasticsearch.common.CheckedConsumer;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.metrics.cardinality.CardinalityAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.cardinality.CardinalityAggregator;
import org.elasticsearch.search.aggregations.metrics.cardinality.InternalCardinality;
import org.elasticsearch.search.aggregations.support.ValueType;
import java.io.IOException;
import java.util.Arrays;
import java.util.function.Consumer;
import static java.util.Collections.singleton;
public class CardinalityAggregatorTests extends AggregatorTestCase {
public void testNoDocs() throws IOException {
testCase(new MatchAllDocsQuery(), iw -> {
// Intentionally not writing any docs
}, card -> {
assertEquals(0.0, card.getValue(), 0);
});
}
public void testNoMatchingField() throws IOException {
testCase(new MatchAllDocsQuery(), iw -> {
iw.addDocument(singleton(new SortedNumericDocValuesField("wrong_number", 7)));
iw.addDocument(singleton(new SortedNumericDocValuesField("wrong_number", 1)));
}, card -> {
assertEquals(0.0, card.getValue(), 0);
});
}
public void testSomeMatchesSortedNumericDocValues() throws IOException {
testCase(new FieldValueQuery("number"), iw -> {
iw.addDocument(singleton(new SortedNumericDocValuesField("number", 7)));
iw.addDocument(singleton(new SortedNumericDocValuesField("number", 1)));
}, card -> {
assertEquals(2, card.getValue(), 0);
});
}
public void testSomeMatchesNumericDocValues() throws IOException {
testCase(new FieldValueQuery("number"), iw -> {
iw.addDocument(singleton(new NumericDocValuesField("number", 7)));
iw.addDocument(singleton(new NumericDocValuesField("number", 1)));
}, card -> {
assertEquals(2, card.getValue(), 0);
});
}
public void testQueryFiltering() throws IOException {
testCase(IntPoint.newRangeQuery("number", 0, 5), iw -> {
iw.addDocument(Arrays.asList(new IntPoint("number", 7),
new SortedNumericDocValuesField("number", 7)));
iw.addDocument(Arrays.asList(new IntPoint("number", 1),
new SortedNumericDocValuesField("number", 1)));
}, card -> {
assertEquals(1, card.getValue(), 0);
});
}
public void testQueryFiltersAll() throws IOException {
testCase(IntPoint.newRangeQuery("number", -1, 0), iw -> {
iw.addDocument(Arrays.asList(new IntPoint("number", 7),
new SortedNumericDocValuesField("number", 7)));
iw.addDocument(Arrays.asList(new IntPoint("number", 1),
new SortedNumericDocValuesField("number", 1)));
}, card -> {
assertEquals(0.0, card.getValue(), 0);
});
}
private void testCase(Query query, CheckedConsumer<RandomIndexWriter, IOException> buildIndex,
Consumer<InternalCardinality> verify) throws IOException {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
buildIndex.accept(indexWriter);
indexWriter.close();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
CardinalityAggregationBuilder aggregationBuilder = new CardinalityAggregationBuilder(
"_name", ValueType.NUMERIC).field("number");
MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(
NumberFieldMapper.NumberType.LONG);
fieldType.setName("number");
try (CardinalityAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher,
fieldType)) {
aggregator.preCollection();
indexSearcher.search(query, aggregator);
aggregator.postCollection();
verify.accept((InternalCardinality) aggregator.buildAggregation(0L));
}
indexReader.close();
directory.close();
}
}

View File

@ -0,0 +1,82 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.metrics.cardinality;
import org.elasticsearch.common.io.stream.Writeable.Reader;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.MockBigArrays;
import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
import org.elasticsearch.search.aggregations.InternalAggregationTestCase;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import org.junit.After;
import org.junit.Before;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class InternalCardinalityTests extends InternalAggregationTestCase<InternalCardinality> {
private static List<HyperLogLogPlusPlus> algos;
private static int p;
@Before
public void setup() {
algos = new ArrayList<>();
p = randomIntBetween(HyperLogLogPlusPlus.MIN_PRECISION, HyperLogLogPlusPlus.MAX_PRECISION);
}
@Override
protected InternalCardinality createTestInstance(String name,
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) {
HyperLogLogPlusPlus hllpp = new HyperLogLogPlusPlus(p,
new MockBigArrays(Settings.EMPTY, new NoneCircuitBreakerService()), 1);
algos.add(hllpp);
for (int i = 0; i < 100; i++) {
hllpp.collect(0, randomIntBetween(1, 100));
}
return new InternalCardinality(name, hllpp, pipelineAggregators, metaData);
}
@Override
protected Reader<InternalCardinality> instanceReader() {
return InternalCardinality::new;
}
@Override
protected void assertReduced(InternalCardinality reduced, List<InternalCardinality> inputs) {
HyperLogLogPlusPlus[] algos = inputs.stream().map(InternalCardinality::getState)
.toArray(size -> new HyperLogLogPlusPlus[size]);
if (algos.length > 0) {
HyperLogLogPlusPlus result = algos[0];
for (int i = 1; i < algos.length; i++) {
result.merge(0, algos[i], 0);
}
assertEquals(result.cardinality(0), reduced.value(), 0);
}
}
@After
public void cleanup() {
Releasables.close(algos);
algos.clear();
algos = null;
}
}