Adds tests for cardinality and filter aggregations (#23826)
* Adds tests for cardinality and filter aggregations Relates to #22278 * addresses review comments
This commit is contained in:
parent
5cf1d4ae90
commit
058869ed54
|
@ -34,6 +34,9 @@ import org.elasticsearch.common.util.IntArray;
|
|||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.util.HashSet;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Hyperloglog++ counter, implemented based on pseudo code from
|
||||
|
@ -420,6 +423,32 @@ public final class HyperLogLogPlusPlus implements Releasable {
|
|||
Releasables.close(runLens, hashSet.sizes);
|
||||
}
|
||||
|
||||
private Set<Object> getComparableData(long bucket) {
|
||||
Set<Object> values = new HashSet<>();
|
||||
if (algorithm.get(bucket) == LINEAR_COUNTING) {
|
||||
try (IntArray hashSetValues = hashSet.values(bucket)) {
|
||||
for (long i = 0; i < hashSetValues.size(); i++) {
|
||||
values.add(hashSetValues.get(i));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (long i = 0; i < runLens.size(); i++) {
|
||||
values.add(runLens.get((bucket << p) + i));
|
||||
}
|
||||
}
|
||||
return values;
|
||||
}
|
||||
|
||||
public int hashCode(long bucket) {
|
||||
return Objects.hash(p, algorithm.get(bucket), getComparableData(bucket));
|
||||
}
|
||||
|
||||
public boolean equals(long bucket, HyperLogLogPlusPlus other) {
|
||||
return Objects.equals(p, other.p) &&
|
||||
Objects.equals(algorithm.get(bucket), other.algorithm.get(bucket)) &&
|
||||
Objects.equals(getComparableData(bucket), getComparableData(bucket));
|
||||
}
|
||||
|
||||
/**
|
||||
* We are actually using HyperLogLog's runLens array but interpreting it as a hash set
|
||||
* for linear counting.
|
||||
|
|
|
@ -113,4 +113,18 @@ public final class InternalCardinality extends InternalNumericMetricsAggregation
|
|||
return builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int doHashCode() {
|
||||
return counts.hashCode(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean doEquals(Object obj) {
|
||||
InternalCardinality other = (InternalCardinality) obj;
|
||||
return counts.equals(0, other.counts);
|
||||
}
|
||||
|
||||
HyperLogLogPlusPlus getState() {
|
||||
return counts;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,104 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.search.aggregations.bucket;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.elasticsearch.index.mapper.KeywordFieldMapper;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.query.QueryBuilder;
|
||||
import org.elasticsearch.index.query.QueryBuilders;
|
||||
import org.elasticsearch.search.aggregations.AggregatorTestCase;
|
||||
import org.elasticsearch.search.aggregations.bucket.filter.FilterAggregationBuilder;
|
||||
import org.elasticsearch.search.aggregations.bucket.filter.InternalFilter;
|
||||
import org.junit.Before;
|
||||
|
||||
public class FilterAggregatorTests extends AggregatorTestCase {
|
||||
private MappedFieldType fieldType;
|
||||
|
||||
@Before
|
||||
public void setUpTest() throws Exception {
|
||||
super.setUp();
|
||||
fieldType = new KeywordFieldMapper.KeywordFieldType();
|
||||
fieldType.setHasDocValues(true);
|
||||
fieldType.setIndexOptions(IndexOptions.DOCS);
|
||||
fieldType.setName("field");
|
||||
}
|
||||
|
||||
public void testEmpty() throws Exception {
|
||||
Directory directory = newDirectory();
|
||||
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
|
||||
indexWriter.close();
|
||||
IndexReader indexReader = DirectoryReader.open(directory);
|
||||
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
|
||||
QueryBuilder filter = QueryBuilders.termQuery("field", randomAsciiOfLength(5));
|
||||
FilterAggregationBuilder builder = new FilterAggregationBuilder("test", filter);
|
||||
InternalFilter response = search(indexSearcher, new MatchAllDocsQuery(), builder,
|
||||
fieldType);
|
||||
assertEquals(response.getDocCount(), 0);
|
||||
indexReader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
public void testRandom() throws Exception {
|
||||
Directory directory = newDirectory();
|
||||
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
|
||||
int numDocs = randomIntBetween(100, 200);
|
||||
int maxTerm = randomIntBetween(10, 50);
|
||||
int[] expectedBucketCount = new int[maxTerm];
|
||||
Document document = new Document();
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
if (frequently()) {
|
||||
// make sure we have more than one segment to test the merge
|
||||
indexWriter.getReader().close();
|
||||
}
|
||||
int value = randomInt(maxTerm-1);
|
||||
expectedBucketCount[value] += 1;
|
||||
document.add(new Field("field", Integer.toString(value), fieldType));
|
||||
indexWriter.addDocument(document);
|
||||
document.clear();
|
||||
}
|
||||
indexWriter.close();
|
||||
|
||||
IndexReader indexReader = DirectoryReader.open(directory);
|
||||
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
|
||||
int value = randomInt(maxTerm - 1);
|
||||
QueryBuilder filter = QueryBuilders.termQuery("field", Integer.toString(value));
|
||||
FilterAggregationBuilder builder = new FilterAggregationBuilder("test", filter);
|
||||
|
||||
for (boolean doReduce : new boolean[] {true, false}) {
|
||||
final InternalFilter response;
|
||||
if (doReduce) {
|
||||
response = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
|
||||
} else {
|
||||
response = search(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
|
||||
}
|
||||
assertEquals(response.getDocCount(), (long) expectedBucketCount[value]);
|
||||
}
|
||||
indexReader.close();
|
||||
directory.close();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,131 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.aggregations.metrics;
|
||||
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.FieldValueQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.elasticsearch.common.CheckedConsumer;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.mapper.NumberFieldMapper;
|
||||
import org.elasticsearch.search.aggregations.AggregatorTestCase;
|
||||
import org.elasticsearch.search.aggregations.metrics.cardinality.CardinalityAggregationBuilder;
|
||||
import org.elasticsearch.search.aggregations.metrics.cardinality.CardinalityAggregator;
|
||||
import org.elasticsearch.search.aggregations.metrics.cardinality.InternalCardinality;
|
||||
import org.elasticsearch.search.aggregations.support.ValueType;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import static java.util.Collections.singleton;
|
||||
|
||||
public class CardinalityAggregatorTests extends AggregatorTestCase {
|
||||
public void testNoDocs() throws IOException {
|
||||
testCase(new MatchAllDocsQuery(), iw -> {
|
||||
// Intentionally not writing any docs
|
||||
}, card -> {
|
||||
assertEquals(0.0, card.getValue(), 0);
|
||||
});
|
||||
}
|
||||
|
||||
public void testNoMatchingField() throws IOException {
|
||||
testCase(new MatchAllDocsQuery(), iw -> {
|
||||
iw.addDocument(singleton(new SortedNumericDocValuesField("wrong_number", 7)));
|
||||
iw.addDocument(singleton(new SortedNumericDocValuesField("wrong_number", 1)));
|
||||
}, card -> {
|
||||
assertEquals(0.0, card.getValue(), 0);
|
||||
});
|
||||
}
|
||||
|
||||
public void testSomeMatchesSortedNumericDocValues() throws IOException {
|
||||
testCase(new FieldValueQuery("number"), iw -> {
|
||||
iw.addDocument(singleton(new SortedNumericDocValuesField("number", 7)));
|
||||
iw.addDocument(singleton(new SortedNumericDocValuesField("number", 1)));
|
||||
}, card -> {
|
||||
assertEquals(2, card.getValue(), 0);
|
||||
});
|
||||
}
|
||||
|
||||
public void testSomeMatchesNumericDocValues() throws IOException {
|
||||
testCase(new FieldValueQuery("number"), iw -> {
|
||||
iw.addDocument(singleton(new NumericDocValuesField("number", 7)));
|
||||
iw.addDocument(singleton(new NumericDocValuesField("number", 1)));
|
||||
}, card -> {
|
||||
assertEquals(2, card.getValue(), 0);
|
||||
});
|
||||
}
|
||||
|
||||
public void testQueryFiltering() throws IOException {
|
||||
testCase(IntPoint.newRangeQuery("number", 0, 5), iw -> {
|
||||
iw.addDocument(Arrays.asList(new IntPoint("number", 7),
|
||||
new SortedNumericDocValuesField("number", 7)));
|
||||
iw.addDocument(Arrays.asList(new IntPoint("number", 1),
|
||||
new SortedNumericDocValuesField("number", 1)));
|
||||
}, card -> {
|
||||
assertEquals(1, card.getValue(), 0);
|
||||
});
|
||||
}
|
||||
|
||||
public void testQueryFiltersAll() throws IOException {
|
||||
testCase(IntPoint.newRangeQuery("number", -1, 0), iw -> {
|
||||
iw.addDocument(Arrays.asList(new IntPoint("number", 7),
|
||||
new SortedNumericDocValuesField("number", 7)));
|
||||
iw.addDocument(Arrays.asList(new IntPoint("number", 1),
|
||||
new SortedNumericDocValuesField("number", 1)));
|
||||
}, card -> {
|
||||
assertEquals(0.0, card.getValue(), 0);
|
||||
});
|
||||
}
|
||||
|
||||
private void testCase(Query query, CheckedConsumer<RandomIndexWriter, IOException> buildIndex,
|
||||
Consumer<InternalCardinality> verify) throws IOException {
|
||||
Directory directory = newDirectory();
|
||||
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
|
||||
buildIndex.accept(indexWriter);
|
||||
indexWriter.close();
|
||||
|
||||
IndexReader indexReader = DirectoryReader.open(directory);
|
||||
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
|
||||
|
||||
CardinalityAggregationBuilder aggregationBuilder = new CardinalityAggregationBuilder(
|
||||
"_name", ValueType.NUMERIC).field("number");
|
||||
MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(
|
||||
NumberFieldMapper.NumberType.LONG);
|
||||
fieldType.setName("number");
|
||||
try (CardinalityAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher,
|
||||
fieldType)) {
|
||||
aggregator.preCollection();
|
||||
indexSearcher.search(query, aggregator);
|
||||
aggregator.postCollection();
|
||||
verify.accept((InternalCardinality) aggregator.buildAggregation(0L));
|
||||
}
|
||||
indexReader.close();
|
||||
directory.close();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,82 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.aggregations.metrics.cardinality;
|
||||
|
||||
import org.elasticsearch.common.io.stream.Writeable.Reader;
|
||||
import org.elasticsearch.common.lease.Releasables;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.util.MockBigArrays;
|
||||
import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregationTestCase;
|
||||
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class InternalCardinalityTests extends InternalAggregationTestCase<InternalCardinality> {
|
||||
private static List<HyperLogLogPlusPlus> algos;
|
||||
private static int p;
|
||||
|
||||
@Before
|
||||
public void setup() {
|
||||
algos = new ArrayList<>();
|
||||
p = randomIntBetween(HyperLogLogPlusPlus.MIN_PRECISION, HyperLogLogPlusPlus.MAX_PRECISION);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected InternalCardinality createTestInstance(String name,
|
||||
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) {
|
||||
HyperLogLogPlusPlus hllpp = new HyperLogLogPlusPlus(p,
|
||||
new MockBigArrays(Settings.EMPTY, new NoneCircuitBreakerService()), 1);
|
||||
algos.add(hllpp);
|
||||
for (int i = 0; i < 100; i++) {
|
||||
hllpp.collect(0, randomIntBetween(1, 100));
|
||||
}
|
||||
return new InternalCardinality(name, hllpp, pipelineAggregators, metaData);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Reader<InternalCardinality> instanceReader() {
|
||||
return InternalCardinality::new;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void assertReduced(InternalCardinality reduced, List<InternalCardinality> inputs) {
|
||||
HyperLogLogPlusPlus[] algos = inputs.stream().map(InternalCardinality::getState)
|
||||
.toArray(size -> new HyperLogLogPlusPlus[size]);
|
||||
if (algos.length > 0) {
|
||||
HyperLogLogPlusPlus result = algos[0];
|
||||
for (int i = 1; i < algos.length; i++) {
|
||||
result.merge(0, algos[i], 0);
|
||||
}
|
||||
assertEquals(result.cardinality(0), reduced.value(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
@After
|
||||
public void cleanup() {
|
||||
Releasables.close(algos);
|
||||
algos.clear();
|
||||
algos = null;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue