Tests: Add unit test for SignificantLongTerms and SignificantStringTerms (#23428)
Relates to #22278
This commit is contained in:
parent
1228084c1c
commit
5a668c4add
|
@ -28,6 +28,7 @@ import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
|
|||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
|
@ -99,4 +100,21 @@ public abstract class InternalMappedSignificantTerms<
|
|||
protected SignificanceHeuristic getSignificanceHeuristic() {
|
||||
return significanceHeuristic;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean doEquals(Object obj) {
|
||||
InternalMappedSignificantTerms<?, ?> that = (InternalMappedSignificantTerms<?, ?>) obj;
|
||||
return super.doEquals(obj)
|
||||
&& Objects.equals(format, that.format)
|
||||
&& subsetSize == that.subsetSize
|
||||
&& supersetSize == that.supersetSize
|
||||
&& Objects.equals(significanceHeuristic, that.significanceHeuristic)
|
||||
&& Objects.equals(buckets, that.buckets)
|
||||
&& Objects.equals(bucketMap, that.bucketMap);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int doHashCode() {
|
||||
return Objects.hash(super.doHashCode(), format, subsetSize, supersetSize, significanceHeuristic, buckets, bucketMap);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@ import java.util.HashMap;
|
|||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
import static java.util.Collections.unmodifiableList;
|
||||
|
||||
|
@ -127,6 +128,27 @@ public abstract class InternalSignificantTerms<A extends InternalSignificantTerm
|
|||
public double getSignificanceScore() {
|
||||
return score;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Bucket<?> that = (Bucket<?>) o;
|
||||
return bucketOrd == that.bucketOrd &&
|
||||
Double.compare(that.score, score) == 0 &&
|
||||
Objects.equals(aggregations, that.aggregations) &&
|
||||
Objects.equals(format, that.format);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(getClass(), bucketOrd, aggregations, score, format);
|
||||
}
|
||||
}
|
||||
|
||||
protected final int requiredSize;
|
||||
|
@ -226,4 +248,16 @@ public abstract class InternalSignificantTerms<A extends InternalSignificantTerm
|
|||
protected abstract long getSupersetSize();
|
||||
|
||||
protected abstract SignificanceHeuristic getSignificanceHeuristic();
|
||||
|
||||
@Override
|
||||
protected int doHashCode() {
|
||||
return Objects.hash(minDocCount, requiredSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean doEquals(Object obj) {
|
||||
InternalSignificantTerms<?, ?> that = (InternalSignificantTerms<?, ?>) obj;
|
||||
return Objects.equals(minDocCount, that.minDocCount)
|
||||
&& Objects.equals(requiredSize, that.requiredSize);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
|
|||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Result of the running the significant terms aggregation on a numeric field.
|
||||
|
@ -109,6 +110,16 @@ public class SignificantLongTerms extends InternalMappedSignificantTerms<Signifi
|
|||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return super.equals(obj) && Objects.equals(term, ((Bucket) obj).term);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(super.hashCode(), term);
|
||||
}
|
||||
}
|
||||
|
||||
public SignificantLongTerms(String name, int requiredSize, long minDocCount, List<PipelineAggregator> pipelineAggregators,
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
|
|||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Result of the running the significant terms aggregation on a String field.
|
||||
|
@ -111,6 +112,16 @@ public class SignificantStringTerms extends InternalMappedSignificantTerms<Signi
|
|||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return super.equals(obj) && Objects.equals(termBytes, ((SignificantStringTerms.Bucket) obj).termBytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(super.hashCode(), termBytes);
|
||||
}
|
||||
}
|
||||
|
||||
public SignificantStringTerms(String name, int requiredSize, long minDocCount, List<PipelineAggregator> pipelineAggregators,
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.aggregations.bucket.significant;
|
||||
|
||||
import org.elasticsearch.search.aggregations.InternalAggregationTestCase;
|
||||
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
public abstract class InternalSignificantTermsTestCase extends InternalAggregationTestCase<InternalSignificantTerms<?, ?>> {
|
||||
|
||||
@Override
|
||||
protected InternalSignificantTerms createUnmappedInstance(String name,
|
||||
List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metaData) {
|
||||
InternalSignificantTerms<?, ?> testInstance = createTestInstance(name, pipelineAggregators, metaData);
|
||||
return new UnmappedSignificantTerms(name, testInstance.requiredSize, testInstance.minDocCount, pipelineAggregators, metaData);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void assertReduced(InternalSignificantTerms<?, ?> reduced, List<InternalSignificantTerms<?, ?>> inputs) {
|
||||
assertEquals(inputs.stream().mapToLong(InternalSignificantTerms::getSubsetSize).sum(), reduced.getSubsetSize());
|
||||
assertEquals(inputs.stream().mapToLong(InternalSignificantTerms::getSupersetSize).sum(), reduced.getSupersetSize());
|
||||
|
||||
List<Function<SignificantTerms.Bucket, Long>> counts = Arrays.asList(
|
||||
SignificantTerms.Bucket::getSubsetDf,
|
||||
SignificantTerms.Bucket::getSupersetDf,
|
||||
SignificantTerms.Bucket::getDocCount
|
||||
);
|
||||
|
||||
for (Function<SignificantTerms.Bucket, Long> count : counts) {
|
||||
Map<Object, Long> reducedCounts = toCounts(reduced.getBuckets().stream(), count);
|
||||
Map<Object, Long> totalCounts = toCounts(inputs.stream().map(SignificantTerms::getBuckets).flatMap(List::stream), count);
|
||||
|
||||
Map<Object, Long> expectedReducedCounts = new HashMap<>(totalCounts);
|
||||
expectedReducedCounts.keySet().retainAll(reducedCounts.keySet());
|
||||
assertEquals(expectedReducedCounts, reducedCounts);
|
||||
}
|
||||
}
|
||||
|
||||
private static Map<Object, Long> toCounts(Stream<? extends SignificantTerms.Bucket> buckets,
|
||||
Function<SignificantTerms.Bucket, Long> fn) {
|
||||
return buckets.collect(Collectors.toMap(SignificantTerms.Bucket::getKey, fn, Long::sum));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,91 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.aggregations.bucket.significant;
|
||||
|
||||
import org.elasticsearch.common.io.stream.Writeable;
|
||||
import org.elasticsearch.search.DocValueFormat;
|
||||
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare;
|
||||
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND;
|
||||
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore;
|
||||
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation;
|
||||
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
|
||||
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
|
||||
import org.junit.Before;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.elasticsearch.search.aggregations.InternalAggregations.EMPTY;
|
||||
|
||||
public class SignificantLongTermsTests extends InternalSignificantTermsTestCase {
|
||||
|
||||
private SignificanceHeuristic significanceHeuristic;
|
||||
|
||||
@Before
|
||||
public void setUpSignificanceHeuristic() {
|
||||
significanceHeuristic = randomSignificanceHeuristic();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected InternalSignificantTerms createTestInstance(String name,
|
||||
List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metaData) {
|
||||
DocValueFormat format = DocValueFormat.RAW;
|
||||
int requiredSize = randomIntBetween(1, 5);
|
||||
int shardSize = requiredSize + 2;
|
||||
final int numBuckets = randomInt(shardSize);
|
||||
|
||||
long globalSubsetSize = 0;
|
||||
long globalSupersetSize = 0;
|
||||
|
||||
List<SignificantLongTerms.Bucket> buckets = new ArrayList<>(numBuckets);
|
||||
Set<Long> terms = new HashSet<>();
|
||||
for (int i = 0; i < numBuckets; ++i) {
|
||||
long term = randomValueOtherThanMany(l -> terms.add(l) == false, random()::nextLong);
|
||||
|
||||
int subsetDf = randomIntBetween(1, 10);
|
||||
int supersetDf = randomIntBetween(subsetDf, 20);
|
||||
int supersetSize = randomIntBetween(supersetDf, 30);
|
||||
|
||||
globalSubsetSize += subsetDf;
|
||||
globalSupersetSize += supersetSize;
|
||||
|
||||
buckets.add(new SignificantLongTerms.Bucket(subsetDf, subsetDf, supersetDf, supersetSize, term, EMPTY, format));
|
||||
}
|
||||
return new SignificantLongTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, globalSubsetSize,
|
||||
globalSupersetSize, significanceHeuristic, buckets);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Writeable.Reader<InternalSignificantTerms<?, ?>> instanceReader() {
|
||||
return SignificantLongTerms::new;
|
||||
}
|
||||
|
||||
private static SignificanceHeuristic randomSignificanceHeuristic() {
|
||||
return randomFrom(
|
||||
new JLHScore(),
|
||||
new MutualInformation(randomBoolean(), randomBoolean()),
|
||||
new GND(randomBoolean()),
|
||||
new ChiSquare(randomBoolean(), randomBoolean()));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.aggregations.bucket.significant;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.io.stream.Writeable;
|
||||
import org.elasticsearch.search.DocValueFormat;
|
||||
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare;
|
||||
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND;
|
||||
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore;
|
||||
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation;
|
||||
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
|
||||
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
|
||||
import org.junit.Before;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.elasticsearch.search.aggregations.InternalAggregations.EMPTY;
|
||||
|
||||
public class SignificantStringTermsTests extends InternalSignificantTermsTestCase {
|
||||
|
||||
private SignificanceHeuristic significanceHeuristic;
|
||||
|
||||
@Before
|
||||
public void setUpSignificanceHeuristic() {
|
||||
significanceHeuristic = randomSignificanceHeuristic();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected InternalSignificantTerms createTestInstance(String name,
|
||||
List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metaData) {
|
||||
DocValueFormat format = DocValueFormat.RAW;
|
||||
int requiredSize = randomIntBetween(1, 5);
|
||||
int shardSize = requiredSize + 2;
|
||||
final int numBuckets = randomInt(shardSize);
|
||||
|
||||
long globalSubsetSize = 0;
|
||||
long globalSupersetSize = 0;
|
||||
|
||||
List<SignificantStringTerms.Bucket> buckets = new ArrayList<>(numBuckets);
|
||||
Set<BytesRef> terms = new HashSet<>();
|
||||
for (int i = 0; i < numBuckets; ++i) {
|
||||
BytesRef term = randomValueOtherThanMany(b -> terms.add(b) == false, () -> new BytesRef(randomAsciiOfLength(10)));
|
||||
|
||||
int subsetDf = randomIntBetween(1, 10);
|
||||
int supersetDf = randomIntBetween(subsetDf, 20);
|
||||
int supersetSize = randomIntBetween(supersetDf, 30);
|
||||
|
||||
globalSubsetSize += subsetDf;
|
||||
globalSupersetSize += supersetSize;
|
||||
|
||||
buckets.add(new SignificantStringTerms.Bucket(term, subsetDf, subsetDf, supersetDf, supersetSize, EMPTY, format));
|
||||
}
|
||||
return new SignificantStringTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, globalSubsetSize,
|
||||
globalSupersetSize, significanceHeuristic, buckets);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Writeable.Reader<InternalSignificantTerms<?, ?>> instanceReader() {
|
||||
return SignificantStringTerms::new;
|
||||
}
|
||||
|
||||
private static SignificanceHeuristic randomSignificanceHeuristic() {
|
||||
return randomFrom(
|
||||
new JLHScore(),
|
||||
new MutualInformation(randomBoolean(), randomBoolean()),
|
||||
new GND(randomBoolean()),
|
||||
new ChiSquare(randomBoolean(), randomBoolean()));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue