Make some agg tests easier to read (#54954) (#55079)

We added a fancy method to provide random realistic test data to the
reduction tests in #54910. This uses that to remove some of the more
esoteric machinations in the agg tests. This will marginally increase
the coverage of the serialiation tests and, more importantly, remove
some mysterious value generation code that only really made sense for
random reduction tests but was used all over the place. It doesn't, on
the other hand, make the tests shorter. Just *hopefully* more clear.

I only cleaned up a few tests this way. If we like this it'd probably be
worth grabbing others.
This commit is contained in:
Nik Everett 2020-04-10 14:15:30 -04:00 committed by GitHub
parent b99a50bcb9
commit c00811f3a3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 137 additions and 93 deletions

View File

@ -35,32 +35,33 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.function.Predicate;
import java.util.stream.Stream;
import static java.util.stream.Collectors.toList;
import static org.hamcrest.Matchers.equalTo;
public abstract class AbstractPercentilesTestCase<T extends InternalAggregation & Iterable<Percentile>>
extends InternalAggregationTestCase<T> {
private double[] percents;
private boolean keyed;
private DocValueFormat docValueFormat;
@Override
public void setUp() throws Exception {
super.setUp();
percents = randomPercents(false);
keyed = randomBoolean();
docValueFormat = randomNumericDocValueFormat();
protected T createTestInstance(String name, Map<String, Object> metadata) {
return createTestInstance(name, metadata, randomBoolean(), randomNumericDocValueFormat(), randomPercents(false));
}
@Override
protected T createTestInstance(String name, Map<String, Object> metadata) {
protected List<T> randomResultsToReduce(String name, int size) {
boolean keyed = randomBoolean();
DocValueFormat format = randomNumericDocValueFormat();
double[] percents = randomPercents(false);
return Stream.generate(() -> createTestInstance(name, null, keyed, format, percents)).limit(size).collect(toList());
}
private T createTestInstance(String name, Map<String, Object> metadata, boolean keyed, DocValueFormat format, double[] percents) {
int numValues = frequently() ? randomInt(100) : 0;
double[] values = new double[numValues];
for (int i = 0; i < numValues; ++i) {
values[i] = randomDouble();
}
return createTestInstance(name, metadata, keyed, docValueFormat, percents, values);
return createTestInstance(name, metadata, keyed, format, percents, values);
}
protected abstract T createTestInstance(String name, Map<String, Object> metadata,

View File

@ -56,37 +56,76 @@ import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Stream;
import static java.lang.Math.max;
import static java.lang.Math.min;
import static java.util.Comparator.comparing;
import static java.util.stream.Collectors.toList;
public class InternalTopHitsTests extends InternalAggregationTestCase<InternalTopHits> {
/**
* Should the test instances look like they are sorted by some fields (true) or sorted by score (false). Set here because these need
* to be the same across the entirety of {@link #testReduceRandom()}.
*/
private boolean testInstancesLookSortedByField;
/**
* Fields shared by all instances created by {@link #createTestInstance(String, Map)}.
*/
private SortField[] testInstancesSortFields;
/**
* Collects all generated scores and fields to ensure that all scores are unique. That is necessary for deterministic results
*/
private Set<Float> usedScores = new HashSet<>();
private Set<Object> usedFields = new HashSet<>();
@Override
public void setUp() throws Exception {
super.setUp();
testInstancesLookSortedByField = randomBoolean();
testInstancesSortFields = testInstancesLookSortedByField ? randomSortFields() : new SortField[0];
protected InternalTopHits createTestInstance(String name, Map<String, Object> metadata) {
if (randomBoolean()) {
return createTestInstanceSortedByFields(name, metadata, ESTestCase::randomFloat,
randomSortFields(), InternalTopHitsTests::randomOfType);
}
return createTestInstanceSortedScore(name, metadata, ESTestCase::randomFloat);
}
@Override
protected InternalTopHits createTestInstance(String name, Map<String, Object> metadata) {
protected List<InternalTopHits> randomResultsToReduce(String name, int size) {
/*
* Make sure all scores are unique so we can get
* deterministic test results.
*/
Set<Float> usedScores = new HashSet<>();
Supplier<Float> scoreSupplier = () -> {
float score = randomValueOtherThanMany(usedScores::contains, ESTestCase::randomFloat);
usedScores.add(score);
return score;
};
Supplier<InternalTopHits> supplier;
if (randomBoolean()) {
SortField[] sortFields = randomSortFields();
Set<Object> usedSortFieldValues = new HashSet<>();
Function<SortField.Type, Object> sortFieldValueSuppier = t -> {
Object value = randomValueOtherThanMany(usedSortFieldValues::contains, () -> randomOfType(t));
usedSortFieldValues.add(value);
return value;
};
supplier = () -> createTestInstanceSortedByFields(name, null, scoreSupplier, sortFields, sortFieldValueSuppier);
} else {
supplier = () -> createTestInstanceSortedScore(name, null, scoreSupplier);
}
return Stream.generate(supplier).limit(size).collect(toList());
}
private InternalTopHits createTestInstanceSortedByFields(String name, Map<String, Object> metadata,
Supplier<Float> scoreSupplier, SortField[] sortFields, Function<SortField.Type, Object> sortFieldValueSupplier) {
return createTestInstance(name, metadata, scoreSupplier,
(docId, score) -> {
Object[] fields = new Object[sortFields.length];
for (int f = 0; f < sortFields.length; f++) {
final int ff = f;
fields[f] = sortFieldValueSupplier.apply(sortFields[ff].getType());
}
return new FieldDoc(docId, score, fields);
},
(totalHits, scoreDocs) -> new TopFieldDocs(totalHits, scoreDocs, sortFields),
sortFieldsComparator(sortFields));
}
private InternalTopHits createTestInstanceSortedScore(String name, Map<String, Object> metadata, Supplier<Float> scoreSupplier) {
return createTestInstance(name, metadata, scoreSupplier, ScoreDoc::new, TopDocs::new, scoreComparator());
}
private InternalTopHits createTestInstance(String name, Map<String, Object> metadata, Supplier<Float> scoreSupplier,
BiFunction<Integer, Float, ScoreDoc> docBuilder,
BiFunction<TotalHits, ScoreDoc[], TopDocs> topDocsBuilder, Comparator<ScoreDoc> comparator) {
int from = 0;
int requestedSize = between(1, 40);
int actualSize = between(0, requestedSize);
@ -96,37 +135,21 @@ public class InternalTopHitsTests extends InternalAggregationTestCase<InternalTo
SearchHit[] hits = new SearchHit[actualSize];
Set<Integer> usedDocIds = new HashSet<>();
for (int i = 0; i < actualSize; i++) {
float score = randomValueOtherThanMany(usedScores::contains, ESTestCase::randomFloat);
usedScores.add(score);
float score = scoreSupplier.get();
maxScore = max(maxScore, score);
int docId = randomValueOtherThanMany(usedDocIds::contains, () -> between(0, IndexWriter.MAX_DOCS));
usedDocIds.add(docId);
Map<String, DocumentField> searchHitFields = new HashMap<>();
if (testInstancesLookSortedByField) {
Object[] fields = new Object[testInstancesSortFields.length];
for (int f = 0; f < testInstancesSortFields.length; f++) {
final int ff = f;
fields[f] = randomValueOtherThanMany(usedFields::contains, () -> randomOfType(testInstancesSortFields[ff].getType()));
usedFields.add(fields[f]);
}
scoreDocs[i] = new FieldDoc(docId, score, fields);
} else {
scoreDocs[i] = new ScoreDoc(docId, score);
}
hits[i] = new SearchHit(docId, Integer.toString(i), new Text("test"), searchHitFields, Collections.emptyMap());
scoreDocs[i] = docBuilder.apply(docId, score);
hits[i] = new SearchHit(docId, Integer.toString(i), new Text("_docs"), searchHitFields, Collections.emptyMap());
hits[i].score(score);
}
int totalHits = between(actualSize, 500000);
sort(hits, scoreDocs, scoreDocComparator());
sort(hits, scoreDocs, comparator);
SearchHits searchHits = new SearchHits(hits, new TotalHits(totalHits, TotalHits.Relation.EQUAL_TO), maxScore);
TopDocs topDocs;
if (testInstancesLookSortedByField) {
topDocs = new TopFieldDocs(new TotalHits(totalHits, TotalHits.Relation.EQUAL_TO), scoreDocs, testInstancesSortFields);
} else {
topDocs = new TopDocs(new TotalHits(totalHits, TotalHits.Relation.EQUAL_TO), scoreDocs);
}
TopDocs topDocs = topDocsBuilder.apply(new TotalHits(totalHits, TotalHits.Relation.EQUAL_TO), scoreDocs);
// Lucene's TopDocs initializes the maxScore to Float.NaN, if there is no maxScore
TopDocsAndMaxScore topDocsAndMaxScore = new TopDocsAndMaxScore(topDocs, maxScore == Float.NEGATIVE_INFINITY ? Float.NaN : maxScore);
@ -178,7 +201,7 @@ public class InternalTopHitsTests extends InternalAggregationTestCase<InternalTo
}
}
private Object randomOfType(SortField.Type type) {
private static Object randomOfType(SortField.Type type) {
switch (type) {
case CUSTOM:
throw new UnsupportedOperationException();
@ -207,6 +230,14 @@ public class InternalTopHitsTests extends InternalAggregationTestCase<InternalTo
@Override
protected void assertReduced(InternalTopHits reduced, List<InternalTopHits> inputs) {
boolean sortedByFields = inputs.get(0).getTopDocs().topDocs instanceof TopFieldDocs;
Comparator<ScoreDoc> dataNodeComparator;
if (sortedByFields) {
dataNodeComparator = sortFieldsComparator(((TopFieldDocs) inputs.get(0).getTopDocs().topDocs).fields);
} else {
dataNodeComparator = scoreComparator();
}
Comparator<ScoreDoc> reducedComparator = dataNodeComparator.thenComparing(s -> s.shardIndex);
SearchHits actualHits = reduced.getHits();
List<Tuple<ScoreDoc, SearchHit>> allHits = new ArrayList<>();
float maxScore = Float.NEGATIVE_INFINITY;
@ -221,7 +252,7 @@ public class InternalTopHitsTests extends InternalAggregationTestCase<InternalTo
maxScore = max(maxScore, internalHits.getMaxScore());
for (int i = 0; i < internalHits.getHits().length; i++) {
ScoreDoc doc = inputs.get(input).getTopDocs().topDocs.scoreDocs[i];
if (testInstancesLookSortedByField) {
if (sortedByFields) {
doc = new FieldDoc(doc.doc, doc.score, ((FieldDoc) doc).fields, input);
} else {
doc = new ScoreDoc(doc.doc, doc.score, input);
@ -229,7 +260,7 @@ public class InternalTopHitsTests extends InternalAggregationTestCase<InternalTo
allHits.add(new Tuple<>(doc, internalHits.getHits()[i]));
}
}
allHits.sort(comparing(Tuple::v1, scoreDocComparator()));
allHits.sort(comparing(Tuple::v1, reducedComparator));
SearchHit[] expectedHitsHits = new SearchHit[min(inputs.get(0).getSize(), allHits.size())];
for (int i = 0; i < expectedHitsHits.length; i++) {
expectedHitsHits[i] = allHits.get(i).v2();
@ -291,36 +322,32 @@ public class InternalTopHitsTests extends InternalAggregationTestCase<InternalTo
return sortFields;
}
private Comparator<ScoreDoc> scoreDocComparator() {
return innerScoreDocComparator().thenComparing(s -> s.shardIndex);
private Comparator<ScoreDoc> sortFieldsComparator(SortField[] sortFields) {
@SuppressWarnings("rawtypes")
FieldComparator[] comparators = new FieldComparator[sortFields.length];
for (int i = 0; i < sortFields.length; i++) {
// Values passed to getComparator shouldn't matter
comparators[i] = sortFields[i].getComparator(0, 0);
}
return (lhs, rhs) -> {
FieldDoc l = (FieldDoc) lhs;
FieldDoc r = (FieldDoc) rhs;
int i = 0;
while (i < l.fields.length) {
@SuppressWarnings("unchecked")
int c = comparators[i].compareValues(l.fields[i], r.fields[i]);
if (c != 0) {
return c;
}
i++;
}
return 0;
};
}
private Comparator<ScoreDoc> innerScoreDocComparator() {
if (testInstancesLookSortedByField) {
// Values passed to getComparator shouldn't matter
@SuppressWarnings("rawtypes")
FieldComparator[] comparators = new FieldComparator[testInstancesSortFields.length];
for (int i = 0; i < testInstancesSortFields.length; i++) {
comparators[i] = testInstancesSortFields[i].getComparator(0, 0);
}
return (lhs, rhs) -> {
FieldDoc l = (FieldDoc) lhs;
FieldDoc r = (FieldDoc) rhs;
int i = 0;
while (i < l.fields.length) {
@SuppressWarnings("unchecked")
int c = comparators[i].compareValues(l.fields[i], r.fields[i]);
if (c != 0) {
return c;
}
i++;
}
return 0;
};
} else {
Comparator<ScoreDoc> comparator = comparing(d -> d.score);
return comparator.reversed();
}
private Comparator<ScoreDoc> scoreComparator() {
Comparator<ScoreDoc> comparator = comparing(d -> d.score);
return comparator.reversed();
}
@Override

View File

@ -163,6 +163,7 @@ import static org.elasticsearch.search.aggregations.InternalMultiBucketAggregati
import static org.elasticsearch.test.XContentTestUtils.insertRandomFields;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertToXContentEquivalent;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.lessThanOrEqualTo;
public abstract class InternalAggregationTestCase<T extends InternalAggregation> extends AbstractWireSerializingTestCase<T> {
@ -298,7 +299,9 @@ public abstract class InternalAggregationTestCase<T extends InternalAggregation>
public void testReduceRandom() throws IOException {
String name = randomAlphaOfLength(5);
List<T> inputs = randomResultsToReduce(name, between(1, 200));
int size = between(1, 200);
List<T> inputs = randomResultsToReduce(name, size);
assertThat(inputs, hasSize(size));
List<InternalAggregation> toReduce = new ArrayList<>();
toReduce.addAll(inputs);
// Sort aggs so that unmapped come last. This mimicks the behavior of InternalAggregations.reduce()

View File

@ -22,8 +22,10 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Predicate;
import java.util.stream.Stream;
import static java.util.Collections.emptyMap;
import static java.util.stream.Collectors.toList;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.nullValue;
@ -38,21 +40,32 @@ public class InternalStringStatsTests extends InternalAggregationTestCase<Intern
@Override
protected InternalStringStats createTestInstance(String name, Map<String, Object> metadata) {
if (randomBoolean()) {
return new InternalStringStats(name, 0, 0, 0, 0, emptyMap(), randomBoolean(), DocValueFormat.RAW, metadata);
}
return createTestInstance(name, metadata, Long.MAX_VALUE, Long.MAX_VALUE);
}
@Override
protected List<InternalStringStats> randomResultsToReduce(String name, int size) {
/*
* Pick random count and length that are *much* less than
* Pick random count and length that are less than
* Long.MAX_VALUE because reduction adds them together and sometimes
* serializes them and that serialization would fail if the sum has
* wrapped to a negative number.
*/
long count = randomLongBetween(1, Integer.MAX_VALUE);
long totalLength = randomLongBetween(0, count * 10);
return Stream.generate(() -> createTestInstance(name, null, Long.MAX_VALUE / size, Long.MAX_VALUE / size))
.limit(size)
.collect(toList());
}
private InternalStringStats createTestInstance(String name, Map<String, Object> metadata, long maxCount, long maxTotalLength) {
if (randomBoolean()) {
return new InternalStringStats(name, 0, 0, 0, 0, emptyMap(), randomBoolean(), DocValueFormat.RAW, metadata);
}
long count = randomLongBetween(1, maxCount);
long totalLength = randomLongBetween(0, maxTotalLength);
return new InternalStringStats(name, count, totalLength,
between(0, Integer.MAX_VALUE), between(0, Integer.MAX_VALUE), randomCharOccurrences(),
randomBoolean(), DocValueFormat.RAW, metadata);
};
}
@Override
protected InternalStringStats mutateInstance(InternalStringStats instance) throws IOException {