aggregator and yaml tests for missing agg (#53214)

Tests for unmapped fields, the missing parameter, scripting, and correct
ValuesSource types in MissingAggregatorTests. Basic yaml tests for the
missing agg

For #42949
This commit is contained in:
Andy Bristol 2020-04-01 15:23:08 -07:00 committed by GitHub
parent 949636944c
commit 62a52465fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 502 additions and 106 deletions

View File

@ -0,0 +1,98 @@
setup:
- do:
indices.create:
index: test
body:
settings:
number_of_replicas: 0
mappings:
properties:
field1:
type: long
field2:
type: long
field3:
type: long
- do:
bulk:
refresh: true
body:
- index:
_index: test
- field1: 100
- index:
_index: test
- field1: 200
- index:
_index: test
- field1: 300
field2: 300
---
"match all":
- do:
search:
rest_total_hits_as_int: true
body:
aggs:
missing_agg:
missing:
field: field3
- match: { hits.total: 3 }
- length: { hits.hits: 3 }
- match: { aggregations.missing_agg.doc_count: 3 }
---
"match some":
- do:
search:
rest_total_hits_as_int: true
body:
aggs:
missing_agg:
missing:
field: field2
- match: { hits.total: 3 }
- length: { hits.hits: 3 }
- match: { aggregations.missing_agg.doc_count: 2 }
---
"match none":
- do:
search:
rest_total_hits_as_int: true
body:
aggs:
missing_agg:
missing:
field: field1
- match: { hits.total: 3 }
- length: { hits.hits: 3 }
- match: { aggregations.missing_agg.doc_count: 0 }
---
"missing param":
- do:
search:
rest_total_hits_as_int: true
body:
aggs:
missing_agg:
missing:
field: field3
missing: 1
- match: { hits.total: 3 }
- length: { hits.hits: 3 }
- match: { aggregations.missing_agg.doc_count: 0 }

View File

@ -20,174 +20,472 @@
package org.elasticsearch.search.aggregations.bucket.missing;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.elasticsearch.common.lucene.search.Queries;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.CheckedConsumer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType;
import org.elasticsearch.index.mapper.RangeFieldMapper;
import org.elasticsearch.index.mapper.RangeType;
import org.elasticsearch.script.MockScriptEngine;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptEngine;
import org.elasticsearch.script.ScriptModule;
import org.elasticsearch.script.ScriptService;
import org.elasticsearch.script.ScriptType;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
import org.elasticsearch.search.lookup.LeafDocLookup;
import java.io.IOException;
import java.util.Collections;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Consumer;
import java.util.function.Function;
import static java.util.Collections.emptyMap;
import static java.util.Collections.singleton;
import static java.util.Collections.singletonMap;
import static java.util.stream.Collectors.toList;
import static org.elasticsearch.common.lucene.search.Queries.newMatchAllQuery;
public class MissingAggregatorTests extends AggregatorTestCase {
private static final String VALUE_SCRIPT_PARAMS = "value_script_params";
private static final String VALUE_SCRIPT = "value_script";
private static final String FIELD_SCRIPT_PARAMS = "field_script_params";
private static final String FIELD_SCRIPT = "field_script";
private static final long DEFAULT_INC_PARAM = 1;
private static final long DEFAULT_THRESHOLD_PARAM = 50;
public void testMatchNoDocs() throws IOException {
int numDocs = randomIntBetween(10, 200);
testBothCases(numDocs,
"field",
Queries.newMatchAllQuery(),
doc -> doc.add(new SortedNumericDocValuesField("field", randomLong())),
final int numDocs = randomIntBetween(10, 200);
final MappedFieldType fieldType = new NumberFieldMapper.Builder("_name", NumberType.LONG).fieldType();
fieldType.setName("field");
final MissingAggregationBuilder builder = new MissingAggregationBuilder("_name", null)
.field(fieldType.name());
testCase(
newMatchAllQuery(),
builder,
writer -> {
for (int i = 0; i < numDocs; i++) {
writer.addDocument(singleton(new SortedNumericDocValuesField(fieldType.name(), randomLong())));
}
},
internalMissing -> {
assertEquals(internalMissing.getDocCount(), 0);
assertEquals(0, internalMissing.getDocCount());
assertFalse(AggregationInspectionHelper.hasValue(internalMissing));
});
},
singleton(fieldType)
);
}
public void testMatchAllDocs() throws IOException {
int numDocs = randomIntBetween(10, 200);
testBothCases(numDocs,
"field",
Queries.newMatchAllQuery(),
doc -> doc.add(new SortedNumericDocValuesField("another_field", randomLong())),
final MappedFieldType aggFieldType = new NumberFieldMapper.Builder("_name", NumberType.LONG).fieldType();
aggFieldType.setName("agg_field");
final MappedFieldType anotherFieldType = new NumberFieldMapper.Builder("_name", NumberType.LONG).fieldType();
anotherFieldType.setName("another_field");
final MissingAggregationBuilder builder = new MissingAggregationBuilder("_name", null)
.field(aggFieldType.name());
testCase(
newMatchAllQuery(),
builder,
writer -> {
for (int i = 0; i < numDocs; i++) {
writer.addDocument(singleton(new SortedNumericDocValuesField(anotherFieldType.name(), randomLong())));
}
},
internalMissing -> {
assertEquals(internalMissing.getDocCount(), numDocs);
assertEquals(numDocs, internalMissing.getDocCount());
assertTrue(AggregationInspectionHelper.hasValue(internalMissing));
});
},
org.elasticsearch.common.collect.List.of(aggFieldType, anotherFieldType)
);
}
public void testMatchSparse() throws IOException {
int numDocs = randomIntBetween(100, 200);
final AtomicInteger count = new AtomicInteger();
testBothCases(numDocs,
"field",
Queries.newMatchAllQuery(),
doc -> {
if (randomBoolean()) {
doc.add(new SortedNumericDocValuesField("another_field", randomLong()));
count.incrementAndGet();
} else {
doc.add(new SortedNumericDocValuesField("field", randomLong()));
}
},
final MappedFieldType aggFieldType = new NumberFieldMapper.Builder("_name", NumberType.LONG).fieldType();
aggFieldType.setName("agg_field");
final MappedFieldType anotherFieldType = new NumberFieldMapper.Builder("_name", NumberType.LONG).fieldType();
anotherFieldType.setName("another_field");
final MissingAggregationBuilder builder = new MissingAggregationBuilder("_name", null)
.field(aggFieldType.name());
final int numDocs = randomIntBetween(100, 200);
int docsMissingAggField = 0;
final List<Set<IndexableField>> docs = new ArrayList<>();
for (int i = 0; i < numDocs; i++) {
if (randomBoolean()) {
docs.add(singleton(new SortedNumericDocValuesField(aggFieldType.name(), randomLong())));
} else {
docs.add(singleton(new SortedNumericDocValuesField(anotherFieldType.name(), randomLong())));
docsMissingAggField++;
}
}
final int finalDocsMissingAggField = docsMissingAggField;
testCase(
newMatchAllQuery(),
builder,
writer -> writer.addDocuments(docs),
internalMissing -> {
assertEquals(internalMissing.getDocCount(), count.get());
count.set(0);
assertEquals(finalDocsMissingAggField, internalMissing.getDocCount());
assertTrue(AggregationInspectionHelper.hasValue(internalMissing));
});
},
org.elasticsearch.common.collect.List.of(aggFieldType, anotherFieldType)
);
}
public void testMatchSparseRangeField() throws IOException {
int numDocs = randomIntBetween(100, 200);
final AtomicInteger count = new AtomicInteger();
final String fieldName = "field";
RangeType rangeType = RangeType.DOUBLE;
final BinaryDocValuesField field = new BinaryDocValuesField(fieldName, rangeType.encodeRanges(Collections.singleton(
new RangeFieldMapper.Range(rangeType, 1.0D, 5.0D, true, true))));
MappedFieldType fieldType = new RangeFieldMapper.Builder(fieldName, rangeType).fieldType();
fieldType.setName(fieldName);
testBothCases(numDocs,
fieldName,
Queries.newMatchAllQuery(),
doc -> {
if (randomBoolean()) {
doc.add(new SortedNumericDocValuesField("another_field", randomLong()));
count.incrementAndGet();
} else {
doc.add(field);
final RangeType rangeType = RangeType.DOUBLE;
MappedFieldType aggFieldType = new RangeFieldMapper.Builder("_name", rangeType).fieldType();
aggFieldType.setName("agg_field");
final MappedFieldType anotherFieldType = new RangeFieldMapper.Builder("_name", rangeType).fieldType();
anotherFieldType.setName("another_field");
final RangeFieldMapper.Range range = new RangeFieldMapper.Range(rangeType, 1.0D, 5.0D, true, true);
final BytesRef encodedRange = rangeType.encodeRanges(singleton(range));
final BinaryDocValuesField encodedRangeField = new BinaryDocValuesField(aggFieldType.name(), encodedRange);
final MissingAggregationBuilder builder = new MissingAggregationBuilder("_name", null)
.field(aggFieldType.name());
final int numDocs = randomIntBetween(100, 200);
int docsMissingAggField = 0;
final List<Set<IndexableField>> docs = new ArrayList<>();
for (int i = 0; i < numDocs; i++) {
if (randomBoolean()) {
docs.add(singleton(encodedRangeField));
} else {
docs.add(singleton(new SortedNumericDocValuesField(anotherFieldType.name(), randomLong())));
docsMissingAggField++;
}
}
final int finalDocsMissingAggField = docsMissingAggField;
testCase(
newMatchAllQuery(),
builder,
writer -> writer.addDocuments(docs),
internalMissing -> {
assertEquals(finalDocsMissingAggField, internalMissing.getDocCount());
assertTrue(AggregationInspectionHelper.hasValue(internalMissing));
},
org.elasticsearch.common.collect.List.of(aggFieldType, anotherFieldType)
);
}
public void testUnmappedWithoutMissingParam() throws IOException {
final int numDocs = randomIntBetween(10, 20);
final MappedFieldType aggFieldType = new NumberFieldMapper.Builder("_name", NumberType.LONG).fieldType();
aggFieldType.setName("agg_field");
final MissingAggregationBuilder builder = new MissingAggregationBuilder("_name", null)
.field("unknown_field");
testCase(
newMatchAllQuery(),
builder,
writer -> {
for (int i = 0; i < numDocs; i++) {
writer.addDocument(singleton(new SortedNumericDocValuesField(aggFieldType.name(), randomLong())));
}
},
internalMissing -> {
assertEquals(internalMissing.getDocCount(), count.get());
count.set(0);
assertEquals(numDocs, internalMissing.getDocCount());
assertTrue(AggregationInspectionHelper.hasValue(internalMissing));
}, fieldType);
},
singleton(aggFieldType)
);
}
public void testUnmappedWithMissingParam() throws IOException {
final int numDocs = randomIntBetween(10, 20);
final MappedFieldType aggFieldType = new NumberFieldMapper.Builder("_name", NumberType.LONG).fieldType();
aggFieldType.setName("agg_field");
public void testMissingField() throws IOException {
int numDocs = randomIntBetween(10, 20);
testBothCases(numDocs,
"unknown_field",
Queries.newMatchAllQuery(),
doc -> {
doc.add(new SortedNumericDocValuesField("field", randomLong()));
final MissingAggregationBuilder builder = new MissingAggregationBuilder("_name", null)
.field("unknown_field")
.missing(randomLong());
testCase(
newMatchAllQuery(),
builder,
writer -> {
for (int i = 0; i < numDocs; i++) {
writer.addDocument(singleton(new SortedNumericDocValuesField(aggFieldType.name(), randomLong())));
}
},
internalMissing -> {
assertEquals(internalMissing.getDocCount(), numDocs);
assertEquals(0, internalMissing.getDocCount());
assertFalse(AggregationInspectionHelper.hasValue(internalMissing));
},
singleton(aggFieldType)
);
}
public void testMissingParam() throws IOException {
final int numDocs = randomIntBetween(10, 20);
final MappedFieldType aggFieldType = new NumberFieldMapper.Builder("_name", NumberType.LONG).fieldType();
aggFieldType.setName("agg_field");
final MappedFieldType anotherFieldType = new NumberFieldMapper.Builder("_name", NumberType.LONG).fieldType();
anotherFieldType.setName("another_field");
final MissingAggregationBuilder builder = new MissingAggregationBuilder("_name", null)
.field(aggFieldType.name())
.missing(randomLong());
testCase(
newMatchAllQuery(),
builder,
writer -> {
for (int i = 0; i < numDocs; i++) {
writer.addDocument(singleton(new SortedNumericDocValuesField(anotherFieldType.name(), randomLong())));
}
},
internalMissing -> {
assertEquals(0, internalMissing.getDocCount());
assertFalse(AggregationInspectionHelper.hasValue(internalMissing));
},
org.elasticsearch.common.collect.List.of(aggFieldType, anotherFieldType)
);
}
public void testMultiValuedField() throws IOException {
final MappedFieldType aggFieldType = new NumberFieldMapper.Builder("_name", NumberType.LONG).fieldType();
aggFieldType.setName("agg_field");
final MappedFieldType anotherFieldType = new NumberFieldMapper.Builder("_name", NumberType.LONG).fieldType();
anotherFieldType.setName("another_field");
final MissingAggregationBuilder builder = new MissingAggregationBuilder("_name", null)
.field(aggFieldType.name());
final int numDocs = randomIntBetween(100, 200);
int docsMissingAggField = 0;
final List<Set<IndexableField>> docs = new ArrayList<>();
for (int i = 0; i < numDocs; i++) {
if (randomBoolean()) {
final long randomLong = randomLong();
docs.add(org.elasticsearch.common.collect.Set.of(
new SortedNumericDocValuesField(aggFieldType.name(), randomLong),
new SortedNumericDocValuesField(aggFieldType.name(), randomLong + 1)
));
} else {
docs.add(singleton(new SortedNumericDocValuesField(anotherFieldType.name(), randomLong())));
docsMissingAggField++;
}
}
final int finalDocsMissingAggField = docsMissingAggField;
testCase(
newMatchAllQuery(),
builder,
writer -> writer.addDocuments(docs),
internalMissing -> {
assertEquals(finalDocsMissingAggField, internalMissing.getDocCount());
assertTrue(AggregationInspectionHelper.hasValue(internalMissing));
});
},
org.elasticsearch.common.collect.List.of(aggFieldType, anotherFieldType)
);
}
private void testBothCases(int numDocs,
String fieldName,
Query query,
Consumer<Document> consumer,
Consumer<InternalMissing> verify) throws IOException {
NumberFieldMapper.Builder mapperBuilder = new NumberFieldMapper.Builder("_name",
NumberFieldMapper.NumberType.LONG);
final MappedFieldType fieldType = mapperBuilder.fieldType();
fieldType.setHasDocValues(true);
fieldType.setName(fieldName);
testBothCases(numDocs, fieldName, query, consumer, verify, fieldType);
public void testSingleValuedFieldWithValueScript() throws IOException {
valueScriptTestCase(new Script(ScriptType.INLINE, MockScriptEngine.NAME, VALUE_SCRIPT, emptyMap()));
}
private void testBothCases(int numDocs,
String fieldName,
Query query,
Consumer<Document> consumer,
Consumer<InternalMissing> verify,
MappedFieldType fieldType) throws IOException {
executeTestCase(numDocs, fieldName, query, consumer, verify, false, fieldType);
executeTestCase(numDocs, fieldName, query, consumer, verify, true, fieldType);
public void testSingleValuedFieldWithValueScriptWithParams() throws IOException {
valueScriptTestCase(new Script(ScriptType.INLINE, MockScriptEngine.NAME, VALUE_SCRIPT_PARAMS, singletonMap("inc", 10)));
}
private void executeTestCase(int numDocs,
String fieldName,
Query query,
Consumer<Document> consumer,
Consumer<InternalMissing> verify,
boolean reduced,
MappedFieldType fieldType) throws IOException {
private void valueScriptTestCase(Script script) throws IOException {
final MappedFieldType aggFieldType = new NumberFieldMapper.Builder("_name", NumberType.LONG).fieldType();
aggFieldType.setName("agg_field");
final MappedFieldType anotherFieldType = new NumberFieldMapper.Builder("_name", NumberType.LONG).fieldType();
anotherFieldType.setName("another_field");
final MissingAggregationBuilder builder = new MissingAggregationBuilder("_name", null)
.field(aggFieldType.name())
.script(script);
final int numDocs = randomIntBetween(100, 200);
int docsMissingAggField = 0;
final List<Set<IndexableField>> docs = new ArrayList<>();
for (int i = 0; i < numDocs; i++) {
if (randomBoolean()) {
docs.add(singleton(new SortedNumericDocValuesField(aggFieldType.name(), randomLong())));
} else {
docs.add(singleton(new SortedNumericDocValuesField(anotherFieldType.name(), randomLong())));
docsMissingAggField++;
}
}
final int finalDocsMissingField = docsMissingAggField;
testCase(
newMatchAllQuery(),
builder,
writer -> writer.addDocuments(docs),
internalMissing -> {
assertEquals(finalDocsMissingField, internalMissing.getDocCount());
assertTrue(AggregationInspectionHelper.hasValue(internalMissing));
},
org.elasticsearch.common.collect.List.of(aggFieldType, anotherFieldType)
);
}
public void testMultiValuedFieldWithFieldScriptWithParams() throws IOException {
final long threshold = 10;
final Map<String, Object> params = org.elasticsearch.common.collect.Map.of("field", "agg_field", "threshold", threshold);
fieldScriptTestCase(new Script(ScriptType.INLINE, MockScriptEngine.NAME, FIELD_SCRIPT_PARAMS, params), threshold);
}
public void testMultiValuedFieldWithFieldScript() throws IOException {
fieldScriptTestCase(new Script(ScriptType.INLINE, MockScriptEngine.NAME, FIELD_SCRIPT, singletonMap("field", "agg_field")),
DEFAULT_THRESHOLD_PARAM);
}
private void fieldScriptTestCase(Script script, long threshold) throws IOException {
final MappedFieldType aggFieldType = new NumberFieldMapper.Builder("_name", NumberType.LONG).fieldType();
aggFieldType.setName("agg_field");
final MissingAggregationBuilder builder = new MissingAggregationBuilder("_name", null)
.script(script);
final int numDocs = randomIntBetween(100, 200);
int docsBelowThreshold = 0;
final List<Set<IndexableField>> docs = new ArrayList<>();
for (int i = 0; i < numDocs; i++) {
final long firstValue = randomLongBetween(0, 100);
final long secondValue = firstValue + 1;
if (firstValue < threshold && secondValue < threshold) {
docsBelowThreshold++;
}
docs.add(org.elasticsearch.common.collect.Set.of(
new SortedNumericDocValuesField(aggFieldType.name(), firstValue),
new SortedNumericDocValuesField(aggFieldType.name(), secondValue)
));
}
final int finalDocsBelowThreshold = docsBelowThreshold;
testCase(
newMatchAllQuery(),
builder,
writer -> writer.addDocuments(docs),
internalMissing -> {
assertEquals(finalDocsBelowThreshold, internalMissing.getDocCount());
assertTrue(AggregationInspectionHelper.hasValue(internalMissing));
},
singleton(aggFieldType)
);
}
private void testCase(Query query,
MissingAggregationBuilder builder,
CheckedConsumer<RandomIndexWriter, IOException> writeIndex,
Consumer<InternalMissing> verify,
Collection<MappedFieldType> fieldTypes) throws IOException {
testCaseWithReduce(query, builder, writeIndex, verify, fieldTypes, false);
testCaseWithReduce(query, builder, writeIndex, verify, fieldTypes, true);
}
private void testCaseWithReduce(Query query,
MissingAggregationBuilder builder,
CheckedConsumer<RandomIndexWriter, IOException> writeIndex,
Consumer<InternalMissing> verify,
Collection<MappedFieldType> fieldTypes,
boolean reduced) throws IOException {
try (Directory directory = newDirectory()) {
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
Document document = new Document();
for (int i = 0; i < numDocs; i++) {
if (frequently()) {
indexWriter.commit();
}
consumer.accept(document);
indexWriter.addDocument(document);
document.clear();
}
writeIndex.accept(indexWriter);
}
try (IndexReader indexReader = DirectoryReader.open(directory)) {
IndexSearcher indexSearcher =
newSearcher(indexReader, true, true);
MissingAggregationBuilder builder = new MissingAggregationBuilder("_name", null);
builder.field(fieldName);
InternalMissing missing;
final IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
final MappedFieldType[] fieldTypesArray = fieldTypes.toArray(new MappedFieldType[0]);
final InternalMissing missing;
if (reduced) {
missing = searchAndReduce(indexSearcher, query, builder, fieldType);
missing = searchAndReduce(indexSearcher, query, builder, fieldTypesArray);
} else {
missing = search(indexSearcher, query, builder, fieldType);
missing = search(indexSearcher, query, builder, fieldTypesArray);
}
verify.accept(missing);
}
}
}
@Override
protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) {
return new MissingAggregationBuilder("_name", null)
.field(fieldName);
}
@Override
protected List<ValuesSourceType> getSupportedValuesSourceTypes() {
return org.elasticsearch.common.collect.List.of(
CoreValuesSourceType.NUMERIC,
CoreValuesSourceType.BYTES,
CoreValuesSourceType.GEOPOINT,
CoreValuesSourceType.RANGE,
CoreValuesSourceType.HISTOGRAM
);
}
@Override
protected ScriptService getMockScriptService() {
final Map<String, Function<Map<String, Object>, Object>> deterministicScripts = new HashMap<>();
deterministicScripts.put(VALUE_SCRIPT_PARAMS, vars -> {
final double value = ((Number) vars.get("_value")).doubleValue();
final long inc = ((Number) vars.get("inc")).longValue();
return value + inc;
});
deterministicScripts.put(VALUE_SCRIPT, vars -> {
final double value = ((Number) vars.get("_value")).doubleValue();
return value + DEFAULT_INC_PARAM;
});
deterministicScripts.put(FIELD_SCRIPT_PARAMS, vars -> {
final String fieldName = (String) vars.get("field");
final long threshold = ((Number) vars.get("threshold")).longValue();
return threshold(fieldName, threshold, vars);
});
deterministicScripts.put(FIELD_SCRIPT, vars -> {
final String fieldName = (String) vars.get("field");
return threshold(fieldName, DEFAULT_THRESHOLD_PARAM, vars);
});
final MockScriptEngine scriptEngine = new MockScriptEngine(MockScriptEngine.NAME, deterministicScripts, emptyMap(), emptyMap());
final Map<String, ScriptEngine> engines = singletonMap(scriptEngine.getType(), scriptEngine);
return new ScriptService(Settings.EMPTY, engines, ScriptModule.CORE_CONTEXTS);
}
private static List<Long> threshold(String fieldName, long threshold, Map<String, Object> vars) {
final LeafDocLookup lookup = (LeafDocLookup) vars.get("doc");
return lookup.get(fieldName).stream()
.map(value -> ((Number) value).longValue())
.filter(value -> value >= threshold)
.collect(toList());
}
}