Enable validating user-supplied missing values on unmapped fields (#43718) (#43940)

Provides a hook for aggregations to introspect the `ValuesSourceType` for a user supplied Missing value on an unmapped field, when the type would otherwise be `ANY`.  Mapped field behavior is unchanged, and still applies the `ValuesSourceType` of the field.  This PR just provides the hook for doing this, no existing aggregations have their behavior changed.
This commit is contained in:
Mark Tozzi 2019-07-08 10:46:23 -04:00 committed by GitHub
parent 4390d4a8af
commit 299a52c17d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 319 additions and 17 deletions

View File

@ -189,7 +189,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
}
throw new AggregationExecutionException("terms aggregation cannot be applied to field [" + config.fieldContext().field()
+ "]. It can only be applied to numeric or string fields.");
+ "]. It can only be applied to numeric or string fields.");
}
// return the SubAggCollectionMode that this aggregation should use based on the expected size

View File

@ -43,13 +43,28 @@ public abstract class ValuesSourceAggregatorFactory<VS extends ValuesSource, AF
@Override
public Aggregator createInternal(Aggregator parent, boolean collectsFromSingleBucket,
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
VS vs = config.toValuesSource(context.getQueryShardContext());
VS vs = config.toValuesSource(context.getQueryShardContext(), this::resolveMissingAny);
if (vs == null) {
return createUnmapped(parent, pipelineAggregators, metaData);
}
return doCreateInternal(vs, parent, collectsFromSingleBucket, pipelineAggregators, metaData);
}
/**
* This method provides a hook for aggregations that need finer grained control over the ValuesSource selected when the user supplies a
* missing value and there is no mapped field to infer the type from. This will only be called for aggregations that specify the
* ValuesSourceType.ANY in their constructors (On the builder class). The user supplied object is passed as a parameter, so its type
* may be inspected as needed.
*
* Generally, only the type of the returned ValuesSource is used, so returning the EMPTY instance of the chosen type is recommended.
*
* @param missing The user supplied missing value
* @return A ValuesSource instance compatible with the supplied parameter
*/
protected ValuesSource resolveMissingAny(Object missing) {
return ValuesSource.Bytes.WithOrdinals.EMPTY;
}
protected abstract Aggregator createUnmapped(Aggregator parent,
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException;

View File

@ -36,6 +36,7 @@ import org.elasticsearch.search.aggregations.AggregationExecutionException;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.util.function.Function;
/**
* A configuration that tells aggregations how to retrieve data from the index
@ -223,10 +224,15 @@ public class ValuesSourceConfig<VS extends ValuesSource> {
return format;
}
@Nullable
public VS toValuesSource(QueryShardContext context) {
return toValuesSource(context, value -> ValuesSource.Bytes.WithOrdinals.EMPTY);
}
/** Get a value source given its configuration. A return value of null indicates that
* no value source could be built. */
@Nullable
public VS toValuesSource(QueryShardContext context) {
public VS toValuesSource(QueryShardContext context, Function<Object, ValuesSource> resolveMissingAny) {
if (!valid()) {
throw new IllegalStateException(
"value source config is invalid; must have either a field context or a script or marked as unwrapped");
@ -241,8 +247,10 @@ public class ValuesSourceConfig<VS extends ValuesSource> {
vs = (VS) ValuesSource.Numeric.EMPTY;
} else if (valueSourceType() == ValuesSourceType.GEOPOINT) {
vs = (VS) ValuesSource.GeoPoint.EMPTY;
} else if (valueSourceType() == ValuesSourceType.ANY || valueSourceType() == ValuesSourceType.BYTES) {
} else if (valueSourceType() == ValuesSourceType.BYTES) {
vs = (VS) ValuesSource.Bytes.WithOrdinals.EMPTY;
} else if (valueSourceType() == ValuesSourceType.ANY) {
vs = (VS) resolveMissingAny.apply(missing());
} else {
throw new IllegalArgumentException("Can't deal with unmapped ValuesSource type " + valueSourceType());
}

View File

@ -21,17 +21,22 @@ package org.elasticsearch.search.aggregations.bucket.histogram;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
import static org.hamcrest.Matchers.containsString;
public class HistogramAggregatorTests extends AggregatorTestCase {
public void testLongs() throws Exception {
@ -188,6 +193,83 @@ public class HistogramAggregatorTests extends AggregatorTestCase {
}
}
public void testMissingUnmappedField() throws Exception {
try (Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
for (int i = 0; i < 7; i ++) {
Document doc = new Document();
w.addDocument(doc);
}
HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg")
.field("field")
.interval(5)
.missing(2d);
MappedFieldType type = null;
try (IndexReader reader = w.getReader()) {
IndexSearcher searcher = new IndexSearcher(reader);
InternalHistogram histogram = search(searcher, new MatchAllDocsQuery(), aggBuilder, type);
assertEquals(1, histogram.getBuckets().size());
assertEquals(0d, histogram.getBuckets().get(0).getKey());
assertEquals(7, histogram.getBuckets().get(0).getDocCount());
assertTrue(AggregationInspectionHelper.hasValue(histogram));
}
}
}
public void testMissingUnmappedFieldBadType() throws Exception {
try (Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
for (int i = 0; i < 7; i ++) {
w.addDocument(new Document());
}
String missingValue = "🍌🍌🍌";
HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg")
.field("field")
.interval(5)
.missing(missingValue);
MappedFieldType type = null;
try (IndexReader reader = w.getReader()) {
IndexSearcher searcher = new IndexSearcher(reader);
Throwable t = expectThrows(IllegalArgumentException.class, () -> {
search(searcher, new MatchAllDocsQuery(), aggBuilder, type);
});
// This throws a number format exception (which is a subclass of IllegalArgumentException) and might be ok?
assertThat(t.getMessage(), containsString(missingValue));
}
}
}
public void testIncorrectFieldType() throws Exception {
try (Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
for (String value : new String[] {"foo", "bar", "baz", "quux"}) {
Document doc = new Document();
doc.add(new SortedSetDocValuesField("field", new BytesRef(value)));
w.addDocument(doc);
}
HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg")
.field("field")
.interval(5);
MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType();
fieldType.setName("field");
fieldType.setHasDocValues(true);
try (IndexReader reader = w.getReader()) {
IndexSearcher searcher = new IndexSearcher(reader);
expectThrows(IllegalArgumentException.class, () -> {
search(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType);
});
}
}
}
public void testOffset() throws Exception {
try (Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {

View File

@ -152,4 +152,45 @@ public class IpRangeAggregatorTests extends AggregatorTestCase {
}
}
}
public void testMissingUnmapped() throws Exception {
try (Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
for (int i = 0; i < 7; i++) {
Document doc = new Document();
w.addDocument(doc);
}
IpRangeAggregationBuilder builder = new IpRangeAggregationBuilder("test_agg")
.field("field")
.addRange(new IpRangeAggregationBuilder.Range("foo", "192.168.100.0", "192.168.100.255"))
.missing("192.168.100.42"); // Apparently we expect a string here
try (IndexReader reader = w.getReader()) {
IndexSearcher searcher = new IndexSearcher(reader);
InternalBinaryRange range = search(searcher, new MatchAllDocsQuery(), builder, (MappedFieldType) null);
assertEquals(1, range.getBuckets().size());
}
}
}
public void testMissingUnmappedBadType() throws Exception {
try (Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
for (int i = 0; i < 7; i++) {
Document doc = new Document();
w.addDocument(doc);
}
IpRangeAggregationBuilder builder = new IpRangeAggregationBuilder("test_agg")
.field("field")
.addRange(new IpRangeAggregationBuilder.Range("foo", "192.168.100.0", "192.168.100.255"))
.missing(1234);
try (IndexReader reader = w.getReader()) {
IndexSearcher searcher = new IndexSearcher(reader);
expectThrows(IllegalArgumentException.class, () -> {
search(searcher, new MatchAllDocsQuery(), builder, (MappedFieldType) null);
});
}
}
}
}

View File

@ -21,6 +21,7 @@ package org.elasticsearch.search.aggregations.bucket.terms;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.InetAddressPoint;
import org.apache.lucene.document.LatLonDocValuesField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
@ -37,10 +38,12 @@ import org.apache.lucene.search.TotalHits;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.network.InetAddresses;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.MockBigArrays;
import org.elasticsearch.common.util.MockPageCacheRecycler;
import org.elasticsearch.index.mapper.GeoPointFieldMapper;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.IpFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
@ -77,6 +80,7 @@ import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper
import org.elasticsearch.search.aggregations.support.ValueType;
import org.elasticsearch.search.sort.FieldSortBuilder;
import org.elasticsearch.search.sort.ScoreSortBuilder;
import org.elasticsearch.test.geo.RandomGeoGenerator;
import java.io.IOException;
import java.net.InetAddress;
@ -884,6 +888,60 @@ public class TermsAggregatorTests extends AggregatorTestCase {
}
}
public void testGeoPointField() throws Exception {
try (Directory directory = newDirectory()) {
GeoPoint point = RandomGeoGenerator.randomPoint(random());
final String field = "field";
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
Document document = new Document();
document.add(new LatLonDocValuesField(field, point.getLat(), point.getLon()));
indexWriter.addDocument(document);
try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) {
MappedFieldType fieldType = new GeoPointFieldMapper.GeoPointFieldType();
fieldType.setHasDocValues(true);
fieldType.setName("field");
IndexSearcher indexSearcher = newIndexSearcher(indexReader);
TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name", null) .field(field);
// Note - other places we throw IllegalArgumentException
expectThrows(AggregationExecutionException.class, () -> {
createAggregator(aggregationBuilder, indexSearcher, fieldType);
});
}
}
}
}
public void testIpField() throws Exception {
try (Directory directory = newDirectory()) {
final String field = "field";
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
Document document = new Document();
document.add(new SortedSetDocValuesField("field",
new BytesRef(InetAddressPoint.encode(InetAddresses.forString("192.168.100.42")))));
indexWriter.addDocument(document);
try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) {
MappedFieldType fieldType = new IpFieldMapper.IpFieldType();
fieldType.setHasDocValues(true);
fieldType.setName("field");
IndexSearcher indexSearcher = newIndexSearcher(indexReader);
TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name", null) .field(field);
// Note - other places we throw IllegalArgumentException
Aggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
aggregator.preCollection();
indexSearcher.search(new MatchAllDocsQuery(), aggregator);
aggregator.postCollection();
Terms result = (Terms) aggregator.buildAggregation(0L);
assertEquals("_name", result.getName());
assertEquals(1, result.getBuckets().size());
assertEquals("192.168.100.42", result.getBuckets().get(0).getKey());
assertEquals(1, result.getBuckets().get(0).getDocCount());
}
}
}
}
public void testNestedTermsAgg() throws Exception {
try (Directory directory = newDirectory()) {
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {

View File

@ -31,6 +31,7 @@ import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.elasticsearch.common.CheckedConsumer;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
@ -107,8 +108,61 @@ public class CardinalityAggregatorTests extends AggregatorTestCase {
});
}
public void testUnmappedMissingString() throws IOException {
CardinalityAggregationBuilder aggregationBuilder = new CardinalityAggregationBuilder("name", null)
.field("number").missing("🍌🍌🍌");
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
iw.addDocument(singleton(new NumericDocValuesField("unrelatedField", 7)));
iw.addDocument(singleton(new NumericDocValuesField("unrelatedField", 8)));
iw.addDocument(singleton(new NumericDocValuesField("unrelatedField", 9)));
}, card -> {
assertEquals(1, card.getValue(), 0);
assertTrue(AggregationInspectionHelper.hasValue(card));
}, null);
}
public void testUnmappedMissingNumber() throws IOException {
CardinalityAggregationBuilder aggregationBuilder = new CardinalityAggregationBuilder("name", null)
.field("number").missing(1234);
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
iw.addDocument(singleton(new NumericDocValuesField("unrelatedField", 7)));
iw.addDocument(singleton(new NumericDocValuesField("unrelatedField", 8)));
iw.addDocument(singleton(new NumericDocValuesField("unrelatedField", 9)));
}, card -> {
assertEquals(1, card.getValue(), 0);
assertTrue(AggregationInspectionHelper.hasValue(card));
}, null);
}
public void testUnmappedMissingGeoPoint() throws IOException {
CardinalityAggregationBuilder aggregationBuilder = new CardinalityAggregationBuilder("name", null)
.field("number").missing(new GeoPoint(42.39561, -71.13051));
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
iw.addDocument(singleton(new NumericDocValuesField("unrelatedField", 7)));
iw.addDocument(singleton(new NumericDocValuesField("unrelatedField", 8)));
iw.addDocument(singleton(new NumericDocValuesField("unrelatedField", 9)));
}, card -> {
assertEquals(1, card.getValue(), 0);
assertTrue(AggregationInspectionHelper.hasValue(card));
}, null);
}
private void testCase(Query query, CheckedConsumer<RandomIndexWriter, IOException> buildIndex,
Consumer<InternalCardinality> verify) throws IOException {
Consumer<InternalCardinality> verify) throws IOException {
MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(
NumberFieldMapper.NumberType.LONG);
fieldType.setName("number");
final CardinalityAggregationBuilder aggregationBuilder = new CardinalityAggregationBuilder(
"_name", ValueType.NUMERIC).field("number");
testCase(aggregationBuilder, query, buildIndex, verify, fieldType);
}
private void testCase(CardinalityAggregationBuilder aggregationBuilder, Query query,
CheckedConsumer<RandomIndexWriter, IOException> buildIndex, Consumer<InternalCardinality> verify,
MappedFieldType fieldType) throws IOException {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
buildIndex.accept(indexWriter);
@ -117,11 +171,6 @@ public class CardinalityAggregatorTests extends AggregatorTestCase {
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
CardinalityAggregationBuilder aggregationBuilder = new CardinalityAggregationBuilder(
"_name", ValueType.NUMERIC).field("number");
MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(
NumberFieldMapper.NumberType.LONG);
fieldType.setName("number");
CardinalityAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher,
fieldType);
aggregator.preCollection();

View File

@ -33,6 +33,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.CheckedConsumer;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.index.mapper.BooleanFieldMapper;
import org.elasticsearch.index.mapper.DateFieldMapper;
import org.elasticsearch.index.mapper.GeoPointFieldMapper;
@ -119,11 +120,66 @@ public class ValueCountAggregatorTests extends AggregatorTestCase {
});
}
public void testUnmappedMissingString() throws IOException {
ValueCountAggregationBuilder aggregationBuilder = new ValueCountAggregationBuilder("name", null)
.field("number").missing("🍌🍌🍌");
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
iw.addDocument(singleton(new NumericDocValuesField("unrelatedField", 7)));
iw.addDocument(singleton(new NumericDocValuesField("unrelatedField", 8)));
iw.addDocument(singleton(new NumericDocValuesField("unrelatedField", 9)));
}, card -> {
assertEquals(3, card.getValue(), 0);
assertTrue(AggregationInspectionHelper.hasValue(card));
}, null);
}
public void testUnmappedMissingNumber() throws IOException {
ValueCountAggregationBuilder aggregationBuilder = new ValueCountAggregationBuilder("name", null)
.field("number").missing(1234);
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
iw.addDocument(singleton(new NumericDocValuesField("unrelatedField", 7)));
iw.addDocument(singleton(new NumericDocValuesField("unrelatedField", 8)));
iw.addDocument(singleton(new NumericDocValuesField("unrelatedField", 9)));
}, card -> {
assertEquals(3, card.getValue(), 0);
assertTrue(AggregationInspectionHelper.hasValue(card));
}, null);
}
public void testUnmappedMissingGeoPoint() throws IOException {
ValueCountAggregationBuilder aggregationBuilder = new ValueCountAggregationBuilder("name", null)
.field("number").missing(new GeoPoint(42.39561, -71.13051));
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
iw.addDocument(singleton(new NumericDocValuesField("unrelatedField", 7)));
iw.addDocument(singleton(new NumericDocValuesField("unrelatedField", 8)));
iw.addDocument(singleton(new NumericDocValuesField("unrelatedField", 9)));
}, card -> {
assertEquals(3, card.getValue(), 0);
assertTrue(AggregationInspectionHelper.hasValue(card));
}, null);
}
private void testCase(Query query,
ValueType valueType,
CheckedConsumer<RandomIndexWriter, IOException> indexer,
Consumer<InternalValueCount> verify) throws IOException {
MappedFieldType fieldType = createMappedFieldType(valueType);
fieldType.setName(FIELD_NAME);
fieldType.setHasDocValues(true);
ValueCountAggregationBuilder aggregationBuilder = new ValueCountAggregationBuilder("_name", valueType);
aggregationBuilder.field(FIELD_NAME);
testCase(aggregationBuilder, query, indexer, verify, fieldType);
}
private void testCase(ValueCountAggregationBuilder aggregationBuilder, Query query,
CheckedConsumer<RandomIndexWriter, IOException> indexer,
Consumer<InternalValueCount> verify, MappedFieldType fieldType) throws IOException {
try (Directory directory = newDirectory()) {
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
indexer.accept(indexWriter);
@ -132,13 +188,6 @@ public class ValueCountAggregatorTests extends AggregatorTestCase {
try (IndexReader indexReader = DirectoryReader.open(directory)) {
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
MappedFieldType fieldType = createMappedFieldType(valueType);
fieldType.setName(FIELD_NAME);
fieldType.setHasDocValues(true);
ValueCountAggregationBuilder aggregationBuilder = new ValueCountAggregationBuilder("_name", valueType);
aggregationBuilder.field(FIELD_NAME);
ValueCountAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
aggregator.preCollection();
indexSearcher.search(query, aggregator);