SQL: Perform lazy evaluation of mismatched mappings (#35676)

Move away from performing eager, fail-fast validation of mismatched
 mapping to a lazy evaluation based on the fields actually used in the
 query. This allows queries to run on the parts of the indices that
 "work" which is not just convenient but also a necessity for large
 mappings (like logging) where alignment is hard/impossible to achieve.

Fix #35659
This commit is contained in:
Costin Leau 2018-11-21 17:35:15 +02:00 committed by GitHub
parent 26cc8ff8c3
commit 03f003733d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 200 additions and 58 deletions

View File

@ -49,6 +49,7 @@ import org.elasticsearch.xpack.sql.tree.Node;
import org.elasticsearch.xpack.sql.type.DataType;
import org.elasticsearch.xpack.sql.type.DataTypeConversion;
import org.elasticsearch.xpack.sql.type.DataTypes;
import org.elasticsearch.xpack.sql.type.InvalidMappedField;
import org.elasticsearch.xpack.sql.type.UnsupportedEsField;
import java.util.ArrayList;
@ -200,8 +201,14 @@ public class Analyzer extends RuleExecutor<LogicalPlan> {
// if it's a object/compound type, keep it unresolved with a nice error message
if (named instanceof FieldAttribute) {
FieldAttribute fa = (FieldAttribute) named;
// incompatible mappings
if (fa.field() instanceof InvalidMappedField) {
named = u.withUnresolvedMessage("Cannot use field [" + fa.name() + "] due to ambiguities being "
+ ((InvalidMappedField) fa.field()).errorMessage());
}
// unsupported types
if (DataTypes.isUnsupported(fa.dataType())) {
else if (DataTypes.isUnsupported(fa.dataType())) {
UnsupportedEsField unsupportedField = (UnsupportedEsField) fa.field();
named = u.withUnresolvedMessage(
"Cannot use field [" + fa.name() + "] type [" + unsupportedField.getOriginalType() + "] as is unsupported");

View File

@ -30,6 +30,7 @@ import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
import org.elasticsearch.xpack.sql.type.DataType;
import org.elasticsearch.xpack.sql.type.DateEsField;
import org.elasticsearch.xpack.sql.type.EsField;
import org.elasticsearch.xpack.sql.type.InvalidMappedField;
import org.elasticsearch.xpack.sql.type.KeywordEsField;
import org.elasticsearch.xpack.sql.type.TextEsField;
import org.elasticsearch.xpack.sql.type.Types;
@ -51,6 +52,7 @@ import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.function.Function;
import java.util.regex.Pattern;
import static java.util.Collections.emptyMap;
@ -263,13 +265,21 @@ public class IndexResolver {
// without sorting, they can still be detected however without the emptyMap optimization
// (fields without multi-fields have no children)
for (Entry<String, Map<String, FieldCapabilities>> entry : sortedFields) {
InvalidMappedField invalidField = null;
FieldCapabilities fieldCap = null;
errorMessage.setLength(0);
String name = entry.getKey();
// skip internal fields
if (!name.startsWith("_")) {
Map<String, FieldCapabilities> types = entry.getValue();
// field is mapped differently across indices
if (types.size() > 1) {
// build error message
// build the error message
// and create a MultiTypeField
for (Entry<String, FieldCapabilities> type : types.entrySet()) {
if (errorMessage.length() > 0) {
errorMessage.append(", ");
@ -280,37 +290,39 @@ public class IndexResolver {
errorMessage.append(Arrays.toString(type.getValue().indices()));
}
errorMessage.insert(0,
"[" + indexPattern + "] points to indices with incompatible mappings; " +
"field [" + name + "] is mapped in [" + types.size() + "] different ways: ");
errorMessage.insert(0, "mapped as [" + types.size() + "] incompatible types: ");
invalidField = new InvalidMappedField(name, errorMessage.toString());
}
if (errorMessage.length() > 0) {
return IndexResolution.invalid(errorMessage.toString());
}
FieldCapabilities fieldCap = types.values().iterator().next();
// validate search/agg-able
if (fieldCap.isAggregatable() && fieldCap.nonAggregatableIndices() != null) {
errorMessage.append("[" + indexPattern + "] points to indices with incompatible mappings: ");
errorMessage.append("field [" + name + "] is aggregateable except in ");
errorMessage.append(Arrays.toString(fieldCap.nonAggregatableIndices()));
}
if (fieldCap.isSearchable() && fieldCap.nonSearchableIndices() != null) {
if (errorMessage.length() > 0) {
errorMessage.append(",");
// type is okay, check aggregation
else {
fieldCap = types.values().iterator().next();
// validate search/agg-able
if (fieldCap.isAggregatable() && fieldCap.nonAggregatableIndices() != null) {
errorMessage.append("mapped as aggregatable except in ");
errorMessage.append(Arrays.toString(fieldCap.nonAggregatableIndices()));
}
if (fieldCap.isSearchable() && fieldCap.nonSearchableIndices() != null) {
if (errorMessage.length() > 0) {
errorMessage.append(",");
}
errorMessage.append("mapped as searchable except in ");
errorMessage.append(Arrays.toString(fieldCap.nonSearchableIndices()));
}
if (errorMessage.length() > 0) {
invalidField = new InvalidMappedField(name, errorMessage.toString());
}
errorMessage.append("[" + indexPattern + "] points to indices with incompatible mappings: ");
errorMessage.append("field [" + name + "] is searchable except in ");
errorMessage.append(Arrays.toString(fieldCap.nonSearchableIndices()));
}
if (errorMessage.length() > 0) {
return IndexResolution.invalid(errorMessage.toString());
}
// validation passes - create the field
// and name wasn't added before
// if the name wasn't added before
final InvalidMappedField invalidF = invalidField;
final FieldCapabilities fieldCapab = fieldCap;
if (!flattedMapping.containsKey(name)) {
createField(name, fieldCap, fieldCaps, hierarchicalMapping, flattedMapping, false);
createField(name, fieldCaps, hierarchicalMapping, flattedMapping, s -> {
return invalidF != null ? invalidF : createField(s, fieldCapab.getType(), emptyMap(), fieldCapab.isAggregatable());
});
}
}
}
@ -318,8 +330,9 @@ public class IndexResolver {
return IndexResolution.valid(new EsIndex(indexPattern, hierarchicalMapping));
}
private static EsField createField(String fieldName, FieldCapabilities caps, Map<String, Map<String, FieldCapabilities>> globalCaps,
Map<String, EsField> hierarchicalMapping, Map<String, EsField> flattedMapping, boolean hasChildren) {
private static EsField createField(String fieldName, Map<String, Map<String, FieldCapabilities>> globalCaps,
Map<String, EsField> hierarchicalMapping, Map<String, EsField> flattedMapping,
Function<String, EsField> field) {
Map<String, EsField> parentProps = hierarchicalMapping;
@ -336,39 +349,37 @@ public class IndexResolver {
throw new SqlIllegalArgumentException("Cannot find field {}; this is likely a bug", parentName);
}
FieldCapabilities parentCap = map.values().iterator().next();
parent = createField(parentName, parentCap, globalCaps, hierarchicalMapping, flattedMapping, true);
parent = createField(parentName, globalCaps, hierarchicalMapping, flattedMapping,
s -> createField(s, parentCap.getType(), new TreeMap<>(), parentCap.isAggregatable()));
}
parentProps = parent.getProperties();
}
EsField field = null;
Map<String, EsField> props = hasChildren ? new TreeMap<>() : emptyMap();
EsField esField = field.apply(fieldName);
parentProps.put(fieldName, esField);
flattedMapping.put(fullFieldName, esField);
DataType esType = DataType.fromTypeName(caps.getType());
return esField;
}
private static EsField createField(String fieldName, String typeName, Map<String, EsField> props, boolean isAggregateable) {
DataType esType = DataType.fromTypeName(typeName);
switch (esType) {
case TEXT:
field = new TextEsField(fieldName, props, false);
break;
return new TextEsField(fieldName, props, false);
case KEYWORD:
int length = DataType.KEYWORD.defaultPrecision;
// TODO: to check whether isSearchable/isAggregateable takes into account the presence of the normalizer
boolean normalized = false;
field = new KeywordEsField(fieldName, props, caps.isAggregatable(), length, normalized);
break;
return new KeywordEsField(fieldName, props, isAggregateable, length, normalized);
case DATE:
field = new DateEsField(fieldName, props, caps.isAggregatable());
break;
return new DateEsField(fieldName, props, isAggregateable);
case UNSUPPORTED:
field = new UnsupportedEsField(fieldName, caps.getType());
break;
return new UnsupportedEsField(fieldName, typeName);
default:
field = new EsField(fieldName, esType, props, caps.isAggregatable());
return new EsField(fieldName, esType, props, isAggregateable);
}
parentProps.put(fieldName, field);
flattedMapping.put(fullFieldName, field);
return field;
}
private static FieldCapabilitiesRequest createFieldCapsRequest(String index) {

View File

@ -0,0 +1,57 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.type;
import org.elasticsearch.xpack.sql.analysis.index.MappingException;
import java.util.Objects;
import static java.util.Collections.emptyMap;
/**
* Representation of field mapped differently across indices.
* Used during mapping discovery only.
*/
public class InvalidMappedField extends EsField {
private final String errorMessage;
public InvalidMappedField(String name, String errorMessage) {
super(name, DataType.UNSUPPORTED, emptyMap(), false);
this.errorMessage = errorMessage;
}
public String errorMessage() {
return errorMessage;
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), errorMessage);
}
@Override
public boolean equals(Object obj) {
if (super.equals(obj)) {
InvalidMappedField other = (InvalidMappedField) obj;
return Objects.equals(errorMessage, other.errorMessage);
}
return false;
}
@Override
public EsField getExactField() {
throw new MappingException("Field [" + getName() + "] is invalid, cannot access it");
}
@Override
public boolean isExact() {
return false;
}
}

View File

@ -9,6 +9,7 @@ import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.sql.analysis.AnalysisException;
import org.elasticsearch.xpack.sql.analysis.index.EsIndex;
import org.elasticsearch.xpack.sql.analysis.index.IndexResolution;
import org.elasticsearch.xpack.sql.analysis.index.IndexResolverTests;
import org.elasticsearch.xpack.sql.expression.function.FunctionRegistry;
import org.elasticsearch.xpack.sql.parser.SqlParser;
import org.elasticsearch.xpack.sql.plan.logical.LogicalPlan;
@ -46,6 +47,25 @@ public class VerifierErrorMessagesTests extends ESTestCase {
return analyzer.analyze(parser.createStatement(sql), true);
}
private IndexResolution incompatible() {
Map<String, EsField> basicMapping = TypesTests.loadMapping("mapping-basic.json", true);
Map<String, EsField> incompatible = TypesTests.loadMapping("mapping-basic-incompatible.json");
assertNotEquals(basicMapping, incompatible);
IndexResolution resolution = IndexResolverTests.merge(new EsIndex("basic", basicMapping),
new EsIndex("incompatible", incompatible));
assertTrue(resolution.isValid());
return resolution;
}
private String incompatibleError(String sql) {
return error(incompatible(), sql);
}
private LogicalPlan incompatibleAccept(String sql) {
return accept(incompatible(), sql);
}
public void testMissingIndex() {
assertEquals("1:17: Unknown index [missing]", error(IndexResolution.notFound("missing"), "SELECT foo FROM missing"));
}
@ -366,4 +386,40 @@ public class VerifierErrorMessagesTests extends ESTestCase {
assertEquals("1:8: [INSERT] fourth argument must be [string], found value [3] type [integer]",
error("SELECT INSERT('text', 1, 2, 3)"));
}
public void testAllowCorrectFieldsInIncompatibleMappings() {
assertNotNull(incompatibleAccept("SELECT languages FROM \"*\""));
}
public void testWildcardInIncompatibleMappings() {
assertNotNull(incompatibleAccept("SELECT * FROM \"*\""));
}
public void testMismatchedFieldInIncompatibleMappings() {
assertEquals(
"1:8: Cannot use field [emp_no] due to ambiguities being mapped as [2] incompatible types: "
+ "[integer] in [basic], [long] in [incompatible]",
incompatibleError("SELECT emp_no FROM \"*\""));
}
public void testMismatchedFieldStarInIncompatibleMappings() {
assertEquals(
"1:8: Cannot use field [emp_no] due to ambiguities being mapped as [2] incompatible types: "
+ "[integer] in [basic], [long] in [incompatible]",
incompatibleError("SELECT emp_no.* FROM \"*\""));
}
public void testMismatchedFieldFilterInIncompatibleMappings() {
assertEquals(
"1:33: Cannot use field [emp_no] due to ambiguities being mapped as [2] incompatible types: "
+ "[integer] in [basic], [long] in [incompatible]",
incompatibleError("SELECT languages FROM \"*\" WHERE emp_no > 1"));
}
public void testMismatchedFieldScalarInIncompatibleMappings() {
assertEquals(
"1:45: Cannot use field [emp_no] due to ambiguities being mapped as [2] incompatible types: "
+ "[integer] in [basic], [long] in [incompatible]",
incompatibleError("SELECT languages FROM \"*\" ORDER BY SIGN(ABS(emp_no))"));
}
}

View File

@ -9,6 +9,7 @@ import org.elasticsearch.action.fieldcaps.FieldCapabilities;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.sql.type.DataType;
import org.elasticsearch.xpack.sql.type.EsField;
import org.elasticsearch.xpack.sql.type.InvalidMappedField;
import org.elasticsearch.xpack.sql.type.TypesTests;
import java.util.ArrayList;
@ -61,12 +62,16 @@ public class IndexResolverTests extends ESTestCase {
IndexResolution resolution = IndexResolver.mergedMapping(wildcard,
fromMappings(new EsIndex("basic", basicMapping), new EsIndex("incompatible", incompatible)));
assertFalse(resolution.isValid());
MappingException me = expectThrows(MappingException.class, () -> resolution.get());
assertTrue(resolution.isValid());
EsIndex esIndex = resolution.get();
assertEquals(wildcard, esIndex.name());
EsField esField = esIndex.mapping().get("gender");
assertEquals(InvalidMappedField.class, esField.getClass());
assertEquals(
"[*] points to indices with incompatible mappings;"
+ " field [gender] is mapped in [2] different ways: [text] in [incompatible], [keyword] in [basic]",
me.getMessage());
"mapped as [2] incompatible types: [text] in [incompatible], [keyword] in [basic]",
((InvalidMappedField) esField).errorMessage());
}
public void testMergeIncompatibleCapabilities() throws Exception {
@ -79,11 +84,13 @@ public class IndexResolverTests extends ESTestCase {
IndexResolution resolution = IndexResolver.mergedMapping(wildcard,
fromMappings(new EsIndex("basic", basicMapping), new EsIndex("incompatible", incompatible)));
assertFalse(resolution.isValid());
MappingException me = expectThrows(MappingException.class, () -> resolution.get());
assertEquals(
"[*] points to indices with incompatible mappings: field [emp_no] is aggregateable except in [incompatible]",
me.getMessage());
assertTrue(resolution.isValid());
EsIndex esIndex = resolution.get();
assertEquals(wildcard, esIndex.name());
EsField esField = esIndex.mapping().get("emp_no");
assertEquals(InvalidMappedField.class, esField.getClass());
assertEquals("mapped as aggregatable except in [incompatible]", ((InvalidMappedField) esField).errorMessage());
}
public void testMultiLevelObjectMappings() throws Exception {
@ -106,7 +113,11 @@ public class IndexResolverTests extends ESTestCase {
assertEqualsMaps(nestedMapping, resolution.get().mapping());
}
private static Map<String, Map<String, FieldCapabilities>> fromMappings(EsIndex... indices) {
public static IndexResolution merge(EsIndex... indices) {
return IndexResolver.mergedMapping("*", fromMappings(indices));
}
public static Map<String, Map<String, FieldCapabilities>> fromMappings(EsIndex... indices) {
Map<String, Map<String, FieldCapabilities>> merged = new HashMap<>();
// first pass: create the field caps