From b2631f9ac894d7e36bb328a606d20bcab42d6d70 Mon Sep 17 00:00:00 2001 From: Costin Leau Date: Sun, 25 Feb 2018 04:39:48 +0200 Subject: [PATCH] SQL: Extend field resolution to deal with unquoted qualified columns (elastic/x-pack-elasticsearch#3894) * foo.bar can mean either: no table field foo.bar or table foo, field bar The resolution rule has been extended to include the latter case as fallback. * Always check field ambiguity * Since field with dots can create confusion (when not qualified), the analyzer always now always checks for both qualified and not-qualified fields and throws an ambiguity message with the potential candidates. This forces the use of qualifiers or quotes to better indicate the desired field. * Add example of aliasing the table to remove ambiguity Original commit: elastic/x-pack-elasticsearch@8b70b9c4ee40de22617b69c0156cde5c09e3eb09 --- .../xpack/sql/analysis/analyzer/Analyzer.java | 44 ++++++++------- .../analyzer/FieldAttributeTests.java | 56 ++++++++++++++++++- .../test/resources/mapping-dotted-field.json | 32 +++++++++++ 3 files changed, 111 insertions(+), 21 deletions(-) create mode 100644 plugin/sql/src/test/resources/mapping-dotted-field.json diff --git a/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Analyzer.java b/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Analyzer.java index 6fdb9637559..26a2b424c31 100644 --- a/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Analyzer.java +++ b/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Analyzer.java @@ -5,6 +5,7 @@ */ package org.elasticsearch.xpack.sql.analysis.analyzer; +import org.elasticsearch.common.util.Comparators; import org.elasticsearch.xpack.sql.analysis.AnalysisException; import org.elasticsearch.xpack.sql.analysis.analyzer.Verifier.Failure; import org.elasticsearch.xpack.sql.analysis.index.IndexResolution; @@ -49,16 +50,15 @@ import org.elasticsearch.xpack.sql.type.DataType; import org.elasticsearch.xpack.sql.type.DataTypeConversion; import org.elasticsearch.xpack.sql.type.DataTypes; import org.elasticsearch.xpack.sql.type.UnsupportedEsField; -import org.elasticsearch.xpack.sql.util.StringUtils; import org.joda.time.DateTimeZone; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Comparator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; -import java.util.Locale; import java.util.Map; import java.util.Objects; import java.util.Set; @@ -144,11 +144,11 @@ public class Analyzer extends RuleExecutor { } @SuppressWarnings("unchecked") - private static E resolveExpression(E expression, LogicalPlan plan, boolean lenient) { + private static E resolveExpression(E expression, LogicalPlan plan) { return (E) expression.transformUp(e -> { if (e instanceof UnresolvedAttribute) { UnresolvedAttribute ua = (UnresolvedAttribute) e; - Attribute a = resolveAgainstList(ua, plan.output(), lenient); + Attribute a = resolveAgainstList(ua, plan.output()); return a != null ? a : e; } return e; @@ -159,17 +159,21 @@ public class Analyzer extends RuleExecutor { // Shared methods around the analyzer rules // - private static Attribute resolveAgainstList(UnresolvedAttribute u, List attrList, boolean lenient) { + private static Attribute resolveAgainstList(UnresolvedAttribute u, List attrList) { List matches = new ArrayList<>(); - // first try the qualified version + // first take into account the qualified version boolean qualified = u.qualifier() != null; for (Attribute attribute : attrList) { if (!attribute.synthetic()) { boolean match = qualified ? Objects.equals(u.qualifiedName(), attribute.qualifiedName()) : - Objects.equals(u.name(), attribute.name()); + // if the field is unqualified + // first check the names directly + (Objects.equals(u.name(), attribute.name()) + // but also if the qualifier might not be quoted and if there's any ambiguity with nested fields + || Objects.equals(u.name(), attribute.qualifiedName())); if (match) { matches.add(attribute.withLocation(u.location())); } @@ -185,15 +189,15 @@ public class Analyzer extends RuleExecutor { return matches.get(0); } - // too many references - should it be ignored? - // TODO: move away from exceptions inside the analyzer - if (!lenient) { - throw new AnalysisException(u, "Reference %s is ambiguous, matches any of %s", u.nodeString(), matches); - } - - return null; + return u.withUnresolvedMessage("Reference [" + u.qualifiedName() + + "] is ambiguous (to disambiguate use quotes or qualifiers); matches any of " + + matches.stream() + .map(a -> "\"" + a.qualifier() + "\".\"" + a.name() + "\"") + .sorted() + .collect(toList()) + ); } - + private static boolean hasStar(List exprs) { for (Expression expression : exprs) { if (expression instanceof UnresolvedStar) { @@ -304,7 +308,7 @@ public class Analyzer extends RuleExecutor { for (int i = 0; i < groupings.size(); i++) { Expression grouping = groupings.get(i); if (grouping instanceof UnresolvedAttribute) { - Attribute maybeResolved = resolveAgainstList((UnresolvedAttribute) grouping, resolved, true); + Attribute maybeResolved = resolveAgainstList((UnresolvedAttribute) grouping, resolved); if (maybeResolved != null) { changed = true; // use the matched expression (not its attribute) @@ -330,7 +334,7 @@ public class Analyzer extends RuleExecutor { OrderBy o = (OrderBy) plan; if (!o.resolved()) { List resolvedOrder = o.order().stream() - .map(or -> resolveExpression(or, o.child(), true)) + .map(or -> resolveExpression(or, o.child())) .collect(toList()); return new OrderBy(o.location(), o.child(), resolvedOrder); } @@ -347,7 +351,7 @@ public class Analyzer extends RuleExecutor { for (LogicalPlan child : plan.children()) { childrenOutput.addAll(child.output()); } - NamedExpression named = resolveAgainstList(u, childrenOutput, false); + NamedExpression named = resolveAgainstList(u, childrenOutput); // if resolved, return it; otherwise keep it in place to be resolved later if (named != null) { // if it's a object/compound type, keep it unresolved with a nice error message @@ -403,7 +407,7 @@ public class Analyzer extends RuleExecutor { if (us.qualifier() != null) { // resolve the so-called qualifier first // since this is an unresolved start we don't know whether it's a path or an actual qualifier - Attribute q = resolveAgainstList(us.qualifier(), output, false); + Attribute q = resolveAgainstList(us.qualifier(), output); // now use the resolved 'qualifier' to match for (Attribute attr : output) { @@ -648,7 +652,7 @@ public class Analyzer extends RuleExecutor { } static E tryResolveExpression(E exp, LogicalPlan plan) { - E resolved = resolveExpression(exp, plan, true); + E resolved = resolveExpression(exp, plan); if (!resolved.resolved()) { // look at unary trees but ignore subqueries if (plan.children().size() == 1 && !(plan instanceof SubQueryAlias)) { diff --git a/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/FieldAttributeTests.java b/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/FieldAttributeTests.java index f562a8d5063..73c59f6ef41 100644 --- a/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/FieldAttributeTests.java +++ b/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/FieldAttributeTests.java @@ -57,6 +57,26 @@ public class FieldAttributeTests extends ESTestCase { } private FieldAttribute attribute(String fieldName) { + // test multiple version of the attribute name + // to make sure all match the same thing + + // NB: the equality is done on the same since each plan bumps the expression counter + + // unqualified + FieldAttribute unqualified = parseQueryFor(fieldName); + // unquoted qualifier + FieldAttribute unquotedQualifier = parseQueryFor("test." + fieldName); + assertEquals(unqualified.name(), unquotedQualifier.name()); + assertEquals(unqualified.qualifiedName(), unquotedQualifier.qualifiedName()); + // quoted qualifier + FieldAttribute quotedQualifier = parseQueryFor("\"test\"." + fieldName); + assertEquals(unqualified.name(), quotedQualifier.name()); + assertEquals(unqualified.qualifiedName(), quotedQualifier.qualifiedName()); + + return randomFrom(unqualified, unquotedQualifier, quotedQualifier); + } + + private FieldAttribute parseQueryFor(String fieldName) { LogicalPlan plan = plan("SELECT " + fieldName + " FROM test"); assertThat(plan, instanceOf(Project.class)); Project p = (Project) plan; @@ -140,4 +160,38 @@ public class FieldAttributeTests extends ESTestCase { assertThat(names, not(hasItem("unsupported"))); assertThat(names, hasItems("bool", "text", "keyword", "int")); } -} + + public void testFieldAmbiguity() { + Map mapping = TypesTests.loadMapping("mapping-dotted-field.json"); + + EsIndex index = new EsIndex("test", mapping); + getIndexResult = IndexResolution.valid(index); + analyzer = new Analyzer(functionRegistry, getIndexResult, DateTimeZone.UTC); + + VerificationException ex = expectThrows(VerificationException.class, () -> plan("SELECT test.bar FROM test")); + assertEquals( + "Found 1 problem(s)\nline 1:8: Reference [test.bar] is ambiguous (to disambiguate use quotes or qualifiers); " + + "matches any of [\"test\".\"bar\", \"test\".\"test.bar\"]", + ex.getMessage()); + + ex = expectThrows(VerificationException.class, () -> plan("SELECT test.test FROM test")); + assertEquals( + "Found 1 problem(s)\nline 1:8: Reference [test.test] is ambiguous (to disambiguate use quotes or qualifiers); " + + "matches any of [\"test\".\"test\", \"test\".\"test.test\"]", + ex.getMessage()); + + LogicalPlan plan = plan("SELECT test.test FROM test AS x"); + assertThat(plan, instanceOf(Project.class)); + + plan = plan("SELECT \"test\".test.test FROM test"); + assertThat(plan, instanceOf(Project.class)); + + Project p = (Project) plan; + List projections = p.projections(); + assertThat(projections, hasSize(1)); + Attribute attribute = projections.get(0).toAttribute(); + assertThat(attribute, instanceOf(FieldAttribute.class)); + assertThat(attribute.qualifier(), is("test")); + assertThat(attribute.name(), is("test.test")); + } +} \ No newline at end of file diff --git a/plugin/sql/src/test/resources/mapping-dotted-field.json b/plugin/sql/src/test/resources/mapping-dotted-field.json new file mode 100644 index 00000000000..c48cd5c7706 --- /dev/null +++ b/plugin/sql/src/test/resources/mapping-dotted-field.json @@ -0,0 +1,32 @@ +{ + "properties" : { + "test" : { + "properties" : { + "test" : { + "type" : "text", + "fields" : { + "keyword" : { + "type" : "keyword" + } + } + }, + "bar" : { + "type" : "text", + "fields" : { + "keyword" : { + "type" : "keyword" + } + } + } + } + }, + "bar" : { + "type" : "text", + "fields" : { + "keyword" : { + "type" : "keyword" + } + } + } + } +} \ No newline at end of file