From b2631f9ac894d7e36bb328a606d20bcab42d6d70 Mon Sep 17 00:00:00 2001
From: Costin Leau <costin@users.noreply.github.com>
Date: Sun, 25 Feb 2018 04:39:48 +0200
Subject: [PATCH] SQL: Extend field resolution to deal with unquoted qualified
 columns (elastic/x-pack-elasticsearch#3894)

* foo.bar can mean either: no table field foo.bar or table foo, field bar
The resolution rule has been extended to include the latter case as
fallback.
* Always check field ambiguity
* Since field with dots can create confusion (when not qualified), the
analyzer always now always checks for both qualified and not-qualified
fields and throws an ambiguity message with the potential candidates.
This forces the use of qualifiers or quotes to better indicate the
desired field.
* Add example of aliasing the table to remove ambiguity

Original commit: elastic/x-pack-elasticsearch@8b70b9c4ee40de22617b69c0156cde5c09e3eb09
---
 .../xpack/sql/analysis/analyzer/Analyzer.java | 44 ++++++++-------
 .../analyzer/FieldAttributeTests.java         | 56 ++++++++++++++++++-
 .../test/resources/mapping-dotted-field.json  | 32 +++++++++++
 3 files changed, 111 insertions(+), 21 deletions(-)
 create mode 100644 plugin/sql/src/test/resources/mapping-dotted-field.json
diff --git a/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Analyzer.java b/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Analyzer.java
index 6fdb9637559..26a2b424c31 100644
--- a/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Analyzer.java
+++ b/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Analyzer.java
@@ -5,6 +5,7 @@
  */
 package org.elasticsearch.xpack.sql.analysis.analyzer;
 
+import org.elasticsearch.common.util.Comparators;
 import org.elasticsearch.xpack.sql.analysis.AnalysisException;
 import org.elasticsearch.xpack.sql.analysis.analyzer.Verifier.Failure;
 import org.elasticsearch.xpack.sql.analysis.index.IndexResolution;
@@ -49,16 +50,15 @@ import org.elasticsearch.xpack.sql.type.DataType;
 import org.elasticsearch.xpack.sql.type.DataTypeConversion;
 import org.elasticsearch.xpack.sql.type.DataTypes;
 import org.elasticsearch.xpack.sql.type.UnsupportedEsField;
-import org.elasticsearch.xpack.sql.util.StringUtils;
 import org.joda.time.DateTimeZone;
 
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Comparator;
 import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
 import java.util.List;
-import java.util.Locale;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Set;
@@ -144,11 +144,11 @@ public class Analyzer extends RuleExecutor<LogicalPlan> {
     }
 
     @SuppressWarnings("unchecked")
-    private static <E extends Expression> E resolveExpression(E expression, LogicalPlan plan, boolean lenient) {
+    private static <E extends Expression> E resolveExpression(E expression, LogicalPlan plan) {
         return (E) expression.transformUp(e -> {
             if (e instanceof UnresolvedAttribute) {
                 UnresolvedAttribute ua = (UnresolvedAttribute) e;
-                Attribute a = resolveAgainstList(ua, plan.output(), lenient);
+                Attribute a = resolveAgainstList(ua, plan.output());
                 return a != null ? a : e;
             }
             return e;
@@ -159,17 +159,21 @@ public class Analyzer extends RuleExecutor<LogicalPlan> {
     // Shared methods around the analyzer rules
     //
 
-    private static Attribute resolveAgainstList(UnresolvedAttribute u, List<Attribute> attrList, boolean lenient) {
+    private static Attribute resolveAgainstList(UnresolvedAttribute u, List<Attribute> attrList) {
         List<Attribute> matches = new ArrayList<>();
 
-        // first try the qualified version
+        // first take into account the qualified version
         boolean qualified = u.qualifier() != null;
 
         for (Attribute attribute : attrList) {
             if (!attribute.synthetic()) {
                 boolean match = qualified ?
                         Objects.equals(u.qualifiedName(), attribute.qualifiedName()) :
-                        Objects.equals(u.name(), attribute.name());
+                        // if the field is unqualified
+                        // first check the names directly
+                        (Objects.equals(u.name(), attribute.name())
+                             // but also if the qualifier might not be quoted and if there's any ambiguity with nested fields
+                             || Objects.equals(u.name(), attribute.qualifiedName()));
                 if (match) {
                     matches.add(attribute.withLocation(u.location()));
                 }
@@ -185,15 +189,15 @@ public class Analyzer extends RuleExecutor<LogicalPlan> {
             return matches.get(0);
         }
 
-        // too many references - should it be ignored?
-        // TODO: move away from exceptions inside the analyzer
-        if (!lenient) {
-            throw new AnalysisException(u, "Reference %s is ambiguous, matches any of %s", u.nodeString(), matches);
-        }
-
-        return null;
+        return u.withUnresolvedMessage("Reference [" + u.qualifiedName()
+                + "] is ambiguous (to disambiguate use quotes or qualifiers); matches any of " + 
+                 matches.stream()
+                 .map(a -> "\"" + a.qualifier() + "\".\"" + a.name() + "\"")
+                 .sorted()
+                 .collect(toList())
+                );
     }
-
+    
     private static boolean hasStar(List<? extends Expression> exprs) {
         for (Expression expression : exprs) {
             if (expression instanceof UnresolvedStar) {
@@ -304,7 +308,7 @@ public class Analyzer extends RuleExecutor<LogicalPlan> {
                     for (int i = 0; i < groupings.size(); i++) {
                         Expression grouping = groupings.get(i);
                         if (grouping instanceof UnresolvedAttribute) {
-                            Attribute maybeResolved = resolveAgainstList((UnresolvedAttribute) grouping, resolved, true);
+                            Attribute maybeResolved = resolveAgainstList((UnresolvedAttribute) grouping, resolved);
                             if (maybeResolved != null) {
                                 changed = true;
                                 // use the matched expression (not its attribute)
@@ -330,7 +334,7 @@ public class Analyzer extends RuleExecutor<LogicalPlan> {
                 OrderBy o = (OrderBy) plan;
                 if (!o.resolved()) {
                     List<Order> resolvedOrder = o.order().stream()
-                            .map(or -> resolveExpression(or, o.child(), true))
+                            .map(or -> resolveExpression(or, o.child()))
                             .collect(toList());
                     return new OrderBy(o.location(), o.child(), resolvedOrder);
                 }
@@ -347,7 +351,7 @@ public class Analyzer extends RuleExecutor<LogicalPlan> {
                     for (LogicalPlan child : plan.children()) {
                         childrenOutput.addAll(child.output());
                     }
-                    NamedExpression named = resolveAgainstList(u, childrenOutput, false);
+                    NamedExpression named = resolveAgainstList(u, childrenOutput);
                     // if resolved, return it; otherwise keep it in place to be resolved later
                     if (named != null) {
                         // if it's a object/compound type, keep it unresolved with a nice error message
@@ -403,7 +407,7 @@ public class Analyzer extends RuleExecutor<LogicalPlan> {
             if (us.qualifier() != null) {
                 // resolve the so-called qualifier first
                 // since this is an unresolved start we don't know whether it's a path or an actual qualifier
-                Attribute q = resolveAgainstList(us.qualifier(), output, false);
+                Attribute q = resolveAgainstList(us.qualifier(), output);
 
                 // now use the resolved 'qualifier' to match
                 for (Attribute attr : output) {
@@ -648,7 +652,7 @@ public class Analyzer extends RuleExecutor<LogicalPlan> {
         }
 
         static <E extends Expression> E tryResolveExpression(E exp, LogicalPlan plan) {
-            E resolved = resolveExpression(exp, plan, true);
+            E resolved = resolveExpression(exp, plan);
             if (!resolved.resolved()) {
                 // look at unary trees but ignore subqueries
                 if (plan.children().size() == 1 && !(plan instanceof SubQueryAlias)) {
diff --git a/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/FieldAttributeTests.java b/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/FieldAttributeTests.java
index f562a8d5063..73c59f6ef41 100644
--- a/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/FieldAttributeTests.java
+++ b/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/FieldAttributeTests.java
@@ -57,6 +57,26 @@ public class FieldAttributeTests extends ESTestCase {
     }
 
     private FieldAttribute attribute(String fieldName) {
+        // test multiple version of the attribute name
+        // to make sure all match the same thing
+
+        // NB: the equality is done on the same since each plan bumps the expression counter
+
+        // unqualified
+        FieldAttribute unqualified = parseQueryFor(fieldName);
+        // unquoted qualifier
+        FieldAttribute unquotedQualifier = parseQueryFor("test." + fieldName);
+        assertEquals(unqualified.name(), unquotedQualifier.name());
+        assertEquals(unqualified.qualifiedName(), unquotedQualifier.qualifiedName());
+        // quoted qualifier
+        FieldAttribute quotedQualifier = parseQueryFor("\"test\"." + fieldName);
+        assertEquals(unqualified.name(), quotedQualifier.name());
+        assertEquals(unqualified.qualifiedName(), quotedQualifier.qualifiedName());
+
+        return randomFrom(unqualified, unquotedQualifier, quotedQualifier);
+    }
+
+    private FieldAttribute parseQueryFor(String fieldName) {
         LogicalPlan plan = plan("SELECT " + fieldName + " FROM test");
         assertThat(plan, instanceOf(Project.class));
         Project p = (Project) plan;
@@ -140,4 +160,38 @@ public class FieldAttributeTests extends ESTestCase {
         assertThat(names, not(hasItem("unsupported")));
         assertThat(names, hasItems("bool", "text", "keyword", "int"));
     }
-}
+
+    public void testFieldAmbiguity() {
+        Map<String, EsField> mapping = TypesTests.loadMapping("mapping-dotted-field.json");
+
+        EsIndex index = new EsIndex("test", mapping);
+        getIndexResult = IndexResolution.valid(index);
+        analyzer = new Analyzer(functionRegistry, getIndexResult, DateTimeZone.UTC);
+
+        VerificationException ex = expectThrows(VerificationException.class, () -> plan("SELECT test.bar FROM test"));
+        assertEquals(
+                "Found 1 problem(s)\nline 1:8: Reference [test.bar] is ambiguous (to disambiguate use quotes or qualifiers); "
+                        + "matches any of [\"test\".\"bar\", \"test\".\"test.bar\"]",
+                ex.getMessage());
+
+        ex = expectThrows(VerificationException.class, () -> plan("SELECT test.test FROM test"));
+        assertEquals(
+                "Found 1 problem(s)\nline 1:8: Reference [test.test] is ambiguous (to disambiguate use quotes or qualifiers); "
+                        + "matches any of [\"test\".\"test\", \"test\".\"test.test\"]",
+                ex.getMessage());
+
+        LogicalPlan plan = plan("SELECT test.test FROM test AS x");
+        assertThat(plan, instanceOf(Project.class));
+
+        plan = plan("SELECT \"test\".test.test FROM test");
+        assertThat(plan, instanceOf(Project.class));
+
+        Project p = (Project) plan;
+        List<? extends NamedExpression> projections = p.projections();
+        assertThat(projections, hasSize(1));
+        Attribute attribute = projections.get(0).toAttribute();
+        assertThat(attribute, instanceOf(FieldAttribute.class));
+        assertThat(attribute.qualifier(), is("test"));
+        assertThat(attribute.name(), is("test.test"));
+    }
+}
\ No newline at end of file
diff --git a/plugin/sql/src/test/resources/mapping-dotted-field.json b/plugin/sql/src/test/resources/mapping-dotted-field.json
new file mode 100644
index 00000000000..c48cd5c7706
--- /dev/null
+++ b/plugin/sql/src/test/resources/mapping-dotted-field.json
@@ -0,0 +1,32 @@
+{
+    "properties" : {
+        "test" : {
+            "properties" : {
+                "test" : {
+                    "type" : "text",
+                    "fields" : {
+                        "keyword" : {
+                            "type" : "keyword"
+                        }
+                    }
+                },
+                "bar" : {
+                    "type" : "text",
+                    "fields" : {
+                        "keyword" : {
+                            "type" : "keyword"
+                        }
+                    }
+                }
+            }
+        },
+        "bar" : {
+            "type" : "text",
+            "fields" : {
+                "keyword" : {
+                    "type" : "keyword"
+                }
+            }
+        }
+    }
+}
\ No newline at end of file