SQL: Uniquely named inner_hits sections for each nested field condition (#45341)

* Name each inner_hits section of nested queries differently and extract and combine the multiple values it generates into a single list. This also introduces a limitation (its origin it's with Elasticsearch though) on the sorting capabilities when the sorting is based on the nested fields filtered: only one of the conditions applied to nested documents will be used in the nested sorting. (cherry picked from commit cfc5cf68f6e83b07bb9006986d0903d6be418ec6)
2019-08-09 00:22:49 +03:00 · 2019-08-09 00:22:49 +03:00 · 740d58fd46
parent af908efa41
commit 740d58fd46
4 changed files with 128 additions and 24 deletions
--- a/x-pack/plugin/sql/qa/src/main/resources/nested.csv-spec
+++ b/x-pack/plugin/sql/qa/src/main/resources/nested.csv-spec
@ -297,3 +297,54 @@ SELECT dep.dep_id, dep.dep_name, first_name, emp_no FROM test_emp WHERE emp_no=1
 ---------------+----------------+---------------+---------------
 d007           |Sales           |Valter         |10099
 ;
+
+selectNestedFieldsWithTwoNestedConditions
+SELECT CONCAT(CONCAT(first_name, ' '), last_name) AS name, dep.dep_name, dep.dep_id, dep.from_date FROM test_emp WHERE dep.dep_name = 'Production' AND dep.from_date > '1990-01-01' AND first_name IS NOT NULL;
+
+
+      name:s         |  dep.dep_name:s  | dep.dep_id:s  |    dep.from_date:ts      
+---------------------+------------------+---------------+------------------------
+Parto Bamford        |Production        |d004           |1995-12-03T00:00:00.000Z
+Duangkaew Piveteau   |Production        |d004           |1996-11-24T00:00:00.000Z
+Duangkaew Piveteau   |Quality Management|d006           |2000-06-26T00:00:00.000Z
+Kazuhide Peha        |Production        |d004           |1992-07-29T00:00:00.000Z
+Mayuko Warwick       |Production        |d004           |1997-12-30T00:00:00.000Z
+Suzette Pettey       |Production        |d004           |1998-06-14T00:00:00.000Z
+Yongqiao Berztiss    |Production        |d004           |1995-03-20T00:00:00.000Z
+Otmar Herbst         |Production        |d004           |1991-09-18T00:00:00.000Z
+Otmar Herbst         |Quality Management|d006           |1999-07-08T00:00:00.000Z
+Mingsen Casley       |Production        |d004           |1994-05-21T00:00:00.000Z
+Moss Shanbhogue      |Production        |d004           |1996-11-16T00:00:00.000Z
+Hidefumi Caine       |Production        |d004           |1992-10-15T00:00:00.000Z
+Margareta Bierman    |Production        |d004           |1992-06-14T00:00:00.000Z
+Tuval Kalloufi       |Production        |d004           |1995-12-15T00:00:00.000Z
+Kenroku Malabarba    |Production        |d004           |1994-04-09T00:00:00.000Z
+Jayson Mandell       |Production        |d004           |1999-01-23T00:00:00.000Z
+Sreekrishna Servieres|Production        |d004           |1985-05-13T00:00:00.000Z
+Sreekrishna Servieres|Research          |d008           |1992-12-11T00:00:00.000Z
+Sreekrishna Servieres|Sales             |d007           |1993-05-05T00:00:00.000Z
+;
+
+selectNestedAndRootDocument_WithTwoNestedConditions_AndOneRootCondition
+SELECT last_name AS name, dep.dep_name, dep.dep_id, dep.from_date FROM test_emp WHERE dep.dep_name = 'Production' AND dep.from_date > '1990-01-01' AND last_name LIKE 'M%' ORDER BY last_name DESC;
+
+    name:s     | dep.dep_name:s | dep.dep_id:s  |    dep.from_date:ts      
+---------------+----------------+---------------+------------------------
+Mandell        |Production      |d004           |1999-01-23T00:00:00.000Z
+Malabarba      |Production      |d004           |1994-04-09T00:00:00.000Z
+;
+
+selectNestedAndRootDocument_WithMultipleConditions_AndNestedSorting
+SELECT CONCAT(CONCAT(first_name, ' '), last_name) AS name, dep.dep_name, dep.dep_id, dep.from_date, dep.to_date FROM test_emp WHERE dep.from_date > '1990-01-01' AND dep.dep_name='Production' AND dep.to_date < '2000-01-01' ORDER BY dep.dep_id, dep.from_date, name;
+
+       name:s        |  dep.dep_name:s  | dep.dep_id:s  |    dep.from_date:ts    |    dep.to_date:ts       
+---------------------+------------------+---------------+------------------------+------------------------
+Otmar Herbst         |Production        |d004           |1991-09-18T00:00:00.000Z|1999-07-08T00:00:00.000Z
+Otmar Herbst         |Quality Management|d006           |1999-07-08T00:00:00.000Z|9999-01-01T00:00:00.000Z
+Kazuhide Peha        |Production        |d004           |1992-07-29T00:00:00.000Z|9999-01-01T00:00:00.000Z
+Kazuhide Peha        |Development       |d005           |1987-04-03T00:00:00.000Z|1992-07-29T00:00:00.000Z
+Sreekrishna Servieres|Production        |d004           |1985-05-13T00:00:00.000Z|1989-06-29T00:00:00.000Z
+Sreekrishna Servieres|Customer Service  |d009           |1989-06-29T00:00:00.000Z|1992-12-11T00:00:00.000Z
+Sreekrishna Servieres|Research          |d008           |1992-12-11T00:00:00.000Z|1993-05-05T00:00:00.000Z
+Sreekrishna Servieres|Sales             |d007           |1993-05-05T00:00:00.000Z|1994-02-01T00:00:00.000Z
+;
--- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/SearchHitRowSet.java
+++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/SearchHitRowSet.java
@ -11,10 +11,16 @@ import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
 import org.elasticsearch.xpack.sql.execution.search.extractor.HitExtractor;
 import org.elasticsearch.xpack.sql.session.Cursor;

+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.BitSet;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
 import java.util.LinkedHashSet;
 import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Set;

 /**
@ -22,6 +28,7 @@ import java.util.Set;
 */
 class SearchHitRowSet extends ResultRowSet<HitExtractor> {
    private final SearchHit[] hits;
+    private final Map<SearchHit, Map<String, SearchHit[]>> flatInnerHits = new HashMap<>();
    private final Cursor cursor;
    private final Set<String> innerHits = new LinkedHashSet<>();
    private final String innerHit;
@ -60,12 +67,13 @@ class SearchHitRowSet extends ResultRowSet<HitExtractor> {

            sz = 0;
            for (SearchHit hit : hits) {
+                Map<String, SearchHit[]> innerHitsPerPath = new HashMap<>(innerHits.size());
                for (String ih : innerHits) {
-                    SearchHits sh = hit.getInnerHits().get(ih);
-                    if (sh != null) {
-                        sz += sh.getHits().length;
-                    }
+                    SearchHit[] sh = getAllInnerHits(hit, ih);
+                    innerHitsPerPath.put(ih, sh);
+                    sz += sh.length;
                }
+                flatInnerHits.put(hit, innerHitsPerPath);
            }
        }
        // page size
@ -102,8 +110,8 @@ class SearchHitRowSet extends ResultRowSet<HitExtractor> {
        for (int lvl = 0; lvl <= extractorLevel ; lvl++) {
            // TODO: add support for multi-nested doc
            if (hit != null) {
-                SearchHits innerHits = hit.getInnerHits().get(innerHit);
-                sh = innerHits == null ? SearchHits.EMPTY : innerHits.getHits();
+                SearchHit[] innerHits = flatInnerHits.get(hit).get(innerHit);
+                sh = innerHits == null ? SearchHits.EMPTY : innerHits;
            }
            hit = sh[indexPerLevel[lvl]];
        }
@ -111,6 +119,47 @@ class SearchHitRowSet extends ResultRowSet<HitExtractor> {
        return e.extract(hit);
    }

+    private SearchHit[] getAllInnerHits(SearchHit hit, String path) {
+        if (hit == null) {
+            return null;
+        }
+        
+        // multiple inner_hits results sections can match the same nested documents, thus we eliminate the duplicates by
+        // using the offset as the "deduplicator" in a HashMap
+        HashMap<Integer, SearchHit> lhm = new HashMap<>();
+        for (Entry<String, SearchHits> entry : hit.getInnerHits().entrySet()) {
+            int endOfPath = entry.getKey().lastIndexOf('_');
+            if (endOfPath >= 0 && entry.getKey().substring(0, endOfPath).equals(path)) {
+                SearchHit[] h = entry.getValue().getHits();
+                for (int i = 0; i < h.length; i++) {
+                    lhm.put(h[i].getNestedIdentity().getOffset(), h[i]);
+                }
+            }
+        }
+
+        // Then sort the resulting List based on the offset of the same inner hit. Each inner_hit match will have an offset value,
+        // relative to its location in the _source
+        List<SearchHit> sortedList = new ArrayList<>(lhm.values());
+        Collections.sort(sortedList, new NestedHitOffsetComparator());
+
+        return sortedList.toArray(new SearchHit[sortedList.size()]);
+    }
+    
+    private class NestedHitOffsetComparator implements Comparator<SearchHit> {
+        @Override
+        public int compare(SearchHit sh1, SearchHit sh2) {
+            if (sh1 == null && sh2 == null) {
+                return 0;
+            } else if (sh1 == null) {
+                return -1;
+            } else if (sh2 == null) {
+                return 1;
+            }
+
+            return Integer.valueOf(sh1.getNestedIdentity().getOffset()).compareTo(Integer.valueOf(sh2.getNestedIdentity().getOffset()));
+        }
+    }
+
    @Override
    protected boolean doHasCurrent() {
        return row < size;
@ -139,8 +188,8 @@ class SearchHitRowSet extends ResultRowSet<HitExtractor> {
                    // TODO: improve this for multi-nested responses
                    String path = lvl == 0 ? innerHit : null;
                    if (path != null) {
-                        SearchHits innerHits = h.getInnerHits().get(path);
-                        sh = innerHits == null ? SearchHits.EMPTY : innerHits.getHits();
+                        SearchHit[] innerHits = flatInnerHits.get(h).get(path);
+                        sh = innerHits == null ? SearchHits.EMPTY : innerHits;
                    }
                }
            }
--- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/query/NestedQuery.java
+++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/query/NestedQuery.java
@ -5,13 +5,6 @@
 */
 package org.elasticsearch.xpack.sql.querydsl.query;

-import java.util.AbstractMap;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Objects;
-
 import org.apache.lucene.search.join.ScoreMode;
 import org.elasticsearch.index.query.InnerHitBuilder;
 import org.elasticsearch.index.query.NestedQueryBuilder;
@ -19,19 +12,25 @@ import org.elasticsearch.index.query.QueryBuilder;
 import org.elasticsearch.search.fetch.StoredFieldsContext;
 import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
 import org.elasticsearch.search.sort.NestedSortBuilder;
-import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
 import org.elasticsearch.xpack.sql.tree.Source;

+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
 import static java.util.Collections.emptyMap;
 import static java.util.Collections.singletonList;
 import static java.util.Collections.unmodifiableMap;
-
 import static org.elasticsearch.index.query.QueryBuilders.nestedQuery;

 /**
 * A query to a nested document.
 */
 public class NestedQuery extends Query {
+    private static long COUNTER = 0;
    // TODO: make this configurable
    private static final int MAX_INNER_HITS = 99;
    private static final List<String> NO_STORED_FIELD = singletonList(StoredFieldsContext._NONE_);
@ -93,9 +92,14 @@ public class NestedQuery extends Query {
        if (false == sort.getPath().equals(path)) {
            return;
        }
+
+        //TODO: Add all filters in nested sorting when https://github.com/elastic/elasticsearch/issues/33079 is implemented
+        // Adding multiple filters to sort sections makes sense for nested queries where multiple conditions belong to the same
+        // nested query. The current functionality creates one nested query for each condition involving a nested field.
        QueryBuilder childAsBuilder = child.asBuilder();
        if (sort.getFilter() != null && false == sort.getFilter().equals(childAsBuilder)) {
-            throw new SqlIllegalArgumentException("nested query should have been grouped in one place");
+            // throw new SqlIllegalArgumentException("nested query should have been grouped in one place");
+            return;
        }
        sort.setFilter(childAsBuilder);
    }
@ -109,6 +113,7 @@ public class NestedQuery extends Query {
            InnerHitBuilder ihb = new InnerHitBuilder();
            ihb.setSize(0);
            ihb.setSize(MAX_INNER_HITS);
+            ihb.setName(path + "_" + COUNTER++);

            boolean noSourceNeeded = true;
            List<String> sourceFields = new ArrayList<>();
--- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/querydsl/query/NestedQueryTests.java
+++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/querydsl/query/NestedQueryTests.java
@ -7,7 +7,6 @@ package org.elasticsearch.xpack.sql.querydsl.query;

 import org.elasticsearch.search.sort.NestedSortBuilder;
 import org.elasticsearch.test.ESTestCase;
-import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
 import org.elasticsearch.xpack.sql.tree.Source;
 import org.elasticsearch.xpack.sql.tree.SourceTests;
 import org.elasticsearch.xpack.sql.util.StringUtils;
@ -122,11 +121,11 @@ public class NestedQueryTests extends ESTestCase {
            assertEquals(q.child().asBuilder(), sort.getFilter());
            q.enrichNestedSort(sort);

-            // But enriching using another query is not
-            NestedQuery other = new NestedQuery(SourceTests.randomSource(), q.path(), q.fields(),
-                randomValueOtherThan(q.child(), () -> randomQuery(0)));
-            Exception e = expectThrows(SqlIllegalArgumentException.class, () -> other.enrichNestedSort(sort));
-            assertEquals("nested query should have been grouped in one place", e.getMessage());
+            // But enriching using another query will keep only the first query
+            Query originalChildQuery = randomValueOtherThan(q.child(), () -> randomQuery(0));
+            NestedQuery other = new NestedQuery(SourceTests.randomSource(), q.path(), q.fields(), originalChildQuery);
+            other.enrichNestedSort(sort);
+            assertEquals(other.child().asBuilder(), originalChildQuery.asBuilder());
        }
    }