SQL: Uniquely named inner_hits sections for each nested field condition (#45341)

* Name each inner_hits section of nested queries differently and extract and combine the multiple values it generates into a single list.
This also introduces a limitation (its origin it's with Elasticsearch
though) on the sorting capabilities when the sorting is based on the
nested fields filtered: only one of the conditions applied to nested
documents will be used in the nested sorting.

(cherry picked from commit cfc5cf68f6e83b07bb9006986d0903d6be418ec6)
This commit is contained in:
Andrei Stefan 2019-08-09 00:22:49 +03:00 committed by GitHub
parent af908efa41
commit 740d58fd46
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 128 additions and 24 deletions

View File

@ -297,3 +297,54 @@ SELECT dep.dep_id, dep.dep_name, first_name, emp_no FROM test_emp WHERE emp_no=1
---------------+----------------+---------------+---------------
d007 |Sales |Valter |10099
;
selectNestedFieldsWithTwoNestedConditions
SELECT CONCAT(CONCAT(first_name, ' '), last_name) AS name, dep.dep_name, dep.dep_id, dep.from_date FROM test_emp WHERE dep.dep_name = 'Production' AND dep.from_date > '1990-01-01' AND first_name IS NOT NULL;
name:s | dep.dep_name:s | dep.dep_id:s | dep.from_date:ts
---------------------+------------------+---------------+------------------------
Parto Bamford |Production |d004 |1995-12-03T00:00:00.000Z
Duangkaew Piveteau |Production |d004 |1996-11-24T00:00:00.000Z
Duangkaew Piveteau |Quality Management|d006 |2000-06-26T00:00:00.000Z
Kazuhide Peha |Production |d004 |1992-07-29T00:00:00.000Z
Mayuko Warwick |Production |d004 |1997-12-30T00:00:00.000Z
Suzette Pettey |Production |d004 |1998-06-14T00:00:00.000Z
Yongqiao Berztiss |Production |d004 |1995-03-20T00:00:00.000Z
Otmar Herbst |Production |d004 |1991-09-18T00:00:00.000Z
Otmar Herbst |Quality Management|d006 |1999-07-08T00:00:00.000Z
Mingsen Casley |Production |d004 |1994-05-21T00:00:00.000Z
Moss Shanbhogue |Production |d004 |1996-11-16T00:00:00.000Z
Hidefumi Caine |Production |d004 |1992-10-15T00:00:00.000Z
Margareta Bierman |Production |d004 |1992-06-14T00:00:00.000Z
Tuval Kalloufi |Production |d004 |1995-12-15T00:00:00.000Z
Kenroku Malabarba |Production |d004 |1994-04-09T00:00:00.000Z
Jayson Mandell |Production |d004 |1999-01-23T00:00:00.000Z
Sreekrishna Servieres|Production |d004 |1985-05-13T00:00:00.000Z
Sreekrishna Servieres|Research |d008 |1992-12-11T00:00:00.000Z
Sreekrishna Servieres|Sales |d007 |1993-05-05T00:00:00.000Z
;
selectNestedAndRootDocument_WithTwoNestedConditions_AndOneRootCondition
SELECT last_name AS name, dep.dep_name, dep.dep_id, dep.from_date FROM test_emp WHERE dep.dep_name = 'Production' AND dep.from_date > '1990-01-01' AND last_name LIKE 'M%' ORDER BY last_name DESC;
name:s | dep.dep_name:s | dep.dep_id:s | dep.from_date:ts
---------------+----------------+---------------+------------------------
Mandell |Production |d004 |1999-01-23T00:00:00.000Z
Malabarba |Production |d004 |1994-04-09T00:00:00.000Z
;
selectNestedAndRootDocument_WithMultipleConditions_AndNestedSorting
SELECT CONCAT(CONCAT(first_name, ' '), last_name) AS name, dep.dep_name, dep.dep_id, dep.from_date, dep.to_date FROM test_emp WHERE dep.from_date > '1990-01-01' AND dep.dep_name='Production' AND dep.to_date < '2000-01-01' ORDER BY dep.dep_id, dep.from_date, name;
name:s | dep.dep_name:s | dep.dep_id:s | dep.from_date:ts | dep.to_date:ts
---------------------+------------------+---------------+------------------------+------------------------
Otmar Herbst |Production |d004 |1991-09-18T00:00:00.000Z|1999-07-08T00:00:00.000Z
Otmar Herbst |Quality Management|d006 |1999-07-08T00:00:00.000Z|9999-01-01T00:00:00.000Z
Kazuhide Peha |Production |d004 |1992-07-29T00:00:00.000Z|9999-01-01T00:00:00.000Z
Kazuhide Peha |Development |d005 |1987-04-03T00:00:00.000Z|1992-07-29T00:00:00.000Z
Sreekrishna Servieres|Production |d004 |1985-05-13T00:00:00.000Z|1989-06-29T00:00:00.000Z
Sreekrishna Servieres|Customer Service |d009 |1989-06-29T00:00:00.000Z|1992-12-11T00:00:00.000Z
Sreekrishna Servieres|Research |d008 |1992-12-11T00:00:00.000Z|1993-05-05T00:00:00.000Z
Sreekrishna Servieres|Sales |d007 |1993-05-05T00:00:00.000Z|1994-02-01T00:00:00.000Z
;

View File

@ -11,10 +11,16 @@ import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
import org.elasticsearch.xpack.sql.execution.search.extractor.HitExtractor;
import org.elasticsearch.xpack.sql.session.Cursor;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
/**
@ -22,6 +28,7 @@ import java.util.Set;
*/
class SearchHitRowSet extends ResultRowSet<HitExtractor> {
private final SearchHit[] hits;
private final Map<SearchHit, Map<String, SearchHit[]>> flatInnerHits = new HashMap<>();
private final Cursor cursor;
private final Set<String> innerHits = new LinkedHashSet<>();
private final String innerHit;
@ -60,12 +67,13 @@ class SearchHitRowSet extends ResultRowSet<HitExtractor> {
sz = 0;
for (SearchHit hit : hits) {
Map<String, SearchHit[]> innerHitsPerPath = new HashMap<>(innerHits.size());
for (String ih : innerHits) {
SearchHits sh = hit.getInnerHits().get(ih);
if (sh != null) {
sz += sh.getHits().length;
}
SearchHit[] sh = getAllInnerHits(hit, ih);
innerHitsPerPath.put(ih, sh);
sz += sh.length;
}
flatInnerHits.put(hit, innerHitsPerPath);
}
}
// page size
@ -102,8 +110,8 @@ class SearchHitRowSet extends ResultRowSet<HitExtractor> {
for (int lvl = 0; lvl <= extractorLevel ; lvl++) {
// TODO: add support for multi-nested doc
if (hit != null) {
SearchHits innerHits = hit.getInnerHits().get(innerHit);
sh = innerHits == null ? SearchHits.EMPTY : innerHits.getHits();
SearchHit[] innerHits = flatInnerHits.get(hit).get(innerHit);
sh = innerHits == null ? SearchHits.EMPTY : innerHits;
}
hit = sh[indexPerLevel[lvl]];
}
@ -111,6 +119,47 @@ class SearchHitRowSet extends ResultRowSet<HitExtractor> {
return e.extract(hit);
}
private SearchHit[] getAllInnerHits(SearchHit hit, String path) {
if (hit == null) {
return null;
}
// multiple inner_hits results sections can match the same nested documents, thus we eliminate the duplicates by
// using the offset as the "deduplicator" in a HashMap
HashMap<Integer, SearchHit> lhm = new HashMap<>();
for (Entry<String, SearchHits> entry : hit.getInnerHits().entrySet()) {
int endOfPath = entry.getKey().lastIndexOf('_');
if (endOfPath >= 0 && entry.getKey().substring(0, endOfPath).equals(path)) {
SearchHit[] h = entry.getValue().getHits();
for (int i = 0; i < h.length; i++) {
lhm.put(h[i].getNestedIdentity().getOffset(), h[i]);
}
}
}
// Then sort the resulting List based on the offset of the same inner hit. Each inner_hit match will have an offset value,
// relative to its location in the _source
List<SearchHit> sortedList = new ArrayList<>(lhm.values());
Collections.sort(sortedList, new NestedHitOffsetComparator());
return sortedList.toArray(new SearchHit[sortedList.size()]);
}
private class NestedHitOffsetComparator implements Comparator<SearchHit> {
@Override
public int compare(SearchHit sh1, SearchHit sh2) {
if (sh1 == null && sh2 == null) {
return 0;
} else if (sh1 == null) {
return -1;
} else if (sh2 == null) {
return 1;
}
return Integer.valueOf(sh1.getNestedIdentity().getOffset()).compareTo(Integer.valueOf(sh2.getNestedIdentity().getOffset()));
}
}
@Override
protected boolean doHasCurrent() {
return row < size;
@ -139,8 +188,8 @@ class SearchHitRowSet extends ResultRowSet<HitExtractor> {
// TODO: improve this for multi-nested responses
String path = lvl == 0 ? innerHit : null;
if (path != null) {
SearchHits innerHits = h.getInnerHits().get(path);
sh = innerHits == null ? SearchHits.EMPTY : innerHits.getHits();
SearchHit[] innerHits = flatInnerHits.get(h).get(path);
sh = innerHits == null ? SearchHits.EMPTY : innerHits;
}
}
}

View File

@ -5,13 +5,6 @@
*/
package org.elasticsearch.xpack.sql.querydsl.query;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import org.apache.lucene.search.join.ScoreMode;
import org.elasticsearch.index.query.InnerHitBuilder;
import org.elasticsearch.index.query.NestedQueryBuilder;
@ -19,19 +12,25 @@ import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.search.fetch.StoredFieldsContext;
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import org.elasticsearch.search.sort.NestedSortBuilder;
import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
import org.elasticsearch.xpack.sql.tree.Source;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import static java.util.Collections.emptyMap;
import static java.util.Collections.singletonList;
import static java.util.Collections.unmodifiableMap;
import static org.elasticsearch.index.query.QueryBuilders.nestedQuery;
/**
* A query to a nested document.
*/
public class NestedQuery extends Query {
private static long COUNTER = 0;
// TODO: make this configurable
private static final int MAX_INNER_HITS = 99;
private static final List<String> NO_STORED_FIELD = singletonList(StoredFieldsContext._NONE_);
@ -93,9 +92,14 @@ public class NestedQuery extends Query {
if (false == sort.getPath().equals(path)) {
return;
}
//TODO: Add all filters in nested sorting when https://github.com/elastic/elasticsearch/issues/33079 is implemented
// Adding multiple filters to sort sections makes sense for nested queries where multiple conditions belong to the same
// nested query. The current functionality creates one nested query for each condition involving a nested field.
QueryBuilder childAsBuilder = child.asBuilder();
if (sort.getFilter() != null && false == sort.getFilter().equals(childAsBuilder)) {
throw new SqlIllegalArgumentException("nested query should have been grouped in one place");
// throw new SqlIllegalArgumentException("nested query should have been grouped in one place");
return;
}
sort.setFilter(childAsBuilder);
}
@ -109,6 +113,7 @@ public class NestedQuery extends Query {
InnerHitBuilder ihb = new InnerHitBuilder();
ihb.setSize(0);
ihb.setSize(MAX_INNER_HITS);
ihb.setName(path + "_" + COUNTER++);
boolean noSourceNeeded = true;
List<String> sourceFields = new ArrayList<>();

View File

@ -7,7 +7,6 @@ package org.elasticsearch.xpack.sql.querydsl.query;
import org.elasticsearch.search.sort.NestedSortBuilder;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
import org.elasticsearch.xpack.sql.tree.Source;
import org.elasticsearch.xpack.sql.tree.SourceTests;
import org.elasticsearch.xpack.sql.util.StringUtils;
@ -122,11 +121,11 @@ public class NestedQueryTests extends ESTestCase {
assertEquals(q.child().asBuilder(), sort.getFilter());
q.enrichNestedSort(sort);
// But enriching using another query is not
NestedQuery other = new NestedQuery(SourceTests.randomSource(), q.path(), q.fields(),
randomValueOtherThan(q.child(), () -> randomQuery(0)));
Exception e = expectThrows(SqlIllegalArgumentException.class, () -> other.enrichNestedSort(sort));
assertEquals("nested query should have been grouped in one place", e.getMessage());
// But enriching using another query will keep only the first query
Query originalChildQuery = randomValueOtherThan(q.child(), () -> randomQuery(0));
NestedQuery other = new NestedQuery(SourceTests.randomSource(), q.path(), q.fields(), originalChildQuery);
other.enrichNestedSort(sort);
assertEquals(other.child().asBuilder(), originalChildQuery.asBuilder());
}
}