SQL: Uniquely named inner_hits sections for each nested field condition (#45341)
* Name each inner_hits section of nested queries differently and extract and combine the multiple values it generates into a single list. This also introduces a limitation (its origin it's with Elasticsearch though) on the sorting capabilities when the sorting is based on the nested fields filtered: only one of the conditions applied to nested documents will be used in the nested sorting. (cherry picked from commit cfc5cf68f6e83b07bb9006986d0903d6be418ec6)
This commit is contained in:
parent
af908efa41
commit
740d58fd46
|
@ -297,3 +297,54 @@ SELECT dep.dep_id, dep.dep_name, first_name, emp_no FROM test_emp WHERE emp_no=1
|
|||
---------------+----------------+---------------+---------------
|
||||
d007 |Sales |Valter |10099
|
||||
;
|
||||
|
||||
selectNestedFieldsWithTwoNestedConditions
|
||||
SELECT CONCAT(CONCAT(first_name, ' '), last_name) AS name, dep.dep_name, dep.dep_id, dep.from_date FROM test_emp WHERE dep.dep_name = 'Production' AND dep.from_date > '1990-01-01' AND first_name IS NOT NULL;
|
||||
|
||||
|
||||
name:s | dep.dep_name:s | dep.dep_id:s | dep.from_date:ts
|
||||
---------------------+------------------+---------------+------------------------
|
||||
Parto Bamford |Production |d004 |1995-12-03T00:00:00.000Z
|
||||
Duangkaew Piveteau |Production |d004 |1996-11-24T00:00:00.000Z
|
||||
Duangkaew Piveteau |Quality Management|d006 |2000-06-26T00:00:00.000Z
|
||||
Kazuhide Peha |Production |d004 |1992-07-29T00:00:00.000Z
|
||||
Mayuko Warwick |Production |d004 |1997-12-30T00:00:00.000Z
|
||||
Suzette Pettey |Production |d004 |1998-06-14T00:00:00.000Z
|
||||
Yongqiao Berztiss |Production |d004 |1995-03-20T00:00:00.000Z
|
||||
Otmar Herbst |Production |d004 |1991-09-18T00:00:00.000Z
|
||||
Otmar Herbst |Quality Management|d006 |1999-07-08T00:00:00.000Z
|
||||
Mingsen Casley |Production |d004 |1994-05-21T00:00:00.000Z
|
||||
Moss Shanbhogue |Production |d004 |1996-11-16T00:00:00.000Z
|
||||
Hidefumi Caine |Production |d004 |1992-10-15T00:00:00.000Z
|
||||
Margareta Bierman |Production |d004 |1992-06-14T00:00:00.000Z
|
||||
Tuval Kalloufi |Production |d004 |1995-12-15T00:00:00.000Z
|
||||
Kenroku Malabarba |Production |d004 |1994-04-09T00:00:00.000Z
|
||||
Jayson Mandell |Production |d004 |1999-01-23T00:00:00.000Z
|
||||
Sreekrishna Servieres|Production |d004 |1985-05-13T00:00:00.000Z
|
||||
Sreekrishna Servieres|Research |d008 |1992-12-11T00:00:00.000Z
|
||||
Sreekrishna Servieres|Sales |d007 |1993-05-05T00:00:00.000Z
|
||||
;
|
||||
|
||||
selectNestedAndRootDocument_WithTwoNestedConditions_AndOneRootCondition
|
||||
SELECT last_name AS name, dep.dep_name, dep.dep_id, dep.from_date FROM test_emp WHERE dep.dep_name = 'Production' AND dep.from_date > '1990-01-01' AND last_name LIKE 'M%' ORDER BY last_name DESC;
|
||||
|
||||
name:s | dep.dep_name:s | dep.dep_id:s | dep.from_date:ts
|
||||
---------------+----------------+---------------+------------------------
|
||||
Mandell |Production |d004 |1999-01-23T00:00:00.000Z
|
||||
Malabarba |Production |d004 |1994-04-09T00:00:00.000Z
|
||||
;
|
||||
|
||||
selectNestedAndRootDocument_WithMultipleConditions_AndNestedSorting
|
||||
SELECT CONCAT(CONCAT(first_name, ' '), last_name) AS name, dep.dep_name, dep.dep_id, dep.from_date, dep.to_date FROM test_emp WHERE dep.from_date > '1990-01-01' AND dep.dep_name='Production' AND dep.to_date < '2000-01-01' ORDER BY dep.dep_id, dep.from_date, name;
|
||||
|
||||
name:s | dep.dep_name:s | dep.dep_id:s | dep.from_date:ts | dep.to_date:ts
|
||||
---------------------+------------------+---------------+------------------------+------------------------
|
||||
Otmar Herbst |Production |d004 |1991-09-18T00:00:00.000Z|1999-07-08T00:00:00.000Z
|
||||
Otmar Herbst |Quality Management|d006 |1999-07-08T00:00:00.000Z|9999-01-01T00:00:00.000Z
|
||||
Kazuhide Peha |Production |d004 |1992-07-29T00:00:00.000Z|9999-01-01T00:00:00.000Z
|
||||
Kazuhide Peha |Development |d005 |1987-04-03T00:00:00.000Z|1992-07-29T00:00:00.000Z
|
||||
Sreekrishna Servieres|Production |d004 |1985-05-13T00:00:00.000Z|1989-06-29T00:00:00.000Z
|
||||
Sreekrishna Servieres|Customer Service |d009 |1989-06-29T00:00:00.000Z|1992-12-11T00:00:00.000Z
|
||||
Sreekrishna Servieres|Research |d008 |1992-12-11T00:00:00.000Z|1993-05-05T00:00:00.000Z
|
||||
Sreekrishna Servieres|Sales |d007 |1993-05-05T00:00:00.000Z|1994-02-01T00:00:00.000Z
|
||||
;
|
||||
|
|
|
@ -11,10 +11,16 @@ import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
|
|||
import org.elasticsearch.xpack.sql.execution.search.extractor.HitExtractor;
|
||||
import org.elasticsearch.xpack.sql.session.Cursor;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.BitSet;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
|
@ -22,6 +28,7 @@ import java.util.Set;
|
|||
*/
|
||||
class SearchHitRowSet extends ResultRowSet<HitExtractor> {
|
||||
private final SearchHit[] hits;
|
||||
private final Map<SearchHit, Map<String, SearchHit[]>> flatInnerHits = new HashMap<>();
|
||||
private final Cursor cursor;
|
||||
private final Set<String> innerHits = new LinkedHashSet<>();
|
||||
private final String innerHit;
|
||||
|
@ -60,12 +67,13 @@ class SearchHitRowSet extends ResultRowSet<HitExtractor> {
|
|||
|
||||
sz = 0;
|
||||
for (SearchHit hit : hits) {
|
||||
Map<String, SearchHit[]> innerHitsPerPath = new HashMap<>(innerHits.size());
|
||||
for (String ih : innerHits) {
|
||||
SearchHits sh = hit.getInnerHits().get(ih);
|
||||
if (sh != null) {
|
||||
sz += sh.getHits().length;
|
||||
}
|
||||
SearchHit[] sh = getAllInnerHits(hit, ih);
|
||||
innerHitsPerPath.put(ih, sh);
|
||||
sz += sh.length;
|
||||
}
|
||||
flatInnerHits.put(hit, innerHitsPerPath);
|
||||
}
|
||||
}
|
||||
// page size
|
||||
|
@ -102,8 +110,8 @@ class SearchHitRowSet extends ResultRowSet<HitExtractor> {
|
|||
for (int lvl = 0; lvl <= extractorLevel ; lvl++) {
|
||||
// TODO: add support for multi-nested doc
|
||||
if (hit != null) {
|
||||
SearchHits innerHits = hit.getInnerHits().get(innerHit);
|
||||
sh = innerHits == null ? SearchHits.EMPTY : innerHits.getHits();
|
||||
SearchHit[] innerHits = flatInnerHits.get(hit).get(innerHit);
|
||||
sh = innerHits == null ? SearchHits.EMPTY : innerHits;
|
||||
}
|
||||
hit = sh[indexPerLevel[lvl]];
|
||||
}
|
||||
|
@ -111,6 +119,47 @@ class SearchHitRowSet extends ResultRowSet<HitExtractor> {
|
|||
return e.extract(hit);
|
||||
}
|
||||
|
||||
private SearchHit[] getAllInnerHits(SearchHit hit, String path) {
|
||||
if (hit == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// multiple inner_hits results sections can match the same nested documents, thus we eliminate the duplicates by
|
||||
// using the offset as the "deduplicator" in a HashMap
|
||||
HashMap<Integer, SearchHit> lhm = new HashMap<>();
|
||||
for (Entry<String, SearchHits> entry : hit.getInnerHits().entrySet()) {
|
||||
int endOfPath = entry.getKey().lastIndexOf('_');
|
||||
if (endOfPath >= 0 && entry.getKey().substring(0, endOfPath).equals(path)) {
|
||||
SearchHit[] h = entry.getValue().getHits();
|
||||
for (int i = 0; i < h.length; i++) {
|
||||
lhm.put(h[i].getNestedIdentity().getOffset(), h[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Then sort the resulting List based on the offset of the same inner hit. Each inner_hit match will have an offset value,
|
||||
// relative to its location in the _source
|
||||
List<SearchHit> sortedList = new ArrayList<>(lhm.values());
|
||||
Collections.sort(sortedList, new NestedHitOffsetComparator());
|
||||
|
||||
return sortedList.toArray(new SearchHit[sortedList.size()]);
|
||||
}
|
||||
|
||||
private class NestedHitOffsetComparator implements Comparator<SearchHit> {
|
||||
@Override
|
||||
public int compare(SearchHit sh1, SearchHit sh2) {
|
||||
if (sh1 == null && sh2 == null) {
|
||||
return 0;
|
||||
} else if (sh1 == null) {
|
||||
return -1;
|
||||
} else if (sh2 == null) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return Integer.valueOf(sh1.getNestedIdentity().getOffset()).compareTo(Integer.valueOf(sh2.getNestedIdentity().getOffset()));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean doHasCurrent() {
|
||||
return row < size;
|
||||
|
@ -139,8 +188,8 @@ class SearchHitRowSet extends ResultRowSet<HitExtractor> {
|
|||
// TODO: improve this for multi-nested responses
|
||||
String path = lvl == 0 ? innerHit : null;
|
||||
if (path != null) {
|
||||
SearchHits innerHits = h.getInnerHits().get(path);
|
||||
sh = innerHits == null ? SearchHits.EMPTY : innerHits.getHits();
|
||||
SearchHit[] innerHits = flatInnerHits.get(h).get(path);
|
||||
sh = innerHits == null ? SearchHits.EMPTY : innerHits;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,13 +5,6 @@
|
|||
*/
|
||||
package org.elasticsearch.xpack.sql.querydsl.query;
|
||||
|
||||
import java.util.AbstractMap;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.search.join.ScoreMode;
|
||||
import org.elasticsearch.index.query.InnerHitBuilder;
|
||||
import org.elasticsearch.index.query.NestedQueryBuilder;
|
||||
|
@ -19,19 +12,25 @@ import org.elasticsearch.index.query.QueryBuilder;
|
|||
import org.elasticsearch.search.fetch.StoredFieldsContext;
|
||||
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
|
||||
import org.elasticsearch.search.sort.NestedSortBuilder;
|
||||
import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
|
||||
import org.elasticsearch.xpack.sql.tree.Source;
|
||||
|
||||
import java.util.AbstractMap;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
import static java.util.Collections.emptyMap;
|
||||
import static java.util.Collections.singletonList;
|
||||
import static java.util.Collections.unmodifiableMap;
|
||||
|
||||
import static org.elasticsearch.index.query.QueryBuilders.nestedQuery;
|
||||
|
||||
/**
|
||||
* A query to a nested document.
|
||||
*/
|
||||
public class NestedQuery extends Query {
|
||||
private static long COUNTER = 0;
|
||||
// TODO: make this configurable
|
||||
private static final int MAX_INNER_HITS = 99;
|
||||
private static final List<String> NO_STORED_FIELD = singletonList(StoredFieldsContext._NONE_);
|
||||
|
@ -93,9 +92,14 @@ public class NestedQuery extends Query {
|
|||
if (false == sort.getPath().equals(path)) {
|
||||
return;
|
||||
}
|
||||
|
||||
//TODO: Add all filters in nested sorting when https://github.com/elastic/elasticsearch/issues/33079 is implemented
|
||||
// Adding multiple filters to sort sections makes sense for nested queries where multiple conditions belong to the same
|
||||
// nested query. The current functionality creates one nested query for each condition involving a nested field.
|
||||
QueryBuilder childAsBuilder = child.asBuilder();
|
||||
if (sort.getFilter() != null && false == sort.getFilter().equals(childAsBuilder)) {
|
||||
throw new SqlIllegalArgumentException("nested query should have been grouped in one place");
|
||||
// throw new SqlIllegalArgumentException("nested query should have been grouped in one place");
|
||||
return;
|
||||
}
|
||||
sort.setFilter(childAsBuilder);
|
||||
}
|
||||
|
@ -109,6 +113,7 @@ public class NestedQuery extends Query {
|
|||
InnerHitBuilder ihb = new InnerHitBuilder();
|
||||
ihb.setSize(0);
|
||||
ihb.setSize(MAX_INNER_HITS);
|
||||
ihb.setName(path + "_" + COUNTER++);
|
||||
|
||||
boolean noSourceNeeded = true;
|
||||
List<String> sourceFields = new ArrayList<>();
|
||||
|
|
|
@ -7,7 +7,6 @@ package org.elasticsearch.xpack.sql.querydsl.query;
|
|||
|
||||
import org.elasticsearch.search.sort.NestedSortBuilder;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
|
||||
import org.elasticsearch.xpack.sql.tree.Source;
|
||||
import org.elasticsearch.xpack.sql.tree.SourceTests;
|
||||
import org.elasticsearch.xpack.sql.util.StringUtils;
|
||||
|
@ -122,11 +121,11 @@ public class NestedQueryTests extends ESTestCase {
|
|||
assertEquals(q.child().asBuilder(), sort.getFilter());
|
||||
q.enrichNestedSort(sort);
|
||||
|
||||
// But enriching using another query is not
|
||||
NestedQuery other = new NestedQuery(SourceTests.randomSource(), q.path(), q.fields(),
|
||||
randomValueOtherThan(q.child(), () -> randomQuery(0)));
|
||||
Exception e = expectThrows(SqlIllegalArgumentException.class, () -> other.enrichNestedSort(sort));
|
||||
assertEquals("nested query should have been grouped in one place", e.getMessage());
|
||||
// But enriching using another query will keep only the first query
|
||||
Query originalChildQuery = randomValueOtherThan(q.child(), () -> randomQuery(0));
|
||||
NestedQuery other = new NestedQuery(SourceTests.randomSource(), q.path(), q.fields(), originalChildQuery);
|
||||
other.enrichNestedSort(sort);
|
||||
assertEquals(other.child().asBuilder(), originalChildQuery.asBuilder());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue