Add second level of field collapsing (#31808)

* Put second level collapse under inner_hits

Closes #24855
This commit is contained in:
Mayya Sharipova 2018-07-13 11:40:03 -04:00 committed by GitHub
parent f9791cf158
commit 80492cacfc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 296 additions and 4 deletions

View File

@ -116,3 +116,105 @@ The default is based on the number of data nodes and the default search thread p
WARNING: `collapse` cannot be used in conjunction with <<search-request-scroll, scroll>>,
<<search-request-rescore, rescore>> or <<search-request-search-after, search after>>.
==== Second level of collapsing
Second level of collapsing is also supported and is applied to `inner_hits`.
For example, the following request finds the top scored tweets for
each country, and within each country finds the top scored tweets
for each user.
[source,js]
--------------------------------------------------
GET /twitter/_search
{
"query": {
"match": {
"message": "elasticsearch"
}
},
"collapse" : {
"field" : "country",
"inner_hits" : {
"name": "by_location",
"collapse" : {"field" : "user"},
"size": 3
}
}
}
--------------------------------------------------
// NOTCONSOLE
Response:
[source,js]
--------------------------------------------------
{
...
"hits": [
{
"_index": "twitter",
"_type": "_doc",
"_id": "9",
"_score": ...,
"_source": {...},
"fields": {"country": ["UK"]},
"inner_hits":{
"by_location": {
"hits": {
...,
"hits": [
{
...
"fields": {"user" : ["user124"]}
},
{
...
"fields": {"user" : ["user589"]}
},
{
...
"fields": {"user" : ["user001"]}
}
]
}
}
}
},
{
"_index": "twitter",
"_type": "_doc",
"_id": "1",
"_score": ..,
"_source": {...},
"fields": {"country": ["Canada"]},
"inner_hits":{
"by_location": {
"hits": {
...,
"hits": [
{
...
"fields": {"user" : ["user444"]}
},
{
...
"fields": {"user" : ["user1111"]}
},
{
...
"fields": {"user" : ["user999"]}
}
]
}
}
}
},
....
]
}
--------------------------------------------------
// NOTCONSOLE
NOTE: Second level of of collapsing doesn't allow `inner_hits`.

View File

@ -0,0 +1,141 @@
---
"two levels fields collapsing":
- skip:
version: " - 6.99.99"
reason: using multiple field collapsing from 7.0 on
- do:
indices.create:
index: addresses
body:
settings:
number_of_shards: 1
number_of_replicas: 1
mappings:
_doc:
properties:
country: {"type": "keyword"}
city: {"type": "keyword"}
address: {"type": "text"}
- do:
bulk:
refresh: true
body:
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "1" } }'
- '{"country" : "Canada", "city" : "Saskatoon", "address" : "701 Victoria Avenue" }'
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "2" } }'
- '{"country" : "Canada", "city" : "Toronto", "address" : "74 Victoria Street, Suite, 74 Victoria Street, Suite 300" }'
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "3" } }'
- '{"country" : "Canada", "city" : "Toronto", "address" : "350 Victoria St" }'
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "4" } }'
- '{"country" : "Canada", "city" : "Toronto", "address" : "20 Victoria Street" }'
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "5" } }'
- '{"country" : "UK", "city" : "London", "address" : "58 Victoria Street" }'
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "6" } }'
- '{"country" : "UK", "city" : "London", "address" : "Victoria Street Victoria Palace Theatre" }'
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "7" } }'
- '{"country" : "UK", "city" : "Manchester", "address" : "75 Victoria street Westminster" }'
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "8" } }'
- '{"country" : "UK", "city" : "London", "address" : "Victoria Station Victoria Arcade" }'
# ************* error if internal collapse contains inner_hits
- do:
catch: /parse_exception/
search:
index: addresses
body:
query: { "match" : { "address" : "victoria" }}
collapse:
field: country
inner_hits:
collapse:
field : city
inner_hits: {}
# ************* error if internal collapse contains another collapse
- do:
catch: /parse_exception/
search:
index: addresses
body:
query: { "match" : { "address" : "victoria" }}
collapse:
field: country
inner_hits:
collapse:
field : city
collapse: { field: city }
# ************* top scored
- do:
search:
index: addresses
body:
query: { "match" : { "address" : "victoria" }}
collapse:
field: country
inner_hits:
name: by_location
size: 3
collapse:
field : city
- match: { hits.total: 8 }
- length: { hits.hits: 2 }
- match: { hits.hits.0.fields.country: ["UK"] }
- match: { hits.hits.0.inner_hits.by_location.hits.total: 4 }
# 2 inner hits returned instead of requested 3 as they are collapsed by city
- length: { hits.hits.0.inner_hits.by_location.hits.hits : 2}
- match: { hits.hits.0.inner_hits.by_location.hits.hits.0._id: "8" }
- match: { hits.hits.0.inner_hits.by_location.hits.hits.0.fields.city: ["London"] }
- match: { hits.hits.0.inner_hits.by_location.hits.hits.1._id: "7" }
- match: { hits.hits.0.inner_hits.by_location.hits.hits.1.fields.city: ["Manchester"] }
- match: { hits.hits.1.fields.country: ["Canada"] }
- match: { hits.hits.1.inner_hits.by_location.hits.total: 4 }
# 2 inner hits returned instead of requested 3 as they are collapsed by city
- length: { hits.hits.1.inner_hits.by_location.hits.hits : 2 }
- match: { hits.hits.1.inner_hits.by_location.hits.hits.0._id: "1" }
- match: { hits.hits.1.inner_hits.by_location.hits.hits.0.fields.city: ["Saskatoon"] }
- match: { hits.hits.1.inner_hits.by_location.hits.hits.1._id: "3" }
- match: { hits.hits.1.inner_hits.by_location.hits.hits.1.fields.city: ["Toronto"] }
# ************* sorted
- do:
search:
index: addresses
body:
query: { "match" : { "address" : "victoria" }}
collapse:
field: country
inner_hits:
name: by_location
size: 3
sort: [{ "city": "desc" }]
collapse:
field : city
- match: { hits.total: 8 }
- length: { hits.hits: 2 }
- match: { hits.hits.0.fields.country: ["UK"] }
- match: { hits.hits.0.inner_hits.by_location.hits.total: 4 }
# 2 inner hits returned instead of requested 3 as they are collapsed by city
- length: { hits.hits.0.inner_hits.by_location.hits.hits : 2}
- match: { hits.hits.0.inner_hits.by_location.hits.hits.0._id: "7" }
- match: { hits.hits.0.inner_hits.by_location.hits.hits.0.fields.city: ["Manchester"] }
- match: { hits.hits.0.inner_hits.by_location.hits.hits.1._id: "5" }
- match: { hits.hits.0.inner_hits.by_location.hits.hits.1.fields.city: ["London"] }
- match: { hits.hits.1.fields.country: ["Canada"] }
- match: { hits.hits.1.inner_hits.by_location.hits.total: 4 }
# 2 inner hits returned instead of requested 3 as they are collapsed by city
- length: { hits.hits.1.inner_hits.by_location.hits.hits : 2 }
- match: { hits.hits.1.inner_hits.by_location.hits.hits.0._id: "2" }
- match: { hits.hits.1.inner_hits.by_location.hits.hits.0.fields.city: ["Toronto"] }
- match: { hits.hits.1.inner_hits.by_location.hits.hits.1._id: "1" }
- match: { hits.hits.1.inner_hits.by_location.hits.hits.1.fields.city: ["Saskatoon"] }

View File

@ -87,7 +87,8 @@ final class ExpandSearchPhase extends SearchPhase {
groupQuery.must(origQuery);
}
for (InnerHitBuilder innerHitBuilder : innerHitBuilders) {
SearchSourceBuilder sourceBuilder = buildExpandSearchSourceBuilder(innerHitBuilder)
CollapseBuilder innerCollapseBuilder = innerHitBuilder.getInnerCollapseBuilder();
SearchSourceBuilder sourceBuilder = buildExpandSearchSourceBuilder(innerHitBuilder, innerCollapseBuilder)
.query(groupQuery)
.postFilter(searchRequest.source().postFilter());
SearchRequest groupRequest = buildExpandSearchRequest(searchRequest, sourceBuilder);
@ -135,7 +136,7 @@ final class ExpandSearchPhase extends SearchPhase {
return groupRequest;
}
private SearchSourceBuilder buildExpandSearchSourceBuilder(InnerHitBuilder options) {
private SearchSourceBuilder buildExpandSearchSourceBuilder(InnerHitBuilder options, CollapseBuilder innerCollapseBuilder) {
SearchSourceBuilder groupSource = new SearchSourceBuilder();
groupSource.from(options.getFrom());
groupSource.size(options.getSize());
@ -167,6 +168,9 @@ final class ExpandSearchPhase extends SearchPhase {
groupSource.explain(options.isExplain());
groupSource.trackScores(options.isTrackScores());
groupSource.version(options.isVersion());
if (innerCollapseBuilder != null) {
groupSource.collapse(innerCollapseBuilder);
}
return groupSource;
}
}

View File

@ -37,6 +37,7 @@ import org.elasticsearch.search.fetch.subphase.DocValueFieldsContext.FieldAndFor
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.sort.SortBuilder;
import org.elasticsearch.search.collapse.CollapseBuilder;
import java.io.IOException;
import java.util.ArrayList;
@ -55,6 +56,8 @@ public final class InnerHitBuilder implements Writeable, ToXContentObject {
public static final ParseField NAME_FIELD = new ParseField("name");
public static final ParseField IGNORE_UNMAPPED = new ParseField("ignore_unmapped");
public static final QueryBuilder DEFAULT_INNER_HIT_QUERY = new MatchAllQueryBuilder();
public static final ParseField COLLAPSE_FIELD = new ParseField("collapse");
public static final ParseField FIELD_FIELD = new ParseField("field");
private static final ObjectParser<InnerHitBuilder, Void> PARSER = new ObjectParser<>("inner_hits", InnerHitBuilder::new);
@ -91,6 +94,28 @@ public final class InnerHitBuilder implements Writeable, ToXContentObject {
}, SearchSourceBuilder._SOURCE_FIELD, ObjectParser.ValueType.OBJECT_ARRAY_BOOLEAN_OR_STRING);
PARSER.declareObject(InnerHitBuilder::setHighlightBuilder, (p, c) -> HighlightBuilder.fromXContent(p),
SearchSourceBuilder.HIGHLIGHT_FIELD);
PARSER.declareField((parser, builder, context) -> {
Boolean isParsedCorrectly = false;
String field;
if (parser.currentToken() == XContentParser.Token.START_OBJECT) {
if (parser.nextToken() == XContentParser.Token.FIELD_NAME) {
if (FIELD_FIELD.match(parser.currentName(), parser.getDeprecationHandler())) {
if (parser.nextToken() == XContentParser.Token.VALUE_STRING){
field = parser.text();
if (parser.nextToken() == XContentParser.Token.END_OBJECT){
isParsedCorrectly = true;
CollapseBuilder cb = new CollapseBuilder(field);
builder.setInnerCollapse(cb);
}
}
}
}
}
if (isParsedCorrectly == false) {
throw new ParsingException(parser.getTokenLocation(), "Invalid token in the inner collapse");
}
}, COLLAPSE_FIELD, ObjectParser.ValueType.OBJECT);
}
private String name;
@ -109,6 +134,7 @@ public final class InnerHitBuilder implements Writeable, ToXContentObject {
private Set<ScriptField> scriptFields;
private HighlightBuilder highlightBuilder;
private FetchSourceContext fetchSourceContext;
private CollapseBuilder innerCollapseBuilder = null;
public InnerHitBuilder() {
this.name = null;
@ -173,6 +199,9 @@ public final class InnerHitBuilder implements Writeable, ToXContentObject {
boolean hasChildren = in.readBoolean();
assert hasChildren == false;
}
if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
this.innerCollapseBuilder = in.readOptionalWriteable(CollapseBuilder::new);
}
}
@Override
@ -218,6 +247,9 @@ public final class InnerHitBuilder implements Writeable, ToXContentObject {
}
}
out.writeOptionalWriteable(highlightBuilder);
if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
out.writeOptionalWriteable(innerCollapseBuilder);
}
}
/**
@ -501,6 +533,15 @@ public final class InnerHitBuilder implements Writeable, ToXContentObject {
return query;
}
public InnerHitBuilder setInnerCollapse(CollapseBuilder innerCollapseBuilder) {
this.innerCollapseBuilder = innerCollapseBuilder;
return this;
}
public CollapseBuilder getInnerCollapseBuilder() {
return innerCollapseBuilder;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
@ -550,6 +591,9 @@ public final class InnerHitBuilder implements Writeable, ToXContentObject {
if (highlightBuilder != null) {
builder.field(SearchSourceBuilder.HIGHLIGHT_FIELD.getPreferredName(), highlightBuilder, params);
}
if (innerCollapseBuilder != null) {
builder.field(COLLAPSE_FIELD.getPreferredName(), innerCollapseBuilder);
}
builder.endObject();
return builder;
}
@ -572,13 +616,14 @@ public final class InnerHitBuilder implements Writeable, ToXContentObject {
Objects.equals(scriptFields, that.scriptFields) &&
Objects.equals(fetchSourceContext, that.fetchSourceContext) &&
Objects.equals(sorts, that.sorts) &&
Objects.equals(highlightBuilder, that.highlightBuilder);
Objects.equals(highlightBuilder, that.highlightBuilder) &&
Objects.equals(innerCollapseBuilder, that.innerCollapseBuilder);
}
@Override
public int hashCode() {
return Objects.hash(name, ignoreUnmapped, from, size, explain, version, trackScores,
storedFieldsContext, docValueFields, scriptFields, fetchSourceContext, sorts, highlightBuilder);
storedFieldsContext, docValueFields, scriptFields, fetchSourceContext, sorts, highlightBuilder, innerCollapseBuilder);
}
public static InnerHitBuilder fromXContent(XContentParser parser) throws IOException {