Fix bug where fvh fragments could be loaded from wrong doc (#66142)

This PR fixes a regression where fvh fragments could be loaded from the wrong
document _source.

Some `FragmentsBuilder` implementations contain a `SourceLookup` to load from
_source. The lookup should be positioned to load from the current hit document.
However, since `FragmentsBuilder` are cached and shared across hits, the lookup
is never updated to load from the new documents. This means we accidentally
load _source from a different document.

The regression was introduced in #60179, which started storing `SourceLookup`
on `FragmentsBuilder`.

Fixes #65533.
This commit is contained in:
Julie Tibshirani 2020-12-09 17:52:58 -08:00 committed by GitHub
parent 30f1f5c25f
commit b2d3c3f6f9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 136 additions and 61 deletions

View File

@ -1,23 +1,42 @@
setup: setup:
- do: - do:
indices.create: indices.create:
index: test index: test
body: body:
mappings: mappings:
"properties": "properties":
"title": "id":
"type": "text" "type": "integer"
"term_vector": "with_positions_offsets" "title":
"description": "type": "text"
"type": "text" "term_vector": "with_positions_offsets"
"term_vector": "with_positions_offsets" "description":
"type": "text"
"term_vector": "with_positions_offsets"
"nested":
"type": "nested"
"properties":
"title":
"type": "text"
"term_vector": "with_positions_offsets"
- do: - do:
index: index:
index: test index: test
id: 1
body: body:
"title" : "The quick brown fox is brown" id: 1
"description" : "The quick pink panther is pink" "title" : "The quick brown fox is brown"
"description" : "The quick pink panther is pink"
- do:
index:
index: test
body:
id: 2
"title" : "The quick blue fox is blue"
"nested":
- "title": "purple octopus"
- "title": "purple fish"
- do: - do:
indices.refresh: {} indices.refresh: {}
@ -27,19 +46,69 @@ setup:
search: search:
rest_total_hits_as_int: true rest_total_hits_as_int: true
body: body:
highlight: highlight:
type: fvh type: fvh
fields: fields:
description: description:
type: fvh type: fvh
highlight_query: highlight_query:
prefix: prefix:
description: br description: br
title: title:
type: fvh type: fvh
highlight_query: highlight_query:
prefix: prefix:
title: br title: br
- match: {hits.hits.0.highlight.title.0: "The quick <em>brown</em> fox is <em>brown</em>"} - match: {hits.hits.0.highlight.title.0: "The quick <em>brown</em> fox is <em>brown</em>"}
- is_false: hits.hits.0.highlight.description - is_false: hits.hits.0.highlight.description
---
"Highlight multiple documents":
- skip:
version: " - 7.10.1"
reason: Bug fixed in 7.10.2
- do:
search:
rest_total_hits_as_int: true
body:
query:
match:
title: fox
sort: ["id"]
highlight:
type: fvh
fields:
title:
type: fvh
- match: {hits.hits.0.highlight.title.0: "The quick brown <em>fox</em> is brown"}
- is_false: hits.hits.0.highlight.description
- match: {hits.hits.1.highlight.title.0: "The quick blue <em>fox</em> is blue"}
- is_false: hits.hits.1.highlight.description
---
"Highlight multiple nested documents":
- skip:
version: " - 7.10.1"
reason: Bug fixed in 7.10.2
- do:
search:
rest_total_hits_as_int: true
body:
query:
nested:
path: nested
query:
match:
nested.title: purple
inner_hits:
name: nested_hits
highlight:
type: fvh
fields:
nested.title:
type: fvh
- match: {hits.hits.0.inner_hits.nested_hits.hits.hits.0.highlight.nested\\.title.0: "<em>purple</em> octopus"}
- match: {hits.hits.0.inner_hits.nested_hits.hits.hits.1.highlight.nested\\.title.0: "<em>purple</em> fish"}

View File

@ -41,6 +41,7 @@ import org.elasticsearch.index.mapper.TextSearchInfo;
import org.elasticsearch.search.fetch.FetchSubPhase; import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.Field; import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.Field;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.FieldOptions; import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.FieldOptions;
import org.elasticsearch.search.lookup.SourceLookup;
import java.io.IOException; import java.io.IOException;
import java.text.BreakIterator; import java.text.BreakIterator;
@ -48,6 +49,7 @@ import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.function.Function;
public class FastVectorHighlighter implements Highlighter { public class FastVectorHighlighter implements Highlighter {
private static final BoundaryScanner DEFAULT_SIMPLE_BOUNDARY_SCANNER = new SimpleBoundaryScanner(); private static final BoundaryScanner DEFAULT_SIMPLE_BOUNDARY_SCANNER = new SimpleBoundaryScanner();
@ -88,42 +90,15 @@ public class FastVectorHighlighter implements Highlighter {
FieldHighlightEntry entry = cache.fields.get(fieldType); FieldHighlightEntry entry = cache.fields.get(fieldType);
if (entry == null) { if (entry == null) {
FragListBuilder fragListBuilder; FragListBuilder fragListBuilder;
BaseFragmentsBuilder fragmentsBuilder;
final BoundaryScanner boundaryScanner = getBoundaryScanner(field);
if (field.fieldOptions().numberOfFragments() == 0) { if (field.fieldOptions().numberOfFragments() == 0) {
fragListBuilder = new SingleFragListBuilder(); fragListBuilder = new SingleFragListBuilder();
if (!forceSource && fieldType.isStored()) {
fragmentsBuilder = new SimpleFragmentsBuilder(fieldType, field.fieldOptions().preTags(),
field.fieldOptions().postTags(), boundaryScanner);
} else {
fragmentsBuilder = new SourceSimpleFragmentsBuilder(fieldType, hitContext.sourceLookup(),
field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
}
} else { } else {
fragListBuilder = field.fieldOptions().fragmentOffset() == -1 ? fragListBuilder = field.fieldOptions().fragmentOffset() == -1 ?
new SimpleFragListBuilder() : new SimpleFragListBuilder(field.fieldOptions().fragmentOffset()); new SimpleFragListBuilder() : new SimpleFragListBuilder(field.fieldOptions().fragmentOffset());
if (field.fieldOptions().scoreOrdered()) {
if (!forceSource && fieldType.isStored()) {
fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.fieldOptions().preTags(),
field.fieldOptions().postTags(), boundaryScanner);
} else {
fragmentsBuilder = new SourceScoreOrderFragmentsBuilder(fieldType, hitContext.sourceLookup(),
field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
}
} else {
if (!forceSource && fieldType.isStored()) {
fragmentsBuilder = new SimpleFragmentsBuilder(fieldType, field.fieldOptions().preTags(),
field.fieldOptions().postTags(), boundaryScanner);
} else {
fragmentsBuilder =
new SourceSimpleFragmentsBuilder(fieldType, hitContext.sourceLookup(),
field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
}
}
} }
fragmentsBuilder.setDiscreteMultiValueHighlighting(termVectorMultiValue);
Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier = fragmentsBuilderSupplier(field, fieldType, forceSource);
entry = new FieldHighlightEntry(); entry = new FieldHighlightEntry();
if (field.fieldOptions().requireFieldMatch()) { if (field.fieldOptions().requireFieldMatch()) {
/* /*
@ -141,7 +116,7 @@ public class FastVectorHighlighter implements Highlighter {
hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch()); hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
} }
entry.fragListBuilder = fragListBuilder; entry.fragListBuilder = fragListBuilder;
entry.fragmentsBuilder = fragmentsBuilder; entry.fragmentsBuilderSupplier = fragmentsBuilderSupplier;
if (cache.fvh == null) { if (cache.fvh == null) {
// parameters to FVH are not requires since: // parameters to FVH are not requires since:
// first two booleans are not relevant since they are set on the CustomFieldQuery // first two booleans are not relevant since they are set on the CustomFieldQuery
@ -160,6 +135,7 @@ public class FastVectorHighlighter implements Highlighter {
cache.fvh.setPhraseLimit(field.fieldOptions().phraseLimit()); cache.fvh.setPhraseLimit(field.fieldOptions().phraseLimit());
String[] fragments; String[] fragments;
FragmentsBuilder fragmentsBuilder = entry.fragmentsBuilderSupplier.apply(hitContext.sourceLookup());
// a HACK to make highlighter do highlighting, even though its using the single frag list builder // a HACK to make highlighter do highlighting, even though its using the single frag list builder
int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ?
@ -171,12 +147,12 @@ public class FastVectorHighlighter implements Highlighter {
if (field.fieldOptions().matchedFields() != null && !field.fieldOptions().matchedFields().isEmpty()) { if (field.fieldOptions().matchedFields() != null && !field.fieldOptions().matchedFields().isEmpty()) {
fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(),
fieldType.name(), field.fieldOptions().matchedFields(), fragmentCharSize, fieldType.name(), field.fieldOptions().matchedFields(), fragmentCharSize,
numberOfFragments, entry.fragListBuilder, entry.fragmentsBuilder, field.fieldOptions().preTags(), numberOfFragments, entry.fragListBuilder, fragmentsBuilder, field.fieldOptions().preTags(),
field.fieldOptions().postTags(), encoder); field.fieldOptions().postTags(), encoder);
} else { } else {
fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(),
fieldType.name(), fragmentCharSize, numberOfFragments, entry.fragListBuilder, fieldType.name(), fragmentCharSize, numberOfFragments, entry.fragListBuilder,
entry.fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder); fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
} }
if (CollectionUtils.isEmpty(fragments) == false) { if (CollectionUtils.isEmpty(fragments) == false) {
@ -189,7 +165,7 @@ public class FastVectorHighlighter implements Highlighter {
// the normal fragmentsBuilder // the normal fragmentsBuilder
FieldFragList fieldFragList = new SimpleFieldFragList(-1 /*ignored*/); FieldFragList fieldFragList = new SimpleFieldFragList(-1 /*ignored*/);
fieldFragList.add(0, noMatchSize, Collections.emptyList()); fieldFragList.add(0, noMatchSize, Collections.emptyList());
fragments = entry.fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(), fragments = fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(),
fieldType.name(), fieldFragList, 1, field.fieldOptions().preTags(), fieldType.name(), fieldFragList, 1, field.fieldOptions().preTags(),
field.fieldOptions().postTags(), encoder); field.fieldOptions().postTags(), encoder);
if (CollectionUtils.isEmpty(fragments) == false) { if (CollectionUtils.isEmpty(fragments) == false) {
@ -200,6 +176,36 @@ public class FastVectorHighlighter implements Highlighter {
return null; return null;
} }
private Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier(SearchHighlightContext.Field field,
MappedFieldType fieldType,
boolean forceSource) {
BoundaryScanner boundaryScanner = getBoundaryScanner(field);
FieldOptions options = field.fieldOptions();
Function<SourceLookup, BaseFragmentsBuilder> supplier;
if (!forceSource && fieldType.isStored()) {
if (options.numberOfFragments() != 0 && options.scoreOrdered()) {
supplier = ignored -> new ScoreOrderFragmentsBuilder(options.preTags(), options.postTags(), boundaryScanner);
} else {
supplier = ignored -> new SimpleFragmentsBuilder(fieldType,
options.preTags(), options.postTags(), boundaryScanner);
}
} else {
if (options.numberOfFragments() != 0 && options.scoreOrdered()) {
supplier = lookup -> new SourceScoreOrderFragmentsBuilder(fieldType, lookup,
options.preTags(), options.postTags(), boundaryScanner);
} else {
supplier = lookup -> new SourceSimpleFragmentsBuilder(fieldType, lookup,
options.preTags(), options.postTags(), boundaryScanner);
}
}
return lookup -> {
BaseFragmentsBuilder builder = supplier.apply(lookup);
builder.setDiscreteMultiValueHighlighting(termVectorMultiValue);
return builder;
};
}
@Override @Override
public boolean canHighlight(MappedFieldType ft) { public boolean canHighlight(MappedFieldType ft) {
return ft.getTextSearchInfo().termVectors() == TextSearchInfo.TermVector.OFFSETS; return ft.getTextSearchInfo().termVectors() == TextSearchInfo.TermVector.OFFSETS;
@ -237,7 +243,7 @@ public class FastVectorHighlighter implements Highlighter {
private static class FieldHighlightEntry { private static class FieldHighlightEntry {
public FragListBuilder fragListBuilder; public FragListBuilder fragListBuilder;
public FragmentsBuilder fragmentsBuilder; public Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier;
public FieldQuery noFieldMatchFieldQuery; public FieldQuery noFieldMatchFieldQuery;
public FieldQuery fieldMatchFieldQuery; public FieldQuery fieldMatchFieldQuery;
} }