Fix bug where fvh fragments could be loaded from wrong doc (#66142)

This PR fixes a regression where fvh fragments could be loaded from the wrong
document _source.

Some `FragmentsBuilder` implementations contain a `SourceLookup` to load from
_source. The lookup should be positioned to load from the current hit document.
However, since `FragmentsBuilder` are cached and shared across hits, the lookup
is never updated to load from the new documents. This means we accidentally
load _source from a different document.

The regression was introduced in #60179, which started storing `SourceLookup`
on `FragmentsBuilder`.

Fixes #65533.
This commit is contained in:
Julie Tibshirani 2020-12-09 17:52:58 -08:00 committed by GitHub
parent 30f1f5c25f
commit b2d3c3f6f9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 136 additions and 61 deletions

View File

@ -5,19 +5,38 @@ setup:
body:
mappings:
"properties":
"id":
"type": "integer"
"title":
"type": "text"
"term_vector": "with_positions_offsets"
"description":
"type": "text"
"term_vector": "with_positions_offsets"
"nested":
"type": "nested"
"properties":
"title":
"type": "text"
"term_vector": "with_positions_offsets"
- do:
index:
index: test
id: 1
body:
id: 1
"title" : "The quick brown fox is brown"
"description" : "The quick pink panther is pink"
- do:
index:
index: test
body:
id: 2
"title" : "The quick blue fox is blue"
"nested":
- "title": "purple octopus"
- "title": "purple fish"
- do:
indices.refresh: {}
@ -43,3 +62,53 @@ setup:
- match: {hits.hits.0.highlight.title.0: "The quick <em>brown</em> fox is <em>brown</em>"}
- is_false: hits.hits.0.highlight.description
---
"Highlight multiple documents":
- skip:
version: " - 7.10.1"
reason: Bug fixed in 7.10.2
- do:
search:
rest_total_hits_as_int: true
body:
query:
match:
title: fox
sort: ["id"]
highlight:
type: fvh
fields:
title:
type: fvh
- match: {hits.hits.0.highlight.title.0: "The quick brown <em>fox</em> is brown"}
- is_false: hits.hits.0.highlight.description
- match: {hits.hits.1.highlight.title.0: "The quick blue <em>fox</em> is blue"}
- is_false: hits.hits.1.highlight.description
---
"Highlight multiple nested documents":
- skip:
version: " - 7.10.1"
reason: Bug fixed in 7.10.2
- do:
search:
rest_total_hits_as_int: true
body:
query:
nested:
path: nested
query:
match:
nested.title: purple
inner_hits:
name: nested_hits
highlight:
type: fvh
fields:
nested.title:
type: fvh
- match: {hits.hits.0.inner_hits.nested_hits.hits.hits.0.highlight.nested\\.title.0: "<em>purple</em> octopus"}
- match: {hits.hits.0.inner_hits.nested_hits.hits.hits.1.highlight.nested\\.title.0: "<em>purple</em> fish"}

View File

@ -41,6 +41,7 @@ import org.elasticsearch.index.mapper.TextSearchInfo;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.Field;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.FieldOptions;
import org.elasticsearch.search.lookup.SourceLookup;
import java.io.IOException;
import java.text.BreakIterator;
@ -48,6 +49,7 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.function.Function;
public class FastVectorHighlighter implements Highlighter {
private static final BoundaryScanner DEFAULT_SIMPLE_BOUNDARY_SCANNER = new SimpleBoundaryScanner();
@ -88,42 +90,15 @@ public class FastVectorHighlighter implements Highlighter {
FieldHighlightEntry entry = cache.fields.get(fieldType);
if (entry == null) {
FragListBuilder fragListBuilder;
BaseFragmentsBuilder fragmentsBuilder;
final BoundaryScanner boundaryScanner = getBoundaryScanner(field);
if (field.fieldOptions().numberOfFragments() == 0) {
fragListBuilder = new SingleFragListBuilder();
if (!forceSource && fieldType.isStored()) {
fragmentsBuilder = new SimpleFragmentsBuilder(fieldType, field.fieldOptions().preTags(),
field.fieldOptions().postTags(), boundaryScanner);
} else {
fragmentsBuilder = new SourceSimpleFragmentsBuilder(fieldType, hitContext.sourceLookup(),
field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
}
} else {
fragListBuilder = field.fieldOptions().fragmentOffset() == -1 ?
new SimpleFragListBuilder() : new SimpleFragListBuilder(field.fieldOptions().fragmentOffset());
if (field.fieldOptions().scoreOrdered()) {
if (!forceSource && fieldType.isStored()) {
fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.fieldOptions().preTags(),
field.fieldOptions().postTags(), boundaryScanner);
} else {
fragmentsBuilder = new SourceScoreOrderFragmentsBuilder(fieldType, hitContext.sourceLookup(),
field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
}
} else {
if (!forceSource && fieldType.isStored()) {
fragmentsBuilder = new SimpleFragmentsBuilder(fieldType, field.fieldOptions().preTags(),
field.fieldOptions().postTags(), boundaryScanner);
} else {
fragmentsBuilder =
new SourceSimpleFragmentsBuilder(fieldType, hitContext.sourceLookup(),
field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
}
}
}
fragmentsBuilder.setDiscreteMultiValueHighlighting(termVectorMultiValue);
Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier = fragmentsBuilderSupplier(field, fieldType, forceSource);
entry = new FieldHighlightEntry();
if (field.fieldOptions().requireFieldMatch()) {
/*
@ -141,7 +116,7 @@ public class FastVectorHighlighter implements Highlighter {
hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
}
entry.fragListBuilder = fragListBuilder;
entry.fragmentsBuilder = fragmentsBuilder;
entry.fragmentsBuilderSupplier = fragmentsBuilderSupplier;
if (cache.fvh == null) {
// parameters to FVH are not requires since:
// first two booleans are not relevant since they are set on the CustomFieldQuery
@ -160,6 +135,7 @@ public class FastVectorHighlighter implements Highlighter {
cache.fvh.setPhraseLimit(field.fieldOptions().phraseLimit());
String[] fragments;
FragmentsBuilder fragmentsBuilder = entry.fragmentsBuilderSupplier.apply(hitContext.sourceLookup());
// a HACK to make highlighter do highlighting, even though its using the single frag list builder
int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ?
@ -171,12 +147,12 @@ public class FastVectorHighlighter implements Highlighter {
if (field.fieldOptions().matchedFields() != null && !field.fieldOptions().matchedFields().isEmpty()) {
fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(),
fieldType.name(), field.fieldOptions().matchedFields(), fragmentCharSize,
numberOfFragments, entry.fragListBuilder, entry.fragmentsBuilder, field.fieldOptions().preTags(),
numberOfFragments, entry.fragListBuilder, fragmentsBuilder, field.fieldOptions().preTags(),
field.fieldOptions().postTags(), encoder);
} else {
fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(),
fieldType.name(), fragmentCharSize, numberOfFragments, entry.fragListBuilder,
entry.fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
}
if (CollectionUtils.isEmpty(fragments) == false) {
@ -189,7 +165,7 @@ public class FastVectorHighlighter implements Highlighter {
// the normal fragmentsBuilder
FieldFragList fieldFragList = new SimpleFieldFragList(-1 /*ignored*/);
fieldFragList.add(0, noMatchSize, Collections.emptyList());
fragments = entry.fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(),
fragments = fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(),
fieldType.name(), fieldFragList, 1, field.fieldOptions().preTags(),
field.fieldOptions().postTags(), encoder);
if (CollectionUtils.isEmpty(fragments) == false) {
@ -200,6 +176,36 @@ public class FastVectorHighlighter implements Highlighter {
return null;
}
private Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier(SearchHighlightContext.Field field,
MappedFieldType fieldType,
boolean forceSource) {
BoundaryScanner boundaryScanner = getBoundaryScanner(field);
FieldOptions options = field.fieldOptions();
Function<SourceLookup, BaseFragmentsBuilder> supplier;
if (!forceSource && fieldType.isStored()) {
if (options.numberOfFragments() != 0 && options.scoreOrdered()) {
supplier = ignored -> new ScoreOrderFragmentsBuilder(options.preTags(), options.postTags(), boundaryScanner);
} else {
supplier = ignored -> new SimpleFragmentsBuilder(fieldType,
options.preTags(), options.postTags(), boundaryScanner);
}
} else {
if (options.numberOfFragments() != 0 && options.scoreOrdered()) {
supplier = lookup -> new SourceScoreOrderFragmentsBuilder(fieldType, lookup,
options.preTags(), options.postTags(), boundaryScanner);
} else {
supplier = lookup -> new SourceSimpleFragmentsBuilder(fieldType, lookup,
options.preTags(), options.postTags(), boundaryScanner);
}
}
return lookup -> {
BaseFragmentsBuilder builder = supplier.apply(lookup);
builder.setDiscreteMultiValueHighlighting(termVectorMultiValue);
return builder;
};
}
@Override
public boolean canHighlight(MappedFieldType ft) {
return ft.getTextSearchInfo().termVectors() == TextSearchInfo.TermVector.OFFSETS;
@ -237,7 +243,7 @@ public class FastVectorHighlighter implements Highlighter {
private static class FieldHighlightEntry {
public FragListBuilder fragListBuilder;
public FragmentsBuilder fragmentsBuilder;
public Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier;
public FieldQuery noFieldMatchFieldQuery;
public FieldQuery fieldMatchFieldQuery;
}