Fix bug where fvh fragments could be loaded from wrong doc (#66142)
This PR fixes a regression where fvh fragments could be loaded from the wrong document _source. Some `FragmentsBuilder` implementations contain a `SourceLookup` to load from _source. The lookup should be positioned to load from the current hit document. However, since `FragmentsBuilder` are cached and shared across hits, the lookup is never updated to load from the new documents. This means we accidentally load _source from a different document. The regression was introduced in #60179, which started storing `SourceLookup` on `FragmentsBuilder`. Fixes #65533.
This commit is contained in:
parent
30f1f5c25f
commit
b2d3c3f6f9
|
@ -1,23 +1,42 @@
|
|||
setup:
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
mappings:
|
||||
"properties":
|
||||
"title":
|
||||
"type": "text"
|
||||
"term_vector": "with_positions_offsets"
|
||||
"description":
|
||||
"type": "text"
|
||||
"term_vector": "with_positions_offsets"
|
||||
index: test
|
||||
body:
|
||||
mappings:
|
||||
"properties":
|
||||
"id":
|
||||
"type": "integer"
|
||||
"title":
|
||||
"type": "text"
|
||||
"term_vector": "with_positions_offsets"
|
||||
"description":
|
||||
"type": "text"
|
||||
"term_vector": "with_positions_offsets"
|
||||
"nested":
|
||||
"type": "nested"
|
||||
"properties":
|
||||
"title":
|
||||
"type": "text"
|
||||
"term_vector": "with_positions_offsets"
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
id: 1
|
||||
body:
|
||||
"title" : "The quick brown fox is brown"
|
||||
"description" : "The quick pink panther is pink"
|
||||
id: 1
|
||||
"title" : "The quick brown fox is brown"
|
||||
"description" : "The quick pink panther is pink"
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
body:
|
||||
id: 2
|
||||
"title" : "The quick blue fox is blue"
|
||||
"nested":
|
||||
- "title": "purple octopus"
|
||||
- "title": "purple fish"
|
||||
|
||||
- do:
|
||||
indices.refresh: {}
|
||||
|
||||
|
@ -27,19 +46,69 @@ setup:
|
|||
search:
|
||||
rest_total_hits_as_int: true
|
||||
body:
|
||||
highlight:
|
||||
type: fvh
|
||||
fields:
|
||||
description:
|
||||
type: fvh
|
||||
highlight_query:
|
||||
prefix:
|
||||
description: br
|
||||
title:
|
||||
type: fvh
|
||||
highlight_query:
|
||||
prefix:
|
||||
title: br
|
||||
highlight:
|
||||
type: fvh
|
||||
fields:
|
||||
description:
|
||||
type: fvh
|
||||
highlight_query:
|
||||
prefix:
|
||||
description: br
|
||||
title:
|
||||
type: fvh
|
||||
highlight_query:
|
||||
prefix:
|
||||
title: br
|
||||
|
||||
- match: {hits.hits.0.highlight.title.0: "The quick <em>brown</em> fox is <em>brown</em>"}
|
||||
- is_false: hits.hits.0.highlight.description
|
||||
|
||||
---
|
||||
"Highlight multiple documents":
|
||||
- skip:
|
||||
version: " - 7.10.1"
|
||||
reason: Bug fixed in 7.10.2
|
||||
- do:
|
||||
search:
|
||||
rest_total_hits_as_int: true
|
||||
body:
|
||||
query:
|
||||
match:
|
||||
title: fox
|
||||
sort: ["id"]
|
||||
highlight:
|
||||
type: fvh
|
||||
fields:
|
||||
title:
|
||||
type: fvh
|
||||
|
||||
- match: {hits.hits.0.highlight.title.0: "The quick brown <em>fox</em> is brown"}
|
||||
- is_false: hits.hits.0.highlight.description
|
||||
- match: {hits.hits.1.highlight.title.0: "The quick blue <em>fox</em> is blue"}
|
||||
- is_false: hits.hits.1.highlight.description
|
||||
|
||||
---
|
||||
"Highlight multiple nested documents":
|
||||
- skip:
|
||||
version: " - 7.10.1"
|
||||
reason: Bug fixed in 7.10.2
|
||||
- do:
|
||||
search:
|
||||
rest_total_hits_as_int: true
|
||||
body:
|
||||
query:
|
||||
nested:
|
||||
path: nested
|
||||
query:
|
||||
match:
|
||||
nested.title: purple
|
||||
inner_hits:
|
||||
name: nested_hits
|
||||
highlight:
|
||||
type: fvh
|
||||
fields:
|
||||
nested.title:
|
||||
type: fvh
|
||||
|
||||
- match: {hits.hits.0.inner_hits.nested_hits.hits.hits.0.highlight.nested\\.title.0: "<em>purple</em> octopus"}
|
||||
- match: {hits.hits.0.inner_hits.nested_hits.hits.hits.1.highlight.nested\\.title.0: "<em>purple</em> fish"}
|
||||
|
|
|
@ -41,6 +41,7 @@ import org.elasticsearch.index.mapper.TextSearchInfo;
|
|||
import org.elasticsearch.search.fetch.FetchSubPhase;
|
||||
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.Field;
|
||||
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.FieldOptions;
|
||||
import org.elasticsearch.search.lookup.SourceLookup;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.BreakIterator;
|
||||
|
@ -48,6 +49,7 @@ import java.util.Collections;
|
|||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.function.Function;
|
||||
|
||||
public class FastVectorHighlighter implements Highlighter {
|
||||
private static final BoundaryScanner DEFAULT_SIMPLE_BOUNDARY_SCANNER = new SimpleBoundaryScanner();
|
||||
|
@ -88,42 +90,15 @@ public class FastVectorHighlighter implements Highlighter {
|
|||
FieldHighlightEntry entry = cache.fields.get(fieldType);
|
||||
if (entry == null) {
|
||||
FragListBuilder fragListBuilder;
|
||||
BaseFragmentsBuilder fragmentsBuilder;
|
||||
|
||||
final BoundaryScanner boundaryScanner = getBoundaryScanner(field);
|
||||
if (field.fieldOptions().numberOfFragments() == 0) {
|
||||
fragListBuilder = new SingleFragListBuilder();
|
||||
|
||||
if (!forceSource && fieldType.isStored()) {
|
||||
fragmentsBuilder = new SimpleFragmentsBuilder(fieldType, field.fieldOptions().preTags(),
|
||||
field.fieldOptions().postTags(), boundaryScanner);
|
||||
} else {
|
||||
fragmentsBuilder = new SourceSimpleFragmentsBuilder(fieldType, hitContext.sourceLookup(),
|
||||
field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
|
||||
}
|
||||
} else {
|
||||
fragListBuilder = field.fieldOptions().fragmentOffset() == -1 ?
|
||||
new SimpleFragListBuilder() : new SimpleFragListBuilder(field.fieldOptions().fragmentOffset());
|
||||
if (field.fieldOptions().scoreOrdered()) {
|
||||
if (!forceSource && fieldType.isStored()) {
|
||||
fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.fieldOptions().preTags(),
|
||||
field.fieldOptions().postTags(), boundaryScanner);
|
||||
} else {
|
||||
fragmentsBuilder = new SourceScoreOrderFragmentsBuilder(fieldType, hitContext.sourceLookup(),
|
||||
field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
|
||||
}
|
||||
} else {
|
||||
if (!forceSource && fieldType.isStored()) {
|
||||
fragmentsBuilder = new SimpleFragmentsBuilder(fieldType, field.fieldOptions().preTags(),
|
||||
field.fieldOptions().postTags(), boundaryScanner);
|
||||
} else {
|
||||
fragmentsBuilder =
|
||||
new SourceSimpleFragmentsBuilder(fieldType, hitContext.sourceLookup(),
|
||||
field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
|
||||
}
|
||||
}
|
||||
}
|
||||
fragmentsBuilder.setDiscreteMultiValueHighlighting(termVectorMultiValue);
|
||||
|
||||
Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier = fragmentsBuilderSupplier(field, fieldType, forceSource);
|
||||
|
||||
entry = new FieldHighlightEntry();
|
||||
if (field.fieldOptions().requireFieldMatch()) {
|
||||
/*
|
||||
|
@ -141,7 +116,7 @@ public class FastVectorHighlighter implements Highlighter {
|
|||
hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
|
||||
}
|
||||
entry.fragListBuilder = fragListBuilder;
|
||||
entry.fragmentsBuilder = fragmentsBuilder;
|
||||
entry.fragmentsBuilderSupplier = fragmentsBuilderSupplier;
|
||||
if (cache.fvh == null) {
|
||||
// parameters to FVH are not requires since:
|
||||
// first two booleans are not relevant since they are set on the CustomFieldQuery
|
||||
|
@ -160,6 +135,7 @@ public class FastVectorHighlighter implements Highlighter {
|
|||
cache.fvh.setPhraseLimit(field.fieldOptions().phraseLimit());
|
||||
|
||||
String[] fragments;
|
||||
FragmentsBuilder fragmentsBuilder = entry.fragmentsBuilderSupplier.apply(hitContext.sourceLookup());
|
||||
|
||||
// a HACK to make highlighter do highlighting, even though its using the single frag list builder
|
||||
int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ?
|
||||
|
@ -171,12 +147,12 @@ public class FastVectorHighlighter implements Highlighter {
|
|||
if (field.fieldOptions().matchedFields() != null && !field.fieldOptions().matchedFields().isEmpty()) {
|
||||
fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(),
|
||||
fieldType.name(), field.fieldOptions().matchedFields(), fragmentCharSize,
|
||||
numberOfFragments, entry.fragListBuilder, entry.fragmentsBuilder, field.fieldOptions().preTags(),
|
||||
numberOfFragments, entry.fragListBuilder, fragmentsBuilder, field.fieldOptions().preTags(),
|
||||
field.fieldOptions().postTags(), encoder);
|
||||
} else {
|
||||
fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(),
|
||||
fieldType.name(), fragmentCharSize, numberOfFragments, entry.fragListBuilder,
|
||||
entry.fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
|
||||
fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
|
||||
}
|
||||
|
||||
if (CollectionUtils.isEmpty(fragments) == false) {
|
||||
|
@ -189,7 +165,7 @@ public class FastVectorHighlighter implements Highlighter {
|
|||
// the normal fragmentsBuilder
|
||||
FieldFragList fieldFragList = new SimpleFieldFragList(-1 /*ignored*/);
|
||||
fieldFragList.add(0, noMatchSize, Collections.emptyList());
|
||||
fragments = entry.fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(),
|
||||
fragments = fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(),
|
||||
fieldType.name(), fieldFragList, 1, field.fieldOptions().preTags(),
|
||||
field.fieldOptions().postTags(), encoder);
|
||||
if (CollectionUtils.isEmpty(fragments) == false) {
|
||||
|
@ -200,6 +176,36 @@ public class FastVectorHighlighter implements Highlighter {
|
|||
return null;
|
||||
}
|
||||
|
||||
private Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier(SearchHighlightContext.Field field,
|
||||
MappedFieldType fieldType,
|
||||
boolean forceSource) {
|
||||
BoundaryScanner boundaryScanner = getBoundaryScanner(field);
|
||||
FieldOptions options = field.fieldOptions();
|
||||
Function<SourceLookup, BaseFragmentsBuilder> supplier;
|
||||
if (!forceSource && fieldType.isStored()) {
|
||||
if (options.numberOfFragments() != 0 && options.scoreOrdered()) {
|
||||
supplier = ignored -> new ScoreOrderFragmentsBuilder(options.preTags(), options.postTags(), boundaryScanner);
|
||||
} else {
|
||||
supplier = ignored -> new SimpleFragmentsBuilder(fieldType,
|
||||
options.preTags(), options.postTags(), boundaryScanner);
|
||||
}
|
||||
} else {
|
||||
if (options.numberOfFragments() != 0 && options.scoreOrdered()) {
|
||||
supplier = lookup -> new SourceScoreOrderFragmentsBuilder(fieldType, lookup,
|
||||
options.preTags(), options.postTags(), boundaryScanner);
|
||||
} else {
|
||||
supplier = lookup -> new SourceSimpleFragmentsBuilder(fieldType, lookup,
|
||||
options.preTags(), options.postTags(), boundaryScanner);
|
||||
}
|
||||
}
|
||||
|
||||
return lookup -> {
|
||||
BaseFragmentsBuilder builder = supplier.apply(lookup);
|
||||
builder.setDiscreteMultiValueHighlighting(termVectorMultiValue);
|
||||
return builder;
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canHighlight(MappedFieldType ft) {
|
||||
return ft.getTextSearchInfo().termVectors() == TextSearchInfo.TermVector.OFFSETS;
|
||||
|
@ -237,7 +243,7 @@ public class FastVectorHighlighter implements Highlighter {
|
|||
|
||||
private static class FieldHighlightEntry {
|
||||
public FragListBuilder fragListBuilder;
|
||||
public FragmentsBuilder fragmentsBuilder;
|
||||
public Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier;
|
||||
public FieldQuery noFieldMatchFieldQuery;
|
||||
public FieldQuery fieldMatchFieldQuery;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue