From b2d3c3f6f92ead92c515f4bb406d42c1a0b47392 Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Wed, 9 Dec 2020 17:52:58 -0800 Subject: [PATCH] Fix bug where fvh fragments could be loaded from wrong doc (#66142) This PR fixes a regression where fvh fragments could be loaded from the wrong document _source. Some `FragmentsBuilder` implementations contain a `SourceLookup` to load from _source. The lookup should be positioned to load from the current hit document. However, since `FragmentsBuilder` are cached and shared across hits, the lookup is never updated to load from the new documents. This means we accidentally load _source from a different document. The regression was introduced in #60179, which started storing `SourceLookup` on `FragmentsBuilder`. Fixes #65533. --- .../test/search.highlight/20_fvh.yml | 121 ++++++++++++++---- .../highlight/FastVectorHighlighter.java | 76 ++++++----- 2 files changed, 136 insertions(+), 61 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/20_fvh.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/20_fvh.yml index d411f55106d..e9f5c195092 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/20_fvh.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/20_fvh.yml @@ -1,23 +1,42 @@ setup: - do: indices.create: - index: test - body: - mappings: - "properties": - "title": - "type": "text" - "term_vector": "with_positions_offsets" - "description": - "type": "text" - "term_vector": "with_positions_offsets" + index: test + body: + mappings: + "properties": + "id": + "type": "integer" + "title": + "type": "text" + "term_vector": "with_positions_offsets" + "description": + "type": "text" + "term_vector": "with_positions_offsets" + "nested": + "type": "nested" + "properties": + "title": + "type": "text" + "term_vector": "with_positions_offsets" - do: index: index: test - id: 1 body: - "title" : "The quick brown fox is brown" - "description" : "The quick pink panther is pink" + id: 1 + "title" : "The quick brown fox is brown" + "description" : "The quick pink panther is pink" + + - do: + index: + index: test + body: + id: 2 + "title" : "The quick blue fox is blue" + "nested": + - "title": "purple octopus" + - "title": "purple fish" + - do: indices.refresh: {} @@ -27,19 +46,69 @@ setup: search: rest_total_hits_as_int: true body: - highlight: - type: fvh - fields: - description: - type: fvh - highlight_query: - prefix: - description: br - title: - type: fvh - highlight_query: - prefix: - title: br + highlight: + type: fvh + fields: + description: + type: fvh + highlight_query: + prefix: + description: br + title: + type: fvh + highlight_query: + prefix: + title: br - match: {hits.hits.0.highlight.title.0: "The quick brown fox is brown"} - is_false: hits.hits.0.highlight.description + +--- +"Highlight multiple documents": + - skip: + version: " - 7.10.1" + reason: Bug fixed in 7.10.2 + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + title: fox + sort: ["id"] + highlight: + type: fvh + fields: + title: + type: fvh + + - match: {hits.hits.0.highlight.title.0: "The quick brown fox is brown"} + - is_false: hits.hits.0.highlight.description + - match: {hits.hits.1.highlight.title.0: "The quick blue fox is blue"} + - is_false: hits.hits.1.highlight.description + +--- +"Highlight multiple nested documents": + - skip: + version: " - 7.10.1" + reason: Bug fixed in 7.10.2 + - do: + search: + rest_total_hits_as_int: true + body: + query: + nested: + path: nested + query: + match: + nested.title: purple + inner_hits: + name: nested_hits + highlight: + type: fvh + fields: + nested.title: + type: fvh + + - match: {hits.hits.0.inner_hits.nested_hits.hits.hits.0.highlight.nested\\.title.0: "purple octopus"} + - match: {hits.hits.0.inner_hits.nested_hits.hits.hits.1.highlight.nested\\.title.0: "purple fish"} diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FastVectorHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FastVectorHighlighter.java index a130e1f92f5..3cfaa88126a 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FastVectorHighlighter.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/FastVectorHighlighter.java @@ -41,6 +41,7 @@ import org.elasticsearch.index.mapper.TextSearchInfo; import org.elasticsearch.search.fetch.FetchSubPhase; import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.Field; import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.FieldOptions; +import org.elasticsearch.search.lookup.SourceLookup; import java.io.IOException; import java.text.BreakIterator; @@ -48,6 +49,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.Locale; import java.util.Map; +import java.util.function.Function; public class FastVectorHighlighter implements Highlighter { private static final BoundaryScanner DEFAULT_SIMPLE_BOUNDARY_SCANNER = new SimpleBoundaryScanner(); @@ -88,42 +90,15 @@ public class FastVectorHighlighter implements Highlighter { FieldHighlightEntry entry = cache.fields.get(fieldType); if (entry == null) { FragListBuilder fragListBuilder; - BaseFragmentsBuilder fragmentsBuilder; - - final BoundaryScanner boundaryScanner = getBoundaryScanner(field); if (field.fieldOptions().numberOfFragments() == 0) { fragListBuilder = new SingleFragListBuilder(); - - if (!forceSource && fieldType.isStored()) { - fragmentsBuilder = new SimpleFragmentsBuilder(fieldType, field.fieldOptions().preTags(), - field.fieldOptions().postTags(), boundaryScanner); - } else { - fragmentsBuilder = new SourceSimpleFragmentsBuilder(fieldType, hitContext.sourceLookup(), - field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner); - } } else { fragListBuilder = field.fieldOptions().fragmentOffset() == -1 ? new SimpleFragListBuilder() : new SimpleFragListBuilder(field.fieldOptions().fragmentOffset()); - if (field.fieldOptions().scoreOrdered()) { - if (!forceSource && fieldType.isStored()) { - fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.fieldOptions().preTags(), - field.fieldOptions().postTags(), boundaryScanner); - } else { - fragmentsBuilder = new SourceScoreOrderFragmentsBuilder(fieldType, hitContext.sourceLookup(), - field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner); - } - } else { - if (!forceSource && fieldType.isStored()) { - fragmentsBuilder = new SimpleFragmentsBuilder(fieldType, field.fieldOptions().preTags(), - field.fieldOptions().postTags(), boundaryScanner); - } else { - fragmentsBuilder = - new SourceSimpleFragmentsBuilder(fieldType, hitContext.sourceLookup(), - field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner); - } - } } - fragmentsBuilder.setDiscreteMultiValueHighlighting(termVectorMultiValue); + + Function fragmentsBuilderSupplier = fragmentsBuilderSupplier(field, fieldType, forceSource); + entry = new FieldHighlightEntry(); if (field.fieldOptions().requireFieldMatch()) { /* @@ -141,7 +116,7 @@ public class FastVectorHighlighter implements Highlighter { hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch()); } entry.fragListBuilder = fragListBuilder; - entry.fragmentsBuilder = fragmentsBuilder; + entry.fragmentsBuilderSupplier = fragmentsBuilderSupplier; if (cache.fvh == null) { // parameters to FVH are not requires since: // first two booleans are not relevant since they are set on the CustomFieldQuery @@ -160,6 +135,7 @@ public class FastVectorHighlighter implements Highlighter { cache.fvh.setPhraseLimit(field.fieldOptions().phraseLimit()); String[] fragments; + FragmentsBuilder fragmentsBuilder = entry.fragmentsBuilderSupplier.apply(hitContext.sourceLookup()); // a HACK to make highlighter do highlighting, even though its using the single frag list builder int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? @@ -171,12 +147,12 @@ public class FastVectorHighlighter implements Highlighter { if (field.fieldOptions().matchedFields() != null && !field.fieldOptions().matchedFields().isEmpty()) { fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), fieldType.name(), field.fieldOptions().matchedFields(), fragmentCharSize, - numberOfFragments, entry.fragListBuilder, entry.fragmentsBuilder, field.fieldOptions().preTags(), + numberOfFragments, entry.fragListBuilder, fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder); } else { fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), fieldType.name(), fragmentCharSize, numberOfFragments, entry.fragListBuilder, - entry.fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder); + fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder); } if (CollectionUtils.isEmpty(fragments) == false) { @@ -189,7 +165,7 @@ public class FastVectorHighlighter implements Highlighter { // the normal fragmentsBuilder FieldFragList fieldFragList = new SimpleFieldFragList(-1 /*ignored*/); fieldFragList.add(0, noMatchSize, Collections.emptyList()); - fragments = entry.fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(), + fragments = fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(), fieldType.name(), fieldFragList, 1, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder); if (CollectionUtils.isEmpty(fragments) == false) { @@ -200,6 +176,36 @@ public class FastVectorHighlighter implements Highlighter { return null; } + private Function fragmentsBuilderSupplier(SearchHighlightContext.Field field, + MappedFieldType fieldType, + boolean forceSource) { + BoundaryScanner boundaryScanner = getBoundaryScanner(field); + FieldOptions options = field.fieldOptions(); + Function supplier; + if (!forceSource && fieldType.isStored()) { + if (options.numberOfFragments() != 0 && options.scoreOrdered()) { + supplier = ignored -> new ScoreOrderFragmentsBuilder(options.preTags(), options.postTags(), boundaryScanner); + } else { + supplier = ignored -> new SimpleFragmentsBuilder(fieldType, + options.preTags(), options.postTags(), boundaryScanner); + } + } else { + if (options.numberOfFragments() != 0 && options.scoreOrdered()) { + supplier = lookup -> new SourceScoreOrderFragmentsBuilder(fieldType, lookup, + options.preTags(), options.postTags(), boundaryScanner); + } else { + supplier = lookup -> new SourceSimpleFragmentsBuilder(fieldType, lookup, + options.preTags(), options.postTags(), boundaryScanner); + } + } + + return lookup -> { + BaseFragmentsBuilder builder = supplier.apply(lookup); + builder.setDiscreteMultiValueHighlighting(termVectorMultiValue); + return builder; + }; + } + @Override public boolean canHighlight(MappedFieldType ft) { return ft.getTextSearchInfo().termVectors() == TextSearchInfo.TermVector.OFFSETS; @@ -237,7 +243,7 @@ public class FastVectorHighlighter implements Highlighter { private static class FieldHighlightEntry { public FragListBuilder fragListBuilder; - public FragmentsBuilder fragmentsBuilder; + public Function fragmentsBuilderSupplier; public FieldQuery noFieldMatchFieldQuery; public FieldQuery fieldMatchFieldQuery; }