Bug fix for AnnotatedTextHighlighter - port of 39525 (#39749)
Bug fix for AnnotatedTextHighlighter - port of 39525 Relates to #39395
This commit is contained in:
parent
d094107592
commit
1873de5240
|
@ -54,6 +54,7 @@ import org.elasticsearch.index.mapper.StringFieldType;
|
|||
import org.elasticsearch.index.mapper.TextFieldMapper;
|
||||
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken;
|
||||
import org.elasticsearch.index.query.QueryShardContext;
|
||||
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
@ -315,45 +316,12 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
|||
// When asked to tokenize plain-text versions by the highlighter it tokenizes the
|
||||
// original markup form in order to inject annotations.
|
||||
public static final class AnnotatedHighlighterAnalyzer extends AnalyzerWrapper {
|
||||
private Analyzer delegate;
|
||||
private AnnotatedText[] annotations;
|
||||
public AnnotatedHighlighterAnalyzer(Analyzer delegate){
|
||||
private final Analyzer delegate;
|
||||
private final HitContext hitContext;
|
||||
public AnnotatedHighlighterAnalyzer(Analyzer delegate, HitContext hitContext){
|
||||
super(delegate.getReuseStrategy());
|
||||
this.delegate = delegate;
|
||||
}
|
||||
|
||||
public void init(String[] markedUpFieldValues) {
|
||||
this.annotations = new AnnotatedText[markedUpFieldValues.length];
|
||||
for (int i = 0; i < markedUpFieldValues.length; i++) {
|
||||
annotations[i] = AnnotatedText.parse(markedUpFieldValues[i]);
|
||||
}
|
||||
}
|
||||
|
||||
public String [] getPlainTextValuesForHighlighter(){
|
||||
String [] result = new String[annotations.length];
|
||||
for (int i = 0; i < annotations.length; i++) {
|
||||
result[i] = annotations[i].textMinusMarkup;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public AnnotationToken[] getIntersectingAnnotations(int start, int end) {
|
||||
List<AnnotationToken> intersectingAnnotations = new ArrayList<>();
|
||||
int fieldValueOffset =0;
|
||||
for (AnnotatedText fieldValueAnnotations : this.annotations) {
|
||||
//This is called from a highlighter where all of the field values are concatenated
|
||||
// so each annotation offset will need to be adjusted so that it takes into account
|
||||
// the previous values AND the MULTIVAL delimiter
|
||||
for (AnnotationToken token : fieldValueAnnotations.annotations) {
|
||||
if(token.intersects(start - fieldValueOffset , end - fieldValueOffset)) {
|
||||
intersectingAnnotations.add(new AnnotationToken(token.offset + fieldValueOffset,
|
||||
token.endOffset + fieldValueOffset, token.value));
|
||||
}
|
||||
}
|
||||
//add 1 for the fieldvalue separator character
|
||||
fieldValueOffset +=fieldValueAnnotations.textMinusMarkup.length() +1;
|
||||
}
|
||||
return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]);
|
||||
this.hitContext = hitContext;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -364,10 +332,11 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
|
|||
@Override
|
||||
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
||||
AnnotationsInjector injector = new AnnotationsInjector(components.getTokenStream());
|
||||
AnnotatedText[] annotations = (AnnotatedText[]) hitContext.cache().get(AnnotatedText.class.getName());
|
||||
AtomicInteger readerNum = new AtomicInteger(0);
|
||||
return new TokenStreamComponents(r -> {
|
||||
String plainText = readToString(r);
|
||||
AnnotatedText at = this.annotations[readerNum.getAndIncrement()];
|
||||
AnnotatedText at = annotations[readerNum.getAndIncrement()];
|
||||
assert at.textMinusMarkup.equals(plainText);
|
||||
injector.setAnnotations(at);
|
||||
components.getSource().accept(new StringReader(at.textMinusMarkup));
|
||||
|
|
|
@ -23,7 +23,7 @@ import org.apache.lucene.search.highlight.Encoder;
|
|||
import org.apache.lucene.search.uhighlight.Passage;
|
||||
import org.apache.lucene.search.uhighlight.PassageFormatter;
|
||||
import org.apache.lucene.search.uhighlight.Snippet;
|
||||
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
|
||||
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
|
||||
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
|
@ -42,11 +42,11 @@ public class AnnotatedPassageFormatter extends PassageFormatter {
|
|||
|
||||
public static final String SEARCH_HIT_TYPE = "_hit_term";
|
||||
private final Encoder encoder;
|
||||
private AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer;
|
||||
AnnotatedText[] annotations;
|
||||
|
||||
public AnnotatedPassageFormatter(AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer, Encoder encoder) {
|
||||
this.annotatedHighlighterAnalyzer = annotatedHighlighterAnalyzer;
|
||||
public AnnotatedPassageFormatter(AnnotatedText[] annotations, Encoder encoder) {
|
||||
this.encoder = encoder;
|
||||
this.annotations = annotations;
|
||||
}
|
||||
|
||||
static class MarkupPassage {
|
||||
|
@ -158,7 +158,7 @@ public class AnnotatedPassageFormatter extends PassageFormatter {
|
|||
int pos;
|
||||
int j = 0;
|
||||
for (Passage passage : passages) {
|
||||
AnnotationToken [] annotations = annotatedHighlighterAnalyzer.getIntersectingAnnotations(passage.getStartOffset(),
|
||||
AnnotationToken [] annotations = getIntersectingAnnotations(passage.getStartOffset(),
|
||||
passage.getEndOffset());
|
||||
MarkupPassage mergedMarkup = mergeAnnotations(annotations, passage);
|
||||
|
||||
|
@ -195,6 +195,27 @@ public class AnnotatedPassageFormatter extends PassageFormatter {
|
|||
return snippets;
|
||||
}
|
||||
|
||||
public AnnotationToken[] getIntersectingAnnotations(int start, int end) {
|
||||
List<AnnotationToken> intersectingAnnotations = new ArrayList<>();
|
||||
int fieldValueOffset =0;
|
||||
for (AnnotatedText fieldValueAnnotations : this.annotations) {
|
||||
//This is called from a highlighter where all of the field values are concatenated
|
||||
// so each annotation offset will need to be adjusted so that it takes into account
|
||||
// the previous values AND the MULTIVAL delimiter
|
||||
for (int i = 0; i < fieldValueAnnotations.numAnnotations(); i++) {
|
||||
AnnotationToken token = fieldValueAnnotations.getAnnotation(i);
|
||||
if (token.intersects(start - fieldValueOffset, end - fieldValueOffset)) {
|
||||
intersectingAnnotations
|
||||
.add(new AnnotationToken(token.offset + fieldValueOffset, token.endOffset +
|
||||
fieldValueOffset, token.value));
|
||||
}
|
||||
}
|
||||
//add 1 for the fieldvalue separator character
|
||||
fieldValueOffset +=fieldValueAnnotations.textMinusMarkup.length() +1;
|
||||
}
|
||||
return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]);
|
||||
}
|
||||
|
||||
private void append(StringBuilder dest, String content, int start, int end) {
|
||||
dest.append(encoder.encodeText(content.substring(start, end)));
|
||||
}
|
||||
|
|
|
@ -25,24 +25,22 @@ import org.apache.lucene.search.uhighlight.PassageFormatter;
|
|||
import org.elasticsearch.index.mapper.DocumentMapper;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
|
||||
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
|
||||
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
|
||||
import org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.Field;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class AnnotatedTextHighlighter extends UnifiedHighlighter {
|
||||
|
||||
public static final String NAME = "annotated";
|
||||
|
||||
AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer = null;
|
||||
|
||||
@Override
|
||||
protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) {
|
||||
annotatedHighlighterAnalyzer = new AnnotatedHighlighterAnalyzer(super.getAnalyzer(docMapper, type));
|
||||
return annotatedHighlighterAnalyzer;
|
||||
protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type, HitContext hitContext) {
|
||||
return new AnnotatedHighlighterAnalyzer(super.getAnalyzer(docMapper, type, hitContext), hitContext);
|
||||
}
|
||||
|
||||
// Convert the marked-up values held on-disk to plain-text versions for highlighting
|
||||
|
@ -51,14 +49,26 @@ public class AnnotatedTextHighlighter extends UnifiedHighlighter {
|
|||
throws IOException {
|
||||
List<Object> fieldValues = super.loadFieldValues(fieldType, field, context, hitContext);
|
||||
String[] fieldValuesAsString = fieldValues.toArray(new String[fieldValues.size()]);
|
||||
annotatedHighlighterAnalyzer.init(fieldValuesAsString);
|
||||
return Arrays.asList((Object[]) annotatedHighlighterAnalyzer.getPlainTextValuesForHighlighter());
|
||||
|
||||
AnnotatedText[] annotations = new AnnotatedText[fieldValuesAsString.length];
|
||||
for (int i = 0; i < fieldValuesAsString.length; i++) {
|
||||
annotations[i] = AnnotatedText.parse(fieldValuesAsString[i]);
|
||||
}
|
||||
// Store the annotations in the hitContext
|
||||
hitContext.cache().put(AnnotatedText.class.getName(), annotations);
|
||||
|
||||
ArrayList<Object> result = new ArrayList<>(annotations.length);
|
||||
for (int i = 0; i < annotations.length; i++) {
|
||||
result.add(annotations[i].textMinusMarkup);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected PassageFormatter getPassageFormatter(SearchContextHighlight.Field field, Encoder encoder) {
|
||||
return new AnnotatedPassageFormatter(annotatedHighlighterAnalyzer, encoder);
|
||||
|
||||
protected PassageFormatter getPassageFormatter(HitContext hitContext,SearchContextHighlight.Field field, Encoder encoder) {
|
||||
// Retrieve the annotations from the hitContext
|
||||
AnnotatedText[] annotations = (AnnotatedText[]) hitContext.cache().get(AnnotatedText.class.getName());
|
||||
return new AnnotatedPassageFormatter(annotations, encoder);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -40,18 +40,20 @@ import org.apache.lucene.search.TopDocs;
|
|||
import org.apache.lucene.search.highlight.DefaultEncoder;
|
||||
import org.apache.lucene.search.uhighlight.CustomSeparatorBreakIterator;
|
||||
import org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter;
|
||||
import org.apache.lucene.search.uhighlight.PassageFormatter;
|
||||
import org.apache.lucene.search.uhighlight.Snippet;
|
||||
import org.apache.lucene.search.uhighlight.SplittingBreakIterator;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
|
||||
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
|
||||
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper;
|
||||
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
|
||||
import org.elasticsearch.search.fetch.subphase.highlight.AnnotatedPassageFormatter;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
|
||||
import java.net.URLEncoder;
|
||||
import java.text.BreakIterator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Locale;
|
||||
|
||||
import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
|
||||
|
@ -63,13 +65,24 @@ public class AnnotatedTextHighlighterTests extends ESTestCase {
|
|||
Query query, Locale locale, BreakIterator breakIterator,
|
||||
int noMatchSize, String[] expectedPassages) throws Exception {
|
||||
|
||||
|
||||
// Annotated fields wrap the usual analyzer with one that injects extra tokens
|
||||
Analyzer wrapperAnalyzer = new AnnotationAnalyzerWrapper(new StandardAnalyzer());
|
||||
AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
|
||||
hiliteAnalyzer.init(markedUpInputs);
|
||||
PassageFormatter passageFormatter = new AnnotatedPassageFormatter(hiliteAnalyzer,new DefaultEncoder());
|
||||
String []plainTextForHighlighter = hiliteAnalyzer.getPlainTextValuesForHighlighter();
|
||||
HitContext mockHitContext = new HitContext();
|
||||
AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer, mockHitContext);
|
||||
|
||||
AnnotatedText[] annotations = new AnnotatedText[markedUpInputs.length];
|
||||
for (int i = 0; i < markedUpInputs.length; i++) {
|
||||
annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
|
||||
}
|
||||
mockHitContext.cache().put(AnnotatedText.class.getName(), annotations);
|
||||
|
||||
AnnotatedPassageFormatter passageFormatter = new AnnotatedPassageFormatter(annotations,new DefaultEncoder());
|
||||
|
||||
ArrayList<Object> plainTextForHighlighter = new ArrayList<>(annotations.length);
|
||||
for (int i = 0; i < annotations.length; i++) {
|
||||
plainTextForHighlighter.add(annotations[i].textMinusMarkup);
|
||||
}
|
||||
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(wrapperAnalyzer);
|
||||
|
@ -94,7 +107,7 @@ public class AnnotatedTextHighlighterTests extends ESTestCase {
|
|||
iw.close();
|
||||
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER);
|
||||
assertThat(topDocs.totalHits.value, equalTo(1L));
|
||||
String rawValue = Strings.arrayToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR));
|
||||
String rawValue = Strings.collectionToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR));
|
||||
|
||||
CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, hiliteAnalyzer, null,
|
||||
passageFormatter, locale,
|
||||
|
|
|
@ -42,3 +42,77 @@
|
|||
body: { "query" : {"term" : { "text" : "quick" } }, "highlight" : { "type" : "annotated", "require_field_match": false, "fields" : { "text" : {} } } }
|
||||
|
||||
- match: {hits.hits.0.highlight.text.0: "The [quick](_hit_term=quick) brown fox is brown."}
|
||||
|
||||
---
|
||||
"issue 39395 thread safety issue -requires multiple calls to reveal":
|
||||
- skip:
|
||||
version: " - 6.4.99"
|
||||
reason: Annotated text type introduced in 6.5.0
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: annotated
|
||||
body:
|
||||
settings:
|
||||
number_of_shards: "5"
|
||||
number_of_replicas: "0"
|
||||
mappings:
|
||||
properties:
|
||||
my_field:
|
||||
type: annotated_text
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: annotated
|
||||
id: 1
|
||||
body:
|
||||
"my_field" : "[A](~MARK0&~MARK0) [B](~MARK1)"
|
||||
- do:
|
||||
index:
|
||||
index: annotated
|
||||
id: 2
|
||||
body:
|
||||
"my_field" : "[A](~MARK0) [C](~MARK2)"
|
||||
refresh: true
|
||||
- do:
|
||||
search:
|
||||
request_cache: false
|
||||
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
|
||||
- match: {_shards.failed: 0}
|
||||
|
||||
- do:
|
||||
search:
|
||||
request_cache: false
|
||||
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
|
||||
- match: {_shards.failed: 0}
|
||||
|
||||
- do:
|
||||
search:
|
||||
request_cache: false
|
||||
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
|
||||
- match: {_shards.failed: 0}
|
||||
|
||||
- do:
|
||||
search:
|
||||
request_cache: false
|
||||
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
|
||||
- match: {_shards.failed: 0}
|
||||
|
||||
- do:
|
||||
search:
|
||||
request_cache: false
|
||||
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
|
||||
- match: {_shards.failed: 0}
|
||||
|
||||
- do:
|
||||
search:
|
||||
request_cache: false
|
||||
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
|
||||
- match: {_shards.failed: 0}
|
||||
|
||||
- do:
|
||||
search:
|
||||
request_cache: false
|
||||
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
|
||||
- match: {_shards.failed: 0}
|
||||
|
||||
|
|
|
@ -74,7 +74,6 @@ public interface FetchSubPhase {
|
|||
}
|
||||
return cache;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -38,6 +38,7 @@ import org.elasticsearch.index.mapper.DocumentMapper;
|
|||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
|
||||
import org.elasticsearch.search.fetch.FetchSubPhase;
|
||||
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -68,12 +69,13 @@ public class UnifiedHighlighter implements Highlighter {
|
|||
int numberOfFragments;
|
||||
try {
|
||||
|
||||
final Analyzer analyzer = getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType);
|
||||
final Analyzer analyzer = getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType,
|
||||
hitContext);
|
||||
List<Object> fieldValues = loadFieldValues(fieldType, field, context, hitContext);
|
||||
if (fieldValues.size() == 0) {
|
||||
return null;
|
||||
}
|
||||
final PassageFormatter passageFormatter = getPassageFormatter(field, encoder);
|
||||
final PassageFormatter passageFormatter = getPassageFormatter(hitContext, field, encoder);
|
||||
final IndexSearcher searcher = new IndexSearcher(hitContext.reader());
|
||||
final CustomUnifiedHighlighter highlighter;
|
||||
final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
|
||||
|
@ -140,14 +142,14 @@ public class UnifiedHighlighter implements Highlighter {
|
|||
return null;
|
||||
}
|
||||
|
||||
protected PassageFormatter getPassageFormatter(SearchContextHighlight.Field field, Encoder encoder) {
|
||||
protected PassageFormatter getPassageFormatter(HitContext hitContext, SearchContextHighlight.Field field, Encoder encoder) {
|
||||
CustomPassageFormatter passageFormatter = new CustomPassageFormatter(field.fieldOptions().preTags()[0],
|
||||
field.fieldOptions().postTags()[0], encoder);
|
||||
return passageFormatter;
|
||||
}
|
||||
|
||||
|
||||
protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) {
|
||||
protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type, HitContext hitContext) {
|
||||
return HighlightUtils.getAnalyzer(docMapper, type);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue