Bug fix for AnnotatedTextHighlighter - port of 39525 (#39749)

Bug fix for AnnotatedTextHighlighter - port of 39525 Relates to #39395
2019-03-06 19:02:04 +00:00 · 2019-03-06 19:02:04 +00:00 · 1873de5240
parent d094107592
commit 1873de5240
7 changed files with 153 additions and 65 deletions
--- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java
+++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java
@ -54,6 +54,7 @@ import org.elasticsearch.index.mapper.StringFieldType;
 import org.elasticsearch.index.mapper.TextFieldMapper;
 import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken;
 import org.elasticsearch.index.query.QueryShardContext;
+import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;

 import java.io.IOException;
 import java.io.Reader;
@ -315,45 +316,12 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
    // When asked to tokenize plain-text versions by the highlighter it tokenizes the
    // original markup form in order to inject annotations.
    public static final class AnnotatedHighlighterAnalyzer extends AnalyzerWrapper {
-        private Analyzer delegate;
-        private AnnotatedText[] annotations;
-        public AnnotatedHighlighterAnalyzer(Analyzer delegate){
+        private final Analyzer delegate;
+        private final HitContext hitContext;
+        public AnnotatedHighlighterAnalyzer(Analyzer delegate, HitContext hitContext){
            super(delegate.getReuseStrategy());
            this.delegate = delegate;
-        }
-
-        public void init(String[] markedUpFieldValues) {
-            this.annotations = new AnnotatedText[markedUpFieldValues.length];
-            for (int i = 0; i < markedUpFieldValues.length; i++) {
-                annotations[i] = AnnotatedText.parse(markedUpFieldValues[i]);
-            }
-        }
-
-        public String []  getPlainTextValuesForHighlighter(){
-            String [] result = new String[annotations.length];
-            for (int i = 0; i < annotations.length; i++) {
-                result[i] = annotations[i].textMinusMarkup;
-            }
-            return result;
-        }
-
-        public AnnotationToken[] getIntersectingAnnotations(int start, int end) {
-            List<AnnotationToken> intersectingAnnotations = new ArrayList<>();
-            int fieldValueOffset =0;
-            for (AnnotatedText fieldValueAnnotations : this.annotations) {
-                //This is called from a highlighter where all of the field values are concatenated
-                // so each annotation offset will need to be adjusted so that it takes into account
-                // the previous values AND the MULTIVAL delimiter
-                for (AnnotationToken token : fieldValueAnnotations.annotations) {
-                    if(token.intersects(start - fieldValueOffset , end - fieldValueOffset)) {
-                        intersectingAnnotations.add(new AnnotationToken(token.offset + fieldValueOffset,
-                                token.endOffset + fieldValueOffset, token.value));
-                    }
-                }
-                //add 1 for the fieldvalue separator character
-                fieldValueOffset +=fieldValueAnnotations.textMinusMarkup.length() +1;
-            }
-            return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]);
+            this.hitContext = hitContext;
        }

        @Override
@ -364,10 +332,11 @@ public class AnnotatedTextFieldMapper extends FieldMapper {
        @Override
        protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
            AnnotationsInjector injector = new AnnotationsInjector(components.getTokenStream());
+            AnnotatedText[] annotations = (AnnotatedText[]) hitContext.cache().get(AnnotatedText.class.getName());
            AtomicInteger readerNum = new AtomicInteger(0);
            return new TokenStreamComponents(r -> {
                String plainText = readToString(r);
-                AnnotatedText at = this.annotations[readerNum.getAndIncrement()];
+                AnnotatedText at = annotations[readerNum.getAndIncrement()];
                assert at.textMinusMarkup.equals(plainText);
                injector.setAnnotations(at);
                components.getSource().accept(new StringReader(at.textMinusMarkup));
--- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedPassageFormatter.java
+++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedPassageFormatter.java
@ -23,7 +23,7 @@ import org.apache.lucene.search.highlight.Encoder;
 import org.apache.lucene.search.uhighlight.Passage;
 import org.apache.lucene.search.uhighlight.PassageFormatter;
 import org.apache.lucene.search.uhighlight.Snippet;
-import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
+import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
 import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken;

 import java.io.UnsupportedEncodingException;
@ -42,11 +42,11 @@ public class AnnotatedPassageFormatter extends PassageFormatter {

    public static final String SEARCH_HIT_TYPE = "_hit_term";
    private final Encoder encoder;
-    private AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer;
+    AnnotatedText[] annotations;

-    public AnnotatedPassageFormatter(AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer, Encoder encoder) {
-        this.annotatedHighlighterAnalyzer = annotatedHighlighterAnalyzer;
+    public AnnotatedPassageFormatter(AnnotatedText[] annotations, Encoder encoder) {
        this.encoder = encoder;
+        this.annotations = annotations;
    }

    static class MarkupPassage {
@ -158,7 +158,7 @@ public class AnnotatedPassageFormatter extends PassageFormatter {
        int pos;
        int j = 0;
        for (Passage passage : passages) {
-            AnnotationToken [] annotations = annotatedHighlighterAnalyzer.getIntersectingAnnotations(passage.getStartOffset(), 
+            AnnotationToken [] annotations = getIntersectingAnnotations(passage.getStartOffset(), 
                    passage.getEndOffset());            
            MarkupPassage mergedMarkup = mergeAnnotations(annotations, passage);
            
@ -194,6 +194,27 @@ public class AnnotatedPassageFormatter extends PassageFormatter {
        }                    
        return snippets;
    }
+    
+    public AnnotationToken[] getIntersectingAnnotations(int start, int end) {
+        List<AnnotationToken> intersectingAnnotations = new ArrayList<>();
+        int fieldValueOffset =0;
+        for (AnnotatedText fieldValueAnnotations : this.annotations) {
+            //This is called from a highlighter where all of the field values are concatenated
+            // so each annotation offset will need to be adjusted so that it takes into account 
+            // the previous values AND the MULTIVAL delimiter
+            for (int i = 0; i < fieldValueAnnotations.numAnnotations(); i++) {
+                AnnotationToken token = fieldValueAnnotations.getAnnotation(i);
+                if (token.intersects(start - fieldValueOffset, end - fieldValueOffset)) {
+                    intersectingAnnotations
+                            .add(new AnnotationToken(token.offset + fieldValueOffset, token.endOffset + 
+                                    fieldValueOffset, token.value));
+                }
+            }
+            //add 1 for the fieldvalue separator character
+            fieldValueOffset +=fieldValueAnnotations.textMinusMarkup.length() +1;
+        }
+        return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]);
+    }     

    private void append(StringBuilder dest, String content, int start, int end) {
        dest.append(encoder.encodeText(content.substring(start, end)));
--- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java
+++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java
@ -25,24 +25,22 @@ import org.apache.lucene.search.uhighlight.PassageFormatter;
 import org.elasticsearch.index.mapper.DocumentMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
+import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
 import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
 import org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.Field;
 import org.elasticsearch.search.internal.SearchContext;

 import java.io.IOException;
-import java.util.Arrays;
+import java.util.ArrayList;
 import java.util.List;

 public class AnnotatedTextHighlighter extends UnifiedHighlighter {
    
    public static final String NAME = "annotated";
-
-    AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer = null;    
    
    @Override
-    protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) {
-        annotatedHighlighterAnalyzer = new AnnotatedHighlighterAnalyzer(super.getAnalyzer(docMapper, type));
-        return annotatedHighlighterAnalyzer;
+    protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type, HitContext hitContext) {
+        return new AnnotatedHighlighterAnalyzer(super.getAnalyzer(docMapper, type, hitContext), hitContext);
    }

    // Convert the marked-up values held on-disk to plain-text versions for highlighting
@ -51,14 +49,26 @@ public class AnnotatedTextHighlighter extends UnifiedHighlighter {
            throws IOException {
        List<Object> fieldValues = super.loadFieldValues(fieldType, field, context, hitContext);
        String[] fieldValuesAsString = fieldValues.toArray(new String[fieldValues.size()]);
-        annotatedHighlighterAnalyzer.init(fieldValuesAsString);
-        return Arrays.asList((Object[]) annotatedHighlighterAnalyzer.getPlainTextValuesForHighlighter());
+        
+        AnnotatedText[] annotations = new AnnotatedText[fieldValuesAsString.length];
+        for (int i = 0; i < fieldValuesAsString.length; i++) {
+            annotations[i] = AnnotatedText.parse(fieldValuesAsString[i]);
+        }
+        // Store the annotations in the hitContext
+        hitContext.cache().put(AnnotatedText.class.getName(), annotations);
+        
+        ArrayList<Object> result = new ArrayList<>(annotations.length);
+        for (int i = 0; i < annotations.length; i++) {
+            result.add(annotations[i].textMinusMarkup);
+        }
+        return result;
    }

    @Override
-    protected PassageFormatter getPassageFormatter(SearchContextHighlight.Field field, Encoder encoder) {
-        return new AnnotatedPassageFormatter(annotatedHighlighterAnalyzer, encoder);
-
+    protected PassageFormatter getPassageFormatter(HitContext hitContext,SearchContextHighlight.Field field, Encoder encoder) {
+        // Retrieve the annotations from the hitContext
+        AnnotatedText[] annotations = (AnnotatedText[]) hitContext.cache().get(AnnotatedText.class.getName());
+        return new AnnotatedPassageFormatter(annotations, encoder);
    }

 }
--- a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/highlight/AnnotatedTextHighlighterTests.java
+++ b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/highlight/AnnotatedTextHighlighterTests.java
@ -40,18 +40,20 @@ import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.highlight.DefaultEncoder;
 import org.apache.lucene.search.uhighlight.CustomSeparatorBreakIterator;
 import org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter;
-import org.apache.lucene.search.uhighlight.PassageFormatter;
 import org.apache.lucene.search.uhighlight.Snippet;
 import org.apache.lucene.search.uhighlight.SplittingBreakIterator;
 import org.apache.lucene.store.Directory;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
+import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
 import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper;
+import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
 import org.elasticsearch.search.fetch.subphase.highlight.AnnotatedPassageFormatter;
 import org.elasticsearch.test.ESTestCase;

 import java.net.URLEncoder;
 import java.text.BreakIterator;
+import java.util.ArrayList;
 import java.util.Locale;

 import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
@ -63,13 +65,24 @@ public class AnnotatedTextHighlighterTests extends ESTestCase {
            Query query, Locale locale, BreakIterator breakIterator,
            int noMatchSize, String[] expectedPassages) throws Exception {
        
+        
        // Annotated fields wrap the usual analyzer with one that injects extra tokens
        Analyzer wrapperAnalyzer = new AnnotationAnalyzerWrapper(new StandardAnalyzer());
-        AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
-        hiliteAnalyzer.init(markedUpInputs);
-        PassageFormatter passageFormatter = new AnnotatedPassageFormatter(hiliteAnalyzer,new DefaultEncoder());
-        String []plainTextForHighlighter = hiliteAnalyzer.getPlainTextValuesForHighlighter();
+        HitContext mockHitContext = new HitContext();
+        AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer, mockHitContext);
+        
+        AnnotatedText[] annotations = new AnnotatedText[markedUpInputs.length];
+        for (int i = 0; i < markedUpInputs.length; i++) {
+            annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
+        }
+        mockHitContext.cache().put(AnnotatedText.class.getName(), annotations);

+        AnnotatedPassageFormatter passageFormatter = new AnnotatedPassageFormatter(annotations,new DefaultEncoder());
+        
+        ArrayList<Object> plainTextForHighlighter = new ArrayList<>(annotations.length);
+        for (int i = 0; i < annotations.length; i++) {
+            plainTextForHighlighter.add(annotations[i].textMinusMarkup);
+        }        
        
        Directory dir = newDirectory();
        IndexWriterConfig iwc = newIndexWriterConfig(wrapperAnalyzer);
@ -94,7 +107,7 @@ public class AnnotatedTextHighlighterTests extends ESTestCase {
        iw.close();
        TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER);
        assertThat(topDocs.totalHits.value, equalTo(1L));
-        String rawValue = Strings.arrayToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR));
+        String rawValue = Strings.collectionToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR));
        
        CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, hiliteAnalyzer, null,
                passageFormatter, locale,
--- a/plugins/mapper-annotated-text/src/test/resources/rest-api-spec/test/mapper_annotatedtext/10_basic.yml
+++ b/plugins/mapper-annotated-text/src/test/resources/rest-api-spec/test/mapper_annotatedtext/10_basic.yml
@ -42,3 +42,77 @@
        body: { "query" : {"term" : { "text" : "quick" } }, "highlight" : { "type" : "annotated", "require_field_match": false, "fields" : { "text" : {} } } }

  - match: {hits.hits.0.highlight.text.0: "The [quick](_hit_term=quick) brown fox is brown."}
+
+---
+"issue 39395 thread safety issue -requires multiple calls to reveal":
+  - skip:
+      version: " - 6.4.99"
+      reason: Annotated text type introduced in 6.5.0
+
+  - do:
+      indices.create:
+        index: annotated
+        body:
+          settings:
+            number_of_shards: "5"
+            number_of_replicas: "0"
+          mappings:
+            properties:
+              my_field:
+                type: annotated_text
+
+  - do:
+      index:
+        index: annotated
+        id: 1
+        body: 
+            "my_field" : "[A](~MARK0&~MARK0) [B](~MARK1)"
+  - do:
+      index:
+        index: annotated
+        id: 2
+        body: 
+            "my_field" : "[A](~MARK0) [C](~MARK2)"
+        refresh: true
+  - do:
+      search:
+        request_cache: false
+        body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated",  "fields" : { "my_field" : {} } } }
+  - match: {_shards.failed: 0}
+
+  - do:
+      search:
+        request_cache: false
+        body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated",  "fields" : { "my_field" : {} } } }
+  - match: {_shards.failed: 0}
+
+  - do:
+      search:
+        request_cache: false
+        body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated",  "fields" : { "my_field" : {} } } }
+  - match: {_shards.failed: 0}
+
+  - do:
+      search:
+        request_cache: false
+        body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated",  "fields" : { "my_field" : {} } } }
+  - match: {_shards.failed: 0}
+
+  - do:
+      search:
+        request_cache: false
+        body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated",  "fields" : { "my_field" : {} } } }
+  - match: {_shards.failed: 0}
+
+  - do:
+      search:
+        request_cache: false
+        body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated",  "fields" : { "my_field" : {} } } }
+  - match: {_shards.failed: 0}
+
+  - do:
+      search:
+        request_cache: false
+        body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated",  "fields" : { "my_field" : {} } } }
+  - match: {_shards.failed: 0}
+
--- a/server/src/main/java/org/elasticsearch/search/fetch/FetchSubPhase.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/FetchSubPhase.java
@ -74,7 +74,6 @@ public interface FetchSubPhase {
            }
            return cache;
        }
-
    }

    /**
--- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java
@ -38,6 +38,7 @@ import org.elasticsearch.index.mapper.DocumentMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
 import org.elasticsearch.search.fetch.FetchSubPhase;
+import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
 import org.elasticsearch.search.internal.SearchContext;

 import java.io.IOException;
@ -68,12 +69,13 @@ public class UnifiedHighlighter implements Highlighter {
        int numberOfFragments;
        try {

-            final Analyzer analyzer = getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType);
+            final Analyzer analyzer = getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType,
+                    hitContext);
            List<Object> fieldValues = loadFieldValues(fieldType, field, context, hitContext);
            if (fieldValues.size() == 0) {
                return null;
            }
-            final PassageFormatter passageFormatter = getPassageFormatter(field, encoder);
+            final PassageFormatter passageFormatter = getPassageFormatter(hitContext, field, encoder);
            final IndexSearcher searcher = new IndexSearcher(hitContext.reader());
            final CustomUnifiedHighlighter highlighter;
            final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
@ -140,14 +142,14 @@ public class UnifiedHighlighter implements Highlighter {
        return null;
    }

-    protected PassageFormatter getPassageFormatter(SearchContextHighlight.Field field, Encoder encoder) {
+    protected PassageFormatter getPassageFormatter(HitContext hitContext, SearchContextHighlight.Field field, Encoder encoder) {
        CustomPassageFormatter passageFormatter = new CustomPassageFormatter(field.fieldOptions().preTags()[0],
            field.fieldOptions().postTags()[0], encoder);
        return passageFormatter;
    }

    
-    protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) {
+    protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type, HitContext hitContext) {
        return HighlightUtils.getAnalyzer(docMapper, type);
    }