diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 60266543ada..7d8e3634e7f 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -257,6 +257,10 @@ Bug Fixes * LUCENE-7676: Fixed FilterCodecReader to override more super-class methods. Also added TestFilterCodecReader class. (Christine Poerschke) +* LUCENE-7717: The UnifiedHighlighter and PostingsHighlighter were not highlighting + prefix queries with multi-byte characters. TermRangeQuery is affected too. + (Dmitry Malinin, David Smiley) + ======================= Lucene 6.4.1 ======================= Build diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java index 56345c214d9..c9733d3976f 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java @@ -87,16 +87,6 @@ class MultiTermHighlighting { list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field))); } else if (query instanceof SpanMultiTermQueryWrapper) { list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper) query).getWrappedQuery(), field))); - } else if (query instanceof AutomatonQuery) { - final AutomatonQuery aq = (AutomatonQuery) query; - if (aq.getField().equals(field)) { - list.add(new CharacterRunAutomaton(aq.getAutomaton()) { - @Override - public String toString() { - return aq.toString(); - } - }); - } } else if (query instanceof PrefixQuery) { final PrefixQuery pq = (PrefixQuery) query; Term prefix = pq.getPrefix(); @@ -182,6 +172,16 @@ class MultiTermHighlighting { } }); } + } else if (query instanceof AutomatonQuery) { + final AutomatonQuery aq = (AutomatonQuery) query; + if (aq.getField().equals(field)) { + list.add(new CharacterRunAutomaton(aq.getAutomaton()) { + @Override + public String toString() { + return aq.toString(); + } + }); + } } return list.toArray(new CharacterRunAutomaton[list.size()]); } diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java index 267d6039d83..89403d5628e 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java @@ -100,16 +100,6 @@ class MultiTermHighlighting { } else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) { list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper) query).getWrappedQuery(), fieldMatcher, lookInSpan, preRewriteFunc))); - } else if (query instanceof AutomatonQuery) { - final AutomatonQuery aq = (AutomatonQuery) query; - if (fieldMatcher.test(aq.getField())) { - list.add(new CharacterRunAutomaton(aq.getAutomaton()) { - @Override - public String toString() { - return aq.toString(); - } - }); - } } else if (query instanceof PrefixQuery) { final PrefixQuery pq = (PrefixQuery) query; Term prefix = pq.getPrefix(); @@ -197,6 +187,16 @@ class MultiTermHighlighting { } }); } + } else if (query instanceof AutomatonQuery) { + final AutomatonQuery aq = (AutomatonQuery) query; + if (fieldMatcher.test(aq.getField())) { + list.add(new CharacterRunAutomaton(aq.getAutomaton()) { + @Override + public String toString() { + return aq.toString(); + } + }); + } } return list.toArray(new CharacterRunAutomaton[list.size()]); } diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java index 10f36a74e8f..4a4b7ede196 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java @@ -29,6 +29,7 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; @@ -668,10 +669,11 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { IndexSearcher searcher = newSearcher(ir); UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); + // use a variety of common MTQ types BooleanQuery query = new BooleanQuery.Builder() - .add(new WildcardQuery(new Term("body", "te*")), BooleanClause.Occur.SHOULD) - .add(new WildcardQuery(new Term("body", "one")), BooleanClause.Occur.SHOULD) - .add(new WildcardQuery(new Term("body", "se*")), BooleanClause.Occur.SHOULD) + .add(new PrefixQuery(new Term("body", "te")), BooleanClause.Occur.SHOULD) + .add(new WildcardQuery(new Term("body", "*one*")), BooleanClause.Occur.SHOULD) + .add(new FuzzyQuery(new Term("body", "zentence~")), BooleanClause.Occur.SHOULD) .build(); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits); @@ -732,8 +734,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { snippets = highlighter.highlight("body", query, topDocs); assertEquals(1, snippets.length); - // Default formatter bolds each hit: - assertEquals("Test(body:te*) a one(body:one) sentence(body:se*) document.", snippets[0]); + assertEquals("Test(body:te*) a one(body:*one*) sentence(body:zentence~~2) document.", snippets[0]); ir.close(); } @@ -1054,4 +1055,23 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase { } } + // LUCENE-7717 bug, ordering of MTQ AutomatonQuery detection + public void testRussianPrefixQuery() throws IOException { + Analyzer analyzer = new StandardAnalyzer(); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer); + String field = "title"; + Document doc = new Document(); + doc.add(new Field(field, "я", fieldType)); // Russian char; uses 2 UTF8 bytes + iw.addDocument(doc); + IndexReader ir = iw.getReader(); + iw.close(); + + IndexSearcher searcher = newSearcher(ir); + Query query = new PrefixQuery(new Term(field, "я")); + TopDocs topDocs = searcher.search(query, 1); + UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, analyzer); + String[] snippets = highlighter.highlight(field, query, topDocs); + assertEquals("[я]", Arrays.toString(snippets)); + ir.close(); + } }