diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 60266543ada..7d8e3634e7f 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -257,6 +257,10 @@ Bug Fixes
* LUCENE-7676: Fixed FilterCodecReader to override more super-class methods.
Also added TestFilterCodecReader class. (Christine Poerschke)
+* LUCENE-7717: The UnifiedHighlighter and PostingsHighlighter were not highlighting
+ prefix queries with multi-byte characters. TermRangeQuery is affected too.
+ (Dmitry Malinin, David Smiley)
+
======================= Lucene 6.4.1 =======================
Build
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java
index 56345c214d9..c9733d3976f 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java
@@ -87,16 +87,6 @@ class MultiTermHighlighting {
list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field)));
} else if (query instanceof SpanMultiTermQueryWrapper) {
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper>) query).getWrappedQuery(), field)));
- } else if (query instanceof AutomatonQuery) {
- final AutomatonQuery aq = (AutomatonQuery) query;
- if (aq.getField().equals(field)) {
- list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
- @Override
- public String toString() {
- return aq.toString();
- }
- });
- }
} else if (query instanceof PrefixQuery) {
final PrefixQuery pq = (PrefixQuery) query;
Term prefix = pq.getPrefix();
@@ -182,6 +172,16 @@ class MultiTermHighlighting {
}
});
}
+ } else if (query instanceof AutomatonQuery) {
+ final AutomatonQuery aq = (AutomatonQuery) query;
+ if (aq.getField().equals(field)) {
+ list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
+ @Override
+ public String toString() {
+ return aq.toString();
+ }
+ });
+ }
}
return list.toArray(new CharacterRunAutomaton[list.size()]);
}
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
index 267d6039d83..89403d5628e 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
@@ -100,16 +100,6 @@ class MultiTermHighlighting {
} else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper>) query).getWrappedQuery(),
fieldMatcher, lookInSpan, preRewriteFunc)));
- } else if (query instanceof AutomatonQuery) {
- final AutomatonQuery aq = (AutomatonQuery) query;
- if (fieldMatcher.test(aq.getField())) {
- list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
- @Override
- public String toString() {
- return aq.toString();
- }
- });
- }
} else if (query instanceof PrefixQuery) {
final PrefixQuery pq = (PrefixQuery) query;
Term prefix = pq.getPrefix();
@@ -197,6 +187,16 @@ class MultiTermHighlighting {
}
});
}
+ } else if (query instanceof AutomatonQuery) {
+ final AutomatonQuery aq = (AutomatonQuery) query;
+ if (fieldMatcher.test(aq.getField())) {
+ list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
+ @Override
+ public String toString() {
+ return aq.toString();
+ }
+ });
+ }
}
return list.toArray(new CharacterRunAutomaton[list.size()]);
}
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
index 10f36a74e8f..4a4b7ede196 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
@@ -29,6 +29,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@@ -668,10 +669,11 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ // use a variety of common MTQ types
BooleanQuery query = new BooleanQuery.Builder()
- .add(new WildcardQuery(new Term("body", "te*")), BooleanClause.Occur.SHOULD)
- .add(new WildcardQuery(new Term("body", "one")), BooleanClause.Occur.SHOULD)
- .add(new WildcardQuery(new Term("body", "se*")), BooleanClause.Occur.SHOULD)
+ .add(new PrefixQuery(new Term("body", "te")), BooleanClause.Occur.SHOULD)
+ .add(new WildcardQuery(new Term("body", "*one*")), BooleanClause.Occur.SHOULD)
+ .add(new FuzzyQuery(new Term("body", "zentence~")), BooleanClause.Occur.SHOULD)
.build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
@@ -732,8 +734,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
snippets = highlighter.highlight("body", query, topDocs);
assertEquals(1, snippets.length);
- // Default formatter bolds each hit:
- assertEquals("Test(body:te*) a one(body:one) sentence(body:se*) document.", snippets[0]);
+ assertEquals("Test(body:te*) a one(body:*one*) sentence(body:zentence~~2) document.", snippets[0]);
ir.close();
}
@@ -1054,4 +1055,23 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
}
}
+ // LUCENE-7717 bug, ordering of MTQ AutomatonQuery detection
+ public void testRussianPrefixQuery() throws IOException {
+ Analyzer analyzer = new StandardAnalyzer();
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
+ String field = "title";
+ Document doc = new Document();
+ doc.add(new Field(field, "я", fieldType)); // Russian char; uses 2 UTF8 bytes
+ iw.addDocument(doc);
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ Query query = new PrefixQuery(new Term(field, "я"));
+ TopDocs topDocs = searcher.search(query, 1);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, analyzer);
+ String[] snippets = highlighter.highlight(field, query, topDocs);
+ assertEquals("[я]", Arrays.toString(snippets));
+ ir.close();
+ }
}