mirror of https://github.com/apache/lucene.git
LUCENE-7717: UnifiedHighlighter and PostingsHighlighter bug in PrefixQuery and TermRangeQuery for multi-byte text
This commit is contained in:
parent
0baf2fa33c
commit
ec13032a94
|
@ -257,6 +257,10 @@ Bug Fixes
|
|||
* LUCENE-7676: Fixed FilterCodecReader to override more super-class methods.
|
||||
Also added TestFilterCodecReader class. (Christine Poerschke)
|
||||
|
||||
* LUCENE-7717: The UnifiedHighlighter and PostingsHighlighter were not highlighting
|
||||
prefix queries with multi-byte characters. TermRangeQuery is affected too.
|
||||
(Dmitry Malinin, David Smiley)
|
||||
|
||||
======================= Lucene 6.4.1 =======================
|
||||
|
||||
Build
|
||||
|
|
|
@ -87,16 +87,6 @@ class MultiTermHighlighting {
|
|||
list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field)));
|
||||
} else if (query instanceof SpanMultiTermQueryWrapper) {
|
||||
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field)));
|
||||
} else if (query instanceof AutomatonQuery) {
|
||||
final AutomatonQuery aq = (AutomatonQuery) query;
|
||||
if (aq.getField().equals(field)) {
|
||||
list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
|
||||
@Override
|
||||
public String toString() {
|
||||
return aq.toString();
|
||||
}
|
||||
});
|
||||
}
|
||||
} else if (query instanceof PrefixQuery) {
|
||||
final PrefixQuery pq = (PrefixQuery) query;
|
||||
Term prefix = pq.getPrefix();
|
||||
|
@ -182,6 +172,16 @@ class MultiTermHighlighting {
|
|||
}
|
||||
});
|
||||
}
|
||||
} else if (query instanceof AutomatonQuery) {
|
||||
final AutomatonQuery aq = (AutomatonQuery) query;
|
||||
if (aq.getField().equals(field)) {
|
||||
list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
|
||||
@Override
|
||||
public String toString() {
|
||||
return aq.toString();
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
return list.toArray(new CharacterRunAutomaton[list.size()]);
|
||||
}
|
||||
|
|
|
@ -100,16 +100,6 @@ class MultiTermHighlighting {
|
|||
} else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
|
||||
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(),
|
||||
fieldMatcher, lookInSpan, preRewriteFunc)));
|
||||
} else if (query instanceof AutomatonQuery) {
|
||||
final AutomatonQuery aq = (AutomatonQuery) query;
|
||||
if (fieldMatcher.test(aq.getField())) {
|
||||
list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
|
||||
@Override
|
||||
public String toString() {
|
||||
return aq.toString();
|
||||
}
|
||||
});
|
||||
}
|
||||
} else if (query instanceof PrefixQuery) {
|
||||
final PrefixQuery pq = (PrefixQuery) query;
|
||||
Term prefix = pq.getPrefix();
|
||||
|
@ -197,6 +187,16 @@ class MultiTermHighlighting {
|
|||
}
|
||||
});
|
||||
}
|
||||
} else if (query instanceof AutomatonQuery) {
|
||||
final AutomatonQuery aq = (AutomatonQuery) query;
|
||||
if (fieldMatcher.test(aq.getField())) {
|
||||
list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
|
||||
@Override
|
||||
public String toString() {
|
||||
return aq.toString();
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
return list.toArray(new CharacterRunAutomaton[list.size()]);
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
|||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
|
@ -668,10 +669,11 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
|
||||
// use a variety of common MTQ types
|
||||
BooleanQuery query = new BooleanQuery.Builder()
|
||||
.add(new WildcardQuery(new Term("body", "te*")), BooleanClause.Occur.SHOULD)
|
||||
.add(new WildcardQuery(new Term("body", "one")), BooleanClause.Occur.SHOULD)
|
||||
.add(new WildcardQuery(new Term("body", "se*")), BooleanClause.Occur.SHOULD)
|
||||
.add(new PrefixQuery(new Term("body", "te")), BooleanClause.Occur.SHOULD)
|
||||
.add(new WildcardQuery(new Term("body", "*one*")), BooleanClause.Occur.SHOULD)
|
||||
.add(new FuzzyQuery(new Term("body", "zentence~")), BooleanClause.Occur.SHOULD)
|
||||
.build();
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
|
@ -732,8 +734,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
snippets = highlighter.highlight("body", query, topDocs);
|
||||
assertEquals(1, snippets.length);
|
||||
|
||||
// Default formatter bolds each hit:
|
||||
assertEquals("<b>Test(body:te*)</b> a <b>one(body:one)</b> <b>sentence(body:se*)</b> document.", snippets[0]);
|
||||
assertEquals("<b>Test(body:te*)</b> a <b>one(body:*one*)</b> <b>sentence(body:zentence~~2)</b> document.", snippets[0]);
|
||||
|
||||
ir.close();
|
||||
}
|
||||
|
@ -1054,4 +1055,23 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
// LUCENE-7717 bug, ordering of MTQ AutomatonQuery detection
|
||||
public void testRussianPrefixQuery() throws IOException {
|
||||
Analyzer analyzer = new StandardAnalyzer();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
|
||||
String field = "title";
|
||||
Document doc = new Document();
|
||||
doc.add(new Field(field, "я", fieldType)); // Russian char; uses 2 UTF8 bytes
|
||||
iw.addDocument(doc);
|
||||
IndexReader ir = iw.getReader();
|
||||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
Query query = new PrefixQuery(new Term(field, "я"));
|
||||
TopDocs topDocs = searcher.search(query, 1);
|
||||
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, analyzer);
|
||||
String[] snippets = highlighter.highlight(field, query, topDocs);
|
||||
assertEquals("[<b>я</b>]", Arrays.toString(snippets));
|
||||
ir.close();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue