LUCENE-7417: Highlighter WSTE didn't handle single-term MultiPhraseQuery.

Also updated to Java 5 for-each in this method.
This commit is contained in:
David Smiley 2016-09-09 10:06:39 -04:00
parent 003a346d50
commit 3966f99821
3 changed files with 48 additions and 24 deletions

View File

@ -36,7 +36,21 @@ Other
======================= Lucene 6.3.0 =======================
(No changes)
API Changes
New Features
Bug Fixes
* LUCENE-7417: The standard Highlighter could throw an IllegalArgumentException when
trying to highlight a query containing a degenerate case of a MultiPhraseQuery with one
term. (Thomas Kappler via David Smiley)
Improvements
Optimizations
Other
======================= Lucene 6.2.0 =======================

View File

@ -118,8 +118,7 @@ public class WeightedSpanTermExtractor {
Term[] phraseQueryTerms = phraseQuery.getTerms();
if (phraseQueryTerms.length == 1) {
extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
}
else {
} else {
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
for (int i = 0; i < phraseQueryTerms.length; i++) {
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
@ -153,8 +152,8 @@ public class WeightedSpanTermExtractor {
// this query is TermContext sensitive.
extractWeightedTerms(terms, query, boost);
} else if (query instanceof DisjunctionMaxQuery) {
for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
extract(iterator.next(), boost, terms);
for (Query clause : ((DisjunctionMaxQuery) query)) {
extract(clause, boost, terms);
}
} else if (query instanceof ToParentBlockJoinQuery) {
extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
@ -184,16 +183,15 @@ public class WeightedSpanTermExtractor {
disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
++distinctPositions;
}
for (int j = 0; j < termArray.length; ++j) {
disjuncts.add(new SpanTermQuery(termArray[j]));
for (Term aTermArray : termArray) {
disjuncts.add(new SpanTermQuery(aTermArray));
}
}
int positionGaps = 0;
int position = 0;
final SpanQuery[] clauses = new SpanQuery[distinctPositions];
for (int i = 0; i < disjunctLists.length; ++i) {
List<SpanQuery> disjuncts = disjunctLists[i];
for (List<SpanQuery> disjuncts : disjunctLists) {
if (disjuncts != null) {
clauses[position++] = new SpanOrQuery(disjuncts
.toArray(new SpanQuery[disjuncts.size()]));
@ -202,11 +200,15 @@ public class WeightedSpanTermExtractor {
}
}
final int slop = mpq.getSlop();
final boolean inorder = (slop == 0);
if (clauses.length == 1) {
extractWeightedSpanTerms(terms, clauses[0], boost);
} else {
final int slop = mpq.getSlop();
final boolean inorder = (slop == 0);
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
extractWeightedSpanTerms(terms, sp, boost);
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
extractWeightedSpanTerms(terms, sp, boost);
}
}
} else if (query instanceof MatchAllDocsQuery) {
//nothing

View File

@ -94,7 +94,6 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
import org.junit.Test;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
@ -1580,30 +1579,39 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
helper.start();
}
@Test
public void testHighlighterWithPhraseQuery() throws IOException, InvalidTokenOffsetsException {
final String fieldName = "substring";
final PhraseQuery query = new PhraseQuery(fieldName, new BytesRef[] { new BytesRef("uchu") });
assertHighlighting(query, new SimpleHTMLFormatter("<b>", "</b>"), "Buchung", "B<b>uchu</b>ng", fieldName);
}
public void testHighlighterWithMultiPhraseQuery() throws IOException, InvalidTokenOffsetsException {
final String fieldName = "substring";
final MultiPhraseQuery mpq = new MultiPhraseQuery.Builder()
.add(new Term(fieldName, "uchu")).build();
assertHighlighting(mpq, new SimpleHTMLFormatter("<b>", "</b>"), "Buchung", "B<b>uchu</b>ng", fieldName);
}
private void assertHighlighting(Query query, Formatter formatter, String text, String expected, String fieldName)
throws IOException, InvalidTokenOffsetsException {
final Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new NGramTokenizer(4, 4));
}
};
final String fieldName = "substring";
final List<BytesRef> list = new ArrayList<>();
list.add(new BytesRef("uchu"));
final PhraseQuery query = new PhraseQuery(fieldName, list.toArray(new BytesRef[list.size()]));
final QueryScorer fragmentScorer = new QueryScorer(query, fieldName);
final SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
final Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
highlighter.setTextFragmenter(new SimpleFragmenter(100));
final String fragment = highlighter.getBestFragment(analyzer, fieldName, "Buchung");
assertEquals("B<b>uchu</b>ng",fragment);
final String fragment = highlighter.getBestFragment(analyzer, fieldName, text);
assertEquals(expected, fragment);
}
public void testUnRewrittenQuery() throws Exception {