diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 1387d254620..9603ea5d359 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -36,7 +36,21 @@ Other ======================= Lucene 6.3.0 ======================= -(No changes) +API Changes + +New Features + +Bug Fixes + +* LUCENE-7417: The standard Highlighter could throw an IllegalArgumentException when + trying to highlight a query containing a degenerate case of a MultiPhraseQuery with one + term. (Thomas Kappler via David Smiley) + +Improvements + +Optimizations + +Other ======================= Lucene 6.2.0 ======================= diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java index 837201e4f55..7f79809dbff 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java @@ -118,8 +118,7 @@ public class WeightedSpanTermExtractor { Term[] phraseQueryTerms = phraseQuery.getTerms(); if (phraseQueryTerms.length == 1) { extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost); - } - else { + } else { SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length]; for (int i = 0; i < phraseQueryTerms.length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); @@ -153,8 +152,8 @@ public class WeightedSpanTermExtractor { // this query is TermContext sensitive. extractWeightedTerms(terms, query, boost); } else if (query instanceof DisjunctionMaxQuery) { - for (Iterator iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) { - extract(iterator.next(), boost, terms); + for (Query clause : ((DisjunctionMaxQuery) query)) { + extract(clause, boost, terms); } } else if (query instanceof ToParentBlockJoinQuery) { extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms); @@ -184,16 +183,15 @@ public class WeightedSpanTermExtractor { disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length)); ++distinctPositions; } - for (int j = 0; j < termArray.length; ++j) { - disjuncts.add(new SpanTermQuery(termArray[j])); + for (Term aTermArray : termArray) { + disjuncts.add(new SpanTermQuery(aTermArray)); } } int positionGaps = 0; int position = 0; final SpanQuery[] clauses = new SpanQuery[distinctPositions]; - for (int i = 0; i < disjunctLists.length; ++i) { - List disjuncts = disjunctLists[i]; + for (List disjuncts : disjunctLists) { if (disjuncts != null) { clauses[position++] = new SpanOrQuery(disjuncts .toArray(new SpanQuery[disjuncts.size()])); @@ -202,11 +200,15 @@ public class WeightedSpanTermExtractor { } } - final int slop = mpq.getSlop(); - final boolean inorder = (slop == 0); + if (clauses.length == 1) { + extractWeightedSpanTerms(terms, clauses[0], boost); + } else { + final int slop = mpq.getSlop(); + final boolean inorder = (slop == 0); - SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); - extractWeightedSpanTerms(terms, sp, boost); + SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); + extractWeightedSpanTerms(terms, sp, boost); + } } } else if (query instanceof MatchAllDocsQuery) { //nothing diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java index cf727d7154a..fc402bacff5 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java @@ -94,7 +94,6 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.lucene.util.automaton.RegExp; -import org.junit.Test; import org.w3c.dom.Element; import org.w3c.dom.NodeList; @@ -1580,30 +1579,39 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte helper.start(); } - @Test public void testHighlighterWithPhraseQuery() throws IOException, InvalidTokenOffsetsException { + final String fieldName = "substring"; + final PhraseQuery query = new PhraseQuery(fieldName, new BytesRef[] { new BytesRef("uchu") }); + + assertHighlighting(query, new SimpleHTMLFormatter("", ""), "Buchung", "Buchung", fieldName); + } + + public void testHighlighterWithMultiPhraseQuery() throws IOException, InvalidTokenOffsetsException { + final String fieldName = "substring"; + + final MultiPhraseQuery mpq = new MultiPhraseQuery.Builder() + .add(new Term(fieldName, "uchu")).build(); + + assertHighlighting(mpq, new SimpleHTMLFormatter("", ""), "Buchung", "Buchung", fieldName); + } + + private void assertHighlighting(Query query, Formatter formatter, String text, String expected, String fieldName) + throws IOException, InvalidTokenOffsetsException { final Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { return new TokenStreamComponents(new NGramTokenizer(4, 4)); } }; - final String fieldName = "substring"; - - final List list = new ArrayList<>(); - list.add(new BytesRef("uchu")); - final PhraseQuery query = new PhraseQuery(fieldName, list.toArray(new BytesRef[list.size()])); final QueryScorer fragmentScorer = new QueryScorer(query, fieldName); - final SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("", ""); final Highlighter highlighter = new Highlighter(formatter, fragmentScorer); highlighter.setTextFragmenter(new SimpleFragmenter(100)); - final String fragment = highlighter.getBestFragment(analyzer, fieldName, "Buchung"); - - assertEquals("Buchung",fragment); + final String fragment = highlighter.getBestFragment(analyzer, fieldName, text); + assertEquals(expected, fragment); } public void testUnRewrittenQuery() throws Exception {