LUCENE-7417: Highlighter WSTE didn't handle single-term MultiPhraseQuery.

Also updated to Java 5 for-each in this method. (cherry picked from commit 3966f99)
2016-09-09 10:06:39 -04:00 · 2016-09-09 10:06:39 -04:00 · 514bb1bbc1
parent a0dcf389b2
commit 514bb1bbc1
3 changed files with 57 additions and 27 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -4,7 +4,22 @@ For more information on past and future Lucene versions, please see:
 http://s.apache.org/luceneversions

 ======================= Lucene 6.3.0 =======================
-(No Changes)
+
+API Changes
+
+New Features
+
+Bug Fixes
+
+* LUCENE-7417: The standard Highlighter could throw an IllegalArgumentException when
+  trying to highlight a query containing a degenerate case of a MultiPhraseQuery with one
+  term.  (Thomas Kappler via David Smiley)
+
+Improvements
+
+Optimizations
+
+Other

 ======================= Lucene 6.2.0 =======================

@ -204,6 +219,10 @@ New Features

 API Changes

+* LUCENE-7184: Refactor LatLonPoint encoding methods to new GeoEncodingUtils
+  helper class in core geo package. Also refactors LatLonPointTests to
+  TestGeoEncodingUtils (Nick Knize)
+
 * LUCENE-7163: refactor GeoRect, Polygon, and GeoUtils tests to geo
  package in core (Nick Knize)

@ -219,9 +238,6 @@ API Changes
 * LUCENE-7243: Removed the LeafReaderContext parameter from
  QueryCachingPolicy#shouldCache. (Adrien Grand)

-* LUCENE-7283: SlowCompositeReaderWrapper and the uninverting package have
-  been moved to Solr.  (Mike McCandless)
-
 Optimizations

 * LUCENE-7071: Reduce bytes copying in OfflineSorter, giving ~10%
@ -401,6 +417,10 @@ New Features
  input tokens. Useful for normalizing short text in clustering/linking 
  tasks. (Mark Harwood, Adrien Grand)

+* LUCENE-5735: NumberRangePrefixTreeStrategy now includes interval/range faceting
+  for counting ranges that align with the underlying terms as defined by the
+  NumberRangePrefixTree (e.g. familiar date units like days).  (David Smiley)
+
 * LUCENE-6711: Use CollectionStatistics.docCount() for IDF and average field
  length computations, to avoid skew from documents that don't have the field.
  (Ahmet Arslan via Robert Muir)
--- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
@ -118,8 +118,7 @@ public class WeightedSpanTermExtractor {
      Term[] phraseQueryTerms = phraseQuery.getTerms();
      if (phraseQueryTerms.length == 1) {
        extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
-      }
-      else {
+      } else {
        SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
        for (int i = 0; i < phraseQueryTerms.length; i++) {
          clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
@ -153,8 +152,8 @@ public class WeightedSpanTermExtractor {
      // this query is TermContext sensitive.
      extractWeightedTerms(terms, query, boost);
    } else if (query instanceof DisjunctionMaxQuery) {
-      for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
-        extract(iterator.next(), boost, terms);
+      for (Query clause : ((DisjunctionMaxQuery) query)) {
+        extract(clause, boost, terms);
      }
    } else if (query instanceof ToParentBlockJoinQuery) {
      extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
@ -184,16 +183,15 @@ public class WeightedSpanTermExtractor {
            disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
            ++distinctPositions;
          }
-          for (int j = 0; j < termArray.length; ++j) {
-            disjuncts.add(new SpanTermQuery(termArray[j]));
+          for (Term aTermArray : termArray) {
+            disjuncts.add(new SpanTermQuery(aTermArray));
          }
        }

        int positionGaps = 0;
        int position = 0;
        final SpanQuery[] clauses = new SpanQuery[distinctPositions];
-        for (int i = 0; i < disjunctLists.length; ++i) {
-          List<SpanQuery> disjuncts = disjunctLists[i];
+        for (List<SpanQuery> disjuncts : disjunctLists) {
          if (disjuncts != null) {
            clauses[position++] = new SpanOrQuery(disjuncts
                .toArray(new SpanQuery[disjuncts.size()]));
@ -202,11 +200,15 @@ public class WeightedSpanTermExtractor {
          }
        }

-        final int slop = mpq.getSlop();
-        final boolean inorder = (slop == 0);
+        if (clauses.length == 1) {
+          extractWeightedSpanTerms(terms, clauses[0], boost);
+        } else {
+          final int slop = mpq.getSlop();
+          final boolean inorder = (slop == 0);

-        SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
-        extractWeightedSpanTerms(terms, sp, boost);
+          SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
+          extractWeightedSpanTerms(terms, sp, boost);
+        }
      }
    } else if (query instanceof MatchAllDocsQuery) {
      //nothing
--- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
@ -94,7 +94,6 @@ import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.automaton.Automata;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 import org.apache.lucene.util.automaton.RegExp;
-import org.junit.Test;
 import org.w3c.dom.Element;
 import org.w3c.dom.NodeList;

@ -1580,30 +1579,39 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
    helper.start();
  }

-  @Test
  public void testHighlighterWithPhraseQuery() throws IOException, InvalidTokenOffsetsException {
+    final String fieldName = "substring";

+    final PhraseQuery query = new PhraseQuery(fieldName, new BytesRef[] { new BytesRef("uchu") });
+
+    assertHighlighting(query, new SimpleHTMLFormatter("<b>", "</b>"), "Buchung", "B<b>uchu</b>ng", fieldName);
+  }
+
+  public void testHighlighterWithMultiPhraseQuery() throws IOException, InvalidTokenOffsetsException {
+    final String fieldName = "substring";
+
+    final MultiPhraseQuery mpq = new MultiPhraseQuery.Builder()
+        .add(new Term(fieldName, "uchu")).build();
+
+    assertHighlighting(mpq, new SimpleHTMLFormatter("<b>", "</b>"), "Buchung", "B<b>uchu</b>ng", fieldName);
+  }
+
+  private void assertHighlighting(Query query, Formatter formatter, String text, String expected, String fieldName)
+      throws IOException, InvalidTokenOffsetsException {
    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
        return new TokenStreamComponents(new NGramTokenizer(4, 4));
      }
    };
-    final String fieldName = "substring";
-
-    final List<BytesRef> list = new ArrayList<>();
-    list.add(new BytesRef("uchu"));
-    final PhraseQuery query = new PhraseQuery(fieldName, list.toArray(new BytesRef[list.size()]));

    final QueryScorer fragmentScorer = new QueryScorer(query, fieldName);
-    final SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");

    final Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
    highlighter.setTextFragmenter(new SimpleFragmenter(100));
-    final String fragment = highlighter.getBestFragment(analyzer, fieldName, "Buchung");
-
-    assertEquals("B<b>uchu</b>ng",fragment);
+    final String fragment = highlighter.getBestFragment(analyzer, fieldName, text);

+    assertEquals(expected, fragment);
  }

  public void testUnRewrittenQuery() throws Exception {