From 514bb1bbc19be939b2e23be3a1d647342b60945f Mon Sep 17 00:00:00 2001 From: David Smiley Date: Fri, 9 Sep 2016 10:06:39 -0400 Subject: [PATCH] LUCENE-7417: Highlighter WSTE didn't handle single-term MultiPhraseQuery. Also updated to Java 5 for-each in this method. (cherry picked from commit 3966f99) --- lucene/CHANGES.txt | 28 ++++++++++++++--- .../highlight/WeightedSpanTermExtractor.java | 26 ++++++++-------- .../search/highlight/HighlighterTest.java | 30 ++++++++++++------- 3 files changed, 57 insertions(+), 27 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 5e5c95532fc..45ce2196929 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -4,7 +4,22 @@ For more information on past and future Lucene versions, please see: http://s.apache.org/luceneversions ======================= Lucene 6.3.0 ======================= -(No Changes) + +API Changes + +New Features + +Bug Fixes + +* LUCENE-7417: The standard Highlighter could throw an IllegalArgumentException when + trying to highlight a query containing a degenerate case of a MultiPhraseQuery with one + term. (Thomas Kappler via David Smiley) + +Improvements + +Optimizations + +Other ======================= Lucene 6.2.0 ======================= @@ -204,6 +219,10 @@ New Features API Changes +* LUCENE-7184: Refactor LatLonPoint encoding methods to new GeoEncodingUtils + helper class in core geo package. Also refactors LatLonPointTests to + TestGeoEncodingUtils (Nick Knize) + * LUCENE-7163: refactor GeoRect, Polygon, and GeoUtils tests to geo package in core (Nick Knize) @@ -219,9 +238,6 @@ API Changes * LUCENE-7243: Removed the LeafReaderContext parameter from QueryCachingPolicy#shouldCache. (Adrien Grand) -* LUCENE-7283: SlowCompositeReaderWrapper and the uninverting package have - been moved to Solr. (Mike McCandless) - Optimizations * LUCENE-7071: Reduce bytes copying in OfflineSorter, giving ~10% @@ -401,6 +417,10 @@ New Features input tokens. Useful for normalizing short text in clustering/linking tasks. (Mark Harwood, Adrien Grand) +* LUCENE-5735: NumberRangePrefixTreeStrategy now includes interval/range faceting + for counting ranges that align with the underlying terms as defined by the + NumberRangePrefixTree (e.g. familiar date units like days). (David Smiley) + * LUCENE-6711: Use CollectionStatistics.docCount() for IDF and average field length computations, to avoid skew from documents that don't have the field. (Ahmet Arslan via Robert Muir) diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java index 7507bdde6e8..644dad32893 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java @@ -118,8 +118,7 @@ public class WeightedSpanTermExtractor { Term[] phraseQueryTerms = phraseQuery.getTerms(); if (phraseQueryTerms.length == 1) { extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost); - } - else { + } else { SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length]; for (int i = 0; i < phraseQueryTerms.length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); @@ -153,8 +152,8 @@ public class WeightedSpanTermExtractor { // this query is TermContext sensitive. extractWeightedTerms(terms, query, boost); } else if (query instanceof DisjunctionMaxQuery) { - for (Iterator iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) { - extract(iterator.next(), boost, terms); + for (Query clause : ((DisjunctionMaxQuery) query)) { + extract(clause, boost, terms); } } else if (query instanceof ToParentBlockJoinQuery) { extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms); @@ -184,16 +183,15 @@ public class WeightedSpanTermExtractor { disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length)); ++distinctPositions; } - for (int j = 0; j < termArray.length; ++j) { - disjuncts.add(new SpanTermQuery(termArray[j])); + for (Term aTermArray : termArray) { + disjuncts.add(new SpanTermQuery(aTermArray)); } } int positionGaps = 0; int position = 0; final SpanQuery[] clauses = new SpanQuery[distinctPositions]; - for (int i = 0; i < disjunctLists.length; ++i) { - List disjuncts = disjunctLists[i]; + for (List disjuncts : disjunctLists) { if (disjuncts != null) { clauses[position++] = new SpanOrQuery(disjuncts .toArray(new SpanQuery[disjuncts.size()])); @@ -202,11 +200,15 @@ public class WeightedSpanTermExtractor { } } - final int slop = mpq.getSlop(); - final boolean inorder = (slop == 0); + if (clauses.length == 1) { + extractWeightedSpanTerms(terms, clauses[0], boost); + } else { + final int slop = mpq.getSlop(); + final boolean inorder = (slop == 0); - SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); - extractWeightedSpanTerms(terms, sp, boost); + SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); + extractWeightedSpanTerms(terms, sp, boost); + } } } else if (query instanceof MatchAllDocsQuery) { //nothing diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java index cf727d7154a..fc402bacff5 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java @@ -94,7 +94,6 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.lucene.util.automaton.RegExp; -import org.junit.Test; import org.w3c.dom.Element; import org.w3c.dom.NodeList; @@ -1580,30 +1579,39 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte helper.start(); } - @Test public void testHighlighterWithPhraseQuery() throws IOException, InvalidTokenOffsetsException { + final String fieldName = "substring"; + final PhraseQuery query = new PhraseQuery(fieldName, new BytesRef[] { new BytesRef("uchu") }); + + assertHighlighting(query, new SimpleHTMLFormatter("", ""), "Buchung", "Buchung", fieldName); + } + + public void testHighlighterWithMultiPhraseQuery() throws IOException, InvalidTokenOffsetsException { + final String fieldName = "substring"; + + final MultiPhraseQuery mpq = new MultiPhraseQuery.Builder() + .add(new Term(fieldName, "uchu")).build(); + + assertHighlighting(mpq, new SimpleHTMLFormatter("", ""), "Buchung", "Buchung", fieldName); + } + + private void assertHighlighting(Query query, Formatter formatter, String text, String expected, String fieldName) + throws IOException, InvalidTokenOffsetsException { final Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { return new TokenStreamComponents(new NGramTokenizer(4, 4)); } }; - final String fieldName = "substring"; - - final List list = new ArrayList<>(); - list.add(new BytesRef("uchu")); - final PhraseQuery query = new PhraseQuery(fieldName, list.toArray(new BytesRef[list.size()])); final QueryScorer fragmentScorer = new QueryScorer(query, fieldName); - final SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("", ""); final Highlighter highlighter = new Highlighter(formatter, fragmentScorer); highlighter.setTextFragmenter(new SimpleFragmenter(100)); - final String fragment = highlighter.getBestFragment(analyzer, fieldName, "Buchung"); - - assertEquals("Buchung",fragment); + final String fragment = highlighter.getBestFragment(analyzer, fieldName, text); + assertEquals(expected, fragment); } public void testUnRewrittenQuery() throws Exception {