LUCENE-7417: Highlighter WSTE didn't handle single-term MultiPhraseQuery.

Also updated to Java 5 for-each in this method.

(cherry picked from commit 3966f99)
This commit is contained in:
David Smiley 2016-09-09 10:06:39 -04:00
parent a0dcf389b2
commit 514bb1bbc1
3 changed files with 57 additions and 27 deletions

View File

@ -4,7 +4,22 @@ For more information on past and future Lucene versions, please see:
http://s.apache.org/luceneversions http://s.apache.org/luceneversions
======================= Lucene 6.3.0 ======================= ======================= Lucene 6.3.0 =======================
(No Changes)
API Changes
New Features
Bug Fixes
* LUCENE-7417: The standard Highlighter could throw an IllegalArgumentException when
trying to highlight a query containing a degenerate case of a MultiPhraseQuery with one
term. (Thomas Kappler via David Smiley)
Improvements
Optimizations
Other
======================= Lucene 6.2.0 ======================= ======================= Lucene 6.2.0 =======================
@ -204,6 +219,10 @@ New Features
API Changes API Changes
* LUCENE-7184: Refactor LatLonPoint encoding methods to new GeoEncodingUtils
helper class in core geo package. Also refactors LatLonPointTests to
TestGeoEncodingUtils (Nick Knize)
* LUCENE-7163: refactor GeoRect, Polygon, and GeoUtils tests to geo * LUCENE-7163: refactor GeoRect, Polygon, and GeoUtils tests to geo
package in core (Nick Knize) package in core (Nick Knize)
@ -219,9 +238,6 @@ API Changes
* LUCENE-7243: Removed the LeafReaderContext parameter from * LUCENE-7243: Removed the LeafReaderContext parameter from
QueryCachingPolicy#shouldCache. (Adrien Grand) QueryCachingPolicy#shouldCache. (Adrien Grand)
* LUCENE-7283: SlowCompositeReaderWrapper and the uninverting package have
been moved to Solr. (Mike McCandless)
Optimizations Optimizations
* LUCENE-7071: Reduce bytes copying in OfflineSorter, giving ~10% * LUCENE-7071: Reduce bytes copying in OfflineSorter, giving ~10%
@ -401,6 +417,10 @@ New Features
input tokens. Useful for normalizing short text in clustering/linking input tokens. Useful for normalizing short text in clustering/linking
tasks. (Mark Harwood, Adrien Grand) tasks. (Mark Harwood, Adrien Grand)
* LUCENE-5735: NumberRangePrefixTreeStrategy now includes interval/range faceting
for counting ranges that align with the underlying terms as defined by the
NumberRangePrefixTree (e.g. familiar date units like days). (David Smiley)
* LUCENE-6711: Use CollectionStatistics.docCount() for IDF and average field * LUCENE-6711: Use CollectionStatistics.docCount() for IDF and average field
length computations, to avoid skew from documents that don't have the field. length computations, to avoid skew from documents that don't have the field.
(Ahmet Arslan via Robert Muir) (Ahmet Arslan via Robert Muir)

View File

@ -118,8 +118,7 @@ public class WeightedSpanTermExtractor {
Term[] phraseQueryTerms = phraseQuery.getTerms(); Term[] phraseQueryTerms = phraseQuery.getTerms();
if (phraseQueryTerms.length == 1) { if (phraseQueryTerms.length == 1) {
extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost); extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
} } else {
else {
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length]; SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
for (int i = 0; i < phraseQueryTerms.length; i++) { for (int i = 0; i < phraseQueryTerms.length; i++) {
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
@ -153,8 +152,8 @@ public class WeightedSpanTermExtractor {
// this query is TermContext sensitive. // this query is TermContext sensitive.
extractWeightedTerms(terms, query, boost); extractWeightedTerms(terms, query, boost);
} else if (query instanceof DisjunctionMaxQuery) { } else if (query instanceof DisjunctionMaxQuery) {
for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) { for (Query clause : ((DisjunctionMaxQuery) query)) {
extract(iterator.next(), boost, terms); extract(clause, boost, terms);
} }
} else if (query instanceof ToParentBlockJoinQuery) { } else if (query instanceof ToParentBlockJoinQuery) {
extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms); extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
@ -184,16 +183,15 @@ public class WeightedSpanTermExtractor {
disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length)); disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
++distinctPositions; ++distinctPositions;
} }
for (int j = 0; j < termArray.length; ++j) { for (Term aTermArray : termArray) {
disjuncts.add(new SpanTermQuery(termArray[j])); disjuncts.add(new SpanTermQuery(aTermArray));
} }
} }
int positionGaps = 0; int positionGaps = 0;
int position = 0; int position = 0;
final SpanQuery[] clauses = new SpanQuery[distinctPositions]; final SpanQuery[] clauses = new SpanQuery[distinctPositions];
for (int i = 0; i < disjunctLists.length; ++i) { for (List<SpanQuery> disjuncts : disjunctLists) {
List<SpanQuery> disjuncts = disjunctLists[i];
if (disjuncts != null) { if (disjuncts != null) {
clauses[position++] = new SpanOrQuery(disjuncts clauses[position++] = new SpanOrQuery(disjuncts
.toArray(new SpanQuery[disjuncts.size()])); .toArray(new SpanQuery[disjuncts.size()]));
@ -202,11 +200,15 @@ public class WeightedSpanTermExtractor {
} }
} }
final int slop = mpq.getSlop(); if (clauses.length == 1) {
final boolean inorder = (slop == 0); extractWeightedSpanTerms(terms, clauses[0], boost);
} else {
final int slop = mpq.getSlop();
final boolean inorder = (slop == 0);
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
extractWeightedSpanTerms(terms, sp, boost); extractWeightedSpanTerms(terms, sp, boost);
}
} }
} else if (query instanceof MatchAllDocsQuery) { } else if (query instanceof MatchAllDocsQuery) {
//nothing //nothing

View File

@ -94,7 +94,6 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp; import org.apache.lucene.util.automaton.RegExp;
import org.junit.Test;
import org.w3c.dom.Element; import org.w3c.dom.Element;
import org.w3c.dom.NodeList; import org.w3c.dom.NodeList;
@ -1580,30 +1579,39 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
helper.start(); helper.start();
} }
@Test
public void testHighlighterWithPhraseQuery() throws IOException, InvalidTokenOffsetsException { public void testHighlighterWithPhraseQuery() throws IOException, InvalidTokenOffsetsException {
final String fieldName = "substring";
final PhraseQuery query = new PhraseQuery(fieldName, new BytesRef[] { new BytesRef("uchu") });
assertHighlighting(query, new SimpleHTMLFormatter("<b>", "</b>"), "Buchung", "B<b>uchu</b>ng", fieldName);
}
public void testHighlighterWithMultiPhraseQuery() throws IOException, InvalidTokenOffsetsException {
final String fieldName = "substring";
final MultiPhraseQuery mpq = new MultiPhraseQuery.Builder()
.add(new Term(fieldName, "uchu")).build();
assertHighlighting(mpq, new SimpleHTMLFormatter("<b>", "</b>"), "Buchung", "B<b>uchu</b>ng", fieldName);
}
private void assertHighlighting(Query query, Formatter formatter, String text, String expected, String fieldName)
throws IOException, InvalidTokenOffsetsException {
final Analyzer analyzer = new Analyzer() { final Analyzer analyzer = new Analyzer() {
@Override @Override
protected TokenStreamComponents createComponents(String fieldName) { protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new NGramTokenizer(4, 4)); return new TokenStreamComponents(new NGramTokenizer(4, 4));
} }
}; };
final String fieldName = "substring";
final List<BytesRef> list = new ArrayList<>();
list.add(new BytesRef("uchu"));
final PhraseQuery query = new PhraseQuery(fieldName, list.toArray(new BytesRef[list.size()]));
final QueryScorer fragmentScorer = new QueryScorer(query, fieldName); final QueryScorer fragmentScorer = new QueryScorer(query, fieldName);
final SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
final Highlighter highlighter = new Highlighter(formatter, fragmentScorer); final Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
highlighter.setTextFragmenter(new SimpleFragmenter(100)); highlighter.setTextFragmenter(new SimpleFragmenter(100));
final String fragment = highlighter.getBestFragment(analyzer, fieldName, "Buchung"); final String fragment = highlighter.getBestFragment(analyzer, fieldName, text);
assertEquals("B<b>uchu</b>ng",fragment);
assertEquals(expected, fragment);
} }
public void testUnRewrittenQuery() throws Exception { public void testUnRewrittenQuery() throws Exception {