mirror of https://github.com/apache/lucene.git
LUCENE-7417: Highlighter WSTE didn't handle single-term MultiPhraseQuery.
Also updated to Java 5 for-each in this method.
(cherry picked from commit 3966f99
)
This commit is contained in:
parent
a0dcf389b2
commit
514bb1bbc1
|
@ -4,7 +4,22 @@ For more information on past and future Lucene versions, please see:
|
||||||
http://s.apache.org/luceneversions
|
http://s.apache.org/luceneversions
|
||||||
|
|
||||||
======================= Lucene 6.3.0 =======================
|
======================= Lucene 6.3.0 =======================
|
||||||
(No Changes)
|
|
||||||
|
API Changes
|
||||||
|
|
||||||
|
New Features
|
||||||
|
|
||||||
|
Bug Fixes
|
||||||
|
|
||||||
|
* LUCENE-7417: The standard Highlighter could throw an IllegalArgumentException when
|
||||||
|
trying to highlight a query containing a degenerate case of a MultiPhraseQuery with one
|
||||||
|
term. (Thomas Kappler via David Smiley)
|
||||||
|
|
||||||
|
Improvements
|
||||||
|
|
||||||
|
Optimizations
|
||||||
|
|
||||||
|
Other
|
||||||
|
|
||||||
======================= Lucene 6.2.0 =======================
|
======================= Lucene 6.2.0 =======================
|
||||||
|
|
||||||
|
@ -204,6 +219,10 @@ New Features
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
|
* LUCENE-7184: Refactor LatLonPoint encoding methods to new GeoEncodingUtils
|
||||||
|
helper class in core geo package. Also refactors LatLonPointTests to
|
||||||
|
TestGeoEncodingUtils (Nick Knize)
|
||||||
|
|
||||||
* LUCENE-7163: refactor GeoRect, Polygon, and GeoUtils tests to geo
|
* LUCENE-7163: refactor GeoRect, Polygon, and GeoUtils tests to geo
|
||||||
package in core (Nick Knize)
|
package in core (Nick Knize)
|
||||||
|
|
||||||
|
@ -219,9 +238,6 @@ API Changes
|
||||||
* LUCENE-7243: Removed the LeafReaderContext parameter from
|
* LUCENE-7243: Removed the LeafReaderContext parameter from
|
||||||
QueryCachingPolicy#shouldCache. (Adrien Grand)
|
QueryCachingPolicy#shouldCache. (Adrien Grand)
|
||||||
|
|
||||||
* LUCENE-7283: SlowCompositeReaderWrapper and the uninverting package have
|
|
||||||
been moved to Solr. (Mike McCandless)
|
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
* LUCENE-7071: Reduce bytes copying in OfflineSorter, giving ~10%
|
* LUCENE-7071: Reduce bytes copying in OfflineSorter, giving ~10%
|
||||||
|
@ -401,6 +417,10 @@ New Features
|
||||||
input tokens. Useful for normalizing short text in clustering/linking
|
input tokens. Useful for normalizing short text in clustering/linking
|
||||||
tasks. (Mark Harwood, Adrien Grand)
|
tasks. (Mark Harwood, Adrien Grand)
|
||||||
|
|
||||||
|
* LUCENE-5735: NumberRangePrefixTreeStrategy now includes interval/range faceting
|
||||||
|
for counting ranges that align with the underlying terms as defined by the
|
||||||
|
NumberRangePrefixTree (e.g. familiar date units like days). (David Smiley)
|
||||||
|
|
||||||
* LUCENE-6711: Use CollectionStatistics.docCount() for IDF and average field
|
* LUCENE-6711: Use CollectionStatistics.docCount() for IDF and average field
|
||||||
length computations, to avoid skew from documents that don't have the field.
|
length computations, to avoid skew from documents that don't have the field.
|
||||||
(Ahmet Arslan via Robert Muir)
|
(Ahmet Arslan via Robert Muir)
|
||||||
|
|
|
@ -118,8 +118,7 @@ public class WeightedSpanTermExtractor {
|
||||||
Term[] phraseQueryTerms = phraseQuery.getTerms();
|
Term[] phraseQueryTerms = phraseQuery.getTerms();
|
||||||
if (phraseQueryTerms.length == 1) {
|
if (phraseQueryTerms.length == 1) {
|
||||||
extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
|
extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
|
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
|
||||||
for (int i = 0; i < phraseQueryTerms.length; i++) {
|
for (int i = 0; i < phraseQueryTerms.length; i++) {
|
||||||
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
|
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
|
||||||
|
@ -153,8 +152,8 @@ public class WeightedSpanTermExtractor {
|
||||||
// this query is TermContext sensitive.
|
// this query is TermContext sensitive.
|
||||||
extractWeightedTerms(terms, query, boost);
|
extractWeightedTerms(terms, query, boost);
|
||||||
} else if (query instanceof DisjunctionMaxQuery) {
|
} else if (query instanceof DisjunctionMaxQuery) {
|
||||||
for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
|
for (Query clause : ((DisjunctionMaxQuery) query)) {
|
||||||
extract(iterator.next(), boost, terms);
|
extract(clause, boost, terms);
|
||||||
}
|
}
|
||||||
} else if (query instanceof ToParentBlockJoinQuery) {
|
} else if (query instanceof ToParentBlockJoinQuery) {
|
||||||
extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
|
extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
|
||||||
|
@ -184,16 +183,15 @@ public class WeightedSpanTermExtractor {
|
||||||
disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
|
disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
|
||||||
++distinctPositions;
|
++distinctPositions;
|
||||||
}
|
}
|
||||||
for (int j = 0; j < termArray.length; ++j) {
|
for (Term aTermArray : termArray) {
|
||||||
disjuncts.add(new SpanTermQuery(termArray[j]));
|
disjuncts.add(new SpanTermQuery(aTermArray));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int positionGaps = 0;
|
int positionGaps = 0;
|
||||||
int position = 0;
|
int position = 0;
|
||||||
final SpanQuery[] clauses = new SpanQuery[distinctPositions];
|
final SpanQuery[] clauses = new SpanQuery[distinctPositions];
|
||||||
for (int i = 0; i < disjunctLists.length; ++i) {
|
for (List<SpanQuery> disjuncts : disjunctLists) {
|
||||||
List<SpanQuery> disjuncts = disjunctLists[i];
|
|
||||||
if (disjuncts != null) {
|
if (disjuncts != null) {
|
||||||
clauses[position++] = new SpanOrQuery(disjuncts
|
clauses[position++] = new SpanOrQuery(disjuncts
|
||||||
.toArray(new SpanQuery[disjuncts.size()]));
|
.toArray(new SpanQuery[disjuncts.size()]));
|
||||||
|
@ -202,11 +200,15 @@ public class WeightedSpanTermExtractor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final int slop = mpq.getSlop();
|
if (clauses.length == 1) {
|
||||||
final boolean inorder = (slop == 0);
|
extractWeightedSpanTerms(terms, clauses[0], boost);
|
||||||
|
} else {
|
||||||
|
final int slop = mpq.getSlop();
|
||||||
|
final boolean inorder = (slop == 0);
|
||||||
|
|
||||||
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
|
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
|
||||||
extractWeightedSpanTerms(terms, sp, boost);
|
extractWeightedSpanTerms(terms, sp, boost);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else if (query instanceof MatchAllDocsQuery) {
|
} else if (query instanceof MatchAllDocsQuery) {
|
||||||
//nothing
|
//nothing
|
||||||
|
|
|
@ -94,7 +94,6 @@ import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.automaton.Automata;
|
import org.apache.lucene.util.automaton.Automata;
|
||||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||||
import org.apache.lucene.util.automaton.RegExp;
|
import org.apache.lucene.util.automaton.RegExp;
|
||||||
import org.junit.Test;
|
|
||||||
import org.w3c.dom.Element;
|
import org.w3c.dom.Element;
|
||||||
import org.w3c.dom.NodeList;
|
import org.w3c.dom.NodeList;
|
||||||
|
|
||||||
|
@ -1580,30 +1579,39 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
helper.start();
|
helper.start();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testHighlighterWithPhraseQuery() throws IOException, InvalidTokenOffsetsException {
|
public void testHighlighterWithPhraseQuery() throws IOException, InvalidTokenOffsetsException {
|
||||||
|
final String fieldName = "substring";
|
||||||
|
|
||||||
|
final PhraseQuery query = new PhraseQuery(fieldName, new BytesRef[] { new BytesRef("uchu") });
|
||||||
|
|
||||||
|
assertHighlighting(query, new SimpleHTMLFormatter("<b>", "</b>"), "Buchung", "B<b>uchu</b>ng", fieldName);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testHighlighterWithMultiPhraseQuery() throws IOException, InvalidTokenOffsetsException {
|
||||||
|
final String fieldName = "substring";
|
||||||
|
|
||||||
|
final MultiPhraseQuery mpq = new MultiPhraseQuery.Builder()
|
||||||
|
.add(new Term(fieldName, "uchu")).build();
|
||||||
|
|
||||||
|
assertHighlighting(mpq, new SimpleHTMLFormatter("<b>", "</b>"), "Buchung", "B<b>uchu</b>ng", fieldName);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertHighlighting(Query query, Formatter formatter, String text, String expected, String fieldName)
|
||||||
|
throws IOException, InvalidTokenOffsetsException {
|
||||||
final Analyzer analyzer = new Analyzer() {
|
final Analyzer analyzer = new Analyzer() {
|
||||||
@Override
|
@Override
|
||||||
protected TokenStreamComponents createComponents(String fieldName) {
|
protected TokenStreamComponents createComponents(String fieldName) {
|
||||||
return new TokenStreamComponents(new NGramTokenizer(4, 4));
|
return new TokenStreamComponents(new NGramTokenizer(4, 4));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
final String fieldName = "substring";
|
|
||||||
|
|
||||||
final List<BytesRef> list = new ArrayList<>();
|
|
||||||
list.add(new BytesRef("uchu"));
|
|
||||||
final PhraseQuery query = new PhraseQuery(fieldName, list.toArray(new BytesRef[list.size()]));
|
|
||||||
|
|
||||||
final QueryScorer fragmentScorer = new QueryScorer(query, fieldName);
|
final QueryScorer fragmentScorer = new QueryScorer(query, fieldName);
|
||||||
final SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
|
|
||||||
|
|
||||||
final Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
|
final Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
|
||||||
highlighter.setTextFragmenter(new SimpleFragmenter(100));
|
highlighter.setTextFragmenter(new SimpleFragmenter(100));
|
||||||
final String fragment = highlighter.getBestFragment(analyzer, fieldName, "Buchung");
|
final String fragment = highlighter.getBestFragment(analyzer, fieldName, text);
|
||||||
|
|
||||||
assertEquals("B<b>uchu</b>ng",fragment);
|
|
||||||
|
|
||||||
|
assertEquals(expected, fragment);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testUnRewrittenQuery() throws Exception {
|
public void testUnRewrittenQuery() throws Exception {
|
||||||
|
|
Loading…
Reference in New Issue