mirror of https://github.com/apache/lucene.git
LUCENE-7231: WeightedSpanTermExtractor correctly deals with single-term PhraseQuery
This commit is contained in:
parent
53d96705e9
commit
35024d3edc
|
@ -114,6 +114,9 @@ Bug Fixes
|
|||
* LUCENE-7284: GapSpans needs to implement positionsCost(). (Daniel Bigham, Alan
|
||||
Woodward)
|
||||
|
||||
* LUCENE-7231: WeightedSpanTermExtractor didn't deal correctly with single-term
|
||||
phrase queries. (Eva Popenda, Alan Woodward)
|
||||
|
||||
Documentation
|
||||
|
||||
* LUCENE-7223: Improve XXXPoint javadocs to make it clear that you
|
||||
|
|
|
@ -115,24 +115,29 @@ public class WeightedSpanTermExtractor {
|
|||
} else if (query instanceof PhraseQuery) {
|
||||
PhraseQuery phraseQuery = ((PhraseQuery) query);
|
||||
Term[] phraseQueryTerms = phraseQuery.getTerms();
|
||||
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
|
||||
for (int i = 0; i < phraseQueryTerms.length; i++) {
|
||||
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
|
||||
if (phraseQueryTerms.length == 1) {
|
||||
extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
|
||||
}
|
||||
else {
|
||||
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
|
||||
for (int i = 0; i < phraseQueryTerms.length; i++) {
|
||||
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
|
||||
}
|
||||
|
||||
// sum position increments beyond 1
|
||||
int positionGaps = 0;
|
||||
int[] positions = phraseQuery.getPositions();
|
||||
if (positions.length >= 2) {
|
||||
// positions are in increasing order. max(0,...) is just a safeguard.
|
||||
positionGaps = Math.max(0, positions[positions.length-1] - positions[0] - positions.length + 1);
|
||||
// sum position increments beyond 1
|
||||
int positionGaps = 0;
|
||||
int[] positions = phraseQuery.getPositions();
|
||||
if (positions.length >= 2) {
|
||||
// positions are in increasing order. max(0,...) is just a safeguard.
|
||||
positionGaps = Math.max(0, positions[positions.length - 1] - positions[0] - positions.length + 1);
|
||||
}
|
||||
|
||||
//if original slop is 0 then require inOrder
|
||||
boolean inorder = (phraseQuery.getSlop() == 0);
|
||||
|
||||
SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder);
|
||||
extractWeightedSpanTerms(terms, sp, boost);
|
||||
}
|
||||
|
||||
//if original slop is 0 then require inOrder
|
||||
boolean inorder = (phraseQuery.getSlop() == 0);
|
||||
|
||||
SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder);
|
||||
extractWeightedSpanTerms(terms, sp, boost);
|
||||
} else if (query instanceof TermQuery) {
|
||||
extractWeightedTerms(terms, query, boost);
|
||||
} else if (query instanceof SpanQuery) {
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
*/
|
||||
package org.apache.lucene.search.highlight;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
@ -28,9 +30,6 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CachingTokenFilter;
|
||||
|
@ -41,13 +40,14 @@ import org.apache.lucene.analysis.MockTokenizer;
|
|||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ngram.NGramTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
|
@ -93,6 +93,7 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.junit.Test;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.NodeList;
|
||||
|
||||
|
@ -1560,6 +1561,32 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
helper.start();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHighlighterWithPhraseQuery() throws IOException, InvalidTokenOffsetsException {
|
||||
|
||||
final Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
return new TokenStreamComponents(new NGramTokenizer(4, 4));
|
||||
}
|
||||
};
|
||||
final String fieldName = "substring";
|
||||
|
||||
final List<BytesRef> list = new ArrayList<>();
|
||||
list.add(new BytesRef("uchu"));
|
||||
final PhraseQuery query = new PhraseQuery(fieldName, list.toArray(new BytesRef[list.size()]));
|
||||
|
||||
final QueryScorer fragmentScorer = new QueryScorer(query, fieldName);
|
||||
final SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
|
||||
|
||||
final Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(100));
|
||||
final String fragment = highlighter.getBestFragment(analyzer, fieldName, "Buchung");
|
||||
|
||||
assertEquals("B<b>uchu</b>ng",fragment);
|
||||
|
||||
}
|
||||
|
||||
public void testUnRewrittenQuery() throws Exception {
|
||||
final TestHighlightRunner helper = new TestHighlightRunner() {
|
||||
|
||||
|
|
Loading…
Reference in New Issue