mirror of https://github.com/apache/lucene.git
LUCENE-2003: Highlighter doesn't respect position increments other than 1 with PhraseQuerys
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@829129 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0f76b3139d
commit
9609a2cb4d
|
@ -95,13 +95,32 @@ public class WeightedSpanTermExtractor {
|
|||
}
|
||||
}
|
||||
} else if (query instanceof PhraseQuery) {
|
||||
Term[] phraseQueryTerms = ((PhraseQuery) query).getTerms();
|
||||
PhraseQuery phraseQuery = ((PhraseQuery) query);
|
||||
Term[] phraseQueryTerms = phraseQuery.getTerms();
|
||||
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
|
||||
for (int i = 0; i < phraseQueryTerms.length; i++) {
|
||||
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
|
||||
}
|
||||
int slop = phraseQuery.getSlop();
|
||||
int[] positions = phraseQuery.getPositions();
|
||||
// add largest position increment to slop
|
||||
if (positions.length > 0) {
|
||||
int lastPos = positions[0];
|
||||
int largestInc = 0;
|
||||
int sz = positions.length;
|
||||
for (int i = 1; i < sz; i++) {
|
||||
int pos = positions[i];
|
||||
int inc = pos - lastPos;
|
||||
if (inc > largestInc) {
|
||||
largestInc = inc;
|
||||
}
|
||||
lastPos = pos;
|
||||
}
|
||||
if(largestInc > 1) {
|
||||
slop += largestInc;
|
||||
}
|
||||
}
|
||||
|
||||
int slop = ((PhraseQuery) query).getSlop();
|
||||
boolean inorder = false;
|
||||
|
||||
if (slop == 0) {
|
||||
|
|
|
@ -84,7 +84,7 @@ import org.w3c.dom.NodeList;
|
|||
*/
|
||||
public class HighlighterTest extends BaseTokenStreamTestCase implements Formatter {
|
||||
// TODO: change to CURRENT, does not work because posIncr:
|
||||
static final Version TEST_VERSION = Version.LUCENE_24;
|
||||
static final Version TEST_VERSION = Version.LUCENE_29;
|
||||
|
||||
private IndexReader reader;
|
||||
static final String FIELD_NAME = "contents";
|
||||
|
@ -100,7 +100,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
"This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy",
|
||||
"JFK has been shot", "John Kennedy has been shot",
|
||||
"This text has a typo in referring to Keneddy",
|
||||
"wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b" };
|
||||
"wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b", "lets is a the lets is a the lets is a the lets" };
|
||||
|
||||
/**
|
||||
* Constructor for HighlightExtractorTest.
|
||||
|
@ -256,6 +256,51 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
||||
numHighlights == 3);
|
||||
|
||||
numHighlights = 0;
|
||||
doSearching("\"This piece of text refers to Kennedy\"");
|
||||
|
||||
maxNumFragmentsRequired = 2;
|
||||
|
||||
scorer = new QueryScorer(query, FIELD_NAME);
|
||||
highlighter = new Highlighter(this, scorer);
|
||||
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||
|
||||
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
|
||||
"...");
|
||||
System.out.println("\t" + result);
|
||||
}
|
||||
|
||||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
||||
numHighlights == 4);
|
||||
|
||||
numHighlights = 0;
|
||||
doSearching("\"lets is a the lets is a the lets is a the lets\"");
|
||||
|
||||
maxNumFragmentsRequired = 2;
|
||||
|
||||
scorer = new QueryScorer(query, FIELD_NAME);
|
||||
highlighter = new Highlighter(this, scorer);
|
||||
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||
|
||||
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
|
||||
"...");
|
||||
System.out.println("\t" + result);
|
||||
}
|
||||
|
||||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
||||
numHighlights == 4);
|
||||
|
||||
}
|
||||
|
||||
public void testSimpleQueryScorerPhraseHighlighting2() throws Exception {
|
||||
|
@ -1531,6 +1576,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
public void doSearching(String queryString) throws Exception {
|
||||
QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
|
||||
parser.setEnablePositionIncrements(true);
|
||||
parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||
query = parser.parse(queryString);
|
||||
doSearching(query);
|
||||
|
|
Loading…
Reference in New Issue