LUCENE-4804: PostingsHighlighter sometimes applies term to the wrong passage

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1450462 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-02-26 21:53:40 +00:00
parent dd4b910c02
commit 5e88698ae6
4 changed files with 36 additions and 1 deletions

View File

@ -273,6 +273,9 @@ Bug Fixes
* LUCENE-4802: Don't compute norms for drill-down facet fields. (Mike McCandless)
* LUCENE-4804: PostingsHighlighter sometimes applied terms to the wrong passage,
if they started exactly on a passage boundary. (Robert Muir)
Documentation
* LUCENE-4718: Fixed documentation of oal.queryparser.classic.

View File

@ -378,7 +378,7 @@ public final class PostingsHighlighter {
throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
}
int end = dp.endOffset();
if (start > current.endOffset) {
if (start >= current.endOffset) {
if (current.startOffset >= 0) {
// finalize current
current.score *= scorer.norm(current.startOffset);

View File

@ -310,4 +310,34 @@ public class TestPostingsHighlighter extends LuceneTestCase {
ir.close();
dir.close();
}
public void testCuriousGeorge() throws Exception {
String text = "Its the formula for success for preschoolers—Curious George and fire trucks! " +
"Curious George and the Firefighters is a story based on H. A. and Margret Reys " +
"popular primate and painted in the original watercolor and charcoal style. " +
"Firefighters are a famously brave lot, but can they withstand a visit from one curious monkey?";
Directory dir = newDirectory();
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
FieldType positionsType = new FieldType(TextField.TYPE_STORED);
positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", text, positionsType);
Document document = new Document();
document.add(body);
iw.addDocument(document);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
PhraseQuery query = new PhraseQuery();
query.add(new Term("body", "curious"));
query.add(new Term("body", "george"));
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits);
PostingsHighlighter highlighter = new PostingsHighlighter();
String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
assertEquals(1, snippets.length);
assertFalse(snippets[0].contains("<b>Curious</b>Curious"));
ir.close();
dir.close();
}
}

View File

@ -172,6 +172,8 @@ public class TestPostingsHighlighterRanking extends LuceneTestCase {
assertEquals("body", term.field());
int matchStart = p.getMatchStarts()[i];
assertTrue(matchStart >= 0);
// must at least start within the passage
assertTrue(matchStart < p.getEndOffset());
int matchEnd = p.getMatchEnds()[i];
assertTrue(matchEnd >= 0);
// always moving forward