mirror of https://github.com/apache/lucene.git
LUCENE-4804: PostingsHighlighter sometimes applies term to the wrong passage
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1450462 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
dd4b910c02
commit
5e88698ae6
|
@ -273,6 +273,9 @@ Bug Fixes
|
|||
|
||||
* LUCENE-4802: Don't compute norms for drill-down facet fields. (Mike McCandless)
|
||||
|
||||
* LUCENE-4804: PostingsHighlighter sometimes applied terms to the wrong passage,
|
||||
if they started exactly on a passage boundary. (Robert Muir)
|
||||
|
||||
Documentation
|
||||
|
||||
* LUCENE-4718: Fixed documentation of oal.queryparser.classic.
|
||||
|
|
|
@ -378,7 +378,7 @@ public final class PostingsHighlighter {
|
|||
throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
|
||||
}
|
||||
int end = dp.endOffset();
|
||||
if (start > current.endOffset) {
|
||||
if (start >= current.endOffset) {
|
||||
if (current.startOffset >= 0) {
|
||||
// finalize current
|
||||
current.score *= scorer.norm(current.startOffset);
|
||||
|
|
|
@ -310,4 +310,34 @@ public class TestPostingsHighlighter extends LuceneTestCase {
|
|||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testCuriousGeorge() throws Exception {
|
||||
String text = "It’s the formula for success for preschoolers—Curious George and fire trucks! " +
|
||||
"Curious George and the Firefighters is a story based on H. A. and Margret Rey’s " +
|
||||
"popular primate and painted in the original watercolor and charcoal style. " +
|
||||
"Firefighters are a famously brave lot, but can they withstand a visit from one curious monkey?";
|
||||
Directory dir = newDirectory();
|
||||
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
|
||||
FieldType positionsType = new FieldType(TextField.TYPE_STORED);
|
||||
positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
|
||||
Field body = new Field("body", text, positionsType);
|
||||
Document document = new Document();
|
||||
document.add(body);
|
||||
iw.addDocument(document);
|
||||
IndexReader ir = iw.getReader();
|
||||
iw.close();
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
PhraseQuery query = new PhraseQuery();
|
||||
query.add(new Term("body", "curious"));
|
||||
query.add(new Term("body", "george"));
|
||||
TopDocs topDocs = searcher.search(query, 10);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
PostingsHighlighter highlighter = new PostingsHighlighter();
|
||||
String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
|
||||
assertEquals(1, snippets.length);
|
||||
assertFalse(snippets[0].contains("<b>Curious</b>Curious"));
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -172,6 +172,8 @@ public class TestPostingsHighlighterRanking extends LuceneTestCase {
|
|||
assertEquals("body", term.field());
|
||||
int matchStart = p.getMatchStarts()[i];
|
||||
assertTrue(matchStart >= 0);
|
||||
// must at least start within the passage
|
||||
assertTrue(matchStart < p.getEndOffset());
|
||||
int matchEnd = p.getMatchEnds()[i];
|
||||
assertTrue(matchEnd >= 0);
|
||||
// always moving forward
|
||||
|
|
Loading…
Reference in New Issue