mirror of https://github.com/apache/lucene.git
LUCENE-5166: PostingsHighlighter fails with IndexOutOfBoundsException
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1513207 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
91b24b9da3
commit
1216f64e25
|
@ -111,6 +111,10 @@ Bug Fixes
|
||||||
time we start the read loop (where we check the length) and when we actually do
|
time we start the read loop (where we check the length) and when we actually do
|
||||||
the read. (gsingers, yonik, Robert Muir, Uwe Schindler)
|
the read. (gsingers, yonik, Robert Muir, Uwe Schindler)
|
||||||
|
|
||||||
|
* LUCENE-5166: PostingsHighlighter would throw IOOBE if a term spanned the maxLength
|
||||||
|
boundary, made it into the top-N and went to the formatter.
|
||||||
|
(Manuel Amoabeng, Michael McCandless, Robert Muir)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
* LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.
|
* LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.
|
||||||
|
|
|
@ -506,6 +506,13 @@ public class PostingsHighlighter {
|
||||||
throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
|
throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
|
||||||
}
|
}
|
||||||
int end = dp.endOffset();
|
int end = dp.endOffset();
|
||||||
|
// LUCENE-5166: this hit would span the content limit... however more valid
|
||||||
|
// hits may exist (they are sorted by start). so we pretend like we never
|
||||||
|
// saw this term, it won't cause a passage to be added to passageQueue or anything.
|
||||||
|
assert EMPTY.startOffset() == Integer.MAX_VALUE;
|
||||||
|
if (start < contentLength && end > contentLength) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (start >= current.endOffset) {
|
if (start >= current.endOffset) {
|
||||||
if (current.startOffset >= 0) {
|
if (current.startOffset >= 0) {
|
||||||
// finalize current
|
// finalize current
|
||||||
|
|
|
@ -87,6 +87,81 @@ public class TestPostingsHighlighter extends LuceneTestCase {
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testFormatWithMatchExceedingContentLength() throws Exception {
|
||||||
|
|
||||||
|
int maxLength = 17;
|
||||||
|
String bodyText = "123 5678 01234 TEST";
|
||||||
|
|
||||||
|
final Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
|
||||||
|
iwc.setMergePolicy(newLogMergePolicy());
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
||||||
|
|
||||||
|
final FieldType fieldType = new FieldType(TextField.TYPE_STORED);
|
||||||
|
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
|
||||||
|
final Field body = new Field("body", bodyText, fieldType);
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(body);
|
||||||
|
|
||||||
|
iw.addDocument(doc);
|
||||||
|
|
||||||
|
IndexReader ir = iw.getReader();
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
IndexSearcher searcher = newSearcher(ir);
|
||||||
|
|
||||||
|
Query query = new TermQuery(new Term("body", "test"));
|
||||||
|
|
||||||
|
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
|
||||||
|
assertEquals(1, topDocs.totalHits);
|
||||||
|
|
||||||
|
PostingsHighlighter highlighter = new PostingsHighlighter(maxLength);
|
||||||
|
String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
|
||||||
|
|
||||||
|
|
||||||
|
assertEquals(1, snippets.length);
|
||||||
|
// LUCENE-5166: no snippet
|
||||||
|
assertEquals("123 5678 01234 TE", snippets[0]);
|
||||||
|
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
// simple test highlighting last word.
|
||||||
|
public void testHighlightLastWord() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||||
|
iwc.setMergePolicy(newLogMergePolicy());
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
||||||
|
|
||||||
|
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
|
||||||
|
offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
|
||||||
|
Field body = new Field("body", "", offsetsType);
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(body);
|
||||||
|
|
||||||
|
body.setStringValue("This is a test");
|
||||||
|
iw.addDocument(doc);
|
||||||
|
|
||||||
|
IndexReader ir = iw.getReader();
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
IndexSearcher searcher = newSearcher(ir);
|
||||||
|
PostingsHighlighter highlighter = new PostingsHighlighter();
|
||||||
|
Query query = new TermQuery(new Term("body", "test"));
|
||||||
|
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
|
||||||
|
assertEquals(1, topDocs.totalHits);
|
||||||
|
String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
|
||||||
|
assertEquals(1, snippets.length);
|
||||||
|
assertEquals("This is a <b>test</b>", snippets[0]);
|
||||||
|
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
// simple test with one sentence documents.
|
// simple test with one sentence documents.
|
||||||
public void testOneSentence() throws Exception {
|
public void testOneSentence() throws Exception {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
|
|
|
@ -173,6 +173,8 @@ public class TestPostingsHighlighterRanking extends LuceneTestCase {
|
||||||
assertTrue(p.getNumMatches() > 0);
|
assertTrue(p.getNumMatches() > 0);
|
||||||
assertTrue(p.getStartOffset() >= 0);
|
assertTrue(p.getStartOffset() >= 0);
|
||||||
assertTrue(p.getStartOffset() <= content.length());
|
assertTrue(p.getStartOffset() <= content.length());
|
||||||
|
assertTrue(p.getEndOffset() >= p.getStartOffset());
|
||||||
|
assertTrue(p.getEndOffset() <= content.length());
|
||||||
// we use a very simple analyzer. so we can assert the matches are correct
|
// we use a very simple analyzer. so we can assert the matches are correct
|
||||||
int lastMatchStart = -1;
|
int lastMatchStart = -1;
|
||||||
for (int i = 0; i < p.getNumMatches(); i++) {
|
for (int i = 0; i < p.getNumMatches(); i++) {
|
||||||
|
|
Loading…
Reference in New Issue