LUCENE-5166: PostingsHighlighter fails with IndexOutOfBoundsException

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1513207 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-08-12 17:47:09 +00:00
parent 91b24b9da3
commit 1216f64e25
4 changed files with 88 additions and 0 deletions

View File

@ -111,6 +111,10 @@ Bug Fixes
time we start the read loop (where we check the length) and when we actually do time we start the read loop (where we check the length) and when we actually do
the read. (gsingers, yonik, Robert Muir, Uwe Schindler) the read. (gsingers, yonik, Robert Muir, Uwe Schindler)
* LUCENE-5166: PostingsHighlighter would throw IOOBE if a term spanned the maxLength
boundary, made it into the top-N and went to the formatter.
(Manuel Amoabeng, Michael McCandless, Robert Muir)
API Changes API Changes
* LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap. * LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.

View File

@ -506,6 +506,13 @@ public class PostingsHighlighter {
throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight"); throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
} }
int end = dp.endOffset(); int end = dp.endOffset();
// LUCENE-5166: this hit would span the content limit... however more valid
// hits may exist (they are sorted by start). so we pretend like we never
// saw this term, it won't cause a passage to be added to passageQueue or anything.
assert EMPTY.startOffset() == Integer.MAX_VALUE;
if (start < contentLength && end > contentLength) {
continue;
}
if (start >= current.endOffset) { if (start >= current.endOffset) {
if (current.startOffset >= 0) { if (current.startOffset >= 0) {
// finalize current // finalize current

View File

@ -87,6 +87,81 @@ public class TestPostingsHighlighter extends LuceneTestCase {
dir.close(); dir.close();
} }
public void testFormatWithMatchExceedingContentLength() throws Exception {
int maxLength = 17;
String bodyText = "123 5678 01234 TEST";
final Analyzer analyzer = new MockAnalyzer(random());
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
final FieldType fieldType = new FieldType(TextField.TYPE_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
final Field body = new Field("body", bodyText, fieldType);
Document doc = new Document();
doc.add(body);
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
Query query = new TermQuery(new Term("body", "test"));
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
PostingsHighlighter highlighter = new PostingsHighlighter(maxLength);
String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
assertEquals(1, snippets.length);
// LUCENE-5166: no snippet
assertEquals("123 5678 01234 TE", snippets[0]);
ir.close();
dir.close();
}
// simple test highlighting last word.
public void testHighlightLastWord() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter();
Query query = new TermQuery(new Term("body", "test"));
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
assertEquals(1, snippets.length);
assertEquals("This is a <b>test</b>", snippets[0]);
ir.close();
dir.close();
}
// simple test with one sentence documents. // simple test with one sentence documents.
public void testOneSentence() throws Exception { public void testOneSentence() throws Exception {
Directory dir = newDirectory(); Directory dir = newDirectory();

View File

@ -173,6 +173,8 @@ public class TestPostingsHighlighterRanking extends LuceneTestCase {
assertTrue(p.getNumMatches() > 0); assertTrue(p.getNumMatches() > 0);
assertTrue(p.getStartOffset() >= 0); assertTrue(p.getStartOffset() >= 0);
assertTrue(p.getStartOffset() <= content.length()); assertTrue(p.getStartOffset() <= content.length());
assertTrue(p.getEndOffset() >= p.getStartOffset());
assertTrue(p.getEndOffset() <= content.length());
// we use a very simple analyzer. so we can assert the matches are correct // we use a very simple analyzer. so we can assert the matches are correct
int lastMatchStart = -1; int lastMatchStart = -1;
for (int i = 0; i < p.getNumMatches(); i++) { for (int i = 0; i < p.getNumMatches(); i++) {