From faca588e9353180eaa66cfe2a087333a7e8f9d4d Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 15 Mar 2012 19:12:41 +0000 Subject: [PATCH] LUCENE-3874: don't let bogus positions corrumpt the index git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1301155 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 6 +++++ .../lucene/index/DocInverterPerField.java | 12 +++++++--- .../index/TestIndexWriterExceptions.java | 23 +++++++++++++++++++ 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 08fc4719a0a..c66ae9507bc 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -937,6 +937,12 @@ Bug fixes * LUCENE-3872: IndexWriter.close() now throws IllegalStateException if you call it after calling prepareCommit() without calling commit() first. (Tim Bogaert via Mike McCandless) + +* LUCENE-3874: Throw IllegalArgumentException from IndexWriter (rather + than producing a corrupt index), if a positionIncrement would cause + integer overflow. This can happen, for example when using a buggy + TokenStream that forgets to call clearAttributes() in combination + with a StopFilter. (Robert Muir) Optimizations diff --git a/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java b/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java index 47e4dc88a71..d8a3acdf230 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java @@ -109,10 +109,16 @@ final class DocInverterPerField extends DocFieldConsumerPerField { if (!hasMoreTokens) break; final int posIncr = posIncrAttribute.getPositionIncrement(); - fieldState.position += posIncr; - if (fieldState.position > 0) { - fieldState.position--; + int position = fieldState.position + posIncr; + if (position > 0) { + position--; + } else if (position < 0) { + throw new IllegalArgumentException("position overflow for field '" + field.name() + "'"); } + + // position is legal, we can safely place it in fieldState now. + // not sure if anything will use fieldState after non-aborting exc... + fieldState.position = position; if (posIncr == 0) fieldState.numOverlap++; diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java index 8554f02a626..a4503dcc353 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java @@ -1494,4 +1494,27 @@ public class TestIndexWriterExceptions extends LuceneTestCase { uoe.doFail = false; d.close(); } + + public void testIllegalPositions() throws Exception { + Directory dir = newDirectory(); + IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null)); + Document doc = new Document(); + Token t1 = new Token("foo", 0, 3); + t1.setPositionIncrement(Integer.MAX_VALUE); + Token t2 = new Token("bar", 4, 7); + t2.setPositionIncrement(200); + TokenStream overflowingTokenStream = new CannedTokenStream( + new Token[] { t1, t2 } + ); + Field field = new TextField("foo", overflowingTokenStream); + doc.add(field); + try { + iw.addDocument(doc); + fail(); + } catch (IllegalArgumentException expected) { + // expected exception + } + iw.close(); + dir.close(); + } }