From dc20634a9706c8cb8e077fff5e79952f937848fb Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Mon, 8 Jan 2007 18:11:08 +0000 Subject: [PATCH] LUCENE-140: Add bounds checking to BitVector's get, set, clear methods to prevent index corruption on calling IndexReader.deleteDocument(int docNum) on a "slightly" out of bounds docNum. Other changes: * In IndexReader.deleteDocument, set hasChanges to true before calling doDelete in case an Exception is hit in doDelete. * Changed the "docs out of order" check to be tighter (<= instead of <) to catch boundary case that was missed. * Fixed small unrelated javadoc typo. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@494136 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 9 +++++ .../org/apache/lucene/index/IndexReader.java | 2 +- .../apache/lucene/index/SegmentMerger.java | 4 +- .../org/apache/lucene/util/BitVector.java | 11 +++++- .../apache/lucene/index/TestIndexReader.java | 39 +++++++++++++++++++ 5 files changed, 61 insertions(+), 4 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 1bcf39e53fe..be88c25765f 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -281,6 +281,15 @@ Bug fixes fixing the original FieldCache performance problem. (Chris Hostetter, Yonik Seeley) +29. LUCENE-140: Fix IndexReader.deleteDocument(int docNum) to + correctly raise ArrayIndexOutOfBoundsException when docNum is too + large. Previously, if docNum was only slightly too large (within + the same multiple of 8, ie, up to 7 ints beyond maxDoc), no + exception would be raised and instead the index would become + silently corrupted. The corruption then only appears much later, + in mergeSegments, when the corrupted segment is merged with + segment(s) after it. (Mike McCandless) + Optimizations 1. LUCENE-586: TermDocs.skipTo() is now more efficient for diff --git a/src/java/org/apache/lucene/index/IndexReader.java b/src/java/org/apache/lucene/index/IndexReader.java index 200306a5e42..9be6dc915a1 100644 --- a/src/java/org/apache/lucene/index/IndexReader.java +++ b/src/java/org/apache/lucene/index/IndexReader.java @@ -541,8 +541,8 @@ public abstract class IndexReader { public final synchronized void deleteDocument(int docNum) throws IOException { if(directoryOwner) aquireWriteLock(); - doDelete(docNum); hasChanges = true; + doDelete(docNum); } diff --git a/src/java/org/apache/lucene/index/SegmentMerger.java b/src/java/org/apache/lucene/index/SegmentMerger.java index 48ec8eb58c0..f55cd55f59c 100644 --- a/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/src/java/org/apache/lucene/index/SegmentMerger.java @@ -348,9 +348,9 @@ final class SegmentMerger { doc = docMap[doc]; // map around deletions doc += base; // convert to merged space - if (doc < lastDoc) + if (lastDoc != 0 && doc <= lastDoc) throw new IllegalStateException("docs out of order (" + doc + - " < " + lastDoc + " )"); + " <= " + lastDoc + " )"); df++; diff --git a/src/java/org/apache/lucene/util/BitVector.java b/src/java/org/apache/lucene/util/BitVector.java index f0c38604ab5..d7797c8d53b 100644 --- a/src/java/org/apache/lucene/util/BitVector.java +++ b/src/java/org/apache/lucene/util/BitVector.java @@ -49,12 +49,18 @@ public final class BitVector { /** Sets the value of bit to one. */ public final void set(int bit) { + if (bit >= size) { + throw new ArrayIndexOutOfBoundsException(bit); + } bits[bit >> 3] |= 1 << (bit & 7); count = -1; } /** Sets the value of bit to zero. */ public final void clear(int bit) { + if (bit >= size) { + throw new ArrayIndexOutOfBoundsException(bit); + } bits[bit >> 3] &= ~(1 << (bit & 7)); count = -1; } @@ -62,6 +68,9 @@ public final class BitVector { /** Returns true if bit is one and false if it is zero. */ public final boolean get(int bit) { + if (bit >= size) { + throw new ArrayIndexOutOfBoundsException(bit); + } return (bits[bit >> 3] & (1 << (bit & 7))) != 0; } @@ -147,7 +156,7 @@ public final class BitVector { } } - /** Indicates if the bit vector is sparse and should be saved as a d-gaps list, or desnse, and should be saved as a bit set. */ + /** Indicates if the bit vector is sparse and should be saved as a d-gaps list, or dense, and should be saved as a bit set. */ private boolean isSparse() { // note: order of comparisons below set to favor smaller values (no binary range search.) // note: adding 4 because we start with ((int) -1) to indicate d-gaps format. diff --git a/src/test/org/apache/lucene/index/TestIndexReader.java b/src/test/org/apache/lucene/index/TestIndexReader.java index 69a078c62b8..f345cd98a1b 100644 --- a/src/test/org/apache/lucene/index/TestIndexReader.java +++ b/src/test/org/apache/lucene/index/TestIndexReader.java @@ -749,6 +749,45 @@ public class TestIndexReader extends TestCase diskFree += 10; } } + + public void testDocsOutOfOrderJIRA140() throws IOException { + Directory dir = new RAMDirectory(); + IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); + for(int i=0;i<11;i++) { + addDoc(writer, "aaa"); + } + writer.close(); + IndexReader reader = IndexReader.open(dir); + + // Try to delete an invalid docId, yet, within range + // of the final bits of the BitVector: + + boolean gotException = false; + try { + reader.deleteDocument(11); + } catch (ArrayIndexOutOfBoundsException e) { + gotException = true; + } + reader.close(); + + writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); + + // We must add more docs to get a new segment written + for(int i=0;i<11;i++) { + addDoc(writer, "aaa"); + } + + try { + writer.optimize(); + } catch (IllegalStateException e) { + e.printStackTrace(); + fail("hit unexpected illegal state exception during optimize"); + } + + if (!gotException) { + fail("delete of out-of-bounds doc number failed to hit exception"); + } + } private String arrayToString(String[] l) { String s = "";