LUCENE-140: Add bounds checking to BitVector's get, set, clear methods

to prevent index corruption on calling IndexReader.deleteDocument(int
docNum) on a "slightly" out of bounds docNum.  Other changes:

  * In IndexReader.deleteDocument, set hasChanges to true before
    calling doDelete in case an Exception is hit in doDelete.

  * Changed the "docs out of order" check to be tighter (<= instead of
    <) to catch boundary case that was missed.

  * Fixed small unrelated javadoc typo.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@494136 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2007-01-08 18:11:08 +00:00
parent 0971e23550
commit dc20634a97
5 changed files with 61 additions and 4 deletions

View File

@ -281,6 +281,15 @@ Bug fixes
fixing the original FieldCache performance problem. fixing the original FieldCache performance problem.
(Chris Hostetter, Yonik Seeley) (Chris Hostetter, Yonik Seeley)
29. LUCENE-140: Fix IndexReader.deleteDocument(int docNum) to
correctly raise ArrayIndexOutOfBoundsException when docNum is too
large. Previously, if docNum was only slightly too large (within
the same multiple of 8, ie, up to 7 ints beyond maxDoc), no
exception would be raised and instead the index would become
silently corrupted. The corruption then only appears much later,
in mergeSegments, when the corrupted segment is merged with
segment(s) after it. (Mike McCandless)
Optimizations Optimizations
1. LUCENE-586: TermDocs.skipTo() is now more efficient for 1. LUCENE-586: TermDocs.skipTo() is now more efficient for

View File

@ -541,8 +541,8 @@ public abstract class IndexReader {
public final synchronized void deleteDocument(int docNum) throws IOException { public final synchronized void deleteDocument(int docNum) throws IOException {
if(directoryOwner) if(directoryOwner)
aquireWriteLock(); aquireWriteLock();
doDelete(docNum);
hasChanges = true; hasChanges = true;
doDelete(docNum);
} }

View File

@ -348,9 +348,9 @@ final class SegmentMerger {
doc = docMap[doc]; // map around deletions doc = docMap[doc]; // map around deletions
doc += base; // convert to merged space doc += base; // convert to merged space
if (doc < lastDoc) if (lastDoc != 0 && doc <= lastDoc)
throw new IllegalStateException("docs out of order (" + doc + throw new IllegalStateException("docs out of order (" + doc +
" < " + lastDoc + " )"); " <= " + lastDoc + " )");
df++; df++;

View File

@ -49,12 +49,18 @@ public final class BitVector {
/** Sets the value of <code>bit</code> to one. */ /** Sets the value of <code>bit</code> to one. */
public final void set(int bit) { public final void set(int bit) {
if (bit >= size) {
throw new ArrayIndexOutOfBoundsException(bit);
}
bits[bit >> 3] |= 1 << (bit & 7); bits[bit >> 3] |= 1 << (bit & 7);
count = -1; count = -1;
} }
/** Sets the value of <code>bit</code> to zero. */ /** Sets the value of <code>bit</code> to zero. */
public final void clear(int bit) { public final void clear(int bit) {
if (bit >= size) {
throw new ArrayIndexOutOfBoundsException(bit);
}
bits[bit >> 3] &= ~(1 << (bit & 7)); bits[bit >> 3] &= ~(1 << (bit & 7));
count = -1; count = -1;
} }
@ -62,6 +68,9 @@ public final class BitVector {
/** Returns <code>true</code> if <code>bit</code> is one and /** Returns <code>true</code> if <code>bit</code> is one and
<code>false</code> if it is zero. */ <code>false</code> if it is zero. */
public final boolean get(int bit) { public final boolean get(int bit) {
if (bit >= size) {
throw new ArrayIndexOutOfBoundsException(bit);
}
return (bits[bit >> 3] & (1 << (bit & 7))) != 0; return (bits[bit >> 3] & (1 << (bit & 7))) != 0;
} }
@ -147,7 +156,7 @@ public final class BitVector {
} }
} }
/** Indicates if the bit vector is sparse and should be saved as a d-gaps list, or desnse, and should be saved as a bit set. */ /** Indicates if the bit vector is sparse and should be saved as a d-gaps list, or dense, and should be saved as a bit set. */
private boolean isSparse() { private boolean isSparse() {
// note: order of comparisons below set to favor smaller values (no binary range search.) // note: order of comparisons below set to favor smaller values (no binary range search.)
// note: adding 4 because we start with ((int) -1) to indicate d-gaps format. // note: adding 4 because we start with ((int) -1) to indicate d-gaps format.

View File

@ -750,6 +750,45 @@ public class TestIndexReader extends TestCase
} }
} }
public void testDocsOutOfOrderJIRA140() throws IOException {
Directory dir = new RAMDirectory();
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
for(int i=0;i<11;i++) {
addDoc(writer, "aaa");
}
writer.close();
IndexReader reader = IndexReader.open(dir);
// Try to delete an invalid docId, yet, within range
// of the final bits of the BitVector:
boolean gotException = false;
try {
reader.deleteDocument(11);
} catch (ArrayIndexOutOfBoundsException e) {
gotException = true;
}
reader.close();
writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
// We must add more docs to get a new segment written
for(int i=0;i<11;i++) {
addDoc(writer, "aaa");
}
try {
writer.optimize();
} catch (IllegalStateException e) {
e.printStackTrace();
fail("hit unexpected illegal state exception during optimize");
}
if (!gotException) {
fail("delete of out-of-bounds doc number failed to hit exception");
}
}
private String arrayToString(String[] l) { private String arrayToString(String[] l) {
String s = ""; String s = "";
for(int i=0;i<l.length;i++) { for(int i=0;i<l.length;i++) {