diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 90c498d9581..44d90a02240 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -486,6 +486,9 @@ Changes in runtime behavior
* LUCENE-2829: Improve the performance of "primary key" lookup use
case (running a TermQuery that matches one document) on a
multi-segment index. (Robert Muir, Mike McCandless)
+
+* LUCENE-2010: Segments with 100% deleted documents are now removed on
+ IndexReader or IndexWriter commit. (Uwe Schindler, Mike McCandless)
API Changes
@@ -905,6 +908,9 @@ Optimizations
* LUCENE-2824: Optimize BufferedIndexInput to do less bounds checks.
(Robert Muir)
+* LUCENE-2010: Segments with 100% deleted documents are now removed on
+ IndexReader or IndexWriter commit. (Uwe Schindler, Mike McCandless)
+
Build
* LUCENE-2124: Moved the JDK-based collation support from contrib/collation
diff --git a/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java b/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java
index 8be325a64a9..0be1dd2ba30 100644
--- a/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java
+++ b/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java
@@ -270,9 +270,9 @@ class BufferedDeletes {
}
private synchronized long applyDeletes(IndexWriter.ReaderPool readerPool,
- SegmentInfo info,
- SegmentDeletes coalescedDeletes,
- SegmentDeletes segmentDeletes) throws IOException {
+ SegmentInfo info,
+ SegmentDeletes coalescedDeletes,
+ SegmentDeletes segmentDeletes) throws IOException {
assert readerPool.infoIsLive(info);
assert coalescedDeletes == null || coalescedDeletes.docIDs.size() == 0;
diff --git a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java
index 9da85ca5e6a..f339133b7ca 100644
--- a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java
+++ b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java
@@ -710,6 +710,9 @@ class DirectoryReader extends IndexReader implements Cloneable {
for (int i = 0; i < subReaders.length; i++)
subReaders[i].commit();
+ // Remove segments that contain only 100% deleted docs:
+ segmentInfos.pruneDeletedSegments();
+
// Sync all files we just wrote
directory.sync(segmentInfos.files(directory, false));
segmentInfos.commit(directory);
diff --git a/lucene/src/java/org/apache/lucene/index/IndexReader.java b/lucene/src/java/org/apache/lucene/index/IndexReader.java
index eb953c687ba..7f1b736cf1e 100644
--- a/lucene/src/java/org/apache/lucene/index/IndexReader.java
+++ b/lucene/src/java/org/apache/lucene/index/IndexReader.java
@@ -1163,7 +1163,14 @@ public abstract class IndexReader implements Cloneable,Closeable {
return n;
}
- /** Undeletes all documents currently marked as deleted in this index.
+ /** Undeletes all documents currently marked as deleted in
+ * this index.
+ *
+ *
NOTE: this is only a best-effort process. For
+ * example, if all documents in a given segment were
+ * deleted, Lucene now drops that segment from the index,
+ * which means its documents will not be recovered by this
+ * method.
*
* @throws StaleReaderException if the index has changed
* since this reader was opened
diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java
index e746427eec9..710822bd15d 100644
--- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java
@@ -3276,6 +3276,15 @@ public class IndexWriter implements Closeable {
}
}
+ private boolean keepFullyDeletedSegments;
+
+ /** Only for testing.
+ *
+ * @lucene.internal */
+ void keepFullyDeletedSegments() {
+ keepFullyDeletedSegments = true;
+ }
+
// called only from assert
private boolean filesExist(SegmentInfos toSync) throws IOException {
Collection files = toSync.files(directory, false);
@@ -3334,6 +3343,10 @@ public class IndexWriter implements Closeable {
readerPool.commit();
toSync = (SegmentInfos) segmentInfos.clone();
+ if (!keepFullyDeletedSegments) {
+ toSync.pruneDeletedSegments();
+ }
+
assert filesExist(toSync);
if (commitUserData != null)
diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
index 896e6222266..493279ee17b 100644
--- a/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
@@ -308,6 +308,19 @@ public final class SegmentInfos extends Vector {
}
}
+ /** Prunes any segment whose docs are all deleted. */
+ public void pruneDeletedSegments() {
+ int segIdx = 0;
+ while(segIdx < size()) {
+ final SegmentInfo info = info(segIdx);
+ if (info.getDelCount() == info.docCount) {
+ remove(segIdx);
+ } else {
+ segIdx++;
+ }
+ }
+ }
+
/**
* Returns a copy of this instance, also copying each
* SegmentInfo.
diff --git a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
index c02f4fa6f26..52d5b7d7d46 100755
--- a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
+++ b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
@@ -428,7 +428,7 @@ public class TestAddIndexes extends LuceneTestCase {
);
writer.addIndexes(aux, new MockDirectoryWrapper(random, new RAMDirectory(aux)));
- assertEquals(1060, writer.maxDoc());
+ assertEquals(1020, writer.maxDoc());
assertEquals(1000, writer.getDocCount(0));
writer.close();
dir.close();
@@ -480,7 +480,7 @@ public class TestAddIndexes extends LuceneTestCase {
);
writer.addIndexes(aux, aux2);
- assertEquals(1060, writer.maxDoc());
+ assertEquals(1040, writer.maxDoc());
assertEquals(1000, writer.getDocCount(0));
writer.close();
dir.close();
diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReader.java b/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
index 41fb07fbf73..ef87922f311 100644
--- a/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
@@ -360,7 +360,7 @@ public class TestIndexReader extends LuceneTestCase
// CREATE A NEW READER and re-test
reader = IndexReader.open(dir, false);
- assertEquals("deleted docFreq", 100, reader.docFreq(searchTerm));
+ assertEquals("deleted docFreq", 0, reader.docFreq(searchTerm));
assertTermDocsCount("deleted termDocs", reader, searchTerm, 0);
reader.close();
reader2.close();
@@ -697,7 +697,6 @@ public class TestIndexReader extends LuceneTestCase
// CREATE A NEW READER and re-test
reader = IndexReader.open(dir, false);
- assertEquals("deleted docFreq", 100, reader.docFreq(searchTerm));
assertEquals("deleted docFreq", 100, reader.docFreq(searchTerm2));
assertTermDocsCount("deleted termDocs", reader, searchTerm, 0);
assertTermDocsCount("deleted termDocs", reader, searchTerm2, 100);
@@ -838,7 +837,6 @@ public class TestIndexReader extends LuceneTestCase
writer.close();
IndexReader reader = IndexReader.open(dir, false);
reader.deleteDocument(0);
- reader.deleteDocument(1);
reader.close();
reader = IndexReader.open(dir, false);
reader.undeleteAll();
@@ -855,7 +853,6 @@ public class TestIndexReader extends LuceneTestCase
writer.close();
IndexReader reader = IndexReader.open(dir, false);
reader.deleteDocument(0);
- reader.deleteDocument(1);
reader.close();
reader = IndexReader.open(dir, false);
reader.undeleteAll();
@@ -1290,9 +1287,6 @@ public class TestIndexReader extends LuceneTestCase
// Open another reader to confirm that everything is deleted
reader2 = IndexReader.open(dir, false);
- assertEquals("reopened 2", 100, reader2.docFreq(searchTerm1));
- assertEquals("reopened 2", 100, reader2.docFreq(searchTerm2));
- assertEquals("reopened 2", 100, reader2.docFreq(searchTerm3));
assertTermDocsCount("reopened 2", reader2, searchTerm1, 0);
assertTermDocsCount("reopened 2", reader2, searchTerm2, 0);
assertTermDocsCount("reopened 2", reader2, searchTerm3, 100);
diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java b/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java
index c6bdd8c380f..e7d87a640ca 100644
--- a/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java
@@ -1211,7 +1211,6 @@ public class TestIndexReaderReopen extends LuceneTestCase {
IndexReader r = IndexReader.open(dir, false);
assertEquals(0, r.numDocs());
- assertEquals(4, r.maxDoc());
Collection commits = IndexReader.listCommits(dir);
for (final IndexCommit commit : commits) {
diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
index 554fa5bc165..a24bab5a878 100644
--- a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
@@ -101,19 +101,12 @@ public class TestIndexWriter extends LuceneTestCase {
}
reader.close();
- // test doc count before segments are merged/index is optimized
- writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()));
- assertEquals(100, writer.maxDoc());
- writer.close();
-
reader = IndexReader.open(dir, true);
- assertEquals(100, reader.maxDoc());
assertEquals(60, reader.numDocs());
reader.close();
// optimize the index and check that the new doc count is correct
writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()));
- assertEquals(100, writer.maxDoc());
assertEquals(60, writer.numDocs());
writer.optimize();
assertEquals(60, writer.maxDoc());
@@ -1431,7 +1424,6 @@ public class TestIndexWriter extends LuceneTestCase {
w.close();
IndexReader ir = IndexReader.open(dir, true);
- assertEquals(1, ir.maxDoc());
assertEquals(0, ir.numDocs());
ir.close();
diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
index 45ae58ce989..da56333555e 100644
--- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
@@ -567,24 +567,25 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
System.out.println("TEST: open reader");
}
IndexReader reader = IndexReader.open(dir, true);
- int expected = 3+(1-i)*2;
- assertEquals(expected, reader.docFreq(new Term("contents", "here")));
- assertEquals(expected, reader.maxDoc());
- int numDel = 0;
- final Bits delDocs = MultiFields.getDeletedDocs(reader);
- assertNotNull(delDocs);
- for(int j=0;j