diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 90c498d9581..44d90a02240 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -486,6 +486,9 @@ Changes in runtime behavior * LUCENE-2829: Improve the performance of "primary key" lookup use case (running a TermQuery that matches one document) on a multi-segment index. (Robert Muir, Mike McCandless) + +* LUCENE-2010: Segments with 100% deleted documents are now removed on + IndexReader or IndexWriter commit. (Uwe Schindler, Mike McCandless) API Changes @@ -905,6 +908,9 @@ Optimizations * LUCENE-2824: Optimize BufferedIndexInput to do less bounds checks. (Robert Muir) +* LUCENE-2010: Segments with 100% deleted documents are now removed on + IndexReader or IndexWriter commit. (Uwe Schindler, Mike McCandless) + Build * LUCENE-2124: Moved the JDK-based collation support from contrib/collation diff --git a/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java b/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java index 8be325a64a9..0be1dd2ba30 100644 --- a/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java +++ b/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java @@ -270,9 +270,9 @@ class BufferedDeletes { } private synchronized long applyDeletes(IndexWriter.ReaderPool readerPool, - SegmentInfo info, - SegmentDeletes coalescedDeletes, - SegmentDeletes segmentDeletes) throws IOException { + SegmentInfo info, + SegmentDeletes coalescedDeletes, + SegmentDeletes segmentDeletes) throws IOException { assert readerPool.infoIsLive(info); assert coalescedDeletes == null || coalescedDeletes.docIDs.size() == 0; diff --git a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java index 9da85ca5e6a..f339133b7ca 100644 --- a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java +++ b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java @@ -710,6 +710,9 @@ class DirectoryReader extends IndexReader implements Cloneable { for (int i = 0; i < subReaders.length; i++) subReaders[i].commit(); + // Remove segments that contain only 100% deleted docs: + segmentInfos.pruneDeletedSegments(); + // Sync all files we just wrote directory.sync(segmentInfos.files(directory, false)); segmentInfos.commit(directory); diff --git a/lucene/src/java/org/apache/lucene/index/IndexReader.java b/lucene/src/java/org/apache/lucene/index/IndexReader.java index eb953c687ba..7f1b736cf1e 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/IndexReader.java @@ -1163,7 +1163,14 @@ public abstract class IndexReader implements Cloneable,Closeable { return n; } - /** Undeletes all documents currently marked as deleted in this index. + /** Undeletes all documents currently marked as deleted in + * this index. + * + *

NOTE: this is only a best-effort process. For + * example, if all documents in a given segment were + * deleted, Lucene now drops that segment from the index, + * which means its documents will not be recovered by this + * method. * * @throws StaleReaderException if the index has changed * since this reader was opened diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index e746427eec9..710822bd15d 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -3276,6 +3276,15 @@ public class IndexWriter implements Closeable { } } + private boolean keepFullyDeletedSegments; + + /** Only for testing. + * + * @lucene.internal */ + void keepFullyDeletedSegments() { + keepFullyDeletedSegments = true; + } + // called only from assert private boolean filesExist(SegmentInfos toSync) throws IOException { Collection files = toSync.files(directory, false); @@ -3334,6 +3343,10 @@ public class IndexWriter implements Closeable { readerPool.commit(); toSync = (SegmentInfos) segmentInfos.clone(); + if (!keepFullyDeletedSegments) { + toSync.pruneDeletedSegments(); + } + assert filesExist(toSync); if (commitUserData != null) diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/src/java/org/apache/lucene/index/SegmentInfos.java index 896e6222266..493279ee17b 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentInfos.java @@ -308,6 +308,19 @@ public final class SegmentInfos extends Vector { } } + /** Prunes any segment whose docs are all deleted. */ + public void pruneDeletedSegments() { + int segIdx = 0; + while(segIdx < size()) { + final SegmentInfo info = info(segIdx); + if (info.getDelCount() == info.docCount) { + remove(segIdx); + } else { + segIdx++; + } + } + } + /** * Returns a copy of this instance, also copying each * SegmentInfo. diff --git a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java index c02f4fa6f26..52d5b7d7d46 100755 --- a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java +++ b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java @@ -428,7 +428,7 @@ public class TestAddIndexes extends LuceneTestCase { ); writer.addIndexes(aux, new MockDirectoryWrapper(random, new RAMDirectory(aux))); - assertEquals(1060, writer.maxDoc()); + assertEquals(1020, writer.maxDoc()); assertEquals(1000, writer.getDocCount(0)); writer.close(); dir.close(); @@ -480,7 +480,7 @@ public class TestAddIndexes extends LuceneTestCase { ); writer.addIndexes(aux, aux2); - assertEquals(1060, writer.maxDoc()); + assertEquals(1040, writer.maxDoc()); assertEquals(1000, writer.getDocCount(0)); writer.close(); dir.close(); diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReader.java b/lucene/src/test/org/apache/lucene/index/TestIndexReader.java index 41fb07fbf73..ef87922f311 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReader.java @@ -360,7 +360,7 @@ public class TestIndexReader extends LuceneTestCase // CREATE A NEW READER and re-test reader = IndexReader.open(dir, false); - assertEquals("deleted docFreq", 100, reader.docFreq(searchTerm)); + assertEquals("deleted docFreq", 0, reader.docFreq(searchTerm)); assertTermDocsCount("deleted termDocs", reader, searchTerm, 0); reader.close(); reader2.close(); @@ -697,7 +697,6 @@ public class TestIndexReader extends LuceneTestCase // CREATE A NEW READER and re-test reader = IndexReader.open(dir, false); - assertEquals("deleted docFreq", 100, reader.docFreq(searchTerm)); assertEquals("deleted docFreq", 100, reader.docFreq(searchTerm2)); assertTermDocsCount("deleted termDocs", reader, searchTerm, 0); assertTermDocsCount("deleted termDocs", reader, searchTerm2, 100); @@ -838,7 +837,6 @@ public class TestIndexReader extends LuceneTestCase writer.close(); IndexReader reader = IndexReader.open(dir, false); reader.deleteDocument(0); - reader.deleteDocument(1); reader.close(); reader = IndexReader.open(dir, false); reader.undeleteAll(); @@ -855,7 +853,6 @@ public class TestIndexReader extends LuceneTestCase writer.close(); IndexReader reader = IndexReader.open(dir, false); reader.deleteDocument(0); - reader.deleteDocument(1); reader.close(); reader = IndexReader.open(dir, false); reader.undeleteAll(); @@ -1290,9 +1287,6 @@ public class TestIndexReader extends LuceneTestCase // Open another reader to confirm that everything is deleted reader2 = IndexReader.open(dir, false); - assertEquals("reopened 2", 100, reader2.docFreq(searchTerm1)); - assertEquals("reopened 2", 100, reader2.docFreq(searchTerm2)); - assertEquals("reopened 2", 100, reader2.docFreq(searchTerm3)); assertTermDocsCount("reopened 2", reader2, searchTerm1, 0); assertTermDocsCount("reopened 2", reader2, searchTerm2, 0); assertTermDocsCount("reopened 2", reader2, searchTerm3, 100); diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java b/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java index c6bdd8c380f..e7d87a640ca 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java @@ -1211,7 +1211,6 @@ public class TestIndexReaderReopen extends LuceneTestCase { IndexReader r = IndexReader.open(dir, false); assertEquals(0, r.numDocs()); - assertEquals(4, r.maxDoc()); Collection commits = IndexReader.listCommits(dir); for (final IndexCommit commit : commits) { diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java index 554fa5bc165..a24bab5a878 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -101,19 +101,12 @@ public class TestIndexWriter extends LuceneTestCase { } reader.close(); - // test doc count before segments are merged/index is optimized - writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); - assertEquals(100, writer.maxDoc()); - writer.close(); - reader = IndexReader.open(dir, true); - assertEquals(100, reader.maxDoc()); assertEquals(60, reader.numDocs()); reader.close(); // optimize the index and check that the new doc count is correct writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer())); - assertEquals(100, writer.maxDoc()); assertEquals(60, writer.numDocs()); writer.optimize(); assertEquals(60, writer.maxDoc()); @@ -1431,7 +1424,6 @@ public class TestIndexWriter extends LuceneTestCase { w.close(); IndexReader ir = IndexReader.open(dir, true); - assertEquals(1, ir.maxDoc()); assertEquals(0, ir.numDocs()); ir.close(); diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java index 45ae58ce989..da56333555e 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java @@ -567,24 +567,25 @@ public class TestIndexWriterExceptions extends LuceneTestCase { System.out.println("TEST: open reader"); } IndexReader reader = IndexReader.open(dir, true); - int expected = 3+(1-i)*2; - assertEquals(expected, reader.docFreq(new Term("contents", "here"))); - assertEquals(expected, reader.maxDoc()); - int numDel = 0; - final Bits delDocs = MultiFields.getDeletedDocs(reader); - assertNotNull(delDocs); - for(int j=0;j