From 07aca888bade944ca72d2a6fb0ca4a23565f9d76 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Thu, 17 Feb 2011 11:00:45 +0000 Subject: [PATCH] LUCENE-2918: prune 100% del segments the moment they are created (in applyDeletes) instead of on commit, so that NRT usage doens't waste time on fully deleted segs git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1071569 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/xmlparser/TestParser.java | 2 + .../lucene/index/BufferedDeletesStream.java | 36 ++- .../apache/lucene/index/DirectoryReader.java | 23 +- .../apache/lucene/index/DocumentsWriter.java | 8 + .../org/apache/lucene/index/IndexWriter.java | 206 +++++++++++------- .../org/apache/lucene/index/MergePolicy.java | 4 +- .../lucene/index/RandomIndexWriter.java | 19 +- .../org/apache/lucene/search/QueryUtils.java | 15 +- .../apache/lucene/util/LuceneTestCase.java | 2 +- .../org/apache/lucene/util/_TestUtil.java | 14 ++ .../lucene/index/TestIndexWriterDelete.java | 2 +- .../index/TestIndexWriterOnDiskFull.java | 2 +- .../apache/lucene/index/TestIsCurrent.java | 2 +- .../apache/lucene/index/TestMultiFields.java | 1 + .../index/TestNRTReaderWithThreads.java | 1 + .../lucene/index/TestRollingUpdates.java | 75 +++++++ .../lucene/index/TestThreadedOptimize.java | 2 +- .../lucene/search/TestCachingSpanFilter.java | 5 +- .../search/TestCachingWrapperFilter.java | 4 +- 19 files changed, 314 insertions(+), 109 deletions(-) create mode 100644 lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java diff --git a/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java b/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java index 6122b8aab6a..ae5f02be532 100644 --- a/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java +++ b/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java @@ -21,6 +21,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.Version; import org.apache.lucene.util.LuceneTestCase; import org.junit.AfterClass; +import org.junit.Assume; import org.junit.BeforeClass; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -186,6 +187,7 @@ public class TestParser extends LuceneTestCase { } public void testDuplicateFilterQueryXML() throws ParserException, IOException { + Assume.assumeTrue(searcher.getIndexReader().getSequentialSubReaders().length == 1); Query q=parse("DuplicateFilterQuery.xml"); int h = searcher.search(q, null, 1000).totalHits; assertEquals("DuplicateFilterQuery should produce 1 result ", 1,h); diff --git a/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java b/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java index de3046db5dd..692496ba406 100644 --- a/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java +++ b/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java @@ -121,9 +121,13 @@ class BufferedDeletesStream { // Current gen, for the merged segment: public final long gen; - ApplyDeletesResult(boolean anyDeletes, long gen) { + // If non-null, contains segments that are 100% deleted + public final SegmentInfos allDeleted; + + ApplyDeletesResult(boolean anyDeletes, long gen, SegmentInfos allDeleted) { this.anyDeletes = anyDeletes; this.gen = gen; + this.allDeleted = allDeleted; } } @@ -154,14 +158,14 @@ class BufferedDeletesStream { final long t0 = System.currentTimeMillis(); if (infos.size() == 0) { - return new ApplyDeletesResult(false, nextGen++); + return new ApplyDeletesResult(false, nextGen++, null); } assert checkDeleteStats(); if (!any()) { message("applyDeletes: no deletes; skipping"); - return new ApplyDeletesResult(false, nextGen++); + return new ApplyDeletesResult(false, nextGen++, null); } if (infoStream != null) { @@ -178,6 +182,8 @@ class BufferedDeletesStream { int infosIDX = infos2.size()-1; int delIDX = deletes.size()-1; + SegmentInfos allDeleted = null; + while (infosIDX >= 0) { //System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX); @@ -199,6 +205,7 @@ class BufferedDeletesStream { assert readerPool.infoIsLive(info); SegmentReader reader = readerPool.get(info, false); int delCount = 0; + final boolean segAllDeletes; try { if (coalescedDeletes != null) { //System.out.println(" del coalesced"); @@ -209,13 +216,21 @@ class BufferedDeletesStream { // Don't delete by Term here; DocumentsWriter // already did that on flush: delCount += applyQueryDeletes(packet.queriesIterable(), reader); + segAllDeletes = reader.numDocs() == 0; } finally { readerPool.release(reader); } anyNewDeletes |= delCount > 0; + if (segAllDeletes) { + if (allDeleted == null) { + allDeleted = new SegmentInfos(); + } + allDeleted.add(info); + } + if (infoStream != null) { - message("seg=" + info + " segGen=" + segGen + " segDeletes=[" + packet + "]; coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "] delCount=" + delCount); + message("seg=" + info + " segGen=" + segGen + " segDeletes=[" + packet + "]; coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "] delCount=" + delCount + (segAllDeletes ? " 100% deleted" : "")); } if (coalescedDeletes == null) { @@ -234,16 +249,25 @@ class BufferedDeletesStream { assert readerPool.infoIsLive(info); SegmentReader reader = readerPool.get(info, false); int delCount = 0; + final boolean segAllDeletes; try { delCount += applyTermDeletes(coalescedDeletes.termsIterable(), reader); delCount += applyQueryDeletes(coalescedDeletes.queriesIterable(), reader); + segAllDeletes = reader.numDocs() == 0; } finally { readerPool.release(reader); } anyNewDeletes |= delCount > 0; + if (segAllDeletes) { + if (allDeleted == null) { + allDeleted = new SegmentInfos(); + } + allDeleted.add(info); + } + if (infoStream != null) { - message("seg=" + info + " segGen=" + segGen + " coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "] delCount=" + delCount); + message("seg=" + info + " segGen=" + segGen + " coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "] delCount=" + delCount + (segAllDeletes ? " 100% deleted" : "")); } } info.setBufferedDeletesGen(nextGen); @@ -258,7 +282,7 @@ class BufferedDeletesStream { } // assert infos != segmentInfos || !any() : "infos=" + infos + " segmentInfos=" + segmentInfos + " any=" + any; - return new ApplyDeletesResult(anyNewDeletes, nextGen++); + return new ApplyDeletesResult(anyNewDeletes, nextGen++, allDeleted); } public synchronized long getNextGen() { diff --git a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java index 344a91159c7..78174cdf3b6 100644 --- a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java +++ b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java @@ -146,7 +146,6 @@ class DirectoryReader extends IndexReader implements Cloneable { this.readOnly = true; this.applyAllDeletes = applyAllDeletes; // saved for reopen - segmentInfos = (SegmentInfos) infos.clone();// make sure we clone otherwise we share mutable state with IW this.termInfosIndexDivisor = termInfosIndexDivisor; if (codecs == null) { this.codecs = CodecProvider.getDefault(); @@ -159,23 +158,33 @@ class DirectoryReader extends IndexReader implements Cloneable { // us, which ensures infos will not change; so there's // no need to process segments in reverse order final int numSegments = infos.size(); - SegmentReader[] readers = new SegmentReader[numSegments]; + + List readers = new ArrayList(); final Directory dir = writer.getDirectory(); + segmentInfos = (SegmentInfos) infos.clone(); + int infosUpto = 0; for (int i=0;i 0 || writer.getKeepFullyDeletedSegments()) { + reader.readerFinishedListeners = readerFinishedListeners; + readers.add(reader); + infosUpto++; + } else { + reader.close(); + segmentInfos.remove(infosUpto); + } success = true; } finally { if (!success) { // Close all readers we had opened: - for(i--;i>=0;i--) { + for(SegmentReader reader : readers) { try { - readers[i].close(); + reader.close(); } catch (Throwable ignore) { // keep going - we want to clean up as much as possible } @@ -186,7 +195,7 @@ class DirectoryReader extends IndexReader implements Cloneable { this.writer = writer; - initialize(readers); + initialize(readers.toArray(new SegmentReader[readers.size()])); } /** This constructor is only used for {@link #reopen()} */ diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java index bb5304371fc..341891ab09b 100644 --- a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java @@ -648,8 +648,16 @@ final class DocumentsWriter { newSegment.setDelCount(delCount); newSegment.advanceDelGen(); final String delFileName = newSegment.getDelFileName(); + if (infoStream != null) { + message("flush: write " + delCount + " deletes to " + delFileName); + } boolean success2 = false; try { + // TODO: in the NRT case it'd be better to hand + // this del vector over to the + // shortly-to-be-opened SegmentReader and let it + // carry the changes; there's no reason to use + // filesystem as intermediary here. flushState.deletedDocs.write(directory, delFileName); success2 = true; } finally { diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 44d909265b3..c5e3e5776e7 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -388,8 +388,7 @@ public class IndexWriter implements Closeable { private final Map readerMap = new HashMap(); - /** Forcefully clear changes for the specified segments, - * and remove from the pool. This is called on successful merge. */ + /** Forcefully clear changes for the specified segments. This is called on successful merge. */ synchronized void clear(SegmentInfos infos) throws IOException { if (infos == null) { for (Map.Entry ent: readerMap.entrySet()) { @@ -397,8 +396,9 @@ public class IndexWriter implements Closeable { } } else { for (final SegmentInfo info: infos) { - if (readerMap.containsKey(info)) { - readerMap.get(info).hasChanges = false; + final SegmentReader r = readerMap.get(info); + if (r != null) { + r.hasChanges = false; } } } @@ -407,8 +407,8 @@ public class IndexWriter implements Closeable { // used only by asserts public synchronized boolean infoIsLive(SegmentInfo info) { int idx = segmentInfos.indexOf(info); - assert idx != -1; - assert segmentInfos.get(idx) == info; + assert idx != -1: "info=" + info + " isn't in pool"; + assert segmentInfos.get(idx) == info: "info=" + info + " doesn't match live info in segmentInfos"; return true; } @@ -478,6 +478,21 @@ public class IndexWriter implements Closeable { return false; } + + public synchronized void drop(SegmentInfos infos) throws IOException { + for(SegmentInfo info : infos) { + drop(info); + } + } + + public synchronized void drop(SegmentInfo info) throws IOException { + final SegmentReader sr = readerMap.get(info); + if (sr != null) { + sr.hasChanges = false; + readerMap.remove(info); + sr.close(); + } + } /** Remove all our references to readers, and commits * any pending changes. */ @@ -516,19 +531,18 @@ public class IndexWriter implements Closeable { * Commit all segment reader in the pool. * @throws IOException */ - synchronized void commit() throws IOException { + synchronized void commit(SegmentInfos infos) throws IOException { // We invoke deleter.checkpoint below, so we must be // sync'd on IW: assert Thread.holdsLock(IndexWriter.this); - for (Map.Entry ent : readerMap.entrySet()) { + for (SegmentInfo info : infos) { - SegmentReader sr = ent.getValue(); - if (sr.hasChanges) { - assert infoIsLive(sr.getSegmentInfo()); + final SegmentReader sr = readerMap.get(info); + if (sr != null && sr.hasChanges) { + assert infoIsLive(info); sr.doCommit(null); - // Must checkpoint w/ deleter, because this // segment reader will have created new _X_N.del // file. @@ -2558,6 +2572,24 @@ public class IndexWriter implements Closeable { if (result.anyDeletes) { checkpoint(); } + if (!keepFullyDeletedSegments && result.allDeleted != null) { + if (infoStream != null) { + message("drop 100% deleted segments: " + result.allDeleted); + } + for(SegmentInfo info : result.allDeleted) { + // If a merge has already registered for this + // segment, we leave it in the readerPool; the + // merge will skip merging it and will then drop + // it once it's done: + if (!mergingSegments.contains(info)) { + segmentInfos.remove(info); + if (readerPool != null) { + readerPool.drop(info); + } + } + } + checkpoint(); + } bufferedDeletesStream.prune(segmentInfos); assert !bufferedDeletesStream.any(); flushControl.clearDeletes(); @@ -2634,9 +2666,13 @@ public class IndexWriter implements Closeable { SegmentInfo info = sourceSegments.info(i); minGen = Math.min(info.getBufferedDeletesGen(), minGen); int docCount = info.docCount; - SegmentReader previousReader = merge.readersClone[i]; + final SegmentReader previousReader = merge.readerClones.get(i); + if (previousReader == null) { + // Reader was skipped because it was 100% deletions + continue; + } final Bits prevDelDocs = previousReader.getDeletedDocs(); - SegmentReader currentReader = merge.readers[i]; + final SegmentReader currentReader = merge.readers.get(i); final Bits currentDelDocs = currentReader.getDeletedDocs(); if (previousReader.hasDeletions()) { @@ -2719,18 +2755,21 @@ public class IndexWriter implements Closeable { return false; } - ensureValidMerge(merge); - commitMergedDeletes(merge, mergedReader); // If the doc store we are using has been closed and // is in now compound format (but wasn't when we // started), then we will switch to the compound // format as well: - setMergeDocStoreIsCompoundFile(merge); assert !segmentInfos.contains(merge.info); + final boolean allDeleted = mergedReader.numDocs() == 0; + + if (infoStream != null && allDeleted) { + message("merged segment " + merge.info + " is 100% deleted" + (keepFullyDeletedSegments ? "" : "; skipping insert")); + } + final Set mergedAway = new HashSet(merge.segments); int segIdx = 0; int newSegIdx = 0; @@ -2739,7 +2778,7 @@ public class IndexWriter implements Closeable { while(segIdx < curSegCount) { final SegmentInfo info = segmentInfos.info(segIdx++); if (mergedAway.contains(info)) { - if (!inserted) { + if (!inserted && (!allDeleted || keepFullyDeletedSegments)) { segmentInfos.set(segIdx-1, merge.info); inserted = true; newSegIdx++; @@ -2748,7 +2787,20 @@ public class IndexWriter implements Closeable { segmentInfos.set(newSegIdx++, info); } } - assert newSegIdx == curSegCount - merge.segments.size() + 1; + + // Either we found place to insert segment, or, we did + // not, but only because all segments we merged became + // deleted while we are merging, in which case it should + // be the case that the new segment is also all deleted: + if (!inserted) { + assert allDeleted; + if (keepFullyDeletedSegments) { + segmentInfos.add(0, merge.info); + } else { + readerPool.drop(merge.info); + } + } + segmentInfos.subList(newSegIdx, segmentInfos.size()).clear(); if (infoStream != null) { @@ -2770,7 +2822,6 @@ public class IndexWriter implements Closeable { // cascade the optimize: segmentsToOptimize.add(merge.info); } - return true; } @@ -2913,8 +2964,9 @@ public class IndexWriter implements Closeable { // is running (while synchronized) to avoid race // condition where two conflicting merges from different // threads, start - for(int i=0;i BD final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments); + if (result.anyDeletes) { checkpoint(); } + if (!keepFullyDeletedSegments && result.allDeleted != null) { + if (infoStream != null) { + message("drop 100% deleted segments: " + result.allDeleted); + } + for(SegmentInfo info : result.allDeleted) { + segmentInfos.remove(info); + if (merge.segments.contains(info)) { + mergingSegments.remove(info); + merge.segments.remove(info); + } + } + if (readerPool != null) { + readerPool.drop(result.allDeleted); + } + checkpoint(); + } + merge.info.setBufferedDeletesGen(result.gen); // Lock order: IW -> BD @@ -3023,8 +3093,9 @@ public class IndexWriter implements Closeable { if (merge.registerDone) { final SegmentInfos sourceSegments = merge.segments; final int end = sourceSegments.size(); - for(int i=0;i 0) { + merger.add(clone); + } totDocCount += clone.numDocs(); + segUpto++; } if (infoStream != null) { - message("merge: total "+totDocCount+" docs"); + message("merge: total " + totDocCount + " docs"); } merge.checkAborted(directory); @@ -3160,11 +3220,11 @@ public class IndexWriter implements Closeable { if (infoStream != null) { message("merge segmentCodecs=" + merger.getSegmentCodecs()); - message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + numSegments); + message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + merge.readers.size()); } - anyNonBulkMerges |= merger.getMatchedSubReaderCount() != numSegments; + anyNonBulkMerges |= merger.getMatchedSubReaderCount() != merge.readers.size(); - assert mergedDocCount == totDocCount; + assert mergedDocCount == totDocCount: "mergedDocCount=" + mergedDocCount + " vs " + totDocCount; // Very important to do this before opening the reader // because codec must know if prox was written for @@ -3347,6 +3407,10 @@ public class IndexWriter implements Closeable { keepFullyDeletedSegments = true; } + boolean getKeepFullyDeletedSegments() { + return keepFullyDeletedSegments; + } + // called only from assert private boolean filesExist(SegmentInfos toSync) throws IOException { Collection files = toSync.files(directory, false); @@ -3402,12 +3466,8 @@ public class IndexWriter implements Closeable { if (infoStream != null) message("startCommit index=" + segString(segmentInfos) + " changeCount=" + changeCount); - readerPool.commit(); - + readerPool.commit(segmentInfos); toSync = (SegmentInfos) segmentInfos.clone(); - if (!keepFullyDeletedSegments) { - toSync.pruneDeletedSegments(); - } assert filesExist(toSync); diff --git a/lucene/src/java/org/apache/lucene/index/MergePolicy.java b/lucene/src/java/org/apache/lucene/index/MergePolicy.java index 704161b09bd..088065293f7 100644 --- a/lucene/src/java/org/apache/lucene/index/MergePolicy.java +++ b/lucene/src/java/org/apache/lucene/index/MergePolicy.java @@ -72,8 +72,8 @@ public abstract class MergePolicy implements java.io.Closeable { long mergeGen; // used by IndexWriter boolean isExternal; // used by IndexWriter int maxNumSegmentsOptimize; // used by IndexWriter - SegmentReader[] readers; // used by IndexWriter - SegmentReader[] readersClone; // used by IndexWriter + List readers; // used by IndexWriter + List readerClones; // used by IndexWriter public final SegmentInfos segments; boolean aborted; Throwable error; diff --git a/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java b/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java index 2faf22bb011..d0afe3f262f 100644 --- a/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java +++ b/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java @@ -102,6 +102,17 @@ public class RandomIndexWriter implements Closeable { } } + public void updateDocument(Term t, Document doc) throws IOException { + w.updateDocument(t, doc); + if (docCount++ == flushAt) { + if (LuceneTestCase.VERBOSE) { + System.out.println("RIW.updateDocument: now doing a commit"); + } + w.commit(); + flushAt += _TestUtil.nextInt(r, 10, 1000); + } + } + public void addIndexes(Directory... dirs) throws CorruptIndexException, IOException { w.addIndexes(dirs); } @@ -127,17 +138,21 @@ public class RandomIndexWriter implements Closeable { } public IndexReader getReader() throws IOException { + return getReader(true); + } + + public IndexReader getReader(boolean applyDeletions) throws IOException { getReaderCalled = true; if (r.nextInt(4) == 2) w.optimize(); // If we are writing with PreFlexRW, force a full // IndexReader.open so terms are sorted in codepoint // order during searching: - if (!w.codecs.getDefaultFieldCodec().equals("PreFlex") && r.nextBoolean()) { + if (!applyDeletions || !w.codecs.getDefaultFieldCodec().equals("PreFlex") && r.nextBoolean()) { if (LuceneTestCase.VERBOSE) { System.out.println("RIW.getReader: use NRT reader"); } - return w.getReader(); + return w.getReader(applyDeletions); } else { if (LuceneTestCase.VERBOSE) { System.out.println("RIW.getReader: open new reader"); diff --git a/lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java b/lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java index 483106d9fef..7ccd225e113 100644 --- a/lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java +++ b/lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java @@ -2,14 +2,13 @@ package org.apache.lucene.search; import java.io.IOException; import java.util.Random; -import java.lang.reflect.Method; import junit.framework.Assert; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.MultiReader; @@ -19,6 +18,7 @@ import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.ReaderUtil; +import org.apache.lucene.util._TestUtil; import static org.apache.lucene.util.LuceneTestCase.TEST_VERSION_CURRENT; @@ -172,16 +172,7 @@ public class QueryUtils { } w.commit(); w.deleteDocuments( new MatchAllDocsQuery() ); - try { - // Carefully invoke what is a package-private (test - // only, internal) method on IndexWriter: - Method m = IndexWriter.class.getDeclaredMethod("keepFullyDeletedSegments"); - m.setAccessible(true); - m.invoke(w); - } catch (Exception e) { - // Should not happen? - throw new RuntimeException(e); - } + _TestUtil.keepFullyDeletedSegments(w); w.commit(); if (0 < numDeletedDocs) diff --git a/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java b/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java index a10689c98be..6469ca9302b 100644 --- a/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java +++ b/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java @@ -1243,7 +1243,7 @@ public abstract class LuceneTestCase extends Assert { } @Override - public String toString() { + public synchronized String toString() { return "RandomCodecProvider: " + previousMappings.toString(); } } diff --git a/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java b/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java index 1ad038040a2..f16a971020c 100644 --- a/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java +++ b/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java @@ -25,6 +25,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.PrintStream; +import java.lang.reflect.Method; import java.util.Enumeration; import java.util.Random; import java.util.Map; @@ -305,4 +306,17 @@ public class _TestUtil { }); Assert.assertEquals("Reflection does not produce same map", reflectedValues, map); } + + public static void keepFullyDeletedSegments(IndexWriter w) { + try { + // Carefully invoke what is a package-private (test + // only, internal) method on IndexWriter: + Method m = IndexWriter.class.getDeclaredMethod("keepFullyDeletedSegments"); + m.setAccessible(true); + m.invoke(w); + } catch (Exception e) { + // Should not happen? + throw new RuntimeException(e); + } + } } diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java index f0b3fd6d8b0..0acc750376a 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java @@ -81,7 +81,7 @@ public class TestIndexWriterDelete extends LuceneTestCase { IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2) .setMaxBufferedDeleteTerms(2)); - + modifier.setInfoStream(VERBOSE ? System.out : null); int id = 0; int value = 100; diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterOnDiskFull.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterOnDiskFull.java index 5fc03471ecf..929bebc145f 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterOnDiskFull.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterOnDiskFull.java @@ -464,11 +464,11 @@ public class TestIndexWriterOnDiskFull extends LuceneTestCase { setReaderPooling(true). setMergePolicy(newLogMergePolicy(2)) ); + _TestUtil.keepFullyDeletedSegments(w); Document doc = new Document(); doc.add(newField("f", "doctor who", Field.Store.YES, Field.Index.ANALYZED)); w.addDocument(doc); - w.commit(); w.deleteDocuments(new Term("f", "who")); diff --git a/lucene/src/test/org/apache/lucene/index/TestIsCurrent.java b/lucene/src/test/org/apache/lucene/index/TestIsCurrent.java index 8e41d522607..524108d3524 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIsCurrent.java +++ b/lucene/src/test/org/apache/lucene/index/TestIsCurrent.java @@ -68,7 +68,7 @@ public class TestIsCurrent extends LuceneTestCase { // assert index has a document and reader is up2date assertEquals("One document should be in the index", 1, writer.numDocs()); - assertTrue("Document added, reader should be stale ", reader.isCurrent()); + assertTrue("One document added, reader should be current", reader.isCurrent()); // remove document Term idTerm = new Term("UUID", "1"); diff --git a/lucene/src/test/org/apache/lucene/index/TestMultiFields.java b/lucene/src/test/org/apache/lucene/index/TestMultiFields.java index f1337e95191..30cb118f2f7 100644 --- a/lucene/src/test/org/apache/lucene/index/TestMultiFields.java +++ b/lucene/src/test/org/apache/lucene/index/TestMultiFields.java @@ -32,6 +32,7 @@ public class TestMultiFields extends LuceneTestCase { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(NoMergePolicy.COMPOUND_FILES)); + _TestUtil.keepFullyDeletedSegments(w); Map> docs = new HashMap>(); Set deleted = new HashSet(); diff --git a/lucene/src/test/org/apache/lucene/index/TestNRTReaderWithThreads.java b/lucene/src/test/org/apache/lucene/index/TestNRTReaderWithThreads.java index b544f027126..c71448dbb03 100644 --- a/lucene/src/test/org/apache/lucene/index/TestNRTReaderWithThreads.java +++ b/lucene/src/test/org/apache/lucene/index/TestNRTReaderWithThreads.java @@ -36,6 +36,7 @@ public class TestNRTReaderWithThreads extends LuceneTestCase { setMaxBufferedDocs(10). setMergePolicy(newLogMergePolicy(false,2)) ); + writer.setInfoStream(VERBOSE ? System.out : null); IndexReader reader = writer.getReader(); // start pooling readers reader.close(); RunThread[] indexThreads = new RunThread[4]; diff --git a/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java b/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java new file mode 100644 index 00000000000..c38fd2d4b2c --- /dev/null +++ b/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java @@ -0,0 +1,75 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.*; +import org.apache.lucene.store.*; +import org.apache.lucene.util.*; +import org.junit.Test; + +public class TestRollingUpdates extends LuceneTestCase { + + // Just updates the same set of N docs over and over, to + // stress out deletions + + @Test + public void testRollingUpdates() throws Exception { + final Directory dir = newDirectory(); + + final LineFileDocs docs = new LineFileDocs(random); + + final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())); + final int SIZE = 200 * RANDOM_MULTIPLIER; + int id = 0; + IndexReader r = null; + final int numUpdates = (int) (SIZE * (2+random.nextDouble())); + for(int docIter=0;docIter= SIZE && random.nextInt(50) == 17) { + if (r != null) { + r.close(); + } + final boolean applyDeletions = random.nextBoolean(); + r = w.getReader(applyDeletions); + assertTrue("applyDeletions=" + applyDeletions + " r.numDocs()=" + r.numDocs() + " vs SIZE=" + SIZE, !applyDeletions || r.numDocs() == SIZE); + } + } + + if (r != null) { + r.close(); + } + + w.commit(); + assertEquals(SIZE, w.numDocs()); + + w.close(); + docs.close(); + + dir.close(); + } +} diff --git a/lucene/src/test/org/apache/lucene/index/TestThreadedOptimize.java b/lucene/src/test/org/apache/lucene/index/TestThreadedOptimize.java index c2f988422d3..3516079d208 100644 --- a/lucene/src/test/org/apache/lucene/index/TestThreadedOptimize.java +++ b/lucene/src/test/org/apache/lucene/index/TestThreadedOptimize.java @@ -71,7 +71,7 @@ public class TestThreadedOptimize extends LuceneTestCase { } ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(4); - //writer.setInfoStream(System.out); + writer.setInfoStream(VERBOSE ? System.out : null); Thread[] threads = new Thread[NUM_THREADS]; diff --git a/lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java b/lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java index 3424658ed27..5d8aa209107 100644 --- a/lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java +++ b/lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java @@ -29,6 +29,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; public class TestCachingSpanFilter extends LuceneTestCase { @@ -73,7 +74,9 @@ public class TestCachingSpanFilter extends LuceneTestCase { docs = searcher.search(constantScore, 1); assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); - // now delete the doc, refresh the reader, and see that it's not there + // now delete the doc, refresh the reader, and see that + // it's not there + _TestUtil.keepFullyDeletedSegments(writer.w); writer.deleteDocuments(new Term("id", "1")); reader = refreshReader(reader); diff --git a/lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java b/lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java index 357b3df1017..6c7f7af1165 100644 --- a/lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java +++ b/lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java @@ -22,8 +22,8 @@ import java.io.IOException; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SerialMergeScheduler; import org.apache.lucene.index.SlowMultiReaderWrapper; @@ -32,6 +32,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.OpenBitSetDISI; +import org.apache.lucene.util._TestUtil; public class TestCachingWrapperFilter extends LuceneTestCase { @@ -196,6 +197,7 @@ public class TestCachingWrapperFilter extends LuceneTestCase { assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); // now delete the doc, refresh the reader, and see that it's not there + _TestUtil.keepFullyDeletedSegments(writer.w); writer.deleteDocuments(new Term("id", "1")); reader = refreshReader(reader);