From 858e309c7b1f9339f7102c7445ef29b9ca10e750 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Tue, 29 Jul 2014 15:34:44 +0000 Subject: [PATCH] LUCENE-5843: IndexWriter now enforces max docs in one index git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1614388 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 6 + .../lucene/index/BaseCompositeReader.java | 4 +- .../apache/lucene/index/DocumentsWriter.java | 3 +- .../index/DocumentsWriterPerThread.java | 36 +- .../org/apache/lucene/index/IndexWriter.java | 65 ++- .../org/apache/lucene/index/MergePolicy.java | 6 +- .../org/apache/lucene/index/Test2BDocs.java | 83 ---- .../lucene/index/TestIndexWriterMaxDocs.java | 378 ++++++++++++++++++ .../apache/lucene/util/LuceneTestCase.java | 29 +- 9 files changed, 510 insertions(+), 100 deletions(-) delete mode 100644 lucene/core/src/test/org/apache/lucene/index/Test2BDocs.java create mode 100644 lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMaxDocs.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 0a3783042ae..4fe37df567a 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -133,6 +133,12 @@ New Features checksum against all the bytes), retrieve checksum to validate structure of footer, this can detect some forms of corruption such as truncation. (Robert Muir) + +* LUCENE-5843: Added IndexWriter.MAX_DOCS which is the maximum number + of documents allowed in a single index, and any operations that add + documents will now throw IllegalStateException if the max count + would be exceeded, instead of silently creating an unusable + index. (Mike McCandless) API Changes diff --git a/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java b/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java index 7bb165ffaea..1f52b922df1 100644 --- a/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java @@ -73,8 +73,8 @@ public abstract class BaseCompositeReader extends Composi starts[i] = maxDoc; final IndexReader r = subReaders[i]; maxDoc += r.maxDoc(); // compute maxDocs - if (maxDoc < 0 /* overflow */) { - throw new IllegalArgumentException("Too many documents, composite IndexReaders cannot exceed " + Integer.MAX_VALUE); + if (maxDoc < 0 /* overflow */ || maxDoc > IndexWriter.getActualMaxDocs()) { + throw new IllegalArgumentException("Too many documents, composite IndexReaders cannot exceed " + IndexWriter.getActualMaxDocs()); } numDocs += r.numDocs(); // compute numDocs r.registerParentReader(this); diff --git a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java index 6a3761487f6..c79d693342c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java @@ -388,7 +388,8 @@ final class DocumentsWriter implements Closeable, Accountable { final FieldInfos.Builder infos = new FieldInfos.Builder( writer.globalFieldNumberMap); state.dwpt = new DocumentsWriterPerThread(writer.newSegmentName(), - directory, config, infoStream, deleteQueue, infos); + directory, config, infoStream, deleteQueue, infos, + writer.pendingNumDocs); } } diff --git a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java index 6e7c5ae9854..048b6159045 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java @@ -17,14 +17,12 @@ package org.apache.lucene.index; * limitations under the License. */ -import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK; -import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE; - import java.io.IOException; import java.text.NumberFormat; import java.util.HashSet; import java.util.Locale; import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; @@ -43,6 +41,9 @@ import org.apache.lucene.util.IntBlockPool; import org.apache.lucene.util.MutableBits; import org.apache.lucene.util.RamUsageEstimator; +import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK; +import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE; + class DocumentsWriterPerThread { /** @@ -154,11 +155,11 @@ class DocumentsWriterPerThread { private final NumberFormat nf = NumberFormat.getInstance(Locale.ROOT); final Allocator byteBlockAllocator; final IntBlockPool.Allocator intBlockAllocator; + private final AtomicLong pendingNumDocs; private final LiveIndexWriterConfig indexWriterConfig; - public DocumentsWriterPerThread(String segmentName, Directory directory, LiveIndexWriterConfig indexWriterConfig, InfoStream infoStream, DocumentsWriterDeleteQueue deleteQueue, - FieldInfos.Builder fieldInfos) throws IOException { + FieldInfos.Builder fieldInfos, AtomicLong pendingNumDocs) throws IOException { this.directoryOrig = directory; this.directory = new TrackingDirectoryWrapper(directory); this.fieldInfos = fieldInfos; @@ -167,6 +168,7 @@ class DocumentsWriterPerThread { this.codec = indexWriterConfig.getCodec(); this.docState = new DocState(this, infoStream); this.docState.similarity = indexWriterConfig.getSimilarity(); + this.pendingNumDocs = pendingNumDocs; bytesUsed = Counter.newCounter(); byteBlockAllocator = new DirectTrackingAllocator(bytesUsed); pendingUpdates = new BufferedUpdates(); @@ -207,6 +209,16 @@ class DocumentsWriterPerThread { return true; } + /** Anything that will add N docs to the index should reserve first to + * make sure it's allowed. */ + private void reserveDoc() { + if (pendingNumDocs.incrementAndGet() > IndexWriter.getActualMaxDocs()) { + // Reserve failed + pendingNumDocs.decrementAndGet(); + throw new IllegalStateException("number of documents in the index cannot exceed " + IndexWriter.getActualMaxDocs()); + } + } + public void updateDocument(IndexDocument doc, Analyzer analyzer, Term delTerm) throws IOException { assert testPoint("DocumentsWriterPerThread addDocument start"); assert deleteQueue != null; @@ -216,6 +228,13 @@ class DocumentsWriterPerThread { if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) { infoStream.message("DWPT", Thread.currentThread().getName() + " update delTerm=" + delTerm + " docID=" + docState.docID + " seg=" + segmentInfo.name); } + // Even on exception, the document is still added (but marked + // deleted), so we don't need to un-reserve at that point. + // Aborting exceptions will actually "lose" more than one + // document, so the counter will be "wrong" in that case, but + // it's very hard to fix (we can't easily distinguish aborting + // vs non-aborting exceptions): + reserveDoc(); boolean success = false; try { try { @@ -250,6 +269,13 @@ class DocumentsWriterPerThread { try { for(IndexDocument doc : docs) { + // Even on exception, the document is still added (but marked + // deleted), so we don't need to un-reserve at that point. + // Aborting exceptions will actually "lose" more than one + // document, so the counter will be "wrong" in that case, but + // it's very hard to fix (we can't easily distinguish aborting + // vs non-aborting exceptions): + reserveDoc(); docState.doc = doc; docState.docID = numDocsInRAM; docCount++; diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index f428b3a4fba..bdcf61ad146 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -32,11 +32,12 @@ import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Locale; -import java.util.Map; import java.util.Map.Entry; +import java.util.Map; import java.util.Queue; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; @@ -194,7 +195,31 @@ import org.apache.lucene.util.Version; * keeps track of the last non commit checkpoint. */ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { - + + /** Hard limit on maximum number of documents that may be added to the + * index. If you try to add more than this you'll hit {@code IllegalStateException}. */ + // We defensively subtract 128 to be well below the lowest + // ArrayUtil.MAX_ARRAY_LENGTH on "typical" JVMs. We don't just use + // ArrayUtil.MAX_ARRAY_LENGTH here because this can vary across JVMs: + public static final int MAX_DOCS = Integer.MAX_VALUE - 128; + + // Use package-private instance var to enforce the limit so testing + // can use less electricity: + private static int actualMaxDocs = MAX_DOCS; + + /** Used only for testing. */ + static void setMaxDocs(int maxDocs) { + if (maxDocs > MAX_DOCS) { + // Cannot go higher than the hard max: + throw new IllegalArgumentException("maxDocs must be <= IndexWriter.MAX_DOCS=" + MAX_DOCS + "; got: " + maxDocs); + } + IndexWriter.actualMaxDocs = maxDocs; + } + + static int getActualMaxDocs() { + return IndexWriter.actualMaxDocs; + } + private static final int UNBOUNDED_MAX_MERGE_SEGMENTS = -1; /** @@ -289,6 +314,13 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { * an infoStream message about how long commit took. */ private long startCommitTime; + /** How many documents are in the index, or are in the process of being + * added (reserved). E.g., operations like addIndexes will first reserve + * the right to add N docs, before they actually change the index, + * much like how hotels place an "authorization hold" on your credit + * card to make sure they can later charge you when you check out. */ + final AtomicLong pendingNumDocs = new AtomicLong(); + DirectoryReader getReader() throws IOException { return getReader(true); } @@ -2437,7 +2469,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { flush(false, true); List infos = new ArrayList<>(); - + int totalDocCount = 0; boolean success = false; try { for (Directory dir : dirs) { @@ -2446,6 +2478,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { } SegmentInfos sis = new SegmentInfos(); // read infos from dir sis.read(dir); + totalDocCount += sis.totalDocCount(); for (SegmentCommitInfo info : sis) { assert !infos.contains(info): "dup info dir=" + info.info.dir + " name=" + info.info.name; @@ -2482,6 +2515,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { success = false; try { ensureOpen(); + // Make sure adding the new documents to this index won't + // exceed the limit: + reserveDocs(totalDocCount); success = true; } finally { if (!success) { @@ -2566,6 +2602,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { mergeReaders.add(ctx.reader()); } } + + // Make sure adding the new documents to this index won't + // exceed the limit: + reserveDocs(numDocs); final IOContext context = new IOContext(new MergeInfo(numDocs, -1, true, -1)); @@ -3119,6 +3159,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { // it once it's done: if (!mergingSegments.contains(info)) { segmentInfos.remove(info); + pendingNumDocs.addAndGet(-info.info.getDocCount()); readerPool.drop(info); } } @@ -3491,6 +3532,12 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { // merge: segmentInfos.applyMergeChanges(merge, dropSegment); + // Now deduct the deleted docs that we just reclaimed from this + // merge: + int delDocCount = merge.totalDocCount - merge.info.info.getDocCount(); + assert delDocCount >= 0; + pendingNumDocs.addAndGet(-delDocCount); + if (dropSegment) { assert !segmentInfos.contains(merge.info); readerPool.drop(merge.info); @@ -3774,6 +3821,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { } for(SegmentCommitInfo info : result.allDeleted) { segmentInfos.remove(info); + pendingNumDocs.addAndGet(-info.info.getDocCount()); if (merge.segments.contains(info)) { mergingSegments.remove(info); merge.segments.remove(info); @@ -4644,4 +4692,15 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { return false; } } + + /** Anything that will add N docs to the index should reserve first to + * make sure it's allowed. This will throw {@code + * IllegalStateException} if it's not allowed. */ + private void reserveDocs(int numDocs) { + if (pendingNumDocs.addAndGet(numDocs) > actualMaxDocs) { + // Reserve failed + pendingNumDocs.addAndGet(-numDocs); + throw new IllegalStateException("number of documents in the index cannot exceed " + actualMaxDocs); + } + } } diff --git a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java index efc343eec50..cef7ce0c705 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java @@ -20,8 +20,6 @@ package org.apache.lucene.index; import org.apache.lucene.store.Directory; import org.apache.lucene.store.MergeInfo; import org.apache.lucene.util.FixedBitSet; -import org.apache.lucene.util.SetOnce; -import org.apache.lucene.util.SetOnce.AlreadySetException; import java.io.IOException; import java.util.ArrayList; @@ -92,7 +90,7 @@ public abstract class MergePolicy implements java.io.Closeable { public static class OneMerge { - SegmentCommitInfo info; // used by IndexWriter + SegmentCommitInfo info; // used by IndexWriter boolean registerDone; // used by IndexWriter long mergeGen; // used by IndexWriter boolean isExternal; // used by IndexWriter @@ -109,7 +107,7 @@ public abstract class MergePolicy implements java.io.Closeable { /** Segments to be merged. */ public final List segments; - /** Number of documents in the merged segment. */ + /** Total number of documents in segments to be merged, not accounting for deletions. */ public final int totalDocCount; boolean aborted; Throwable error; diff --git a/lucene/core/src/test/org/apache/lucene/index/Test2BDocs.java b/lucene/core/src/test/org/apache/lucene/index/Test2BDocs.java deleted file mode 100644 index 21a98b709ad..00000000000 --- a/lucene/core/src/test/org/apache/lucene/index/Test2BDocs.java +++ /dev/null @@ -1,83 +0,0 @@ -package org.apache.lucene.index; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.Arrays; - -import org.apache.lucene.document.Document; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.TestUtil; -import org.junit.AfterClass; -import org.junit.BeforeClass; - -public class Test2BDocs extends LuceneTestCase { - static Directory dir; - - @BeforeClass - public static void beforeClass() throws Exception { - dir = newFSDirectory(createTempDir("2Bdocs")); - IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); - Document doc = new Document(); - for (int i = 0; i < 262144; i++) { - iw.addDocument(doc); - } - iw.forceMerge(1); - iw.shutdown(); - } - - @AfterClass - public static void afterClass() throws Exception { - dir.close(); - dir = null; - } - - public void testOverflow() throws Exception { - DirectoryReader ir = DirectoryReader.open(dir); - IndexReader subReaders[] = new IndexReader[8192]; - Arrays.fill(subReaders, ir); - try { - new MultiReader(subReaders); - fail(); - } catch (IllegalArgumentException expected) { - // expected - } - ir.close(); - } - - public void testExactlyAtLimit() throws Exception { - Directory dir2 = newFSDirectory(createTempDir("2BDocs2")); - IndexWriter iw = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); - Document doc = new Document(); - for (int i = 0; i < 262143; i++) { - iw.addDocument(doc); - } - iw.shutdown(); - DirectoryReader ir = DirectoryReader.open(dir); - DirectoryReader ir2 = DirectoryReader.open(dir2); - IndexReader subReaders[] = new IndexReader[8192]; - Arrays.fill(subReaders, ir); - subReaders[subReaders.length-1] = ir2; - MultiReader mr = new MultiReader(subReaders); - assertEquals(Integer.MAX_VALUE, mr.maxDoc()); - assertEquals(Integer.MAX_VALUE, mr.numDocs()); - ir.close(); - ir2.close(); - dir2.close(); - } -} diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMaxDocs.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMaxDocs.java new file mode 100644 index 00000000000..d386966115a --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMaxDocs.java @@ -0,0 +1,378 @@ +package org.apache.lucene.index; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Arrays; +import java.util.Collections; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase.Monster; +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TimeUnits; +import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite; + +@SuppressCodecs({ "SimpleText", "Memory", "Direct" }) +@TimeoutSuite(millis = 6 * TimeUnits.HOUR) +public class TestIndexWriterMaxDocs extends LuceneTestCase { + + @Monster("takes a long time") + public void testExactlyAtTrueLimit() throws Exception { + Directory dir = newFSDirectory(createTempDir("2BDocs3")); + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); + Document doc = new Document(); + doc.add(newStringField("field", "text", Field.Store.NO)); + for (int i = 0; i < IndexWriter.MAX_DOCS; i++) { + iw.addDocument(doc); + /* + if (i%1000000 == 0) { + System.out.println((i/1000000) + " M docs..."); + } + */ + } + iw.commit(); + + // First unoptimized, then optimized: + for(int i=0;i<2;i++) { + DirectoryReader ir = DirectoryReader.open(dir); + assertEquals(IndexWriter.MAX_DOCS, ir.maxDoc()); + assertEquals(IndexWriter.MAX_DOCS, ir.numDocs()); + IndexSearcher searcher = new IndexSearcher(ir); + TopDocs hits = searcher.search(new TermQuery(new Term("field", "text")), 10); + assertEquals(IndexWriter.MAX_DOCS, hits.totalHits); + + // Sort by docID reversed: + hits = searcher.search(new TermQuery(new Term("field", "text")), null, 10, new Sort(new SortField(null, SortField.Type.DOC, true))); + assertEquals(IndexWriter.MAX_DOCS, hits.totalHits); + assertEquals(10, hits.scoreDocs.length); + assertEquals(IndexWriter.MAX_DOCS-1, hits.scoreDocs[0].doc); + ir.close(); + + iw.forceMerge(1); + } + + iw.shutdown(); + dir.close(); + } + + public void testAddDocument() throws Exception { + setIndexWriterMaxDocs(10); + try { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); + for(int i=0;i<10;i++) { + w.addDocument(new Document()); + } + + // 11th document should fail: + try { + w.addDocument(new Document()); + fail("didn't hit exception"); + } catch (IllegalStateException ise) { + // expected + } + w.close(); + dir.close(); + } finally { + restoreIndexWriterMaxDocs(); + } + } + + public void testAddDocuments() throws Exception { + setIndexWriterMaxDocs(10); + try { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); + for(int i=0;i<10;i++) { + w.addDocument(new Document()); + } + + // 11th document should fail: + try { + w.addDocuments(Collections.singletonList(new Document())); + fail("didn't hit exception"); + } catch (IllegalStateException ise) { + // expected + } + w.close(); + dir.close(); + } finally { + restoreIndexWriterMaxDocs(); + } + } + + public void testUpdateDocument() throws Exception { + setIndexWriterMaxDocs(10); + try { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); + for(int i=0;i<10;i++) { + w.addDocument(new Document()); + } + + // 11th document should fail: + try { + w.updateDocument(new Term("field", "foo"), new Document()); + fail("didn't hit exception"); + } catch (IllegalStateException ise) { + // expected + } + w.close(); + dir.close(); + } finally { + restoreIndexWriterMaxDocs(); + } + } + + public void testUpdateDocuments() throws Exception { + setIndexWriterMaxDocs(10); + try { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); + for(int i=0;i<10;i++) { + w.addDocument(new Document()); + } + + // 11th document should fail: + try { + w.updateDocuments(new Term("field", "foo"), Collections.singletonList(new Document())); + fail("didn't hit exception"); + } catch (IllegalStateException ise) { + // expected + } + w.close(); + dir.close(); + } finally { + restoreIndexWriterMaxDocs(); + } + } + + public void testReclaimedDeletes() throws Exception { + setIndexWriterMaxDocs(10); + try { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); + for(int i=0;i<10;i++) { + Document doc = new Document(); + doc.add(newStringField("id", ""+i, Field.Store.NO)); + w.addDocument(doc); + } + + // Delete 5 of them: + for(int i=0;i<5;i++) { + w.deleteDocuments(new Term("id", ""+i)); + } + + w.forceMerge(1); + + assertEquals(5, w.maxDoc()); + + // Add 5 more docs + for(int i=0;i<5;i++) { + w.addDocument(new Document()); + } + + // 11th document should fail: + try { + w.addDocument(new Document()); + fail("didn't hit exception"); + } catch (IllegalStateException ise) { + // expected + } + w.close(); + dir.close(); + } finally { + restoreIndexWriterMaxDocs(); + } + } + + // Tests that 100% deleted segments (which IW "specializes" by dropping entirely) are not mis-counted + public void testReclaimedDeletesWholeSegments() throws Exception { + setIndexWriterMaxDocs(10); + try { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, null); + iwc.setMergePolicy(NoMergePolicy.INSTANCE); + IndexWriter w = new IndexWriter(dir, iwc); + for(int i=0;i<10;i++) { + Document doc = new Document(); + doc.add(newStringField("id", ""+i, Field.Store.NO)); + w.addDocument(doc); + if (i % 2 == 0) { + // Make a new segment every 2 docs: + w.commit(); + } + } + + // Delete 5 of them: + for(int i=0;i<5;i++) { + w.deleteDocuments(new Term("id", ""+i)); + } + + w.forceMerge(1); + + assertEquals(5, w.maxDoc()); + + // Add 5 more docs + for(int i=0;i<5;i++) { + w.addDocument(new Document()); + } + + // 11th document should fail: + try { + w.addDocument(new Document()); + fail("didn't hit exception"); + } catch (IllegalStateException ise) { + // expected + } + w.close(); + dir.close(); + } finally { + restoreIndexWriterMaxDocs(); + } + } + + public void testAddIndexes() throws Exception { + setIndexWriterMaxDocs(10); + try { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); + for(int i=0;i<10;i++) { + w.addDocument(new Document()); + } + w.shutdown(); + + Directory dir2 = newDirectory(); + IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); + w2.addDocument(new Document()); + try { + w2.addIndexes(new Directory[] {dir}); + fail("didn't hit exception"); + } catch (IllegalStateException ise) { + // expected + } + assertEquals(1, w2.maxDoc()); + IndexReader ir = DirectoryReader.open(dir); + try { + w2.addIndexes(new IndexReader[] {ir}); + fail("didn't hit exception"); + } catch (IllegalStateException ise) { + // expected + } + w2.close(); + ir.close(); + dir.close(); + dir2.close(); + } finally { + restoreIndexWriterMaxDocs(); + } + } + + // Make sure MultiReader lets you search exactly the limit number of docs: + public void testMultiReaderExactLimit() throws Exception { + Directory dir = newDirectory(); + Document doc = new Document(); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); + for (int i = 0; i < 100000; i++) { + w.addDocument(doc); + } + w.shutdown(); + + int remainder = IndexWriter.MAX_DOCS % 100000; + Directory dir2 = newDirectory(); + w = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); + for (int i = 0; i < remainder; i++) { + w.addDocument(doc); + } + w.shutdown(); + + int copies = IndexWriter.MAX_DOCS / 100000; + + DirectoryReader ir = DirectoryReader.open(dir); + DirectoryReader ir2 = DirectoryReader.open(dir2); + IndexReader subReaders[] = new IndexReader[copies+1]; + Arrays.fill(subReaders, ir); + subReaders[subReaders.length-1] = ir2; + + MultiReader mr = new MultiReader(subReaders); + assertEquals(IndexWriter.MAX_DOCS, mr.maxDoc()); + assertEquals(IndexWriter.MAX_DOCS, mr.numDocs()); + ir.close(); + ir2.close(); + dir.close(); + dir2.close(); + } + + // Make sure MultiReader is upset if you exceed the limit + public void testMultiReaderBeyondLimit() throws Exception { + Directory dir = newDirectory(); + Document doc = new Document(); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); + for (int i = 0; i < 100000; i++) { + w.addDocument(doc); + } + w.shutdown(); + + int remainder = IndexWriter.MAX_DOCS % 100000; + + // One too many: + remainder++; + + Directory dir2 = newDirectory(); + w = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); + for (int i = 0; i < remainder; i++) { + w.addDocument(doc); + } + w.shutdown(); + + int copies = IndexWriter.MAX_DOCS / 100000; + + DirectoryReader ir = DirectoryReader.open(dir); + DirectoryReader ir2 = DirectoryReader.open(dir2); + IndexReader subReaders[] = new IndexReader[copies+1]; + Arrays.fill(subReaders, ir); + subReaders[subReaders.length-1] = ir2; + + try { + new MultiReader(subReaders); + fail("didn't hit exception"); + } catch (IllegalArgumentException iae) { + // expected + } + ir.close(); + ir2.close(); + dir.close(); + dir2.close(); + } + + public void testTooLargeMaxDocs() throws Exception { + try { + IndexWriter.setMaxDocs(Integer.MAX_VALUE); + fail("didn't hit exception"); + } catch (IllegalArgumentException iae) { + // expected + } + } +} diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java index 4256337480f..4aa68503a62 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java @@ -77,6 +77,7 @@ import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader.ReaderClosedListener; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LiveIndexWriterConfig; import org.apache.lucene.index.LogByteSizeMergePolicy; @@ -663,8 +664,34 @@ public abstract class LuceneTestCase extends Assert { public void tearDown() throws Exception { parentChainCallRule.teardownCalled = true; fieldToType.clear(); + + // Test is supposed to call this itself, but we do this defensively in case it forgot: + restoreIndexWriterMaxDocs(); } + /** Tells {@link IndexWriter} to enforce the specified limit as the maximum number of documents in one index; call + * {@link #restoreIndexWriterMaxDocs} once your test is done. */ + public void setIndexWriterMaxDocs(int limit) { + Method m; + try { + m = IndexWriter.class.getDeclaredMethod("setMaxDocs", int.class); + } catch (NoSuchMethodException nsme) { + throw new RuntimeException(nsme); + } + m.setAccessible(true); + try { + m.invoke(IndexWriter.class, limit); + } catch (IllegalAccessException iae) { + throw new RuntimeException(iae); + } catch (InvocationTargetException ite) { + throw new RuntimeException(ite); + } + } + + /** Returns the default {@link IndexWriter#MAX_DOCS} limit. */ + public void restoreIndexWriterMaxDocs() { + setIndexWriterMaxDocs(IndexWriter.MAX_DOCS); + } // ----------------------------------------------------------------- // Test facilities and facades for subclasses. @@ -2251,8 +2278,6 @@ public abstract class LuceneTestCase extends Assert { // numOrds assertEquals(info, leftValues.getValueCount(), rightValues.getValueCount()); // ords - BytesRef scratchLeft = new BytesRef(); - BytesRef scratchRight = new BytesRef(); for (int i = 0; i < leftValues.getValueCount(); i++) { final BytesRef left = BytesRef.deepCopyOf(leftValues.lookupOrd(i)); final BytesRef right = rightValues.lookupOrd(i);