LUCENE-8609: Allow getting consistent docstats from IndexWriter

Today we have #numDocs() and #maxDoc() on IndexWriter. This is enough
to get all stats for the current index but it's subject to concurrency
and might return numbers that are not consistent ie. some cases can
return maxDoc < numDocs which is undesirable. This change adds a getDocStats()
method to index writer to allow fetching consistent numbers for these stats.

This change also deprecates IndexWriter#numDocs() and IndexWriter#maxDoc()
and replaces all their usages wiht IndexWriter#getDocStats()
This commit is contained in:
Simon Willnauer 2018-12-13 16:05:47 +01:00
parent eb4fda65e5
commit e974311d91
33 changed files with 209 additions and 138 deletions

View File

@ -282,6 +282,10 @@ Other
* LUCENE-8605: Separate bounding box spatial logic from query logic on LatLonShapeBoundingBoxQuery.
(Ignacio Vera)
* LUCENE-8609: Deprecated IndexWriter#numDocs() and IndexWriter#maxDoc() in favor of IndexWriter#getDocStats()
that allows to get consistent numDocs and maxDoc stats that are not subject to concurrent changes.
(Simon Willnauer, Nhat Nguyen)
======================= Lucene 7.6.0 =======================
Build

View File

@ -62,7 +62,7 @@ public class TestEmptyTokenStream extends BaseTokenStreamTestCase {
// this should not fail because we have no TermToBytesRefAttribute
writer.addDocument(doc);
assertEquals(1, writer.numDocs());
assertEquals(1, writer.getDocStats().numDocs);
writer.close();
directory.close();

View File

@ -1071,7 +1071,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
// make sure writer sees right total -- writer seems not to know about deletes in .del?
final int expected = 45;
assertEquals("wrong doc count", expected, writer.numDocs());
assertEquals("wrong doc count", expected, writer.getDocStats().numDocs);
writer.close();
// make sure searching sees right # hits
@ -1139,7 +1139,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
for(int i=0;i<35;i++) {
addDoc(writer, i);
}
assertEquals("wrong doc count", 35, writer.maxDoc());
assertEquals("wrong doc count", 35, writer.getDocStats().maxDoc);
if (fullyMerged) {
writer.forceMerge(1);
}
@ -1599,7 +1599,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
verifyUsesDefaultCodec(dir, dvUpdatesIndex);
IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random())).setSoftDeletesField("__soft_delete");
IndexWriter writer = new IndexWriter(dir, conf);
int maxDoc = writer.maxDoc();
int maxDoc = writer.getDocStats().maxDoc;
writer.updateDocValues(new Term("id", "1"),new NumericDocValuesField("__soft_delete", 1));
if (random().nextBoolean()) {
@ -1607,7 +1607,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
}
writer.forceMerge(1);
writer.commit();
assertEquals(maxDoc-1, writer.maxDoc());
assertEquals(maxDoc-1, writer.getDocStats().maxDoc);
writer.close();
dir.close();
}

View File

@ -1134,7 +1134,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable,
/** Returns total number of docs in this index, including
* docs not yet flushed (still in the RAM buffer),
* not counting deletions.
* @see #numDocs */
* @see #numDocs
* @deprecated use {@link #getDocStats()} instead
* */
@Deprecated
public synchronized int maxDoc() {
ensureOpen();
return docWriter.getNumDocs() + segmentInfos.totalMaxDoc();
@ -1156,7 +1159,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable,
* including deletions. <b>NOTE:</b> buffered deletions
* are not counted. If you really need these to be
* counted you should call {@link #commit()} first.
* @see #numDocs */
* @see #maxDoc
* @deprecated use {@link #getDocStats()} instead
* */
@Deprecated
public synchronized int numDocs() {
ensureOpen();
int count = docWriter.getNumDocs();
@ -5289,4 +5295,46 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable,
final synchronized SegmentInfos cloneSegmentInfos() {
return segmentInfos.clone();
}
/**
* Returns accurate {@link DocStats} form this writer. This is equivalent to calling {@link #numDocs()} and {@link #maxDoc()}
* but is not subject to race-conditions. The numDoc for instance can change after maxDoc is fetched that causes numDocs to be
* greater than maxDoc which makes it hard to get accurate document stats from IndexWriter.
*/
public synchronized DocStats getDocStats() {
ensureOpen();
int numDocs = docWriter.getNumDocs();
int maxDoc = numDocs;
for (final SegmentCommitInfo info : segmentInfos) {
maxDoc += info.info.maxDoc();
numDocs += info.info.maxDoc() - numDeletedDocs(info);
}
assert maxDoc >= numDocs : "maxDoc is less than numDocs: " + maxDoc + " < " + numDocs;
return new DocStats(maxDoc, numDocs);
}
/**
* DocStats for this index
*/
public static final class DocStats {
/**
* The total number of docs in this index, including
* docs not yet flushed (still in the RAM buffer),
* not counting deletions.
*/
public final int maxDoc;
/**
* The total number of docs in this index, including
* docs not yet flushed (still in the RAM buffer), and
* including deletions. <b>NOTE:</b> buffered deletions
* are not counted. If you really need these to be
* counted you should call {@link IndexWriter#commit()} first.
*/
public final int numDocs;
private DocStats(int maxDoc, int numDocs) {
this.maxDoc = maxDoc;
this.numDocs = numDocs;
}
}
}

View File

@ -120,10 +120,10 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
writer.commit();
addDocs2(writer, 10);
writer.commit();
assertEquals(30, writer.maxDoc());
assertEquals(30, writer.getDocStats().maxDoc);
TestUtil.checkIndex(dir);
writer.forceMerge(1);
assertEquals(30, writer.maxDoc());
assertEquals(30, writer.getDocStats().maxDoc);
writer.close();
dir.close();
}
@ -173,7 +173,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
addDocs2(writer, 10);
writer.commit();
codec = iwconf.getCodec();
assertEquals(30, writer.maxDoc());
assertEquals(30, writer.getDocStats().maxDoc);
assertQuery(new Term("content", "bbb"), dir, 10);
assertQuery(new Term("content", "ccc"), dir, 10); ////
assertQuery(new Term("content", "aaa"), dir, 10);
@ -186,13 +186,13 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
assertQuery(new Term("content", "ccc"), dir, 10);
assertQuery(new Term("content", "bbb"), dir, 20);
assertQuery(new Term("content", "aaa"), dir, 10);
assertEquals(40, writer.maxDoc());
assertEquals(40, writer.getDocStats().maxDoc);
if (VERBOSE) {
System.out.println("TEST: now optimize");
}
writer.forceMerge(1);
assertEquals(40, writer.maxDoc());
assertEquals(40, writer.getDocStats().maxDoc);
writer.close();
assertQuery(new Term("content", "ccc"), dir, 10);
assertQuery(new Term("content", "bbb"), dir, 20);
@ -258,7 +258,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
writer.forceMerge(1);
}
writer.commit();
assertEquals((i + 1) * docsPerRound, writer.maxDoc());
assertEquals((i + 1) * docsPerRound, writer.getDocStats().maxDoc);
writer.close();
}
dir.close();

View File

@ -66,7 +66,7 @@ public class TestAddIndexes extends LuceneTestCase {
.setOpenMode(OpenMode.CREATE));
// add 100 documents
addDocs(writer, 100);
assertEquals(100, writer.maxDoc());
assertEquals(100, writer.getDocStats().maxDoc);
writer.close();
TestUtil.checkIndex(dir);
@ -78,20 +78,20 @@ public class TestAddIndexes extends LuceneTestCase {
);
// add 40 documents in separate files
addDocs(writer, 40);
assertEquals(40, writer.maxDoc());
assertEquals(40, writer.getDocStats().maxDoc);
writer.close();
writer = newWriter(aux2, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE));
// add 50 documents in compound files
addDocs2(writer, 50);
assertEquals(50, writer.maxDoc());
assertEquals(50, writer.getDocStats().maxDoc);
writer.close();
// test doc count before segments are merged
writer = newWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND));
assertEquals(100, writer.maxDoc());
assertEquals(100, writer.getDocStats().maxDoc);
writer.addIndexes(aux, aux2);
assertEquals(190, writer.maxDoc());
assertEquals(190, writer.getDocStats().maxDoc);
writer.close();
TestUtil.checkIndex(dir);
@ -106,14 +106,14 @@ public class TestAddIndexes extends LuceneTestCase {
writer = newWriter(aux3, newIndexWriterConfig(new MockAnalyzer(random())));
// add 40 documents
addDocs(writer, 40);
assertEquals(40, writer.maxDoc());
assertEquals(40, writer.getDocStats().maxDoc);
writer.close();
// test doc count before segments are merged
writer = newWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND));
assertEquals(190, writer.maxDoc());
assertEquals(190, writer.getDocStats().maxDoc);
writer.addIndexes(aux3);
assertEquals(230, writer.maxDoc());
assertEquals(230, writer.getDocStats().maxDoc);
writer.close();
// make sure the new index is correct
@ -142,9 +142,9 @@ public class TestAddIndexes extends LuceneTestCase {
writer.close();
writer = newWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND));
assertEquals(230, writer.maxDoc());
assertEquals(230, writer.getDocStats().maxDoc);
writer.addIndexes(aux4);
assertEquals(231, writer.maxDoc());
assertEquals(231, writer.getDocStats().maxDoc);
writer.close();
verifyNumDocs(dir, 231);
@ -284,7 +284,7 @@ public class TestAddIndexes extends LuceneTestCase {
writer = newWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
// add 100 documents
addDocs(writer, 100);
assertEquals(100, writer.maxDoc());
assertEquals(100, writer.getDocStats().maxDoc);
writer.close();
writer = newWriter(
@ -312,7 +312,7 @@ public class TestAddIndexes extends LuceneTestCase {
expectThrows(IllegalArgumentException.class, () -> {
writer2.addIndexes(aux, dir);
});
assertEquals(100, writer2.maxDoc());
assertEquals(100, writer2.getDocStats().maxDoc);
writer2.close();
// make sure the index is correct
@ -342,7 +342,7 @@ public class TestAddIndexes extends LuceneTestCase {
addDocs(writer, 10);
writer.addIndexes(aux);
assertEquals(1040, writer.maxDoc());
assertEquals(1040, writer.getDocStats().maxDoc);
assertEquals(1000, writer.maxDoc(0));
writer.close();
@ -371,7 +371,7 @@ public class TestAddIndexes extends LuceneTestCase {
addDocs(writer, 2);
writer.addIndexes(aux);
assertEquals(1032, writer.maxDoc());
assertEquals(1032, writer.getDocStats().maxDoc);
assertEquals(1000, writer.maxDoc(0));
writer.close();
@ -399,7 +399,7 @@ public class TestAddIndexes extends LuceneTestCase {
);
writer.addIndexes(aux, new MockDirectoryWrapper(random(), TestUtil.ramCopyOf(aux)));
assertEquals(1060, writer.maxDoc());
assertEquals(1060, writer.getDocStats().maxDoc);
assertEquals(1000, writer.maxDoc(0));
writer.close();
@ -441,7 +441,7 @@ public class TestAddIndexes extends LuceneTestCase {
System.out.println("\nTEST: now addIndexes");
}
writer.addIndexes(aux, new MockDirectoryWrapper(random(), TestUtil.ramCopyOf(aux)));
assertEquals(1020, writer.maxDoc());
assertEquals(1020, writer.getDocStats().maxDoc);
assertEquals(1000, writer.maxDoc(0));
writer.close();
dir.close();
@ -466,7 +466,7 @@ public class TestAddIndexes extends LuceneTestCase {
setMergePolicy(newLogMergePolicy(10))
);
writer.addIndexes(aux);
assertEquals(30, writer.maxDoc());
assertEquals(30, writer.getDocStats().maxDoc);
assertEquals(3, writer.getSegmentCount());
writer.close();
@ -501,7 +501,7 @@ public class TestAddIndexes extends LuceneTestCase {
);
writer.addIndexes(aux, aux2);
assertEquals(1040, writer.maxDoc());
assertEquals(1040, writer.getDocStats().maxDoc);
assertEquals(1000, writer.maxDoc(0));
writer.close();
dir.close();
@ -570,7 +570,7 @@ public class TestAddIndexes extends LuceneTestCase {
} else {
addDocs(writer, 1000);
}
assertEquals(1000, writer.maxDoc());
assertEquals(1000, writer.getDocStats().maxDoc);
assertEquals(1, writer.getSegmentCount());
writer.close();
@ -597,7 +597,7 @@ public class TestAddIndexes extends LuceneTestCase {
setMergePolicy(newLogMergePolicy(false, 10))
);
}
assertEquals(30, writer.maxDoc());
assertEquals(30, writer.getDocStats().maxDoc);
assertEquals(3, writer.getSegmentCount());
writer.close();
}
@ -815,7 +815,7 @@ public class TestAddIndexes extends LuceneTestCase {
c.joinThreads();
int expectedNumDocs = 100+NUM_COPY*(4*NUM_ITER/5)*RunAddIndexesThreads.NUM_THREADS*RunAddIndexesThreads.NUM_INIT_DOCS;
assertEquals("expected num docs don't match - failures: " + c.failures, expectedNumDocs, c.writer2.numDocs());
assertEquals("expected num docs don't match - failures: " + c.failures, expectedNumDocs, c.writer2.getDocStats().numDocs);
c.close(true);
@ -1002,7 +1002,7 @@ public class TestAddIndexes extends LuceneTestCase {
TestUtil.addIndexesSlowly(writer, r);
}
writer.commit();
assertEquals("Documents from the incoming index should not have been deleted", 1, writer.numDocs());
assertEquals("Documents from the incoming index should not have been deleted", 1, writer.getDocStats().numDocs);
writer.close();
for (Directory dir : dirs) {
@ -1037,7 +1037,7 @@ public class TestAddIndexes extends LuceneTestCase {
.setOpenMode(OpenMode.CREATE).setCodec(codec));
// add 100 documents
addDocsWithID(writer, 100, 0);
assertEquals(100, writer.maxDoc());
assertEquals(100, writer.getDocStats().maxDoc);
writer.commit();
writer.close();
TestUtil.checkIndex(dir);
@ -1052,7 +1052,7 @@ public class TestAddIndexes extends LuceneTestCase {
);
// add 40 documents in separate files
addDocs(writer, 40);
assertEquals(40, writer.maxDoc());
assertEquals(40, writer.getDocStats().maxDoc);
writer.commit();
writer.close();
@ -1064,7 +1064,7 @@ public class TestAddIndexes extends LuceneTestCase {
);
// add 40 documents in compound files
addDocs2(writer, 50);
assertEquals(50, writer.maxDoc());
assertEquals(50, writer.getDocStats().maxDoc);
writer.commit();
writer.close();
@ -1075,9 +1075,9 @@ public class TestAddIndexes extends LuceneTestCase {
setOpenMode(OpenMode.APPEND).
setCodec(codec)
);
assertEquals(100, writer.maxDoc());
assertEquals(100, writer.getDocStats().maxDoc);
writer.addIndexes(aux, aux2);
assertEquals(190, writer.maxDoc());
assertEquals(190, writer.getDocStats().maxDoc);
writer.close();
dir.close();
@ -1439,8 +1439,8 @@ public class TestAddIndexes extends LuceneTestCase {
readers[i] = (CodecReader)reader.leaves().get(i).reader();
}
writer.addIndexes(readers);
assertEquals(wrappedReader.numDocs(), writer.numDocs());
assertEquals(maxDoc, writer.maxDoc());
assertEquals(wrappedReader.numDocs(), writer.getDocStats().numDocs);
assertEquals(maxDoc, writer.getDocStats().maxDoc);
writer.commit();
SegmentCommitInfo commitInfo = writer.listOfSegmentCommitInfos().get(0);
assertEquals(maxDoc-wrappedReader.numDocs(), commitInfo.getSoftDelCount());
@ -1454,8 +1454,8 @@ public class TestAddIndexes extends LuceneTestCase {
readers[i] = (CodecReader)wrappedReader.leaves().get(i).reader();
}
writer.addIndexes(readers);
assertEquals(wrappedReader.numDocs(), writer.numDocs());
assertEquals(wrappedReader.numDocs(), writer.maxDoc());
assertEquals(wrappedReader.numDocs(), writer.getDocStats().numDocs);
assertEquals(wrappedReader.numDocs(), writer.getDocStats().maxDoc);
IOUtils.close(reader, writer, dir3, dir2, dir1);
}
}

View File

@ -552,7 +552,7 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase {
}
}.start();
while (w.numDocs() != 8) {
while (w.getDocStats().numDocs != 8) {
Thread.sleep(10);
}

View File

@ -135,7 +135,7 @@ public class TestCrash extends LuceneTestCase {
writer.close();
writer = initIndex(random(), dir, false, true);
assertEquals(314, writer.maxDoc());
assertEquals(314, writer.getDocStats().maxDoc);
crash(writer);
/*

View File

@ -427,7 +427,7 @@ public class TestDeletionPolicy extends LuceneTestCase {
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setIndexDeletionPolicy(policy));
addDoc(writer);
assertEquals(11, writer.numDocs());
assertEquals(11, writer.getDocStats().numDocs);
writer.forceMerge(1);
writer.close();
@ -437,7 +437,7 @@ public class TestDeletionPolicy extends LuceneTestCase {
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setIndexDeletionPolicy(policy)
.setIndexCommit(lastCommit));
assertEquals(10, writer.numDocs());
assertEquals(10, writer.getDocStats().numDocs);
// Should undo our rollback:
writer.rollback();
@ -451,7 +451,7 @@ public class TestDeletionPolicy extends LuceneTestCase {
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setIndexDeletionPolicy(policy)
.setIndexCommit(lastCommit));
assertEquals(10, writer.numDocs());
assertEquals(10, writer.getDocStats().numDocs);
// Commits the rollback:
writer.close();
@ -480,7 +480,7 @@ public class TestDeletionPolicy extends LuceneTestCase {
// but this time keeping only the last commit:
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setIndexCommit(lastCommit));
assertEquals(10, writer.numDocs());
assertEquals(10, writer.getDocStats().numDocs);
// Reader still sees fully merged index, because writer
// opened on the prior commit has not yet committed:

View File

@ -96,8 +96,8 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
final long maxRAMBytes = (long) (iwc.getRAMBufferSizeMB() * 1024. * 1024.);
assertEquals(" all flushes must be due numThreads=" + numThreads, 0,
writer.getFlushingBytes());
assertEquals(numDocumentsToIndex, writer.numDocs());
assertEquals(numDocumentsToIndex, writer.maxDoc());
assertEquals(numDocumentsToIndex, writer.getDocStats().numDocs);
assertEquals(numDocumentsToIndex, writer.getDocStats().maxDoc);
assertTrue("peak bytes without flush exceeded watermark",
flushPolicy.peakBytesWithoutFlush <= maxRAMBytes);
assertActiveBytesAfter(flushControl);
@ -151,8 +151,8 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
assertEquals(" all flushes must be due numThreads=" + numThreads[i], 0,
writer.getFlushingBytes());
assertEquals(numDocumentsToIndex, writer.numDocs());
assertEquals(numDocumentsToIndex, writer.maxDoc());
assertEquals(numDocumentsToIndex, writer.getDocStats().numDocs);
assertEquals(numDocumentsToIndex, writer.getDocStats().maxDoc);
assertTrue("peak bytes without flush exceeded watermark",
flushPolicy.peakDocCountWithoutFlush <= iwc.getMaxBufferedDocs());
assertActiveBytesAfter(flushControl);
@ -195,8 +195,8 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
threads[x].join();
}
assertEquals(" all flushes must be due", 0, writer.getFlushingBytes());
assertEquals(numDocumentsToIndex, writer.numDocs());
assertEquals(numDocumentsToIndex, writer.maxDoc());
assertEquals(numDocumentsToIndex, writer.getDocStats().numDocs);
assertEquals(numDocumentsToIndex, writer.getDocStats().maxDoc);
if (flushPolicy.flushOnRAM() && !flushPolicy.flushOnDocCount()) {
final long maxRAMBytes = (long) (iwc.getRAMBufferSizeMB() * 1024. * 1024.);
assertTrue("peak bytes without flush exceeded watermark",
@ -256,8 +256,8 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
assertNotNull(docsWriter);
DocumentsWriterFlushControl flushControl = docsWriter.flushControl;
assertEquals(" all flushes must be due", 0, writer.getFlushingBytes());
assertEquals(numDocumentsToIndex, writer.numDocs());
assertEquals(numDocumentsToIndex, writer.maxDoc());
assertEquals(numDocumentsToIndex, writer.getDocStats().numDocs);
assertEquals(numDocumentsToIndex, writer.getDocStats().maxDoc);
if (numThreads[i] == 1) {
assertFalse(
"single thread must not block numThreads: " + numThreads[i],

View File

@ -61,7 +61,7 @@ public class TestIndexManyDocuments extends LuceneTestCase {
thread.join();
}
assertEquals("lost " + (numDocs - w.maxDoc()) + " documents; maxBufferedDocs=" + iwc.getMaxBufferedDocs(), numDocs, w.maxDoc());
assertEquals("lost " + (numDocs - w.getDocStats().maxDoc) + " documents; maxBufferedDocs=" + iwc.getMaxBufferedDocs(), numDocs, w.getDocStats().maxDoc);
w.close();
IndexReader r = DirectoryReader.open(dir);

View File

@ -127,16 +127,35 @@ public class TestIndexWriter extends LuceneTestCase {
// add 100 documents
for (i = 0; i < 100; i++) {
addDocWithIndex(writer,i);
if (random().nextBoolean()) {
writer.commit();
}
}
assertEquals(100, writer.maxDoc());
IndexWriter.DocStats docStats = writer.getDocStats();
assertEquals(100, docStats.maxDoc);
assertEquals(100, docStats.numDocs);
writer.close();
// delete 40 documents
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(NoMergePolicy.INSTANCE));
.setMergePolicy(new FilterMergePolicy(NoMergePolicy.INSTANCE) {
@Override
public boolean keepFullyDeletedSegment(IOSupplier<CodecReader>
readerIOSupplier) {
return true;
}
}));
for (i = 0; i < 40; i++) {
writer.deleteDocuments(new Term("id", ""+i));
if (random().nextBoolean()) {
writer.commit();
}
}
writer.flush();
docStats = writer.getDocStats();
assertEquals(100, docStats.maxDoc);
assertEquals(60, docStats.numDocs);
writer.close();
reader = DirectoryReader.open(dir);
@ -145,10 +164,11 @@ public class TestIndexWriter extends LuceneTestCase {
// merge the index down and check that the new doc count is correct
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
assertEquals(60, writer.numDocs());
assertEquals(60, writer.getDocStats().numDocs);
writer.forceMerge(1);
assertEquals(60, writer.maxDoc());
assertEquals(60, writer.numDocs());
docStats = writer.getDocStats();
assertEquals(60, docStats.maxDoc);
assertEquals(60, docStats.numDocs);
writer.close();
// check that the index reader gives the same numbers.
@ -161,8 +181,9 @@ public class TestIndexWriter extends LuceneTestCase {
// this existing one works correctly:
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setOpenMode(OpenMode.CREATE));
assertEquals(0, writer.maxDoc());
assertEquals(0, writer.numDocs());
docStats = writer.getDocStats();
assertEquals(0, docStats.maxDoc);
assertEquals(0, docStats.numDocs);
writer.close();
dir.close();
}
@ -226,7 +247,7 @@ public class TestIndexWriter extends LuceneTestCase {
// now open index for create:
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setOpenMode(OpenMode.CREATE));
assertEquals("should be zero documents", writer.maxDoc(), 0);
assertEquals("should be zero documents", writer.getDocStats().maxDoc, 0);
addDoc(writer);
writer.close();
@ -2751,7 +2772,7 @@ public class TestIndexWriter extends LuceneTestCase {
try (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())).setIndexCommit(indexCommit))) {
writer.addDocument(new Document());
writer.commit();
assertEquals(1, writer.maxDoc());
assertEquals(1, writer.getDocStats().maxDoc);
// now check that we moved to 3
dir.openInput("segments_3", IOContext.READ).close();;
}
@ -3147,7 +3168,8 @@ public class TestIndexWriter extends LuceneTestCase {
for (SegmentCommitInfo info : writer.cloneSegmentInfos()) {
numSoftDeleted += info.getSoftDelCount();
}
assertEquals(writer.maxDoc() - writer.numDocs(), numSoftDeleted);
IndexWriter.DocStats docStats = writer.getDocStats();
assertEquals(docStats.maxDoc - docStats.numDocs, numSoftDeleted);
for (LeafReaderContext context : reader.leaves()) {
LeafReader leaf = context.reader();
assertNull(((SegmentReader) leaf).getHardLiveDocs());
@ -3300,7 +3322,8 @@ public class TestIndexWriter extends LuceneTestCase {
for (SegmentCommitInfo info : writer.cloneSegmentInfos()) {
numSoftDeleted += info.getSoftDelCount() + info.getDelCount();
}
assertEquals(writer.maxDoc() - writer.numDocs(), numSoftDeleted);
IndexWriter.DocStats docStats = writer.getDocStats();
assertEquals(docStats.maxDoc - docStats.numDocs, numSoftDeleted);
writer.commit();
try (DirectoryReader dirReader = DirectoryReader.open(dir)) {
int delCount = 0;

View File

@ -445,7 +445,7 @@ public class TestIndexWriterCommit extends LuceneTestCase {
.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE)
.setIndexCommit(commit));
assertEquals(1, w.numDocs());
assertEquals(1, w.getDocStats().numDocs);
// commit IndexWriter to "third"
w.addDocument(doc);

View File

@ -947,7 +947,7 @@ public class TestIndexWriterDelete extends LuceneTestCase {
modifier.deleteDocuments(new TermQuery(new Term("nada", "nada")));
modifier.commit();
assertEquals(5, modifier.numDocs());
assertEquals(5, modifier.getDocStats().numDocs);
modifier.close();
dir.close();
}

View File

@ -47,9 +47,9 @@ public class TestIndexWriterFromReader extends LuceneTestCase {
IndexWriter w2 = new IndexWriter(dir, iwc);
r.close();
assertEquals(1, w2.maxDoc());
assertEquals(1, w2.getDocStats().maxDoc);
w2.addDocument(new Document());
assertEquals(2, w2.maxDoc());
assertEquals(2, w2.getDocStats().maxDoc);
w2.close();
IndexReader r2 = DirectoryReader.open(dir);
@ -74,9 +74,9 @@ public class TestIndexWriterFromReader extends LuceneTestCase {
assertEquals(1, r.maxDoc());
r.close();
assertEquals(1, w2.maxDoc());
assertEquals(1, w2.getDocStats().maxDoc);
w2.addDocument(new Document());
assertEquals(2, w2.maxDoc());
assertEquals(2, w2.getDocStats().maxDoc);
w2.close();
IndexReader r2 = DirectoryReader.open(dir);
@ -141,7 +141,7 @@ public class TestIndexWriterFromReader extends LuceneTestCase {
// Add another doc
w.addDocument(new Document());
assertEquals(2, w.maxDoc());
assertEquals(2, w.getDocStats().maxDoc);
w.close();
IndexWriterConfig iwc = newIndexWriterConfig();
@ -249,7 +249,7 @@ public class TestIndexWriterFromReader extends LuceneTestCase {
// rollback writer to last nrt reader
if (random().nextBoolean()) {
if (VERBOSE) {
System.out.println(" close writer and open new writer from non-NRT reader numDocs=" + w.numDocs());
System.out.println(" close writer and open new writer from non-NRT reader numDocs=" + w.getDocStats().numDocs);
}
w.close();
r.close();
@ -259,7 +259,7 @@ public class TestIndexWriterFromReader extends LuceneTestCase {
nrtLiveIDs = new HashSet<>(liveIDs);
} else {
if (VERBOSE) {
System.out.println(" rollback writer and open new writer from NRT reader numDocs=" + w.numDocs());
System.out.println(" rollback writer and open new writer from NRT reader numDocs=" + w.getDocStats().numDocs);
}
w.rollback();
}
@ -383,7 +383,7 @@ public class TestIndexWriterFromReader extends LuceneTestCase {
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setIndexCommit(r.getIndexCommit());
w = new IndexWriter(dir, iwc);
assertEquals(1, w.numDocs());
assertEquals(1, w.getDocStats().numDocs);
r.close();
DirectoryReader r3 = DirectoryReader.open(w);
@ -413,7 +413,7 @@ public class TestIndexWriterFromReader extends LuceneTestCase {
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setIndexCommit(r.getIndexCommit());
w = new IndexWriter(dir, iwc);
assertEquals(2, w.numDocs());
assertEquals(2, w.getDocStats().numDocs);
r.close();
w.close();
@ -456,7 +456,7 @@ public class TestIndexWriterFromReader extends LuceneTestCase {
iwc = newIndexWriterConfig();
iwc.setIndexCommit(r.getIndexCommit());
IndexWriter w2 = new IndexWriter(dir, iwc);
assertEquals(2, w2.maxDoc());
assertEquals(2, w2.getDocStats().maxDoc);
IOUtils.close(r, w2, dir);
}
}

View File

@ -189,7 +189,7 @@ public class TestIndexWriterMaxDocs extends LuceneTestCase {
w.forceMerge(1);
assertEquals(5, w.maxDoc());
assertEquals(5, w.getDocStats().maxDoc);
// Add 5 more docs
for(int i=0;i<5;i++) {
@ -233,7 +233,7 @@ public class TestIndexWriterMaxDocs extends LuceneTestCase {
w.forceMerge(1);
assertEquals(5, w.maxDoc());
assertEquals(5, w.getDocStats().maxDoc);
// Add 5 more docs
for(int i=0;i<5;i++) {
@ -269,7 +269,7 @@ public class TestIndexWriterMaxDocs extends LuceneTestCase {
w2.addIndexes(new Directory[] {dir});
});
assertEquals(1, w2.maxDoc());
assertEquals(1, w2.getDocStats().maxDoc);
DirectoryReader ir = DirectoryReader.open(dir);
expectThrows(IllegalArgumentException.class, () -> {
TestUtil.addIndexesSlowly(w2, ir);

View File

@ -221,7 +221,7 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase {
writer.waitForMerges();
writer.commit();
checkInvariants(writer);
assertEquals(10, writer.maxDoc());
assertEquals(10, writer.getDocStats().maxDoc);
writer.close();
dir.close();

View File

@ -164,10 +164,10 @@ public class TestIndexWriterMerging extends LuceneTestCase {
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(newLogMergePolicy()));
assertEquals(8, writer.numDocs());
assertEquals(10, writer.maxDoc());
assertEquals(8, writer.getDocStats().numDocs);
assertEquals(10, writer.getDocStats().maxDoc);
writer.forceMergeDeletes();
assertEquals(8, writer.numDocs());
assertEquals(8, writer.getDocStats().numDocs);
writer.close();
ir = DirectoryReader.open(dir);
assertEquals(8, ir.maxDoc());
@ -232,7 +232,7 @@ public class TestIndexWriterMerging extends LuceneTestCase {
newIndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(newLogMergePolicy(3))
);
assertEquals(49, writer.numDocs());
assertEquals(49, writer.getDocStats().numDocs);
writer.forceMergeDeletes();
writer.close();
ir = DirectoryReader.open(dir);

View File

@ -385,8 +385,8 @@ public class TestIndexWriterReader extends LuceneTestCase {
addDirThreads.joinThreads();
//assertEquals(100 + numDirs * (3 * numIter / 4) * addDirThreads.numThreads
// * addDirThreads.NUM_INIT_DOCS, addDirThreads.mainWriter.numDocs());
assertEquals(addDirThreads.count.intValue(), addDirThreads.mainWriter.numDocs());
// * addDirThreads.NUM_INIT_DOCS, addDirThreads.mainwriter.getDocStats().numDocs);
assertEquals(addDirThreads.count.intValue(), addDirThreads.mainWriter.getDocStats().numDocs);
addDirThreads.close(true);

View File

@ -64,7 +64,7 @@ public class TestIsCurrent extends LuceneTestCase {
DirectoryReader reader = writer.getReader();
// assert index has a document and reader is up2date
assertEquals("One document should be in the index", 1, writer.numDocs());
assertEquals("One document should be in the index", 1, writer.getDocStats().numDocs);
assertTrue("One document added, reader should be current", reader.isCurrent());
// remove document
@ -73,7 +73,7 @@ public class TestIsCurrent extends LuceneTestCase {
writer.commit();
// assert document has been deleted (index changed), reader is stale
assertEquals("Document should be removed", 0, writer.numDocs());
assertEquals("Document should be removed", 0, writer.getDocStats().numDocs);
assertFalse("Reader should be stale", reader.isCurrent());
reader.close();
@ -89,7 +89,7 @@ public class TestIsCurrent extends LuceneTestCase {
DirectoryReader reader = writer.getReader();
// assert index has a document and reader is up2date
assertEquals("One document should be in the index", 1, writer.numDocs());
assertEquals("One document should be in the index", 1, writer.getDocStats().numDocs);
assertTrue("Document added, reader should be stale ", reader.isCurrent());
// remove all documents
@ -97,7 +97,7 @@ public class TestIsCurrent extends LuceneTestCase {
writer.commit();
// assert document has been deleted (index changed), reader is stale
assertEquals("Document should be removed", 0, writer.numDocs());
assertEquals("Document should be removed", 0, writer.getDocStats().numDocs);
assertFalse("Reader should be stale", reader.isCurrent());
reader.close();

View File

@ -136,7 +136,7 @@ public class TestRollingUpdates extends LuceneTestCase {
}
w.commit();
assertEquals(SIZE, w.numDocs());
assertEquals(SIZE, w.getDocStats().numDocs);
w.close();

View File

@ -241,16 +241,16 @@ public class TestSoftDeletesRetentionMergePolicy extends LuceneTestCase {
DirectoryReader reader = writer.getReader();
assertEquals(0, reader.numDocs());
assertEquals(3, reader.maxDoc());
assertEquals(0, writer.numDocs());
assertEquals(3, writer.maxDoc());
assertEquals(0, writer.getDocStats().numDocs);
assertEquals(3, writer.getDocStats().maxDoc);
assertEquals(3, reader.leaves().size());
reader.close();
writer.forceMerge(1);
reader = writer.getReader();
assertEquals(0, reader.numDocs());
assertEquals(3, reader.maxDoc());
assertEquals(0, writer.numDocs());
assertEquals(3, writer.maxDoc());
assertEquals(0, writer.getDocStats().numDocs);
assertEquals(3, writer.getDocStats().maxDoc);
assertEquals(1, reader.leaves().size());
IOUtils.close(reader, writer, dir);
}
@ -395,8 +395,8 @@ public class TestSoftDeletesRetentionMergePolicy extends LuceneTestCase {
// when calling forceMergeDeletes.
writer.forceMergeDeletes(true);
assertEquals(1, writer.listOfSegmentCommitInfos().size());
assertEquals(1, writer.numDocs());
assertEquals(1, writer.maxDoc());
assertEquals(1, writer.getDocStats().numDocs);
assertEquals(1, writer.getDocStats().maxDoc);
writer.close();
dir.close();
}
@ -538,8 +538,8 @@ public class TestSoftDeletesRetentionMergePolicy extends LuceneTestCase {
}
});
writer.forceMerge(1);
assertEquals(2, writer.numDocs());
assertEquals(2, writer.maxDoc());
assertEquals(2, writer.getDocStats().numDocs);
assertEquals(2, writer.getDocStats().maxDoc);
assertFalse(delete.get());
IOUtils.close(reader, writer, dir);
}
@ -700,8 +700,8 @@ public class TestSoftDeletesRetentionMergePolicy extends LuceneTestCase {
writer.softUpdateDocument(new Term("id", "bar-1"), d, new NumericDocValuesField("soft_deletes", 1));
writer.forceMerge(1);
assertEquals(2, writer.numDocs()); // foo-2, bar-2
assertEquals(3, writer.maxDoc()); // foo-1, foo-2, bar-2
assertEquals(2, writer.getDocStats().numDocs); // foo-2, bar-2
assertEquals(3, writer.getDocStats().maxDoc); // foo-1, foo-2, bar-2
IOUtils.close(writer, dir);
}

View File

@ -121,8 +121,8 @@ public class TestThreadedForceMerge extends LuceneTestCase {
final int expectedDocCount = (int) ((1+iter)*(200+8*NUM_ITER2*(NUM_THREADS/2.0)*(1+NUM_THREADS)));
assertEquals("index=" + writer.segString() + " numDocs=" + writer.numDocs() + " maxDoc=" + writer.maxDoc() + " config=" + writer.getConfig(), expectedDocCount, writer.numDocs());
assertEquals("index=" + writer.segString() + " numDocs=" + writer.numDocs() + " maxDoc=" + writer.maxDoc() + " config=" + writer.getConfig(), expectedDocCount, writer.maxDoc());
assertEquals("index=" + writer.segString() + " numDocs=" + writer.getDocStats().numDocs + " maxDoc=" + writer.getDocStats().maxDoc + " config=" + writer.getConfig(), expectedDocCount, writer.getDocStats().numDocs);
assertEquals("index=" + writer.segString() + " numDocs=" + writer.getDocStats().numDocs + " maxDoc=" + writer.getDocStats().maxDoc + " config=" + writer.getConfig(), expectedDocCount, writer.getDocStats().maxDoc);
writer.close();
writer = new IndexWriter(directory, newIndexWriterConfig(ANALYZER)

View File

@ -110,8 +110,8 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
doc.add(newTextField("content", "aaa " + (i%4), Field.Store.NO));
w.addDocument(doc);
}
assertEquals(80, w.maxDoc());
assertEquals(80, w.numDocs());
assertEquals(80, w.getDocStats().maxDoc);
assertEquals(80, w.getDocStats().numDocs);
if (VERBOSE) {
System.out.println("\nTEST: delete docs");
@ -119,16 +119,16 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
w.deleteDocuments(new Term("content", "0"));
w.forceMergeDeletes();
assertEquals(80, w.maxDoc());
assertEquals(60, w.numDocs());
assertEquals(80, w.getDocStats().maxDoc);
assertEquals(60, w.getDocStats().numDocs);
if (VERBOSE) {
System.out.println("\nTEST: forceMergeDeletes2");
}
((TieredMergePolicy) w.getConfig().getMergePolicy()).setForceMergeDeletesPctAllowed(10.0);
w.forceMergeDeletes();
assertEquals(60, w.maxDoc());
assertEquals(60, w.numDocs());
assertEquals(60, w.getDocStats().maxDoc);
assertEquals(60, w.getDocStats().numDocs);
w.close();
dir.close();
}
@ -296,8 +296,8 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
w.forceMergeDeletes();
remainingDocs -= deletedThisPass;
checkSegmentsInExpectations(w, segNamesBefore, false); // There should have been no merges
assertEquals("NumDocs should reflect removed documents ", remainingDocs, w.numDocs());
assertTrue("Should still be deleted docs in the index", w.numDocs() < w.maxDoc());
assertEquals("NumDocs should reflect removed documents ", remainingDocs, w.getDocStats().numDocs);
assertTrue("Should still be deleted docs in the index", w.getDocStats().numDocs < w.getDocStats().maxDoc);
// This time, forceMerge. By default this should respect max segment size.
// Will change for LUCENE-8236
@ -307,8 +307,8 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
// Now forceMerge down to one segment, there should be exactly remainingDocs in exactly one segment.
w.forceMerge(1);
assertEquals("There should be exaclty one segment now", 1, w.getSegmentCount());
assertEquals("maxDoc and numDocs should be identical", w.numDocs(), w.maxDoc());
assertEquals("There should be an exact number of documents in that one segment", remainingDocs, w.numDocs());
assertEquals("maxDoc and numDocs should be identical", w.getDocStats().numDocs, w.getDocStats().maxDoc);
assertEquals("There should be an exact number of documents in that one segment", remainingDocs, w.getDocStats().numDocs);
// Delete 5% and expunge, should be no change.
segNamesBefore = getSegmentNames(w);
@ -316,7 +316,7 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
w.forceMergeDeletes();
checkSegmentsInExpectations(w, segNamesBefore, false);
assertEquals("There should still be only one segment. ", 1, w.getSegmentCount());
assertTrue("The segment should have deleted documents", w.numDocs() < w.maxDoc());
assertTrue("The segment should have deleted documents", w.getDocStats().numDocs < w.getDocStats().maxDoc);
w.forceMerge(1); // back to one segment so deletePctDocsFromEachSeg still works
@ -325,17 +325,17 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
w.forceMergeDeletes();
assertEquals("There should still be only one segment. ", 1, w.getSegmentCount());
assertEquals("The segment should have no deleted documents", w.numDocs(), w.maxDoc());
assertEquals("The segment should have no deleted documents", w.getDocStats().numDocs, w.getDocStats().maxDoc);
// sanity check, at this point we should have an over`-large segment, we know we have exactly one.
assertTrue("Our single segment should have quite a few docs", w.numDocs() > 1_000);
assertTrue("Our single segment should have quite a few docs", w.getDocStats().numDocs > 1_000);
// Delete 60% of the documents and then add a few more docs and commit. This should "singleton merge" the large segment
// created above. 60% leaves some wriggle room, LUCENE-8263 will change this assumption and should be tested
// when we deal with that JIRA.
deletedThisPass = deletePctDocsFromEachSeg(w, (w.numDocs() * 60) / 100, true);
deletedThisPass = deletePctDocsFromEachSeg(w, (w.getDocStats().numDocs * 60) / 100, true);
remainingDocs -= deletedThisPass;
for (int i = 0; i < 50; i++) {

View File

@ -171,7 +171,7 @@ public class TestBoolean2 extends LuceneTestCase {
RandomIndexWriter w = new RandomIndexWriter(random(), dir2, iwc);
w.addIndexes(copy);
copy.close();
docCount = w.maxDoc();
docCount = w.getDocStats().maxDoc;
w.close();
mulFactor *= 2;
} while(docCount < 3000 * NUM_FILLER_DOCS);

View File

@ -583,7 +583,7 @@ public class TestSearcherManager extends ThreadedIndexingAndSearchingTestCase {
}
docs.close();
if (VERBOSE) {
System.out.println("TEST: index count=" + writerRef.get().maxDoc());
System.out.println("TEST: index count=" + writerRef.get().getDocStats().maxDoc);
}
} catch (IOException ioe) {
throw new RuntimeException(ioe);

View File

@ -58,7 +58,7 @@ public class TestByteBuffersDirectory extends BaseDirectoryTestCase {
writer.addDocument(doc);
}
writer.commit();
assertEquals(docs, writer.numDocs());
assertEquals(docs, writer.getDocStats().numDocs);
}
}

View File

@ -64,7 +64,7 @@ public class TestRAMDirectory extends BaseDirectoryTestCase {
doc.add(newStringField("content", English.intToEnglish(i).trim(), Field.Store.YES));
writer.addDocument(doc);
}
assertEquals(DOCS_TO_ADD, writer.maxDoc());
assertEquals(DOCS_TO_ADD, writer.getDocStats().maxDoc);
writer.close();
dir.close();

View File

@ -195,7 +195,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
parentStreamField = new Field(Consts.FIELD_PAYLOADS, parentStream, ft);
fullPathField = new StringField(Consts.FULL, "", Field.Store.YES);
nextID = indexWriter.maxDoc();
nextID = indexWriter.getDocStats().maxDoc;
if (cache == null) {
cache = defaultTaxonomyWriterCache();
@ -968,7 +968,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
shouldRefreshReaderManager = true;
initReaderManager(); // ensure that it's initialized
refreshReaderManager();
nextID = indexWriter.maxDoc();
nextID = indexWriter.getDocStats().maxDoc;
taxoArrays = null; // must nullify so that it's re-computed next time it's needed
// need to clear the cache, so that addCategory won't accidentally return

View File

@ -262,7 +262,7 @@ public class TestMoreLikeThis extends LuceneTestCase {
doc.add(newTextField(NOT_FOR_SALE, item, Field.Store.YES));
}
writer.addDocument(doc);
return writer.numDocs() - 1;
return writer.getDocStats().numDocs - 1;
}
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-7161")

View File

@ -315,12 +315,8 @@ public class RandomIndexWriter implements Closeable {
return w.commit();
}
public int numDocs() {
return w.numDocs();
}
public int maxDoc() {
return w.maxDoc();
public IndexWriter.DocStats getDocStats() {
return w.getDocStats();
}
public long deleteAll() throws IOException {

View File

@ -635,7 +635,7 @@ public abstract class ThreadedIndexingAndSearchingTestCase extends LuceneTestCas
writer.commit();
assertEquals("index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), writer.numDocs());
assertEquals("index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), writer.getDocStats().numDocs);
doClose();

View File

@ -502,6 +502,6 @@ public class ClassificationUpdateProcessorTest extends SolrTestCaseJ4 {
private int addDoc(RandomIndexWriter writer, Document doc) throws IOException {
writer.addDocument(doc);
return writer.numDocs() - 1;
return writer.getDocStats().numDocs - 1;
}
}