From 76263087b5828446fa3afd05743a8383b75893fb Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Tue, 29 May 2018 16:08:12 +0200 Subject: [PATCH] LUCENE-8338: Ensure number returned for PendingDeletes are well defined Today a call to PendingDeletes#numPendingDeletes might return 0 if the deletes are written to disk. This doesn't mean these values are committed or refreshed in the latest reader. Some places in IW use these numbers to make decisions if there has been deletes added since last time checked (BufferedUpdateStream) which can cause wrong (while not fatal) decision ie. to kick of new merges. Now this API is made protected and not visible outside of PendingDeletes to prevent any kind of confusion. The APIs now allow to get absolute numbers of getDelCount and numDocs which have the same name and semantics as their relatives on IndexReader/Writer and SegmentCommitInfo. --- .../lucene/index/BufferedUpdatesStream.java | 2 +- .../lucene/index/FilterMergePolicy.java | 4 +- .../lucene/index/FrozenBufferedUpdates.java | 4 +- .../org/apache/lucene/index/IndexWriter.java | 16 +++---- .../org/apache/lucene/index/MergePolicy.java | 6 +-- .../apache/lucene/index/NoMergePolicy.java | 4 +- .../apache/lucene/index/PendingDeletes.java | 46 ++++++++++++++++++- .../lucene/index/PendingSoftDeletes.java | 9 ++-- .../org/apache/lucene/index/ReaderPool.java | 3 +- .../lucene/index/ReadersAndUpdates.java | 45 +++++------------- .../SoftDeletesRetentionMergePolicy.java | 4 +- .../apache/lucene/index/TestReaderPool.java | 5 +- .../apache/lucene/index/TestTryDelete.java | 2 +- 13 files changed, 83 insertions(+), 67 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java b/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java index c93e4b689c9..dcc8bbfb985 100644 --- a/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java +++ b/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java @@ -259,7 +259,7 @@ final class BufferedUpdatesStream implements Accountable { SegmentState(ReadersAndUpdates rld, IOUtils.IOConsumer onClose, SegmentCommitInfo info) throws IOException { this.rld = rld; - startDelCount = rld.getPendingDeleteCount(); + startDelCount = rld.getDelCount(); delGen = info.getBufferedDeletesGen(); this.onClose = onClose; reader = rld.getReader(IOContext.READ); diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterMergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/FilterMergePolicy.java index cbc8b187130..eb634b48a6b 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FilterMergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/FilterMergePolicy.java @@ -99,8 +99,8 @@ public class FilterMergePolicy extends MergePolicy { } @Override - public int numDeletesToMerge(SegmentCommitInfo info, int pendingDeleteCount, + public int numDeletesToMerge(SegmentCommitInfo info, int delCount, IOSupplier readerSupplier) throws IOException { - return in.numDeletesToMerge(info, pendingDeleteCount, readerSupplier); + return in.numDeletesToMerge(info, delCount, readerSupplier); } } diff --git a/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java b/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java index 822a497b5e7..ee36cf4d1d0 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java +++ b/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java @@ -390,8 +390,8 @@ final class FrozenBufferedUpdates { final List segmentStates = Arrays.asList(segStates); for (BufferedUpdatesStream.SegmentState segState : segmentStates) { if (success) { - totDelCount += segState.rld.getPendingDeleteCount() - segState.startDelCount; - int fullDelCount = segState.rld.info.getDelCount() + segState.rld.getPendingDeleteCount(); + totDelCount += segState.rld.getDelCount() - segState.startDelCount; + int fullDelCount = segState.rld.getDelCount(); assert fullDelCount <= segState.rld.info.info.maxDoc() : fullDelCount + " > " + segState.rld.info.info.maxDoc(); if (segState.rld.isFullyDeleted() && writer.getConfig().getMergePolicy().keepFullyDeletedSegment(() -> segState.reader) == false) { if (allDeleted == null) { diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 96dfb64650d..bc2264b7eab 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -635,14 +635,14 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable, public int numDeletedDocs(SegmentCommitInfo info) { ensureOpen(false); validate(info); - int delCount = info.getDelCount(); - final ReadersAndUpdates rld = getPooledInstance(info, false); if (rld != null) { - delCount += rld.getPendingDeleteCount(); + return rld.getDelCount(); // get the full count from here since SCI might change concurrently + } else { + int delCount = info.getDelCount(); + assert delCount <= info.info.maxDoc(): "delCount: " + delCount + " maxDoc: " + info.info.maxDoc(); + return delCount; } - assert delCount <= info.info.maxDoc(): "delCount: " + delCount + " maxDoc: " + info.info.maxDoc(); - return delCount; } /** @@ -3695,7 +3695,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable, // Lazy init (only when we find a delete or update to carry over): final ReadersAndUpdates mergedDeletesAndUpdates = getPooledInstance(merge.info, true); - + int numDeletesBefore = mergedDeletesAndUpdates.getDelCount(); // field -> delGen -> dv field updates Map> mappedDVUpdates = new HashMap<>(); @@ -3786,7 +3786,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable, if (mergedDeletesAndUpdates == null) { infoStream.message("IW", "no new deletes or field updates since merge started"); } else { - String msg = mergedDeletesAndUpdates.getPendingDeleteCount() + " new deletes"; + String msg = mergedDeletesAndUpdates.getDelCount() - numDeletesBefore + " new deletes"; if (anyDVUpdates) { msg += " and " + mergedDeletesAndUpdates.getNumDVUpdates() + " new field updates"; msg += " (" + mergedDeletesAndUpdates.ramBytesUsed.get() + ") bytes"; @@ -4361,7 +4361,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable, ReadersAndUpdates.MergeReader mr = rld.getReaderForMerge(context); SegmentReader reader = mr.reader; - int delCount = reader.numDeletedDocs(); if (infoStream.isEnabled("IW")) { infoStream.message("IW", "seg=" + segString(info) + " reader=" + reader); @@ -4369,7 +4368,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable, merge.hardLiveDocs.add(mr.hardLiveDocs); merge.readers.add(reader); - assert delCount <= info.info.maxDoc(): "delCount=" + delCount + " info.maxDoc=" + info.info.maxDoc() + " rld.pendingDeleteCount=" + rld.getPendingDeleteCount() + " info.getDelCount()=" + info.getDelCount(); segUpto++; } diff --git a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java index 47a09e86c87..d552d74ba49 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java @@ -638,12 +638,12 @@ public abstract class MergePolicy { * @see IndexWriter#softUpdateDocument(Term, Iterable, Field...) * @see IndexWriterConfig#setSoftDeletesField(String) * @param info the segment info that identifies the segment - * @param pendingDeleteCount the number of pending deletes for this segment + * @param delCount the number deleted documents for this segment * @param readerSupplier a supplier that allows to obtain a {@link CodecReader} for this segment */ - public int numDeletesToMerge(SegmentCommitInfo info, int pendingDeleteCount, + public int numDeletesToMerge(SegmentCommitInfo info, int delCount, IOSupplier readerSupplier) throws IOException { - return info.getDelCount() + pendingDeleteCount; + return delCount; } /** diff --git a/lucene/core/src/java/org/apache/lucene/index/NoMergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/NoMergePolicy.java index f3449e23bc8..1480ce458fe 100644 --- a/lucene/core/src/java/org/apache/lucene/index/NoMergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/NoMergePolicy.java @@ -81,8 +81,8 @@ public final class NoMergePolicy extends MergePolicy { } @Override - public int numDeletesToMerge(SegmentCommitInfo info, int pendingDeleteCount, IOSupplier readerSupplier) throws IOException { - return super.numDeletesToMerge(info, pendingDeleteCount, readerSupplier); + public int numDeletesToMerge(SegmentCommitInfo info, int delCount, IOSupplier readerSupplier) throws IOException { + return super.numDeletesToMerge(info, delCount, readerSupplier); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java b/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java index 506d397fe69..f19b05391d3 100644 --- a/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java +++ b/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java @@ -118,7 +118,7 @@ class PendingDeletes { /** * Returns the number of pending deletes that are not written to disk. */ - int numPendingDeletes() { + protected int numPendingDeletes() { return pendingDeleteCount; } @@ -232,7 +232,49 @@ class PendingDeletes { } int numDeletesToMerge(MergePolicy policy, IOSupplier readerIOSupplier) throws IOException { - return policy.numDeletesToMerge(info, numPendingDeletes(), readerIOSupplier); + return policy.numDeletesToMerge(info, getDelCount(), readerIOSupplier); } + /** + * Returns true if the given reader needs to be refreshed in order to see the latest deletes + */ + final boolean needsRefresh(CodecReader reader) { + return reader.getLiveDocs() != getLiveDocs() || reader.numDeletedDocs() != getDelCount(); + } + + /** + * Returns the number of deleted docs in the segment. + */ + final int getDelCount() { + return info.getDelCount() + numPendingDeletes(); + } + + /** + * Returns the number of live documents in this segment + */ + final int numDocs() { + return info.info.maxDoc() - getDelCount(); + } + + // Call only from assert! + boolean verifyDocCounts(CodecReader reader) { + int count = 0; + Bits liveDocs = getLiveDocs(); + if (liveDocs != null) { + for(int docID = 0; docID < info.info.maxDoc(); docID++) { + if (liveDocs.get(docID)) { + count++; + } + } + } else { + count = info.info.maxDoc(); + } + assert numDocs() == count: "info.maxDoc=" + info.info.maxDoc() + " info.getDelCount()=" + info.getDelCount() + + " pendingDeletes=" + toString() + " count=" + count; + assert reader.numDocs() == numDocs() : "reader.numDocs() = " + reader.numDocs() + " numDocs() " + numDocs(); + assert reader.numDeletedDocs() <= info.info.maxDoc(): "delCount=" + reader.numDeletedDocs() + " info.maxDoc=" + + info.info.maxDoc() + " rld.pendingDeleteCount=" + numPendingDeletes() + + " info.getDelCount()=" + info.getDelCount(); + return true; + } } diff --git a/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java b/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java index fe012f65cb2..1c32e4fa92e 100644 --- a/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java +++ b/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java @@ -58,6 +58,7 @@ final class PendingSoftDeletes extends PendingDeletes { } else { // if it was deleted subtract the delCount pendingDeleteCount--; + assert pendingDeleteCount >= 0 : " illegal pending delete count: " + pendingDeleteCount; } return true; } @@ -65,7 +66,7 @@ final class PendingSoftDeletes extends PendingDeletes { } @Override - int numPendingDeletes() { + protected int numPendingDeletes() { return super.numPendingDeletes() + hardDeletes.numPendingDeletes(); } @@ -78,11 +79,11 @@ final class PendingSoftDeletes extends PendingDeletes { if (iterator != null) { // nothing is deleted we don't have a soft deletes field in this segment assert info.info.maxDoc() > 0 : "maxDoc is 0"; pendingDeleteCount += applySoftDeletes(iterator, getMutableBits()); + assert pendingDeleteCount >= 0 : " illegal pending delete count: " + pendingDeleteCount; } dvGeneration = info.getDocValuesGen(); } - assert numPendingDeletes() + info.getDelCount() <= info.info.maxDoc() : - numPendingDeletes() + " + " + info.getDelCount() + " > " + info.info.maxDoc(); + assert getDelCount() <= info.info.maxDoc() : getDelCount() + " > " + info.info.maxDoc(); } @Override @@ -133,6 +134,7 @@ final class PendingSoftDeletes extends PendingDeletes { void onDocValuesUpdate(FieldInfo info, DocValuesFieldUpdates.Iterator iterator) throws IOException { if (this.field.equals(info.name)) { pendingDeleteCount += applySoftDeletes(iterator, getMutableBits()); + assert pendingDeleteCount >= 0 : " illegal pending delete count: " + pendingDeleteCount; assert dvGeneration < info.getDocValuesGen() : "we have seen this generation update already: " + dvGeneration + " vs. " + info.getDocValuesGen(); assert dvGeneration != -2 : "docValues generation is still uninitialized"; dvGeneration = info.getDocValuesGen(); @@ -208,5 +210,4 @@ final class PendingSoftDeletes extends PendingDeletes { Bits getHardLiveDocs() { return hardDeletes.getLiveDocs(); } - } diff --git a/lucene/core/src/java/org/apache/lucene/index/ReaderPool.java b/lucene/core/src/java/org/apache/lucene/index/ReaderPool.java index 861cfaf1c39..45f58a602ca 100644 --- a/lucene/core/src/java/org/apache/lucene/index/ReaderPool.java +++ b/lucene/core/src/java/org/apache/lucene/index/ReaderPool.java @@ -132,7 +132,7 @@ final class ReaderPool implements Closeable { */ synchronized boolean anyPendingDeletes() { for(ReadersAndUpdates rld : readerMap.values()) { - if (rld.getPendingDeleteCount() != 0) { + if (rld.anyPendingDeletes()) { return true; } } @@ -321,7 +321,6 @@ final class ReaderPool implements Closeable { /** * Returns true iff there are any buffered doc values updates. Otherwise false. - * @see #anyPendingDeletes() */ synchronized boolean anyDocValuesChanges() { for (ReadersAndUpdates rld : readerMap.values()) { diff --git a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java index 55585951454..710b74876cd 100644 --- a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java +++ b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java @@ -100,8 +100,6 @@ final class ReadersAndUpdates { *

NOTE: steals incoming ref from reader. */ ReadersAndUpdates(int indexCreatedVersionMajor, SegmentReader reader, PendingDeletes pendingDeletes) throws IOException { this(indexCreatedVersionMajor, reader.getOriginalSegmentInfo(), pendingDeletes); - assert pendingDeletes.numPendingDeletes() >= 0 - : "got " + pendingDeletes.numPendingDeletes() + " reader.numDeletedDocs()=" + reader.numDeletedDocs() + " info.getDelCount()=" + info.getDelCount() + " maxDoc=" + reader.maxDoc() + " numDocs=" + reader.numDocs(); this.reader = reader; pendingDeletes.onNewReader(reader, info); } @@ -122,10 +120,9 @@ final class ReadersAndUpdates { return rc; } - public synchronized int getPendingDeleteCount() { - return pendingDeletes.numPendingDeletes(); + public synchronized int getDelCount() { + return pendingDeletes.getDelCount(); } - private synchronized boolean assertNoDupGen(List fieldUpdates, DocValuesFieldUpdates update) { for (int i=0;i