mirror of https://github.com/apache/lucene.git
LUCENE-8338: Ensure number returned for PendingDeletes are well defined
Today a call to PendingDeletes#numPendingDeletes might return 0 if the deletes are written to disk. This doesn't mean these values are committed or refreshed in the latest reader. Some places in IW use these numbers to make decisions if there has been deletes added since last time checked (BufferedUpdateStream) which can cause wrong (while not fatal) decision ie. to kick of new merges. Now this API is made protected and not visible outside of PendingDeletes to prevent any kind of confusion. The APIs now allow to get absolute numbers of getDelCount and numDocs which have the same name and semantics as their relatives on IndexReader/Writer and SegmentCommitInfo.
This commit is contained in:
parent
d243f35a54
commit
76263087b5
|
@ -259,7 +259,7 @@ final class BufferedUpdatesStream implements Accountable {
|
|||
|
||||
SegmentState(ReadersAndUpdates rld, IOUtils.IOConsumer<ReadersAndUpdates> onClose, SegmentCommitInfo info) throws IOException {
|
||||
this.rld = rld;
|
||||
startDelCount = rld.getPendingDeleteCount();
|
||||
startDelCount = rld.getDelCount();
|
||||
delGen = info.getBufferedDeletesGen();
|
||||
this.onClose = onClose;
|
||||
reader = rld.getReader(IOContext.READ);
|
||||
|
|
|
@ -99,8 +99,8 @@ public class FilterMergePolicy extends MergePolicy {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int numDeletesToMerge(SegmentCommitInfo info, int pendingDeleteCount,
|
||||
public int numDeletesToMerge(SegmentCommitInfo info, int delCount,
|
||||
IOSupplier<CodecReader> readerSupplier) throws IOException {
|
||||
return in.numDeletesToMerge(info, pendingDeleteCount, readerSupplier);
|
||||
return in.numDeletesToMerge(info, delCount, readerSupplier);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -390,8 +390,8 @@ final class FrozenBufferedUpdates {
|
|||
final List<BufferedUpdatesStream.SegmentState> segmentStates = Arrays.asList(segStates);
|
||||
for (BufferedUpdatesStream.SegmentState segState : segmentStates) {
|
||||
if (success) {
|
||||
totDelCount += segState.rld.getPendingDeleteCount() - segState.startDelCount;
|
||||
int fullDelCount = segState.rld.info.getDelCount() + segState.rld.getPendingDeleteCount();
|
||||
totDelCount += segState.rld.getDelCount() - segState.startDelCount;
|
||||
int fullDelCount = segState.rld.getDelCount();
|
||||
assert fullDelCount <= segState.rld.info.info.maxDoc() : fullDelCount + " > " + segState.rld.info.info.maxDoc();
|
||||
if (segState.rld.isFullyDeleted() && writer.getConfig().getMergePolicy().keepFullyDeletedSegment(() -> segState.reader) == false) {
|
||||
if (allDeleted == null) {
|
||||
|
|
|
@ -635,14 +635,14 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable,
|
|||
public int numDeletedDocs(SegmentCommitInfo info) {
|
||||
ensureOpen(false);
|
||||
validate(info);
|
||||
int delCount = info.getDelCount();
|
||||
|
||||
final ReadersAndUpdates rld = getPooledInstance(info, false);
|
||||
if (rld != null) {
|
||||
delCount += rld.getPendingDeleteCount();
|
||||
return rld.getDelCount(); // get the full count from here since SCI might change concurrently
|
||||
} else {
|
||||
int delCount = info.getDelCount();
|
||||
assert delCount <= info.info.maxDoc(): "delCount: " + delCount + " maxDoc: " + info.info.maxDoc();
|
||||
return delCount;
|
||||
}
|
||||
assert delCount <= info.info.maxDoc(): "delCount: " + delCount + " maxDoc: " + info.info.maxDoc();
|
||||
return delCount;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -3695,7 +3695,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable,
|
|||
|
||||
// Lazy init (only when we find a delete or update to carry over):
|
||||
final ReadersAndUpdates mergedDeletesAndUpdates = getPooledInstance(merge.info, true);
|
||||
|
||||
int numDeletesBefore = mergedDeletesAndUpdates.getDelCount();
|
||||
// field -> delGen -> dv field updates
|
||||
Map<String,Map<Long,DocValuesFieldUpdates>> mappedDVUpdates = new HashMap<>();
|
||||
|
||||
|
@ -3786,7 +3786,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable,
|
|||
if (mergedDeletesAndUpdates == null) {
|
||||
infoStream.message("IW", "no new deletes or field updates since merge started");
|
||||
} else {
|
||||
String msg = mergedDeletesAndUpdates.getPendingDeleteCount() + " new deletes";
|
||||
String msg = mergedDeletesAndUpdates.getDelCount() - numDeletesBefore + " new deletes";
|
||||
if (anyDVUpdates) {
|
||||
msg += " and " + mergedDeletesAndUpdates.getNumDVUpdates() + " new field updates";
|
||||
msg += " (" + mergedDeletesAndUpdates.ramBytesUsed.get() + ") bytes";
|
||||
|
@ -4361,7 +4361,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable,
|
|||
|
||||
ReadersAndUpdates.MergeReader mr = rld.getReaderForMerge(context);
|
||||
SegmentReader reader = mr.reader;
|
||||
int delCount = reader.numDeletedDocs();
|
||||
|
||||
if (infoStream.isEnabled("IW")) {
|
||||
infoStream.message("IW", "seg=" + segString(info) + " reader=" + reader);
|
||||
|
@ -4369,7 +4368,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable,
|
|||
|
||||
merge.hardLiveDocs.add(mr.hardLiveDocs);
|
||||
merge.readers.add(reader);
|
||||
assert delCount <= info.info.maxDoc(): "delCount=" + delCount + " info.maxDoc=" + info.info.maxDoc() + " rld.pendingDeleteCount=" + rld.getPendingDeleteCount() + " info.getDelCount()=" + info.getDelCount();
|
||||
segUpto++;
|
||||
}
|
||||
|
||||
|
|
|
@ -638,12 +638,12 @@ public abstract class MergePolicy {
|
|||
* @see IndexWriter#softUpdateDocument(Term, Iterable, Field...)
|
||||
* @see IndexWriterConfig#setSoftDeletesField(String)
|
||||
* @param info the segment info that identifies the segment
|
||||
* @param pendingDeleteCount the number of pending deletes for this segment
|
||||
* @param delCount the number deleted documents for this segment
|
||||
* @param readerSupplier a supplier that allows to obtain a {@link CodecReader} for this segment
|
||||
*/
|
||||
public int numDeletesToMerge(SegmentCommitInfo info, int pendingDeleteCount,
|
||||
public int numDeletesToMerge(SegmentCommitInfo info, int delCount,
|
||||
IOSupplier<CodecReader> readerSupplier) throws IOException {
|
||||
return info.getDelCount() + pendingDeleteCount;
|
||||
return delCount;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -81,8 +81,8 @@ public final class NoMergePolicy extends MergePolicy {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int numDeletesToMerge(SegmentCommitInfo info, int pendingDeleteCount, IOSupplier<CodecReader> readerSupplier) throws IOException {
|
||||
return super.numDeletesToMerge(info, pendingDeleteCount, readerSupplier);
|
||||
public int numDeletesToMerge(SegmentCommitInfo info, int delCount, IOSupplier<CodecReader> readerSupplier) throws IOException {
|
||||
return super.numDeletesToMerge(info, delCount, readerSupplier);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -118,7 +118,7 @@ class PendingDeletes {
|
|||
/**
|
||||
* Returns the number of pending deletes that are not written to disk.
|
||||
*/
|
||||
int numPendingDeletes() {
|
||||
protected int numPendingDeletes() {
|
||||
return pendingDeleteCount;
|
||||
}
|
||||
|
||||
|
@ -232,7 +232,49 @@ class PendingDeletes {
|
|||
}
|
||||
|
||||
int numDeletesToMerge(MergePolicy policy, IOSupplier<CodecReader> readerIOSupplier) throws IOException {
|
||||
return policy.numDeletesToMerge(info, numPendingDeletes(), readerIOSupplier);
|
||||
return policy.numDeletesToMerge(info, getDelCount(), readerIOSupplier);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the given reader needs to be refreshed in order to see the latest deletes
|
||||
*/
|
||||
final boolean needsRefresh(CodecReader reader) {
|
||||
return reader.getLiveDocs() != getLiveDocs() || reader.numDeletedDocs() != getDelCount();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of deleted docs in the segment.
|
||||
*/
|
||||
final int getDelCount() {
|
||||
return info.getDelCount() + numPendingDeletes();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of live documents in this segment
|
||||
*/
|
||||
final int numDocs() {
|
||||
return info.info.maxDoc() - getDelCount();
|
||||
}
|
||||
|
||||
// Call only from assert!
|
||||
boolean verifyDocCounts(CodecReader reader) {
|
||||
int count = 0;
|
||||
Bits liveDocs = getLiveDocs();
|
||||
if (liveDocs != null) {
|
||||
for(int docID = 0; docID < info.info.maxDoc(); docID++) {
|
||||
if (liveDocs.get(docID)) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
count = info.info.maxDoc();
|
||||
}
|
||||
assert numDocs() == count: "info.maxDoc=" + info.info.maxDoc() + " info.getDelCount()=" + info.getDelCount() +
|
||||
" pendingDeletes=" + toString() + " count=" + count;
|
||||
assert reader.numDocs() == numDocs() : "reader.numDocs() = " + reader.numDocs() + " numDocs() " + numDocs();
|
||||
assert reader.numDeletedDocs() <= info.info.maxDoc(): "delCount=" + reader.numDeletedDocs() + " info.maxDoc=" +
|
||||
info.info.maxDoc() + " rld.pendingDeleteCount=" + numPendingDeletes() +
|
||||
" info.getDelCount()=" + info.getDelCount();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -58,6 +58,7 @@ final class PendingSoftDeletes extends PendingDeletes {
|
|||
} else {
|
||||
// if it was deleted subtract the delCount
|
||||
pendingDeleteCount--;
|
||||
assert pendingDeleteCount >= 0 : " illegal pending delete count: " + pendingDeleteCount;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -65,7 +66,7 @@ final class PendingSoftDeletes extends PendingDeletes {
|
|||
}
|
||||
|
||||
@Override
|
||||
int numPendingDeletes() {
|
||||
protected int numPendingDeletes() {
|
||||
return super.numPendingDeletes() + hardDeletes.numPendingDeletes();
|
||||
}
|
||||
|
||||
|
@ -78,11 +79,11 @@ final class PendingSoftDeletes extends PendingDeletes {
|
|||
if (iterator != null) { // nothing is deleted we don't have a soft deletes field in this segment
|
||||
assert info.info.maxDoc() > 0 : "maxDoc is 0";
|
||||
pendingDeleteCount += applySoftDeletes(iterator, getMutableBits());
|
||||
assert pendingDeleteCount >= 0 : " illegal pending delete count: " + pendingDeleteCount;
|
||||
}
|
||||
dvGeneration = info.getDocValuesGen();
|
||||
}
|
||||
assert numPendingDeletes() + info.getDelCount() <= info.info.maxDoc() :
|
||||
numPendingDeletes() + " + " + info.getDelCount() + " > " + info.info.maxDoc();
|
||||
assert getDelCount() <= info.info.maxDoc() : getDelCount() + " > " + info.info.maxDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -133,6 +134,7 @@ final class PendingSoftDeletes extends PendingDeletes {
|
|||
void onDocValuesUpdate(FieldInfo info, DocValuesFieldUpdates.Iterator iterator) throws IOException {
|
||||
if (this.field.equals(info.name)) {
|
||||
pendingDeleteCount += applySoftDeletes(iterator, getMutableBits());
|
||||
assert pendingDeleteCount >= 0 : " illegal pending delete count: " + pendingDeleteCount;
|
||||
assert dvGeneration < info.getDocValuesGen() : "we have seen this generation update already: " + dvGeneration + " vs. " + info.getDocValuesGen();
|
||||
assert dvGeneration != -2 : "docValues generation is still uninitialized";
|
||||
dvGeneration = info.getDocValuesGen();
|
||||
|
@ -208,5 +210,4 @@ final class PendingSoftDeletes extends PendingDeletes {
|
|||
Bits getHardLiveDocs() {
|
||||
return hardDeletes.getLiveDocs();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -132,7 +132,7 @@ final class ReaderPool implements Closeable {
|
|||
*/
|
||||
synchronized boolean anyPendingDeletes() {
|
||||
for(ReadersAndUpdates rld : readerMap.values()) {
|
||||
if (rld.getPendingDeleteCount() != 0) {
|
||||
if (rld.anyPendingDeletes()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -321,7 +321,6 @@ final class ReaderPool implements Closeable {
|
|||
|
||||
/**
|
||||
* Returns <code>true</code> iff there are any buffered doc values updates. Otherwise <code>false</code>.
|
||||
* @see #anyPendingDeletes()
|
||||
*/
|
||||
synchronized boolean anyDocValuesChanges() {
|
||||
for (ReadersAndUpdates rld : readerMap.values()) {
|
||||
|
|
|
@ -100,8 +100,6 @@ final class ReadersAndUpdates {
|
|||
* <p>NOTE: steals incoming ref from reader. */
|
||||
ReadersAndUpdates(int indexCreatedVersionMajor, SegmentReader reader, PendingDeletes pendingDeletes) throws IOException {
|
||||
this(indexCreatedVersionMajor, reader.getOriginalSegmentInfo(), pendingDeletes);
|
||||
assert pendingDeletes.numPendingDeletes() >= 0
|
||||
: "got " + pendingDeletes.numPendingDeletes() + " reader.numDeletedDocs()=" + reader.numDeletedDocs() + " info.getDelCount()=" + info.getDelCount() + " maxDoc=" + reader.maxDoc() + " numDocs=" + reader.numDocs();
|
||||
this.reader = reader;
|
||||
pendingDeletes.onNewReader(reader, info);
|
||||
}
|
||||
|
@ -122,10 +120,9 @@ final class ReadersAndUpdates {
|
|||
return rc;
|
||||
}
|
||||
|
||||
public synchronized int getPendingDeleteCount() {
|
||||
return pendingDeletes.numPendingDeletes();
|
||||
public synchronized int getDelCount() {
|
||||
return pendingDeletes.getDelCount();
|
||||
}
|
||||
|
||||
private synchronized boolean assertNoDupGen(List<DocValuesFieldUpdates> fieldUpdates, DocValuesFieldUpdates update) {
|
||||
for (int i=0;i<fieldUpdates.size();i++) {
|
||||
DocValuesFieldUpdates oldUpdate = fieldUpdates.get(i);
|
||||
|
@ -167,24 +164,6 @@ final class ReadersAndUpdates {
|
|||
return count;
|
||||
}
|
||||
|
||||
// Call only from assert!
|
||||
public synchronized boolean verifyDocCounts() {
|
||||
int count;
|
||||
Bits liveDocs = pendingDeletes.getLiveDocs();
|
||||
if (liveDocs != null) {
|
||||
count = 0;
|
||||
for(int docID=0;docID<info.info.maxDoc();docID++) {
|
||||
if (liveDocs.get(docID)) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
count = info.info.maxDoc();
|
||||
}
|
||||
|
||||
assert info.info.maxDoc() - info.getDelCount() - pendingDeletes.numPendingDeletes() == count: "info.maxDoc=" + info.info.maxDoc() + " info.getDelCount()=" + info.getDelCount() + " pendingDeletes=" + pendingDeletes.numPendingDeletes() + " count=" + count;
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Returns a {@link SegmentReader}. */
|
||||
public synchronized SegmentReader getReader(IOContext context) throws IOException {
|
||||
|
@ -235,8 +214,7 @@ final class ReadersAndUpdates {
|
|||
// force new liveDocs
|
||||
Bits liveDocs = pendingDeletes.getLiveDocs();
|
||||
if (liveDocs != null) {
|
||||
return new SegmentReader(info, reader, liveDocs,
|
||||
info.info.maxDoc() - info.getDelCount() - pendingDeletes.numPendingDeletes());
|
||||
return new SegmentReader(info, reader, liveDocs, pendingDeletes.numDocs());
|
||||
} else {
|
||||
// liveDocs == null and reader != null. That can only be if there are no deletes
|
||||
assert reader.getLiveDocs() == null;
|
||||
|
@ -254,8 +232,7 @@ final class ReadersAndUpdates {
|
|||
// get a reader and dec the ref right away we just make sure we have a reader
|
||||
getReader(IOContext.READ).decRef();
|
||||
}
|
||||
if (reader.getLiveDocs() != pendingDeletes.getLiveDocs()
|
||||
|| reader.numDeletedDocs() != info.getDelCount() - pendingDeletes.numPendingDeletes()) {
|
||||
if (pendingDeletes.needsRefresh(reader)) {
|
||||
// we have a reader but its live-docs are out of sync. let's create a temporary one that we never share
|
||||
swapNewReaderWithLatestLiveDocs();
|
||||
}
|
||||
|
@ -429,6 +406,10 @@ final class ReadersAndUpdates {
|
|||
}
|
||||
}
|
||||
|
||||
synchronized boolean anyPendingDeletes() {
|
||||
return pendingDeletes.numPendingDeletes() != 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* This class merges the current on-disk DV with an incoming update DV instance and merges the two instances
|
||||
* giving the incoming update precedence in terms of values, in other words the values of the update always
|
||||
|
@ -668,8 +649,7 @@ final class ReadersAndUpdates {
|
|||
private SegmentReader createNewReaderWithLatestLiveDocs(SegmentReader reader) throws IOException {
|
||||
assert reader != null;
|
||||
assert Thread.holdsLock(this) : Thread.currentThread().getName();
|
||||
SegmentReader newReader = new SegmentReader(info, reader, pendingDeletes.getLiveDocs(),
|
||||
info.info.maxDoc() - info.getDelCount() - pendingDeletes.numPendingDeletes());
|
||||
SegmentReader newReader = new SegmentReader(info, reader, pendingDeletes.getLiveDocs(), pendingDeletes.numDocs());
|
||||
boolean success2 = false;
|
||||
try {
|
||||
pendingDeletes.onNewReader(newReader, info);
|
||||
|
@ -727,14 +707,13 @@ final class ReadersAndUpdates {
|
|||
}
|
||||
|
||||
SegmentReader reader = getReader(context);
|
||||
int delCount = pendingDeletes.numPendingDeletes() + info.getDelCount();
|
||||
if (delCount != reader.numDeletedDocs()) {
|
||||
if (pendingDeletes.needsRefresh(reader)) {
|
||||
// beware of zombies:
|
||||
assert delCount > reader.numDeletedDocs(): "delCount=" + delCount + " reader.numDeletedDocs()=" + reader.numDeletedDocs();
|
||||
assert pendingDeletes.getLiveDocs() != null;
|
||||
reader = createNewReaderWithLatestLiveDocs(reader);
|
||||
}
|
||||
assert verifyDocCounts();
|
||||
assert pendingDeletes.verifyDocCounts(reader);
|
||||
|
||||
|
||||
return new MergeReader(reader, pendingDeletes.getHardLiveDocs());
|
||||
}
|
||||
|
|
|
@ -173,8 +173,8 @@ public final class SoftDeletesRetentionMergePolicy extends OneMergeWrappingMerge
|
|||
}
|
||||
|
||||
@Override
|
||||
public int numDeletesToMerge(SegmentCommitInfo info, int pendingDeleteCount, IOSupplier<CodecReader> readerSupplier) throws IOException {
|
||||
final int numDeletesToMerge = super.numDeletesToMerge(info, pendingDeleteCount, readerSupplier);
|
||||
public int numDeletesToMerge(SegmentCommitInfo info, int delCount, IOSupplier<CodecReader> readerSupplier) throws IOException {
|
||||
final int numDeletesToMerge = super.numDeletesToMerge(info, delCount, readerSupplier);
|
||||
if (numDeletesToMerge != 0) {
|
||||
final CodecReader reader = readerSupplier.get();
|
||||
if (reader.getLiveDocs() != null) {
|
||||
|
|
|
@ -178,12 +178,9 @@ public class TestReaderPool extends LuceneTestCase {
|
|||
boolean expectUpdate = false;
|
||||
int doc = -1;
|
||||
if (postings != null && postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
readersAndUpdates.delete(doc = postings.docID());
|
||||
assertTrue(readersAndUpdates.delete(doc = postings.docID()));
|
||||
expectUpdate = true;
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
|
||||
assertTrue(pool.anyPendingDeletes());
|
||||
} else {
|
||||
assertFalse(pool.anyPendingDeletes());
|
||||
}
|
||||
assertFalse(pool.anyDocValuesChanges()); // deletes are not accounted here
|
||||
readOnlyClone.close();
|
||||
|
|
|
@ -104,7 +104,7 @@ public class TestTryDelete extends LuceneTestCase
|
|||
}
|
||||
|
||||
assertTrue(writer.hasDeletions());
|
||||
|
||||
|
||||
mgr.maybeRefresh();
|
||||
|
||||
searcher = mgr.acquire();
|
||||
|
|
Loading…
Reference in New Issue