From acb3c379427193036f3d56503529400736ac5dff Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 29 Mar 2018 17:21:59 +0200 Subject: [PATCH] LUCENE-8232: Separate out PendingDeletes from ReadersAndUpdates Today ReadersAndUpdates is tightly coupled with IW and all the handling of pending deletes. This change decouples IW and pending deletes from ReadersAndUpdates and makes PendingDeletes unittestable. --- .../lucene/index/BufferedUpdatesStream.java | 4 +- .../org/apache/lucene/index/IndexWriter.java | 32 +-- .../apache/lucene/index/PendingDeletes.java | 193 ++++++++++++++++++ .../lucene/index/ReadersAndUpdates.java | 185 +++++------------ .../src/java/org/apache/lucene/util/Bits.java | 2 +- .../lucene/index/TestPendingDeletes.java | 142 +++++++++++++ 6 files changed, 404 insertions(+), 154 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java create mode 100644 lucene/core/src/test/org/apache/lucene/index/TestPendingDeletes.java diff --git a/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java b/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java index e887e4dbcce..63001d4d0f5 100644 --- a/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java +++ b/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java @@ -333,8 +333,8 @@ class BufferedUpdatesStream implements Accountable { if (success) { totDelCount += segState.rld.getPendingDeleteCount() - segState.startDelCount; int fullDelCount = segState.rld.info.getDelCount() + segState.rld.getPendingDeleteCount(); - assert fullDelCount <= segState.rld.info.info.maxDoc(); - if (fullDelCount == segState.rld.info.info.maxDoc()) { + assert fullDelCount <= segState.rld.info.info.maxDoc() : fullDelCount + " > " + segState.rld.info.info.maxDoc(); + if (segState.rld.isFullyDeleted()) { if (allDeleted == null) { allDeleted = new ArrayList<>(); } diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 40a53e0f324..3791e190625 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -604,7 +604,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { if (!poolReaders && rld.refCount() == 1 && readerMap.containsKey(rld.info)) { // This is the last ref to this RLD, and we're not // pooling, so remove it: - if (rld.writeLiveDocs(directory)) { + boolean changed = rld.writeLiveDocs(directory); + changed |= rld.writeFieldUpdates(directory, globalFieldNumberMap, bufferedUpdatesStream.getCompletedDelGen(), infoStream); + + if (changed) { // Make sure we only write del docs for a live segment: assert assertInfoLive == false || assertInfoIsLive(rld.info); // Must checkpoint because we just @@ -616,9 +619,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { // did was move the state to disk: checkpointNoSIS(); } - - rld.writeFieldUpdates(directory, bufferedUpdatesStream.getCompletedDelGen(), infoStream); - if (rld.getNumDVUpdates() == 0) { rld.dropReaders(); readerMap.remove(rld.info); @@ -635,13 +635,15 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { } void writeAllDocValuesUpdates() throws IOException { + assert Thread.holdsLock(IndexWriter.this); Collection copy; synchronized (this) { + // this needs to be protected by the reader pool lock otherwise we hit ConcurrentModificationException copy = new HashSet<>(readerMap.values()); } boolean any = false; for (ReadersAndUpdates rld : copy) { - any |= rld.writeFieldUpdates(directory, bufferedUpdatesStream.getCompletedDelGen(), infoStream); + any |= rld.writeFieldUpdates(directory, globalFieldNumberMap, bufferedUpdatesStream.getCompletedDelGen(), infoStream); } if (any) { checkpoint(); @@ -649,11 +651,12 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { } void writeDocValuesUpdatesForMerge(List infos) throws IOException { + assert Thread.holdsLock(IndexWriter.this); boolean any = false; for (SegmentCommitInfo info : infos) { ReadersAndUpdates rld = get(info, false); if (rld != null) { - any |= rld.writeFieldUpdates(directory, bufferedUpdatesStream.getCompletedDelGen(), infoStream); + any |= rld.writeFieldUpdates(directory, globalFieldNumberMap, bufferedUpdatesStream.getCompletedDelGen(), infoStream); rld.setIsMerging(); } } @@ -706,7 +709,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { // Only acquire IW lock on each write, since this is a time consuming operation. This way // other threads get a chance to run in between our writes. synchronized (IndexWriter.this) { - rld.writeFieldUpdates(directory, bufferedUpdatesStream.getCompletedDelGen(), infoStream); + if (rld.writeFieldUpdates(directory, globalFieldNumberMap, bufferedUpdatesStream.getCompletedDelGen(), infoStream)) { + checkpointNoSIS(); + } } long bytesUsedAfter = rld.ramBytesUsed.get(); ramBytesUsed -= bytesUsedBefore - bytesUsedAfter; @@ -789,8 +794,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { if (rld != null) { assert rld.info == info; boolean changed = rld.writeLiveDocs(directory); - - changed |= rld.writeFieldUpdates(directory, bufferedUpdatesStream.getCompletedDelGen(), infoStream); + changed |= rld.writeFieldUpdates(directory, globalFieldNumberMap, bufferedUpdatesStream.getCompletedDelGen(), infoStream); if (changed) { // Make sure we only write del docs for a live segment: @@ -838,7 +842,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { if (create == false) { return null; } - rld = new ReadersAndUpdates(IndexWriter.this, info); + rld = new ReadersAndUpdates(segmentInfos.getIndexCreatedVersionMajor(), info, null, new PendingDeletes(null, info)); // Steal initial reference: readerMap.put(info, rld); } else { @@ -1147,7 +1151,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { LeafReaderContext leaf = leaves.get(i); SegmentReader segReader = (SegmentReader) leaf.reader(); SegmentReader newReader = new SegmentReader(segmentInfos.info(i), segReader, segReader.getLiveDocs(), segReader.numDocs()); - readerPool.readerMap.put(newReader.getSegmentInfo(), new ReadersAndUpdates(this, newReader)); + readerPool.readerMap.put(newReader.getSegmentInfo(), new ReadersAndUpdates(segmentInfos.getIndexCreatedVersionMajor(), newReader, new PendingDeletes(newReader, newReader.getSegmentInfo()))); } // We always assume we are carrying over incoming changes when opening from reader: @@ -1637,8 +1641,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { if (rld != null) { synchronized(bufferedUpdatesStream) { if (rld.delete(docID)) { - final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount(); - if (fullDelCount == rld.info.info.maxDoc()) { + if (rld.isFullyDeleted()) { dropDeletedSegment(rld.info); checkpoint(); } @@ -4000,8 +4003,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { final boolean allDeleted = merge.segments.size() == 0 || merge.info.info.maxDoc() == 0 || - (mergedUpdates != null && - mergedUpdates.getPendingDeleteCount() == merge.info.info.maxDoc()); + (mergedUpdates != null && mergedUpdates.isFullyDeleted()); if (infoStream.isEnabled("IW")) { if (allDeleted) { diff --git a/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java b/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java new file mode 100644 index 00000000000..74043f3f44a --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java @@ -0,0 +1,193 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.index; + +import java.io.IOException; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.LiveDocsFormat; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.TrackingDirectoryWrapper; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.MutableBits; + +/** + * This class handles accounting and applying pending deletes for live segment readers + */ +final class PendingDeletes { + private final SegmentCommitInfo info; + // True if the current liveDocs is referenced by an + // external NRT reader: + private boolean liveDocsShared; + // Holds the current shared (readable and writable) + // liveDocs. This is null when there are no deleted + // docs, and it's copy-on-write (cloned whenever we need + // to change it but it's been shared to an external NRT + // reader). + private Bits liveDocs; + private int pendingDeleteCount; + + PendingDeletes(SegmentReader reader, SegmentCommitInfo info) { + this.info = info; + liveDocsShared = true; + liveDocs = reader != null ? reader.getLiveDocs() : null; + if (reader != null) { + pendingDeleteCount = reader.numDeletedDocs() - info.getDelCount(); + } else { + pendingDeleteCount = 0; + } + } + + + /** + * Marks a document as deleted in this segment and return true if a document got actually deleted or + * if the document was already deleted. + */ + boolean delete(int docID) throws IOException { + assert info.info.maxDoc() > 0; + if (liveDocsShared) { + // Copy on write: this means we've cloned a + // SegmentReader sharing the current liveDocs + // instance; must now make a private clone so we can + // change it: + LiveDocsFormat liveDocsFormat = info.info.getCodec().liveDocsFormat(); + if (liveDocs == null) { + liveDocs = liveDocsFormat.newLiveDocs(info.info.maxDoc()); + } else { + liveDocs = liveDocsFormat.newLiveDocs(liveDocs); + } + liveDocsShared = false; + } + + assert liveDocs != null; + assert docID >= 0 && docID < liveDocs.length() : "out of bounds: docid=" + docID + " liveDocsLength=" + liveDocs.length() + " seg=" + info.info.name + " maxDoc=" + info.info.maxDoc(); + assert !liveDocsShared; + final boolean didDelete = liveDocs.get(docID); + if (didDelete) { + ((MutableBits) liveDocs).clear(docID); + pendingDeleteCount++; + } + return didDelete; + } + + /** + * Should be called if the live docs returned from {@link #getLiveDocs()} are shared outside of the + * {@link ReadersAndUpdates} + */ + void liveDocsShared() { + liveDocsShared = true; + } + + /** + * Returns the current live docs or null if all docs are live. The returned instance might be mutable or is mutated behind the scenes. + * If the returned live docs are shared outside of the ReadersAndUpdates {@link #liveDocsShared()} should be called + * first. + */ + Bits getLiveDocs() { + return liveDocs; + } + + /** + * Returns the number of pending deletes that are not written to disk. + */ + int numPendingDeletes() { + return pendingDeleteCount; + } + + /** + * Called once a new reader is opened for this segment ie. when deletes or updates are applied. + */ + void onNewReader(SegmentReader reader, SegmentCommitInfo info) { + if (liveDocs == null) { + liveDocs = reader.getLiveDocs(); + } + } + + /** + * Resets the pending docs + */ + void reset() { + pendingDeleteCount = 0; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("PendingDeletes(seg=").append(info); + sb.append(" numPendingDeletes=").append(pendingDeleteCount); + sb.append(" liveDocsShared=").append(liveDocsShared); + return sb.toString(); + } + + /** + * Writes the live docs to disk and returns true if any new docs were written. + */ + boolean writeLiveDocs(Directory dir) throws IOException { + if (pendingDeleteCount == 0) { + return false; + } + + Bits liveDocs = this.liveDocs; + assert liveDocs != null; + // We have new deletes + assert liveDocs.length() == info.info.maxDoc(); + + // Do this so we can delete any created files on + // exception; this saves all codecs from having to do + // it: + TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir); + + // We can write directly to the actual name (vs to a + // .tmp & renaming it) because the file is not live + // until segments file is written: + boolean success = false; + try { + Codec codec = info.info.getCodec(); + codec.liveDocsFormat().writeLiveDocs((MutableBits)liveDocs, trackingDir, info, pendingDeleteCount, IOContext.DEFAULT); + success = true; + } finally { + if (!success) { + // Advance only the nextWriteDelGen so that a 2nd + // attempt to write will write to a new file + info.advanceNextWriteDelGen(); + + // Delete any partially created file(s): + for (String fileName : trackingDir.getCreatedFiles()) { + IOUtils.deleteFilesIgnoringExceptions(dir, fileName); + } + } + } + + // If we hit an exc in the line above (eg disk full) + // then info's delGen remains pointing to the previous + // (successfully written) del docs: + info.advanceDelGen(); + info.setDelCount(info.getDelCount() + pendingDeleteCount); + reset(); + return true; + } + + /** + * Returns true iff the segment represented by this {@link PendingDeletes} is fully deleted + */ + boolean isFullyDeleted() { + return info.getDelCount() + pendingDeleteCount == info.info.maxDoc(); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java index 16ea1e5b9d9..8a0e17e2b71 100644 --- a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java +++ b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java @@ -20,6 +20,7 @@ package org.apache.lucene.index; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -34,7 +35,6 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.FieldInfosFormat; -import org.apache.lucene.codecs.LiveDocsFormat; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FlushInfo; import org.apache.lucene.store.IOContext; @@ -43,38 +43,27 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.InfoStream; -import org.apache.lucene.util.MutableBits; // Used by IndexWriter to hold open SegmentReaders (for // searching or merging), plus pending deletes and updates, // for a given segment -class ReadersAndUpdates { +final class ReadersAndUpdates { // Not final because we replace (clone) when we need to // change it and it's been shared: - public final SegmentCommitInfo info; + final SegmentCommitInfo info; // Tracks how many consumers are using this instance: private final AtomicInteger refCount = new AtomicInteger(1); - private final IndexWriter writer; - // Set once (null, and then maybe set, and never set again): private SegmentReader reader; - // Holds the current shared (readable and writable) - // liveDocs. This is null when there are no deleted - // docs, and it's copy-on-write (cloned whenever we need - // to change it but it's been shared to an external NRT - // reader). - private Bits liveDocs; - // How many further deletions we've done against // liveDocs vs when we loaded it or last wrote it: - private int pendingDeleteCount; + private final PendingDeletes pendingDeletes; - // True if the current liveDocs is referenced by an - // external NRT reader: - private boolean liveDocsShared; + // the major version this index was created with + private final int indexCreatedVersionMajor; // Indicates whether this segment is currently being merged. While a segment // is merging, all field updates are also registered in the @@ -96,25 +85,23 @@ class ReadersAndUpdates { // Only set if there are doc values updates against this segment, and the index is sorted: Sorter.DocMap sortMap; - public final AtomicLong ramBytesUsed = new AtomicLong(); - - public ReadersAndUpdates(IndexWriter writer, SegmentCommitInfo info) { - this.writer = writer; + final AtomicLong ramBytesUsed = new AtomicLong(); + + ReadersAndUpdates(int indexCreatedVersionMajor, SegmentCommitInfo info, SegmentReader reader, + PendingDeletes pendingDeletes) { this.info = info; - liveDocsShared = true; + this.pendingDeletes = pendingDeletes; + this.indexCreatedVersionMajor = indexCreatedVersionMajor; + this.reader = reader; } /** Init from a previously opened SegmentReader. * *

NOTE: steals incoming ref from reader. */ - public ReadersAndUpdates(IndexWriter writer, SegmentReader reader) { - this.writer = writer; - this.reader = reader; - info = reader.getSegmentInfo(); - liveDocs = reader.getLiveDocs(); - liveDocsShared = true; - pendingDeleteCount = reader.numDeletedDocs() - info.getDelCount(); - assert pendingDeleteCount >= 0: "got " + pendingDeleteCount + " reader.numDeletedDocs()=" + reader.numDeletedDocs() + " info.getDelCount()=" + info.getDelCount() + " maxDoc=" + reader.maxDoc() + " numDocs=" + reader.numDocs(); + ReadersAndUpdates(int indexCreatedVersionMajor, SegmentReader reader, PendingDeletes pendingDeletes) { + this(indexCreatedVersionMajor, reader.getSegmentInfo(), reader, pendingDeletes); + assert pendingDeletes.numPendingDeletes() >= 0 + : "got " + pendingDeletes.numPendingDeletes() + " reader.numDeletedDocs()=" + reader.numDeletedDocs() + " info.getDelCount()=" + info.getDelCount() + " maxDoc=" + reader.maxDoc() + " numDocs=" + reader.numDocs(); } public void incRef() { @@ -134,7 +121,7 @@ class ReadersAndUpdates { } public synchronized int getPendingDeleteCount() { - return pendingDeleteCount; + return pendingDeletes.numPendingDeletes(); } private synchronized boolean assertNoDupGen(List fieldUpdates, DocValuesFieldUpdates update) { @@ -186,6 +173,7 @@ class ReadersAndUpdates { // Call only from assert! public synchronized boolean verifyDocCounts() { int count; + Bits liveDocs = pendingDeletes.getLiveDocs(); if (liveDocs != null) { count = 0; for(int docID=0;docID writeFieldInfosGen(FieldInfos fieldInfos, Directory dir, DocValuesFormat dvFormat, - FieldInfosFormat infosFormat) throws IOException { + private synchronized Set writeFieldInfosGen(FieldInfos fieldInfos, Directory dir, + FieldInfosFormat infosFormat) throws IOException { final long nextFieldInfosGen = info.getNextFieldInfosGen(); final String segmentSuffix = Long.toString(nextFieldInfosGen, Character.MAX_RADIX); // we write approximately that many bytes (based on Lucene46DVF): @@ -639,22 +558,15 @@ class ReadersAndUpdates { return trackingDir.getCreatedFiles(); } - public synchronized boolean writeFieldUpdates(Directory dir, long maxDelGen, InfoStream infoStream) throws IOException { - + public synchronized boolean writeFieldUpdates(Directory dir, FieldInfos.FieldNumbers fieldNumbers, long maxDelGen, InfoStream infoStream) throws IOException { long startTimeNS = System.nanoTime(); - - assert Thread.holdsLock(writer); - final Map> newDVFiles = new HashMap<>(); Set fieldInfosFiles = null; FieldInfos fieldInfos = null; - boolean any = false; - int count = 0; for (List updates : pendingDVUpdates.values()) { // Sort by increasing delGen: - Collections.sort(updates, (a, b) -> Long.compare(a.delGen, b.delGen)); - count += updates.size(); + Collections.sort(updates, Comparator.comparingLong(a -> a.delGen)); for (DocValuesFieldUpdates update : updates) { if (update.delGen <= maxDelGen && update.any()) { any = true; @@ -680,7 +592,7 @@ class ReadersAndUpdates { // IndexWriter.commitMergedDeletes). final SegmentReader reader; if (this.reader == null) { - reader = new SegmentReader(info, writer.segmentInfos.getIndexCreatedVersionMajor(), IOContext.READONCE); + reader = new SegmentReader(info, indexCreatedVersionMajor, IOContext.READONCE); } else { reader = this.reader; } @@ -688,7 +600,7 @@ class ReadersAndUpdates { try { // clone FieldInfos so that we can update their dvGen separately from // the reader's infos and write them to a new fieldInfos_gen file - FieldInfos.Builder builder = new FieldInfos.Builder(writer.globalFieldNumberMap); + FieldInfos.Builder builder = new FieldInfos.Builder(fieldNumbers); // cannot use builder.add(reader.getFieldInfos()) because it does not // clone FI.attributes as well FI.dvGen for (FieldInfo fi : reader.getFieldInfos()) { @@ -713,7 +625,7 @@ class ReadersAndUpdates { handleNumericDVUpdates(fieldInfos, trackingDir, docValuesFormat, reader, newDVFiles, maxDelGen, infoStream); handleBinaryDVUpdates(fieldInfos, trackingDir, docValuesFormat, reader, newDVFiles, maxDelGen, infoStream); - fieldInfosFiles = writeFieldInfosGen(fieldInfos, trackingDir, docValuesFormat, codec.fieldInfosFormat()); + fieldInfosFiles = writeFieldInfosGen(fieldInfos, trackingDir, codec.fieldInfosFormat()); } finally { if (reader != this.reader) { reader.close(); @@ -763,11 +675,12 @@ class ReadersAndUpdates { // if there is a reader open, reopen it to reflect the updates if (reader != null) { - SegmentReader newReader = new SegmentReader(info, reader, liveDocs, info.info.maxDoc() - info.getDelCount() - pendingDeleteCount); + SegmentReader newReader = new SegmentReader(info, reader, pendingDeletes.getLiveDocs(), info.info.maxDoc() - info.getDelCount() - pendingDeletes.numPendingDeletes()); boolean success2 = false; try { reader.decRef(); reader = newReader; + pendingDeletes.onNewReader(reader, info); success2 = true; } finally { if (success2 == false) { @@ -792,14 +705,10 @@ class ReadersAndUpdates { } info.setDocValuesUpdatesFiles(newDVFiles); - // wrote new files, should checkpoint() - writer.checkpointNoSIS(); - if (infoStream.isEnabled("BD")) { infoStream.message("BD", String.format(Locale.ROOT, "done write field updates for seg=%s; took %.3fs; new files: %s", info, (System.nanoTime() - startTimeNS)/1000000000.0, newDVFiles)); } - return true; } @@ -829,12 +738,11 @@ class ReadersAndUpdates { } SegmentReader reader = getReader(context); - int delCount = pendingDeleteCount + info.getDelCount(); + int delCount = pendingDeletes.numPendingDeletes() + info.getDelCount(); if (delCount != reader.numDeletedDocs()) { - // beware of zombies: assert delCount > reader.numDeletedDocs(): "delCount=" + delCount + " reader.numDeletedDocs()=" + reader.numDeletedDocs(); - + Bits liveDocs = pendingDeletes.getLiveDocs(); assert liveDocs != null; // Create a new reader with the latest live docs: @@ -842,6 +750,7 @@ class ReadersAndUpdates { boolean success = false; try { reader.decRef(); + pendingDeletes.onNewReader(newReader, info); success = true; } finally { if (success == false) { @@ -851,7 +760,7 @@ class ReadersAndUpdates { reader = newReader; } - liveDocsShared = true; + pendingDeletes.liveDocsShared(); assert verifyDocCounts(); @@ -877,8 +786,12 @@ class ReadersAndUpdates { public String toString() { StringBuilder sb = new StringBuilder(); sb.append("ReadersAndLiveDocs(seg=").append(info); - sb.append(" pendingDeleteCount=").append(pendingDeleteCount); - sb.append(" liveDocsShared=").append(liveDocsShared); + sb.append(" pendingDeletes=").append(pendingDeletes); return sb.toString(); } + + public synchronized boolean isFullyDeleted() { + return pendingDeletes.isFullyDeleted(); + } + } diff --git a/lucene/core/src/java/org/apache/lucene/util/Bits.java b/lucene/core/src/java/org/apache/lucene/util/Bits.java index 101122e628f..29935e737b8 100644 --- a/lucene/core/src/java/org/apache/lucene/util/Bits.java +++ b/lucene/core/src/java/org/apache/lucene/util/Bits.java @@ -22,7 +22,7 @@ package org.apache.lucene.util; * @lucene.experimental */ -public interface Bits { +public interface Bits { /** * Returns the value of the bit with the specified index. * @param index index, should be non-negative and < {@link #length()}. diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPendingDeletes.java b/lucene/core/src/test/org/apache/lucene/index/TestPendingDeletes.java new file mode 100644 index 00000000000..39f5680a74f --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/index/TestPendingDeletes.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.TestUtil; +import org.apache.lucene.util.Version; + +public class TestPendingDeletes extends LuceneTestCase { + + public void testDeleteDoc() throws IOException { + RAMDirectory dir = new RAMDirectory(); + SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 10, false, Codec.getDefault(), + Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); + SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, 0, 0); + PendingDeletes deletes = new PendingDeletes(null, commitInfo); + assertNull(deletes.getLiveDocs()); + int docToDelete = TestUtil.nextInt(random(), 0, 7); + assertTrue(deletes.delete(docToDelete)); + assertNotNull(deletes.getLiveDocs()); + assertEquals(1, deletes.numPendingDeletes()); + + Bits liveDocs = deletes.getLiveDocs(); + assertFalse(liveDocs.get(docToDelete)); + assertFalse(deletes.delete(docToDelete)); // delete again + + // make sure we are live ie. mutable + assertTrue(liveDocs.get(8)); + assertTrue(deletes.delete(8)); + assertFalse(liveDocs.get(8)); + assertEquals(2, deletes.numPendingDeletes()); + + deletes.liveDocsShared(); + + // make sure we are live ie. mutable + assertTrue(liveDocs.get(9)); + assertTrue(deletes.delete(9)); + assertTrue(liveDocs.get(9)); + liveDocs = deletes.getLiveDocs(); + assertFalse(liveDocs.get(9)); + assertFalse(liveDocs.get(8)); + assertFalse(liveDocs.get(docToDelete)); + assertEquals(3, deletes.numPendingDeletes()); + dir.close(); + } + + public void testWriteLiveDocs() throws IOException { + RAMDirectory dir = new RAMDirectory(); + SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 6, false, Codec.getDefault(), + Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); + SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, 0, 0); + PendingDeletes deletes = new PendingDeletes(null, commitInfo); + assertFalse(deletes.writeLiveDocs(dir)); + assertEquals(0, dir.listAll().length); + boolean secondDocDeletes = random().nextBoolean(); + deletes.delete(5); + if (secondDocDeletes) { + deletes.liveDocsShared(); + deletes.delete(2); + } + assertEquals(0, commitInfo.getDelGen()); + assertEquals(0, commitInfo.getDelCount()); + + assertEquals(secondDocDeletes ? 2 : 1, deletes.numPendingDeletes()); + assertTrue(deletes.writeLiveDocs(dir)); + assertEquals(1, dir.listAll().length); + Bits liveDocs = Codec.getDefault().liveDocsFormat().readLiveDocs(dir, commitInfo, IOContext.DEFAULT); + assertFalse(liveDocs.get(5)); + if (secondDocDeletes) { + assertFalse(liveDocs.get(2)); + } else { + assertTrue(liveDocs.get(2)); + } + assertTrue(liveDocs.get(0)); + assertTrue(liveDocs.get(1)); + assertTrue(liveDocs.get(3)); + assertTrue(liveDocs.get(4)); + + assertEquals(0, deletes.numPendingDeletes()); + assertEquals(secondDocDeletes ? 2 : 1, commitInfo.getDelCount()); + assertEquals(1, commitInfo.getDelGen()); + + deletes.delete(0); + assertTrue(deletes.writeLiveDocs(dir)); + assertEquals(2, dir.listAll().length); + liveDocs = Codec.getDefault().liveDocsFormat().readLiveDocs(dir, commitInfo, IOContext.DEFAULT); + assertFalse(liveDocs.get(5)); + if (secondDocDeletes) { + assertFalse(liveDocs.get(2)); + } else { + assertTrue(liveDocs.get(2)); + } + assertFalse(liveDocs.get(0)); + assertTrue(liveDocs.get(1)); + assertTrue(liveDocs.get(3)); + assertTrue(liveDocs.get(4)); + + assertEquals(0, deletes.numPendingDeletes()); + assertEquals(secondDocDeletes ? 3 : 2, commitInfo.getDelCount()); + assertEquals(2, commitInfo.getDelGen()); + dir.close(); + } + + public void testIsFullyDeleted() throws IOException { + RAMDirectory dir = new RAMDirectory(); + SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 3, false, Codec.getDefault(), + Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); + SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, 0, 0); + PendingDeletes deletes = new PendingDeletes(null, commitInfo); + for (int i = 0; i < 3; i++) { + assertTrue(deletes.delete(i)); + if (random().nextBoolean()) { + assertTrue(deletes.writeLiveDocs(dir)); + } + assertEquals(i == 2, deletes.isFullyDeleted()); + } + } +}