diff --git a/lucene/core/src/java/org/apache/lucene/index/ReaderPool.java b/lucene/core/src/java/org/apache/lucene/index/ReaderPool.java index 980f4a19924..b792be26873 100644 --- a/lucene/core/src/java/org/apache/lucene/index/ReaderPool.java +++ b/lucene/core/src/java/org/apache/lucene/index/ReaderPool.java @@ -89,7 +89,7 @@ final class ReaderPool implements Closeable { LeafReaderContext leaf = leaves.get(i); SegmentReader segReader = (SegmentReader) leaf.reader(); SegmentReader newReader = new SegmentReader(segmentInfos.info(i), segReader, segReader.getLiveDocs(), - segReader.numDocs()); + segReader.getHardLiveDocs(), segReader.numDocs(), true); readerMap.put(newReader.getOriginalSegmentInfo(), new ReadersAndUpdates(segmentInfos.getIndexCreatedVersionMajor(), newReader, newPendingDeletes(newReader, newReader.getOriginalSegmentInfo()))); } diff --git a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java index 3453447ecce..b09338fc722 100644 --- a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java +++ b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java @@ -214,7 +214,7 @@ final class ReadersAndUpdates { // force new liveDocs Bits liveDocs = pendingDeletes.getLiveDocs(); if (liveDocs != null) { - return new SegmentReader(info, reader, liveDocs, pendingDeletes.numDocs()); + return new SegmentReader(info, reader, liveDocs, pendingDeletes.getHardLiveDocs(), pendingDeletes.numDocs(), true); } else { // liveDocs == null and reader != null. That can only be if there are no deletes assert reader.getLiveDocs() == null; @@ -645,7 +645,8 @@ final class ReadersAndUpdates { private SegmentReader createNewReaderWithLatestLiveDocs(SegmentReader reader) throws IOException { assert reader != null; assert Thread.holdsLock(this) : Thread.currentThread().getName(); - SegmentReader newReader = new SegmentReader(info, reader, pendingDeletes.getLiveDocs(), pendingDeletes.numDocs()); + SegmentReader newReader = new SegmentReader(info, reader, pendingDeletes.getLiveDocs(), + pendingDeletes.getHardLiveDocs(), pendingDeletes.numDocs(), true); boolean success2 = false; try { pendingDeletes.onNewReader(newReader, info); diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java index 9373718dcce..b368b964b7a 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java @@ -51,6 +51,7 @@ public final class SegmentReader extends CodecReader { private final SegmentCommitInfo originalSi; private final LeafMetaData metaData; private final Bits liveDocs; + private final Bits hardLiveDocs; // Normally set to si.maxDoc - si.delDocCount, unless we // were created as an NRT reader from IW, in which case IW @@ -65,7 +66,7 @@ public final class SegmentReader extends CodecReader { final DocValuesProducer docValuesProducer; final FieldInfos fieldInfos; - + /** * Constructs a new SegmentReader with a new core. * @throws CorruptIndexException if the index is corrupt @@ -87,16 +88,16 @@ public final class SegmentReader extends CodecReader { try { if (si.hasDeletions()) { // NOTE: the bitvector is stored using the regular directory, not cfs - liveDocs = codec.liveDocsFormat().readLiveDocs(directory(), si, IOContext.READONCE); + hardLiveDocs = liveDocs = codec.liveDocsFormat().readLiveDocs(directory(), si, IOContext.READONCE); } else { assert si.getDelCount() == 0; - liveDocs = null; + hardLiveDocs = liveDocs = null; } numDocs = si.info.maxDoc() - si.getDelCount(); fieldInfos = initFieldInfos(); docValuesProducer = initDocValuesProducer(); - + assert assertLiveDocs(isNRT, hardLiveDocs, liveDocs); success = true; } finally { // With lock-less commits, it's entirely possible (and @@ -110,27 +111,10 @@ public final class SegmentReader extends CodecReader { } } - /** Create new SegmentReader sharing core from a previous - * SegmentReader and loading new live docs from a new - * deletes file. Used by openIfChanged. */ - SegmentReader(SegmentCommitInfo si, SegmentReader sr) throws IOException { - this(si, sr, - si.hasDeletions() ? si.info.getCodec().liveDocsFormat().readLiveDocs(si.info.dir, si, IOContext.READONCE) : null, - si.info.maxDoc() - si.getDelCount(), false); - } - - /** Create new SegmentReader sharing core from a previous - * SegmentReader and using the provided in-memory - * liveDocs. Used by IndexWriter to provide a new NRT - * reader */ - SegmentReader(SegmentCommitInfo si, SegmentReader sr, Bits liveDocs, int numDocs) throws IOException { - this(si, sr, liveDocs, numDocs, true); - } - /** Create new SegmentReader sharing core from a previous * SegmentReader and using the provided liveDocs, and recording * whether those liveDocs were carried in ram (isNRT=true). */ - SegmentReader(SegmentCommitInfo si, SegmentReader sr, Bits liveDocs, int numDocs, boolean isNRT) throws IOException { + SegmentReader(SegmentCommitInfo si, SegmentReader sr, Bits liveDocs, Bits hardLiveDocs, int numDocs, boolean isNRT) throws IOException { if (numDocs > si.info.maxDoc()) { throw new IllegalArgumentException("numDocs=" + numDocs + " but maxDoc=" + si.info.maxDoc()); } @@ -141,6 +125,8 @@ public final class SegmentReader extends CodecReader { this.originalSi = si; this.metaData = sr.getMetaData(); this.liveDocs = liveDocs; + this.hardLiveDocs = hardLiveDocs; + assert assertLiveDocs(isNRT, hardLiveDocs, liveDocs); this.isNRT = isNRT; this.numDocs = numDocs; this.core = sr.core; @@ -159,6 +145,15 @@ public final class SegmentReader extends CodecReader { } } + private static boolean assertLiveDocs(boolean isNRT, Bits hardLiveDocs, Bits liveDocs) { + if (isNRT) { + assert hardLiveDocs == null || liveDocs != null : " liveDocs must be non null if hardLiveDocs are non null"; + } else { + assert hardLiveDocs == liveDocs : "non-nrt case must have identical liveDocs"; + } + return true; + } + /** * init most recent DocValues for the current commit */ @@ -361,4 +356,14 @@ public final class SegmentReader extends CodecReader { SegmentCommitInfo getOriginalSegmentInfo() { return originalSi; } + + /** + * Returns the live docs that are not hard-deleted. This is an expert API to be used with + * soft-deletes to filter out document that hard deleted for instance due to aborted documents or to distinguish + * soft and hard deleted documents ie. a rolled back tombstone. + * @lucene.experimental + */ + public Bits getHardLiveDocs() { + return hardLiveDocs; + } } diff --git a/lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java b/lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java index 3b1b72fc7d1..5b2b049125f 100644 --- a/lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java @@ -32,6 +32,7 @@ import java.util.concurrent.CopyOnWriteArraySet; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.IOUtils; /** Default implementation of {@link DirectoryReader}. */ @@ -174,16 +175,17 @@ public final class StandardDirectoryReader extends DirectoryReader { try { SegmentReader newReader; if (oldReader == null || commitInfo.info.getUseCompoundFile() != oldReader.getSegmentInfo().info.getUseCompoundFile()) { - // this is a new reader; in case we hit an exception we can decRef it safely newReader = new SegmentReader(commitInfo, infos.getIndexCreatedVersionMajor(), IOContext.READ); newReaders[i] = newReader; } else { if (oldReader.isNRT) { // We must load liveDocs/DV updates from disk: - newReaders[i] = new SegmentReader(commitInfo, oldReader); + Bits liveDocs = commitInfo.hasDeletions() ? commitInfo.info.getCodec().liveDocsFormat() + .readLiveDocs(commitInfo.info.dir, commitInfo, IOContext.READONCE) : null; + newReaders[i] = new SegmentReader(commitInfo, oldReader, liveDocs, liveDocs, + commitInfo.info.maxDoc() - commitInfo.getDelCount(), false); } else { - if (oldReader.getSegmentInfo().getDelGen() == commitInfo.getDelGen() && oldReader.getSegmentInfo().getFieldInfosGen() == commitInfo.getFieldInfosGen()) { // No change; this reader will be shared between @@ -197,10 +199,14 @@ public final class StandardDirectoryReader extends DirectoryReader { if (oldReader.getSegmentInfo().getDelGen() == commitInfo.getDelGen()) { // only DV updates - newReaders[i] = new SegmentReader(commitInfo, oldReader, oldReader.getLiveDocs(), oldReader.numDocs(), false); // this is not an NRT reader! + newReaders[i] = new SegmentReader(commitInfo, oldReader, oldReader.getLiveDocs(), + oldReader.getHardLiveDocs(), oldReader.numDocs(), false); // this is not an NRT reader! } else { // both DV and liveDocs have changed - newReaders[i] = new SegmentReader(commitInfo, oldReader); + Bits liveDocs = commitInfo.hasDeletions() ? commitInfo.info.getCodec().liveDocsFormat() + .readLiveDocs(commitInfo.info.dir, commitInfo, IOContext.READONCE) : null; + newReaders[i] = new SegmentReader(commitInfo, oldReader, liveDocs, liveDocs, + commitInfo.info.maxDoc() - commitInfo.getDelCount(), false); } } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java index 85e6979fe10..4861929d2cb 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -3144,6 +3144,10 @@ public class TestIndexWriter extends LuceneTestCase { numSoftDeleted += info.getSoftDelCount(); } assertEquals(writer.maxDoc() - writer.numDocs(), numSoftDeleted); + for (LeafReaderContext context : reader.leaves()) { + LeafReader leaf = context.reader(); + assertNull(((SegmentReader) leaf).getHardLiveDocs()); + } writer.close(); reader.close(); dir.close(); @@ -3263,6 +3267,12 @@ public class TestIndexWriter extends LuceneTestCase { assertEquals(1, topDocs.totalHits); } } + if (mixDeletes == false) { + for (LeafReaderContext context : reader.leaves()) { + LeafReader leaf = context.reader(); + assertNull(((SegmentReader) leaf).getHardLiveDocs()); + } + } mergeAwaySoftDeletes.set(true); writer.addDocument(new Document()); // add a dummy doc to trigger a segment here writer.flush(); @@ -3524,4 +3534,69 @@ public class TestIndexWriter extends LuceneTestCase { w.close(); d.close(); } + + public void testSoftAndHardLiveDocs() throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig indexWriterConfig = newIndexWriterConfig(); + String softDeletesField = "soft_delete"; + indexWriterConfig.setSoftDeletesField(softDeletesField); + IndexWriter writer = new IndexWriter(dir, indexWriterConfig); + Set uniqueDocs = new HashSet<>(); + for (int i = 0; i < 100; i++) { + int docId = random().nextInt(5); + uniqueDocs.add(docId); + Document doc = new Document(); + doc.add(new StringField("id", String.valueOf(docId), Field.Store.YES)); + if (docId % 2 == 0) { + writer.updateDocument(new Term("id", String.valueOf(docId)), doc); + } else { + writer.softUpdateDocument(new Term("id", String.valueOf(docId)), doc, + new NumericDocValuesField(softDeletesField, 0)); + } + if (random().nextBoolean()) { + assertHardLiveDocs(writer, uniqueDocs); + } + } + + if (random().nextBoolean()) { + writer.commit(); + } + assertHardLiveDocs(writer, uniqueDocs); + + + IOUtils.close(writer, dir); + } + + private void assertHardLiveDocs(IndexWriter writer, Set uniqueDocs) throws IOException { + try (DirectoryReader reader = DirectoryReader.open(writer)) { + assertEquals(uniqueDocs.size(), reader.numDocs()); + List leaves = reader.leaves(); + for (LeafReaderContext ctx : leaves) { + LeafReader leaf = ctx.reader(); + assertTrue(leaf instanceof SegmentReader); + SegmentReader sr = (SegmentReader) leaf; + if (sr.getHardLiveDocs() != null) { + Terms id = sr.terms("id"); + TermsEnum iterator = id.iterator(); + Bits hardLiveDocs = sr.getHardLiveDocs(); + Bits liveDocs = sr.getLiveDocs(); + for (Integer dId : uniqueDocs) { + boolean mustBeHardDeleted = dId % 2 == 0; + if (iterator.seekExact(new BytesRef(dId.toString()))) { + PostingsEnum postings = iterator.postings(null); + while (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + if (liveDocs.get(postings.docID())) { + assertTrue(hardLiveDocs.get(postings.docID())); + } else if (mustBeHardDeleted) { + assertFalse(hardLiveDocs.get(postings.docID())); + } else { + assertTrue(hardLiveDocs.get(postings.docID())); + } + } + } + } + } + } + } + } } diff --git a/lucene/replicator/src/java/org/apache/lucene/replicator/nrt/SegmentInfosSearcherManager.java b/lucene/replicator/src/java/org/apache/lucene/replicator/nrt/SegmentInfosSearcherManager.java index a04464aa9da..d18ee1029d9 100644 --- a/lucene/replicator/src/java/org/apache/lucene/replicator/nrt/SegmentInfosSearcherManager.java +++ b/lucene/replicator/src/java/org/apache/lucene/replicator/nrt/SegmentInfosSearcherManager.java @@ -107,7 +107,7 @@ class SegmentInfosSearcherManager extends ReferenceManager { DirectoryReader r = StandardDirectoryReader.open(dir, currentInfos, subs); addReaderClosedListener(r); node.message("refreshed to version=" + currentInfos.getVersion() + " r=" + r); - return SearcherManager.getSearcher(searcherFactory, r, (DirectoryReader) old.getIndexReader()); + return SearcherManager.getSearcher(searcherFactory, r, old.getIndexReader()); } private void addReaderClosedListener(IndexReader r) {