From 0941cae532ddf7b9af3df55c63941f547c769108 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Sat, 26 May 2018 22:14:19 +0200 Subject: [PATCH] LUCENE-8334: Ensure SR#getSementInfo() returns snapshot The SegmentCommitInfo passed to the segment reader is mutated concurrently. An instance obtained from SR#getSegmentInfo() might return wrong delete counts or generation ids. This ensures that the SR will use a clone internally while stil maintaining the original SI since it's needed inside IW for maintainance like accessing pooled readers etc. --- .../lucene/index/FrozenBufferedUpdates.java | 4 ++-- .../org/apache/lucene/index/IndexWriter.java | 4 ++-- .../org/apache/lucene/index/ReaderPool.java | 4 ++-- .../lucene/index/ReadersAndUpdates.java | 6 ++--- .../apache/lucene/index/SegmentReader.java | 21 ++++++++++++---- .../apache/lucene/index/TestIndexWriter.java | 24 +++++++++++++++++++ 6 files changed, 50 insertions(+), 13 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java b/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java index c69f212ac81..822a497b5e7 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java +++ b/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java @@ -397,7 +397,7 @@ final class FrozenBufferedUpdates { if (allDeleted == null) { allDeleted = new ArrayList<>(); } - allDeleted.add(segState.reader.getSegmentInfo()); + allDeleted.add(segState.reader.getOriginalSegmentInfo()); } } } @@ -452,7 +452,7 @@ final class FrozenBufferedUpdates { if (privateSegment != null) { assert segStates.length == 1; - assert privateSegment == segStates[0].reader.getSegmentInfo(); + assert privateSegment == segStates[0].reader.getOriginalSegmentInfo(); } totalDelCount += applyTermDeletes(segStates); diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index a2ca1b7e7bf..96dfb64650d 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -1457,7 +1457,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable, throw new IllegalArgumentException("the reader must be a SegmentReader or composite reader containing only SegmentReaders"); } - final SegmentCommitInfo info = ((SegmentReader) reader).getSegmentInfo(); + final SegmentCommitInfo info = ((SegmentReader) reader).getOriginalSegmentInfo(); // TODO: this is a slow linear search, but, number of // segments should be contained unless something is @@ -4306,7 +4306,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable, final boolean drop = suppressExceptions == false; try (Closeable finalizer = merge::mergeFinished) { IOUtils.applyToAll(merge.readers, sr -> { - final ReadersAndUpdates rld = getPooledInstance(sr.getSegmentInfo(), false); + final ReadersAndUpdates rld = getPooledInstance(sr.getOriginalSegmentInfo(), false); // We still hold a ref so it should not have been removed: assert rld != null; if (drop) { diff --git a/lucene/core/src/java/org/apache/lucene/index/ReaderPool.java b/lucene/core/src/java/org/apache/lucene/index/ReaderPool.java index cecc31035c6..861cfaf1c39 100644 --- a/lucene/core/src/java/org/apache/lucene/index/ReaderPool.java +++ b/lucene/core/src/java/org/apache/lucene/index/ReaderPool.java @@ -87,8 +87,8 @@ final class ReaderPool implements Closeable { SegmentReader segReader = (SegmentReader) leaf.reader(); SegmentReader newReader = new SegmentReader(segmentInfos.info(i), segReader, segReader.getLiveDocs(), segReader.numDocs()); - readerMap.put(newReader.getSegmentInfo(), new ReadersAndUpdates(segmentInfos.getIndexCreatedVersionMajor(), - newReader, newPendingDeletes(newReader, newReader.getSegmentInfo()))); + readerMap.put(newReader.getOriginalSegmentInfo(), new ReadersAndUpdates(segmentInfos.getIndexCreatedVersionMajor(), + newReader, newPendingDeletes(newReader, newReader.getOriginalSegmentInfo()))); } } } diff --git a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java index 848cea73160..55585951454 100644 --- a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java +++ b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java @@ -99,7 +99,7 @@ final class ReadersAndUpdates { * *

NOTE: steals incoming ref from reader. */ ReadersAndUpdates(int indexCreatedVersionMajor, SegmentReader reader, PendingDeletes pendingDeletes) throws IOException { - this(indexCreatedVersionMajor, reader.getSegmentInfo(), pendingDeletes); + this(indexCreatedVersionMajor, reader.getOriginalSegmentInfo(), pendingDeletes); assert pendingDeletes.numPendingDeletes() >= 0 : "got " + pendingDeletes.numPendingDeletes() + " reader.numDeletedDocs()=" + reader.numDeletedDocs() + " info.getDelCount()=" + info.getDelCount() + " maxDoc=" + reader.maxDoc() + " numDocs=" + reader.numDocs(); this.reader = reader; @@ -200,7 +200,7 @@ final class ReadersAndUpdates { } public synchronized void release(SegmentReader sr) throws IOException { - assert info == sr.getSegmentInfo(); + assert info == sr.getOriginalSegmentInfo(); sr.decRef(); } @@ -235,7 +235,7 @@ final class ReadersAndUpdates { // force new liveDocs Bits liveDocs = pendingDeletes.getLiveDocs(); if (liveDocs != null) { - return new SegmentReader(reader.getSegmentInfo(), reader, liveDocs, + return new SegmentReader(info, reader, liveDocs, info.info.maxDoc() - info.getDelCount() - pendingDeletes.numPendingDeletes()); } else { // liveDocs == null and reader != null. That can only be if there are no deletes diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java index dfc7c115bc6..9373718dcce 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java @@ -45,6 +45,10 @@ import org.apache.lucene.util.IOUtils; public final class SegmentReader extends CodecReader { private final SegmentCommitInfo si; + // this is the original SI that IW uses internally but it's mutated behind the scenes + // and we don't want this SI to be used for anything. Yet, IW needs this to do maintainance + // and lookup pooled readers etc. + private final SegmentCommitInfo originalSi; private final LeafMetaData metaData; private final Bits liveDocs; @@ -67,9 +71,9 @@ public final class SegmentReader extends CodecReader { * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - // TODO: why is this public? - public SegmentReader(SegmentCommitInfo si, int createdVersionMajor, IOContext context) throws IOException { - this.si = si; + SegmentReader(SegmentCommitInfo si, int createdVersionMajor, IOContext context) throws IOException { + this.si = si.clone(); + this.originalSi = si; this.metaData = new LeafMetaData(createdVersionMajor, si.info.getMinVersion(), si.info.getIndexSort()); // We pull liveDocs/DV updates from disk: @@ -133,7 +137,8 @@ public final class SegmentReader extends CodecReader { if (liveDocs != null && liveDocs.length() != si.info.maxDoc()) { throw new IllegalArgumentException("maxDoc=" + si.info.maxDoc() + " but liveDocs.size()=" + liveDocs.length()); } - this.si = si; + this.si = si.clone(); + this.originalSi = si; this.metaData = sr.getMetaData(); this.liveDocs = liveDocs; this.isNRT = isNRT; @@ -348,4 +353,12 @@ public final class SegmentReader extends CodecReader { public LeafMetaData getMetaData() { return metaData; } + + /** + * Returns the original SegmentInfo passed to the segment reader on creation time. + * {@link #getSegmentInfo()} returns a clone of this instance. + */ + SegmentCommitInfo getOriginalSegmentInfo() { + return originalSi; + } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java index 570cb5be11c..ad3e37963c0 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -3352,4 +3352,28 @@ public class TestIndexWriter extends LuceneTestCase { IOUtils.close(writer, dir); } + public void testSegmentInfoIsSnapshot() throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig config = newIndexWriterConfig(); + config.setRAMBufferSizeMB(Integer.MAX_VALUE); + config.setMaxBufferedDocs(2); // no auto flush + IndexWriter writer = new IndexWriter(dir, config); + Document d = new Document(); + d.add(new StringField("id", "doc-0", Field.Store.YES)); + writer.addDocument(d); + d = new Document(); + d.add(new StringField("id", "doc-1", Field.Store.YES)); + writer.addDocument(d); + DirectoryReader reader = writer.getReader(); + SegmentCommitInfo segmentInfo = ((SegmentReader) reader.leaves().get(0).reader()).getSegmentInfo(); + SegmentCommitInfo originalInfo = ((SegmentReader) reader.leaves().get(0).reader()).getOriginalSegmentInfo(); + assertEquals(0, originalInfo.getDelCount()); + assertEquals(0, segmentInfo.getDelCount()); + writer.deleteDocuments(new Term("id", "doc-0")); + writer.commit(); + assertEquals(0, segmentInfo.getDelCount()); + assertEquals(1, originalInfo.getDelCount()); + IOUtils.close(reader, writer, dir); + } + }