LUCENE-5931: detect when segments were (illegally) replaced when re-opening an IndexReader

This commit is contained in:
Mike McCandless 2016-06-12 16:56:54 -04:00
parent 4ead0dbcc6
commit 6b0b119074
4 changed files with 276 additions and 29 deletions

View File

@ -26,6 +26,10 @@ Improvements
IndexWriter.setIndexSort, and now works with dimensional points. IndexWriter.setIndexSort, and now works with dimensional points.
(Adrien Grand, Mike McCandless) (Adrien Grand, Mike McCandless)
* LUCENE-5931: Detect when an application tries to reopen an
IndexReader after (illegally) removing the old index and
reindexing (Vitaly Funstein, Robert Muir, Mike McCandless)
Other Other
* LUCENE-4787: Fixed some highlighting javadocs. (Michael Dodsworth via Adrien * LUCENE-4787: Fixed some highlighting javadocs. (Michael Dodsworth via Adrien

View File

@ -53,6 +53,9 @@ public final class SegmentReader extends CodecReader {
final SegmentCoreReaders core; final SegmentCoreReaders core;
final SegmentDocValues segDocValues; final SegmentDocValues segDocValues;
/** True if we are holding RAM only liveDocs or DV updates, i.e. the SegmentCommitInfo delGen doesn't match our liveDocs. */
final boolean isNRT;
final DocValuesProducer docValuesProducer; final DocValuesProducer docValuesProducer;
final FieldInfos fieldInfos; final FieldInfos fieldInfos;
@ -64,6 +67,10 @@ public final class SegmentReader extends CodecReader {
// TODO: why is this public? // TODO: why is this public?
public SegmentReader(SegmentCommitInfo si, IOContext context) throws IOException { public SegmentReader(SegmentCommitInfo si, IOContext context) throws IOException {
this.si = si; this.si = si;
// We pull liveDocs/DV updates from disk:
this.isNRT = false;
core = new SegmentCoreReaders(si.info.dir, si, context); core = new SegmentCoreReaders(si.info.dir, si, context);
segDocValues = new SegmentDocValues(); segDocValues = new SegmentDocValues();
@ -100,8 +107,8 @@ public final class SegmentReader extends CodecReader {
* deletes file. Used by openIfChanged. */ * deletes file. Used by openIfChanged. */
SegmentReader(SegmentCommitInfo si, SegmentReader sr) throws IOException { SegmentReader(SegmentCommitInfo si, SegmentReader sr) throws IOException {
this(si, sr, this(si, sr,
si.info.getCodec().liveDocsFormat().readLiveDocs(si.info.dir, si, IOContext.READONCE), si.hasDeletions() ? si.info.getCodec().liveDocsFormat().readLiveDocs(si.info.dir, si, IOContext.READONCE) : null,
si.info.maxDoc() - si.getDelCount()); si.info.maxDoc() - si.getDelCount(), false);
} }
/** Create new SegmentReader sharing core from a previous /** Create new SegmentReader sharing core from a previous
@ -109,6 +116,13 @@ public final class SegmentReader extends CodecReader {
* liveDocs. Used by IndexWriter to provide a new NRT * liveDocs. Used by IndexWriter to provide a new NRT
* reader */ * reader */
SegmentReader(SegmentCommitInfo si, SegmentReader sr, Bits liveDocs, int numDocs) throws IOException { SegmentReader(SegmentCommitInfo si, SegmentReader sr, Bits liveDocs, int numDocs) throws IOException {
this(si, sr, liveDocs, numDocs, true);
}
/** Create new SegmentReader sharing core from a previous
* SegmentReader and using the provided liveDocs, and recording
* whether those liveDocs were carried in ram (isNRT=true). */
SegmentReader(SegmentCommitInfo si, SegmentReader sr, Bits liveDocs, int numDocs, boolean isNRT) throws IOException {
if (numDocs > si.info.maxDoc()) { if (numDocs > si.info.maxDoc()) {
throw new IllegalArgumentException("numDocs=" + numDocs + " but maxDoc=" + si.info.maxDoc()); throw new IllegalArgumentException("numDocs=" + numDocs + " but maxDoc=" + si.info.maxDoc());
} }
@ -117,6 +131,7 @@ public final class SegmentReader extends CodecReader {
} }
this.si = si; this.si = si;
this.liveDocs = liveDocs; this.liveDocs = liveDocs;
this.isNRT = isNRT;
this.numDocs = numDocs; this.numDocs = numDocs;
this.core = sr.core; this.core = sr.core;
core.incRef(); core.incRef();

View File

@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
@ -152,7 +153,6 @@ public final class StandardDirectoryReader extends DirectoryReader {
} }
SegmentReader[] newReaders = new SegmentReader[infos.size()]; SegmentReader[] newReaders = new SegmentReader[infos.size()];
for (int i = infos.size() - 1; i>=0; i--) { for (int i = infos.size() - 1; i>=0; i--) {
SegmentCommitInfo commitInfo = infos.info(i); SegmentCommitInfo commitInfo = infos.info(i);
@ -167,6 +167,12 @@ public final class StandardDirectoryReader extends DirectoryReader {
oldReader = (SegmentReader) oldReaders.get(oldReaderIndex.intValue()); oldReader = (SegmentReader) oldReaders.get(oldReaderIndex.intValue());
} }
// Make a best effort to detect when the app illegally "rm -rf" their
// index while a reader was open, and then called openIfChanged:
if (oldReader != null && Arrays.equals(commitInfo.info.getId(), oldReader.getSegmentInfo().info.getId()) == false) {
throw new IllegalStateException("same segment " + commitInfo.info.name + " has invalid doc count change; likely you are re-opening a reader after illegally removing index files yourself and building a new index in their place. Use IndexWriter.deleteAll or open a new IndexWriter using OpenMode.CREATE instead");
}
boolean success = false; boolean success = false;
try { try {
SegmentReader newReader; SegmentReader newReader;
@ -176,35 +182,29 @@ public final class StandardDirectoryReader extends DirectoryReader {
newReader = new SegmentReader(commitInfo, IOContext.READ); newReader = new SegmentReader(commitInfo, IOContext.READ);
newReaders[i] = newReader; newReaders[i] = newReader;
} else { } else {
if (oldReader.getSegmentInfo().getDelGen() == commitInfo.getDelGen() if (oldReader.isNRT) {
&& oldReader.getSegmentInfo().getFieldInfosGen() == commitInfo.getFieldInfosGen()) { // We must load liveDocs/DV updates from disk:
// No change; this reader will be shared between newReaders[i] = new SegmentReader(commitInfo, oldReader);
// the old and the new one, so we must incRef
// it:
oldReader.incRef();
newReaders[i] = oldReader;
} else { } else {
// Steal the ref returned by SegmentReader ctor:
assert commitInfo.info.dir == oldReader.getSegmentInfo().info.dir;
// Make a best effort to detect when the app illegally "rm -rf" their if (oldReader.getSegmentInfo().getDelGen() == commitInfo.getDelGen()
// index while a reader was open, and then called openIfChanged: && oldReader.getSegmentInfo().getFieldInfosGen() == commitInfo.getFieldInfosGen()) {
boolean illegalDocCountChange = commitInfo.info.maxDoc() != oldReader.getSegmentInfo().info.maxDoc(); // No change; this reader will be shared between
// the old and the new one, so we must incRef
boolean hasNeitherDeletionsNorUpdates = commitInfo.hasDeletions()== false && commitInfo.hasFieldUpdates() == false; // it:
oldReader.incRef();
boolean deletesWereLost = commitInfo.getDelGen() == -1 && oldReader.getSegmentInfo().getDelGen() != -1; newReaders[i] = oldReader;
if (illegalDocCountChange || hasNeitherDeletionsNorUpdates || deletesWereLost) {
throw new IllegalStateException("same segment " + commitInfo.info.name + " has invalid changes; likely you are re-opening a reader after illegally removing index files yourself and building a new index in their place. Use IndexWriter.deleteAll or OpenMode.CREATE instead");
}
if (oldReader.getSegmentInfo().getDelGen() == commitInfo.getDelGen()) {
// only DV updates
newReaders[i] = new SegmentReader(commitInfo, oldReader, oldReader.getLiveDocs(), oldReader.numDocs());
} else { } else {
// both DV and liveDocs have changed // Steal the ref returned by SegmentReader ctor:
newReaders[i] = new SegmentReader(commitInfo, oldReader); assert commitInfo.info.dir == oldReader.getSegmentInfo().info.dir;
if (oldReader.getSegmentInfo().getDelGen() == commitInfo.getDelGen()) {
// only DV updates
newReaders[i] = new SegmentReader(commitInfo, oldReader, oldReader.getLiveDocs(), oldReader.numDocs());
} else {
// both DV and liveDocs have changed
newReaders[i] = new SegmentReader(commitInfo, oldReader);
}
} }
} }
} }

View File

@ -33,6 +33,7 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField; import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
@ -772,4 +773,231 @@ public class TestDirectoryReaderReopen extends LuceneTestCase {
r.close(); r.close();
dir.close(); dir.close();
} }
/** test reopening backwards from a non-NRT reader (with document deletes) */
public void testNRTMdeletes() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SnapshotDeletionPolicy snapshotter = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
iwc.setIndexDeletionPolicy(snapshotter);
IndexWriter writer = new IndexWriter(dir, iwc);
writer.commit(); // make sure all index metadata is written out
Document doc = new Document();
doc.add(new StringField("key", "value1", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new StringField("key", "value2", Field.Store.YES));
writer.addDocument(doc);
writer.commit();
IndexCommit ic1 = snapshotter.snapshot();
doc = new Document();
doc.add(new StringField("key", "value3", Field.Store.YES));
writer.updateDocument(new Term("key", "value1"), doc);
writer.commit();
IndexCommit ic2 = snapshotter.snapshot();
DirectoryReader latest = DirectoryReader.open(ic2);
assertEquals(2, latest.leaves().size());
// This reader will be used for searching against commit point 1
DirectoryReader oldest = DirectoryReader.openIfChanged(latest, ic1);
assertEquals(1, oldest.leaves().size());
// sharing same core
assertSame(latest.leaves().get(0).reader().getCoreCacheKey(), oldest.leaves().get(0).reader().getCoreCacheKey());
latest.close();
oldest.close();
snapshotter.release(ic1);
snapshotter.release(ic2);
writer.close();
dir.close();
}
/** test reopening backwards from an NRT reader (with document deletes) */
public void testNRTMdeletes2() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SnapshotDeletionPolicy snapshotter = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
iwc.setIndexDeletionPolicy(snapshotter);
IndexWriter writer = new IndexWriter(dir, iwc);
writer.commit(); // make sure all index metadata is written out
Document doc = new Document();
doc.add(new StringField("key", "value1", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new StringField("key", "value2", Field.Store.YES));
writer.addDocument(doc);
writer.commit();
IndexCommit ic1 = snapshotter.snapshot();
doc = new Document();
doc.add(new StringField("key", "value3", Field.Store.YES));
writer.updateDocument(new Term("key", "value1"), doc);
DirectoryReader latest = DirectoryReader.open(writer);
assertEquals(2, latest.leaves().size());
// This reader will be used for searching against commit point 1
DirectoryReader oldest = DirectoryReader.openIfChanged(latest, ic1);
// This reader should not see the deletion:
assertEquals(2, oldest.numDocs());
assertFalse(oldest.hasDeletions());
snapshotter.release(ic1);
assertEquals(1, oldest.leaves().size());
// sharing same core
assertSame(latest.leaves().get(0).reader().getCoreCacheKey(), oldest.leaves().get(0).reader().getCoreCacheKey());
latest.close();
oldest.close();
writer.close();
dir.close();
}
/** test reopening backwards from a non-NRT reader with DV updates */
public void testNRTMupdates() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SnapshotDeletionPolicy snapshotter = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
iwc.setIndexDeletionPolicy(snapshotter);
IndexWriter writer = new IndexWriter(dir, iwc);
writer.commit(); // make sure all index metadata is written out
Document doc = new Document();
doc.add(new StringField("key", "value1", Field.Store.YES));
doc.add(new NumericDocValuesField("dv", 1));
writer.addDocument(doc);
writer.commit();
IndexCommit ic1 = snapshotter.snapshot();
writer.updateNumericDocValue(new Term("key", "value1"), "dv", 2);
writer.commit();
IndexCommit ic2 = snapshotter.snapshot();
DirectoryReader latest = DirectoryReader.open(ic2);
assertEquals(1, latest.leaves().size());
// This reader will be used for searching against commit point 1
DirectoryReader oldest = DirectoryReader.openIfChanged(latest, ic1);
assertEquals(1, oldest.leaves().size());
// sharing same core
assertSame(latest.leaves().get(0).reader().getCoreCacheKey(), oldest.leaves().get(0).reader().getCoreCacheKey());
assertEquals(1, getOnlyLeafReader(oldest).getNumericDocValues("dv").get(0));
assertEquals(2, getOnlyLeafReader(latest).getNumericDocValues("dv").get(0));
latest.close();
oldest.close();
snapshotter.release(ic1);
snapshotter.release(ic2);
writer.close();
dir.close();
}
/** test reopening backwards from an NRT reader with DV updates */
public void testNRTMupdates2() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SnapshotDeletionPolicy snapshotter = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
iwc.setIndexDeletionPolicy(snapshotter);
IndexWriter writer = new IndexWriter(dir, iwc);
writer.commit(); // make sure all index metadata is written out
Document doc = new Document();
doc.add(new StringField("key", "value1", Field.Store.YES));
doc.add(new NumericDocValuesField("dv", 1));
writer.addDocument(doc);
writer.commit();
IndexCommit ic1 = snapshotter.snapshot();
writer.updateNumericDocValue(new Term("key", "value1"), "dv", 2);
DirectoryReader latest = DirectoryReader.open(writer);
assertEquals(1, latest.leaves().size());
// This reader will be used for searching against commit point 1
DirectoryReader oldest = DirectoryReader.openIfChanged(latest, ic1);
assertEquals(1, oldest.leaves().size());
// sharing same core
assertSame(latest.leaves().get(0).reader().getCoreCacheKey(), oldest.leaves().get(0).reader().getCoreCacheKey());
assertEquals(1, getOnlyLeafReader(oldest).getNumericDocValues("dv").get(0));
assertEquals(2, getOnlyLeafReader(latest).getNumericDocValues("dv").get(0));
latest.close();
oldest.close();
snapshotter.release(ic1);
writer.close();
dir.close();
}
// LUCENE-5931: we make a "best effort" to catch this abuse and throw a clear(er)
// exception than what would otherwise look like hard to explain index corruption during searching
public void testDeleteIndexFilesWhileReaderStillOpen() throws Exception {
RAMDirectory dir = new RAMDirectory();
IndexWriter w = new IndexWriter(dir,
new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
doc.add(newStringField("field", "value", Field.Store.NO));
w.addDocument(doc);
// Creates single segment index:
w.close();
DirectoryReader r = DirectoryReader.open(dir);
// Abuse: remove all files while reader is open; one is supposed to use IW.deleteAll, or open a new IW with OpenMode.CREATE instead:
for(String file : dir.listAll()) {
dir.deleteFile(file);
}
w = new IndexWriter(dir,
new IndexWriterConfig(new MockAnalyzer(random())));
doc = new Document();
doc.add(newStringField("field", "value", Field.Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(newStringField("field", "value2", Field.Store.NO));
w.addDocument(doc);
// Writes same segment, this time with two documents:
w.commit();
w.deleteDocuments(new Term("field", "value2"));
w.addDocument(doc);
// Writes another segments file, so openIfChanged sees that the index has in fact changed:
w.close();
expectThrows(IllegalStateException.class, () -> {
DirectoryReader.openIfChanged(r);
});
}
} }