LUCENE-4738: simplify DirectoryReader.indexExists; fix IndexWriter with CREATE to succeed on a corrupted index; add random IOExceptions to MockDirectoryWrapper.openInput/createOutput

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1466706 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2013-04-10 22:02:24 +00:00
parent 67d433ca28
commit 6e24f5adf4
13 changed files with 350 additions and 88 deletions

View File

@ -261,6 +261,15 @@ Bug Fixes
and one of the sort fields is the relevance score. Only IndexSearchers created
with an ExecutorService are concerned. (Adrien Grand)
* LUCENE-4738, LUCENE-2727, LUCENE-2812: Simplified
DirectoryReader.indexExists so that it's more robust to transient
IOExceptions (e.g. due to issues like file descriptor exhaustion),
but this will also cause it to err towards returning true for
example if the directory contains a corrupted index or an incomplete
initial commit. In addition, IndexWriter with OpenMode.CREATE will
now succeed even if the directory contains a corrupted index (Billow
Gao, Robert Muir, Mike McCandless)
Documentation
* LUCENE-4841: Added example SimpleSortedSetFacetsExample to show how

View File

@ -23,9 +23,9 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.index.SegmentInfos.FindSegmentsFile;
import org.apache.lucene.search.SearcherManager; // javadocs
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.NoSuchDirectoryException;
/** DirectoryReader is an implementation of {@link CompositeReader}
that can read indexes in a {@link Directory}.
@ -314,34 +314,45 @@ public abstract class DirectoryReader extends BaseCompositeReader<AtomicReader>
}
/**
* Returns <code>true</code> if an index exists at the specified directory.
* Returns <code>true</code> if an index likely exists at
* the specified directory. Note that if a corrupt index
* exists, or if an index in the process of committing
* @param directory the directory to check for an index
* @return <code>true</code> if an index exists; <code>false</code> otherwise
*/
public static boolean indexExists(Directory directory) {
public static boolean indexExists(Directory directory) throws IOException {
// LUCENE-2812, LUCENE-2727, LUCENE-4738: this logic will
// return true in cases that should arguably be false,
// such as only IW.prepareCommit has been called, or a
// corrupt first commit, but it's too deadly to make
// this logic "smarter" and risk accidentally returning
// false due to various cases like file description
// exhaustion, access denited, etc., because in that
// case IndexWriter may delete the entire index. It's
// safer to err towards "index exists" than try to be
// smart about detecting not-yet-fully-committed or
// corrupt indices. This means that IndexWriter will
// throw an exception on such indices and the app must
// resolve the situation manually:
String[] files;
try {
new FindSegmentsFile(directory) {
@Override
protected Object doBody(String segmentFileName) throws IOException {
try {
new SegmentInfos().read(directory, segmentFileName);
} catch (FileNotFoundException ex) {
if (!directory.fileExists(segmentFileName)) {
throw ex;
}
/* this is ok - we might have run into a access exception here.
* or even worse like on LUCENE-4870 this is triggered due to
* too many open files on the system. In that case we rather report
* a false positive here since wrongly returning false from indexExist
* can cause data loss since IW relies on this.*/
}
return null;
}
}.run();
return true;
} catch (IOException ioe) {
files = directory.listAll();
} catch (NoSuchDirectoryException nsde) {
// Directory does not exist --> no index exists
return false;
}
// Defensive: maybe a Directory impl returns null
// instead of throwing NoSuchDirectoryException:
if (files != null) {
String prefix = IndexFileNames.SEGMENTS + "_";
for(String file : files) {
if (file.startsWith(prefix) || file.equals(IndexFileNames.SEGMENTS_GEN)) {
return true;
}
}
}
return false;
}
/**

View File

@ -123,7 +123,7 @@ final class IndexFileDeleter implements Closeable {
* @throws IOException if there is a low-level IO error
*/
public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos,
InfoStream infoStream, IndexWriter writer) throws IOException {
InfoStream infoStream, IndexWriter writer, boolean initialIndexExists) throws IOException {
this.infoStream = infoStream;
this.writer = writer;
@ -209,7 +209,7 @@ final class IndexFileDeleter implements Closeable {
}
}
if (currentCommitPoint == null && currentSegmentsFile != null) {
if (currentCommitPoint == null && currentSegmentsFile != null && initialIndexExists) {
// We did not in fact see the segments_N file
// corresponding to the segmentInfos that was passed
// in. Yet, it must exist, because our caller holds
@ -221,7 +221,7 @@ final class IndexFileDeleter implements Closeable {
try {
sis.read(directory, currentSegmentsFile);
} catch (IOException e) {
throw new CorruptIndexException("failed to locate current segments_N file");
throw new CorruptIndexException("failed to locate current segments_N file \"" + currentSegmentsFile + "\"");
}
if (infoStream.isEnabled("IFD")) {
infoStream.message("IFD", "forced open of current segments file " + segmentInfos.getSegmentsFileName());

View File

@ -659,6 +659,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
// IndexFormatTooOldException.
segmentInfos = new SegmentInfos();
boolean initialIndexExists = true;
if (create) {
// Try to read first. This is to allow create
// against an index that's currently open for
@ -669,6 +671,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
segmentInfos.clear();
} catch (IOException e) {
// Likely this means it's a fresh directory
initialIndexExists = false;
}
// Record that we have a change (zero out all
@ -709,7 +712,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
synchronized(this) {
deleter = new IndexFileDeleter(directory,
config.getIndexDeletionPolicy(),
segmentInfos, infoStream, this);
segmentInfos, infoStream, this,
initialIndexExists);
}
if (deleter.startingCommitDeleted) {

View File

@ -184,7 +184,7 @@ public abstract class Directory implements Closeable {
* <pre class="prettyprint">
* Directory to; // the directory to copy to
* for (String file : dir.listAll()) {
* dir.copy(to, file, newFile); // newFile can be either file, or a new name
* dir.copy(to, file, newFile, IOContext.DEFAULT); // newFile can be either file, or a new name
* }
* </pre>
* <p>

View File

@ -914,19 +914,6 @@ public void testFilesOpenClose() throws IOException {
dir.close();
}
// LUCENE-2812
public void testIndexExists() throws Exception {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
writer.addDocument(new Document());
writer.prepareCommit();
assertFalse(DirectoryReader.indexExists(dir));
writer.commit();
writer.close();
assertTrue(DirectoryReader.indexExists(dir));
dir.close();
}
// Make sure totalTermFreq works correctly in the terms
// dict cache
public void testTotalTermFreqCached() throws Exception {
@ -1149,4 +1136,12 @@ public void testFilesOpenClose() throws IOException {
dir.close();
}
public void testIndexExistsOnNonExistentDirectory() throws Exception {
File tempDir = _TestUtil.getTempDir("testIndexExistsOnNonExistentDirectory");
tempDir.delete();
Directory dir = newFSDirectory(tempDir);
System.out.println("dir=" + dir);
assertFalse(DirectoryReader.indexExists(dir));
dir.close();
}
}

View File

@ -52,7 +52,9 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.BaseDirectoryWrapper;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Lock;
import org.apache.lucene.store.LockFactory;
@ -1477,7 +1479,7 @@ public class TestIndexWriter extends LuceneTestCase {
}
public void testNoSegmentFile() throws IOException {
Directory dir = newDirectory();
BaseDirectoryWrapper dir = newDirectory();
dir.setLockFactory(NoLockFactory.getNoLockFactory());
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMaxBufferedDocs(2));
@ -1497,6 +1499,10 @@ public class TestIndexWriter extends LuceneTestCase {
w2.close();
// If we don't do that, the test fails on Windows
w.rollback();
// This test leaves only segments.gen, which causes
// DirectoryReader.indexExists to return true:
dir.setCheckIndexOnClose(false);
dir.close();
}
@ -2115,5 +2121,52 @@ public class TestIndexWriter extends LuceneTestCase {
}
}
// LUCENE-2727/LUCENE-2812/LUCENE-4738:
public void testCorruptFirstCommit() throws Exception {
for(int i=0;i<6;i++) {
BaseDirectoryWrapper dir = newDirectory();
dir.createOutput("segments_0", IOContext.DEFAULT).close();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
int mode = i/2;
if (mode == 0) {
iwc.setOpenMode(OpenMode.CREATE);
} else if (mode == 1) {
iwc.setOpenMode(OpenMode.APPEND);
} else if (mode == 2) {
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
}
if (VERBOSE) {
System.out.println("\nTEST: i=" + i);
}
try {
if ((i & 1) == 0) {
new IndexWriter(dir, iwc).close();
} else {
new IndexWriter(dir, iwc).rollback();
}
if (mode != 0) {
fail("expected exception");
}
} catch (IOException ioe) {
// OpenMode.APPEND should throw an exception since no
// index exists:
if (mode == 0) {
// Unexpected
throw ioe;
}
}
if (VERBOSE) {
System.out.println(" at close: " + Arrays.toString(dir.listAll()));
}
if (mode != 0) {
dir.setCheckIndexOnClose(false);
}
dir.close();
}
}
}

View File

@ -40,7 +40,6 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;

View File

@ -1496,7 +1496,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
if (doFail && name.startsWith("segments_")) {
StackTraceElement[] trace = new Exception().getStackTrace();
for (int i = 0; i < trace.length; i++) {
if ("indexExists".equals(trace[i].getMethodName())) {
if ("read".equals(trace[i].getMethodName())) {
throw new UnsupportedOperationException("expected UOE");
}
}
@ -1516,8 +1516,8 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, null));
fail("should have gotten a UOE");
} catch (UnsupportedOperationException expected) {
}
uoe.doFail = false;
d.close();
}

View File

@ -0,0 +1,158 @@
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.PrintStreamInfoStream;
import org.apache.lucene.util._TestUtil;
public class TestIndexWriterOutOfFileDescriptors extends LuceneTestCase {
public void test() throws Exception {
MockDirectoryWrapper dir = newMockFSDirectory(_TestUtil.getTempDir("TestIndexWriterOutOfFileDescriptors"));
dir.setPreventDoubleWrite(false);
double rate = random().nextDouble()*0.01;
//System.out.println("rate=" + rate);
dir.setRandomIOExceptionRateOnOpen(rate);
int iters = atLeast(20);
LineFileDocs docs = new LineFileDocs(random());
IndexReader r = null;
DirectoryReader r2 = null;
boolean any = false;
MockDirectoryWrapper dirCopy = null;
int lastNumDocs = 0;
for(int iter=0;iter<iters;iter++) {
IndexWriter w = null;
if (VERBOSE) {
System.out.println("TEST: iter=" + iter);
}
try {
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
if (VERBOSE) {
// Do this ourselves instead of relying on LTC so
// we see incrementing messageID:
iwc.setInfoStream(new PrintStreamInfoStream(System.out));
}
MergeScheduler ms = iwc.getMergeScheduler();
if (ms instanceof ConcurrentMergeScheduler) {
((ConcurrentMergeScheduler) ms).setSuppressExceptions();
}
w = new IndexWriter(dir, iwc);
if (r != null && random().nextInt(5) == 3) {
if (random().nextBoolean()) {
if (VERBOSE) {
System.out.println("TEST: addIndexes IR[]");
}
w.addIndexes(new IndexReader[] {r});
} else {
if (VERBOSE) {
System.out.println("TEST: addIndexes Directory[]");
}
w.addIndexes(new Directory[] {dirCopy});
}
} else {
if (VERBOSE) {
System.out.println("TEST: addDocument");
}
w.addDocument(docs.nextDoc());
}
w.close();
w = null;
// NOTE: This is O(N^2)! Only enable for temporary debugging:
//dir.setRandomIOExceptionRateOnOpen(0.0);
//_TestUtil.checkIndex(dir);
//dir.setRandomIOExceptionRateOnOpen(rate);
// Verify numDocs only increases, to catch IndexWriter
// accidentally deleting the index:
dir.setRandomIOExceptionRateOnOpen(0.0);
assertTrue(DirectoryReader.indexExists(dir));
if (r2 == null) {
r2 = DirectoryReader.open(dir);
} else {
DirectoryReader r3 = DirectoryReader.openIfChanged(r2);
if (r3 != null) {
r2.close();
r2 = r3;
}
}
assertTrue("before=" + lastNumDocs + " after=" + r2.numDocs(), r2.numDocs() >= lastNumDocs);
lastNumDocs = r2.numDocs();
//System.out.println("numDocs=" + lastNumDocs);
dir.setRandomIOExceptionRateOnOpen(rate);
any = true;
if (VERBOSE) {
System.out.println("TEST: iter=" + iter + ": success");
}
} catch (IOException ioe) {
if (VERBOSE) {
System.out.println("TEST: iter=" + iter + ": exception");
ioe.printStackTrace();
}
if (w != null) {
// NOTE: leave random IO exceptions enabled here,
// to verify that rollback does not try to write
// anything:
w.rollback();
}
}
if (any && r == null && random().nextBoolean()) {
// Make a copy of a non-empty index so we can use
// it to addIndexes later:
dir.setRandomIOExceptionRateOnOpen(0.0);
r = DirectoryReader.open(dir);
dirCopy = newMockFSDirectory(_TestUtil.getTempDir("TestIndexWriterOutOfFileDescriptors.copy"));
Set<String> files = new HashSet<String>();
for (String file : dir.listAll()) {
dir.copy(dirCopy, file, file, IOContext.DEFAULT);
files.add(file);
}
dirCopy.sync(files);
// Have IW kiss the dir so we remove any leftover
// files ... we can easily have leftover files at
// the time we take a copy because we are holding
// open a reader:
new IndexWriter(dirCopy, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))).close();
dirCopy.setRandomIOExceptionRate(rate);
dir.setRandomIOExceptionRateOnOpen(rate);
}
}
if (r2 != null) {
r2.close();
}
if (r != null) {
r.close();
dirCopy.close();
}
dir.close();
}
}

View File

@ -181,13 +181,27 @@ public class TestTransactions extends LuceneTestCase {
@Override
public void doWork() throws Throwable {
IndexReader r1, r2;
IndexReader r1=null, r2=null;
synchronized(lock) {
r1 = DirectoryReader.open(dir1);
r2 = DirectoryReader.open(dir2);
try {
r1 = DirectoryReader.open(dir1);
r2 = DirectoryReader.open(dir2);
} catch (IOException e) {
if (!e.getMessage().contains("on purpose")) {
throw e;
}
if (r1 != null) {
r1.close();
}
if (r2 != null) {
r2.close();
}
return;
}
}
if (r1.numDocs() != r2.numDocs())
if (r1.numDocs() != r2.numDocs()) {
throw new RuntimeException("doc counts differ: r1=" + r1.numDocs() + " r2=" + r2.numDocs());
}
r1.close();
r2.close();
}

View File

@ -20,6 +20,7 @@ package org.apache.lucene.store;
import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.util._TestUtil;
/**
@ -42,7 +43,7 @@ public class BaseDirectoryWrapper extends Directory {
@Override
public void close() throws IOException {
isOpen = false;
if (checkIndexOnClose && indexPossiblyExists()) {
if (checkIndexOnClose && DirectoryReader.indexExists(this)) {
_TestUtil.checkIndex(this, crossCheckTermVectorsOnClose);
}
delegate.close();
@ -52,27 +53,6 @@ public class BaseDirectoryWrapper extends Directory {
return isOpen;
}
/**
* don't rely upon DirectoryReader.fileExists to determine if we should
* checkIndex() or not. It might mask real problems, where we silently
* don't checkindex at all. instead we look for a segments file.
*/
protected boolean indexPossiblyExists() {
String files[];
try {
files = listAll();
} catch (IOException ex) {
// this means directory doesn't exist, which is ok. return false
return false;
}
for (String f : files) {
if (f.startsWith("segments_")) {
return true;
}
}
return false;
}
/**
* Set whether or not checkindex should be run
* on close

View File

@ -67,6 +67,7 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
// Max actual bytes used. This is set by MockRAMOutputStream:
long maxUsedSize;
double randomIOExceptionRate;
double randomIOExceptionRateOnOpen;
Random randomState;
boolean noDeleteOpenFile = true;
boolean preventDoubleWrite = true;
@ -322,23 +323,50 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
public void setRandomIOExceptionRate(double rate) {
randomIOExceptionRate = rate;
}
public double getRandomIOExceptionRate() {
return randomIOExceptionRate;
}
/**
* If 0.0, no exceptions will be thrown during openInput
* and createOutput. Else this should
* be a double 0.0 - 1.0 and we will randomly throw an
* IOException in openInput and createOutput with
* this probability.
*/
public void setRandomIOExceptionRateOnOpen(double rate) {
randomIOExceptionRateOnOpen = rate;
}
public double getRandomIOExceptionRateOnOpen() {
return randomIOExceptionRateOnOpen;
}
void maybeThrowIOException() throws IOException {
maybeThrowIOException(null);
}
void maybeThrowIOException(String message) throws IOException {
if (randomIOExceptionRate > 0.0) {
int number = Math.abs(randomState.nextInt() % 1000);
if (number < randomIOExceptionRate*1000) {
if (LuceneTestCase.VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": MockDirectoryWrapper: now throw random exception" + (message == null ? "" : " (" + message + ")"));
new Throwable().printStackTrace(System.out);
}
throw new IOException("a random IOException" + (message == null ? "" : "(" + message + ")"));
if (randomState.nextDouble() < randomIOExceptionRate) {
if (LuceneTestCase.VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": MockDirectoryWrapper: now throw random exception" + (message == null ? "" : " (" + message + ")"));
new Throwable().printStackTrace(System.out);
}
throw new IOException("a random IOException" + (message == null ? "" : "(" + message + ")"));
}
}
void maybeThrowIOExceptionOnOpen() throws IOException {
if (randomState.nextDouble() < randomIOExceptionRateOnOpen) {
if (LuceneTestCase.VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": MockDirectoryWrapper: now throw random exception during open");
new Throwable().printStackTrace(System.out);
}
if (randomState.nextBoolean()) {
throw new IOException("a random IOException");
} else {
throw new FileNotFoundException("a random IOException");
}
}
}
@ -403,22 +431,28 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
@Override
public synchronized IndexOutput createOutput(String name, IOContext context) throws IOException {
maybeThrowDeterministicException();
maybeThrowIOExceptionOnOpen();
maybeYield();
if (failOnCreateOutput) {
maybeThrowDeterministicException();
}
if (crashed)
if (crashed) {
throw new IOException("cannot createOutput after crash");
}
init();
synchronized(this) {
if (preventDoubleWrite && createdFiles.contains(name) && !name.equals("segments.gen"))
if (preventDoubleWrite && createdFiles.contains(name) && !name.equals("segments.gen")) {
throw new IOException("file \"" + name + "\" was already written to");
}
}
if (noDeleteOpenFile && openFiles.containsKey(name))
if (noDeleteOpenFile && openFiles.containsKey(name)) {
throw new IOException("MockDirectoryWrapper: file \"" + name + "\" is still open: cannot overwrite");
}
if (crashed)
if (crashed) {
throw new IOException("cannot createOutput after crash");
}
unSyncedFiles.add(name);
createdFiles.add(name);
@ -428,9 +462,9 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
RAMFile existing = ramdir.fileMap.get(name);
// Enforce write once:
if (existing!=null && !name.equals("segments.gen") && preventDoubleWrite)
if (existing!=null && !name.equals("segments.gen") && preventDoubleWrite) {
throw new IOException("file " + name + " already exists");
else {
} else {
if (existing!=null) {
ramdir.sizeInBytes.getAndAdd(-existing.sizeInBytes);
existing.directory = null;
@ -484,6 +518,8 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
@Override
public synchronized IndexInput openInput(String name, IOContext context) throws IOException {
maybeThrowDeterministicException();
maybeThrowIOExceptionOnOpen();
maybeYield();
if (failOnOpenInput) {
maybeThrowDeterministicException();
@ -587,9 +623,12 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
if (noDeleteOpenFile && openLocks.size() > 0) {
throw new RuntimeException("MockDirectoryWrapper: cannot close: there are still open locks: " + openLocks);
}
isOpen = false;
if (getCheckIndexOnClose()) {
if (indexPossiblyExists()) {
randomIOExceptionRate = 0.0;
randomIOExceptionRateOnOpen = 0.0;
if (DirectoryReader.indexExists(this)) {
if (LuceneTestCase.VERBOSE) {
System.out.println("\nNOTE: MockDirectoryWrapper: now crash");
}
@ -793,7 +832,7 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
}
}
}
@Override
public synchronized String[] listAll() throws IOException {
maybeYield();