[STORE] Don't catch FNF/NSF exception when reading metadata

When reading metadata we do catch FileNotFound and NoSuchFileExceptions
today, log the even and return an empty metadata object. Yet, in some cases
this might be the wrong thing todo ie. if a commit point is provided these
situations are actually an error and should be rethrown. This commit
pushes the responsiblity to the caller to handle this exception.

Closes #8207
This commit is contained in:
Simon Willnauer 2014-10-23 14:10:47 +02:00
parent 4a14c635c8
commit c09af6df61
6 changed files with 84 additions and 42 deletions

View File

@ -22,9 +22,11 @@ package org.elasticsearch.index.snapshots.blobstore;
import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables; import com.google.common.collect.Iterables;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.common.io.ByteStreams;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.store.*; import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RateLimiter;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.BytesRefBuilder;
import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.ExceptionsHelper;
@ -32,7 +34,10 @@ import org.elasticsearch.cluster.ClusterService;
import org.elasticsearch.cluster.metadata.SnapshotId; import org.elasticsearch.cluster.metadata.SnapshotId;
import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.common.Strings; import org.elasticsearch.common.Strings;
import org.elasticsearch.common.blobstore.*; import org.elasticsearch.common.blobstore.BlobContainer;
import org.elasticsearch.common.blobstore.BlobMetaData;
import org.elasticsearch.common.blobstore.BlobPath;
import org.elasticsearch.common.blobstore.BlobStore;
import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.Lucene;
@ -52,7 +57,10 @@ import org.elasticsearch.indices.recovery.RecoveryState;
import org.elasticsearch.repositories.RepositoryName; import org.elasticsearch.repositories.RepositoryName;
import org.elasticsearch.repositories.RepositoryVerificationException; import org.elasticsearch.repositories.RepositoryVerificationException;
import java.io.*; import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.*; import java.util.*;
import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.CopyOnWriteArrayList;
@ -696,7 +704,7 @@ public class BlobStoreIndexShardRepository extends AbstractComponent implements
/** /**
* Performs restore operation * Performs restore operation
*/ */
public void restore() { public void restore() throws IOException {
store.incRef(); store.incRef();
try { try {
logger.debug("[{}] [{}] restoring to [{}] ...", snapshotId, repositoryName, shardId); logger.debug("[{}] [{}] restoring to [{}] ...", snapshotId, repositoryName, shardId);
@ -707,15 +715,12 @@ public class BlobStoreIndexShardRepository extends AbstractComponent implements
long totalSize = 0; long totalSize = 0;
int numberOfReusedFiles = 0; int numberOfReusedFiles = 0;
long reusedTotalSize = 0; long reusedTotalSize = 0;
Store.MetadataSnapshot recoveryTargetMetadata = Store.MetadataSnapshot.EMPTY; final Store.MetadataSnapshot recoveryTargetMetadata;
try { try {
recoveryTargetMetadata = store.getMetadata(); recoveryTargetMetadata = store.getMetadataOrEmpty();
} catch (CorruptIndexException e) { } catch (CorruptIndexException e) {
logger.warn("{} Can't read metadata from store", e, shardId); logger.warn("{} Can't read metadata from store", e, shardId);
throw new IndexShardRestoreFailedException(shardId, "Can't restore corrupted shard", e); throw new IndexShardRestoreFailedException(shardId, "Can't restore corrupted shard", e);
} catch (Throwable e) {
// if the index is broken we might not be able to read it
logger.warn("{} Can't read metadata from store", e, shardId);
} }
final List<FileInfo> filesToRecover = Lists.newArrayList(); final List<FileInfo> filesToRecover = Lists.newArrayList();

View File

@ -145,8 +145,31 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
} }
} }
/**
* Returns a new MetadataSnapshot for the latest commit in this store or
* an empty snapshot if no index exists or can not be opened.
* @throws CorruptIndexException if the lucene index is corrupted. This can be caused by a checksum mismatch or an
* unexpected exception when opening the index reading the segments file.
*/
public MetadataSnapshot getMetadataOrEmpty() throws IOException {
try {
return getMetadata(null);
} catch (IndexNotFoundException ex) {
// that's fine - happens all the time no need to log
} catch (FileNotFoundException | NoSuchFileException ex) {
logger.info("Failed to open / find files while reading metadata snapshot");
}
return MetadataSnapshot.EMPTY;
}
/** /**
* Returns a new MetadataSnapshot for the latest commit in this store. * Returns a new MetadataSnapshot for the latest commit in this store.
*
* @throws CorruptIndexException if the lucene index is corrupted. This can be caused by a checksum mismatch or an
* unexpected exception when opening the index reading the segments file.
* @throws FileNotFoundException if one or more files referenced by a commit are not present.
* @throws NoSuchFileException if one or more files referenced by a commit are not present.
* @throws IndexNotFoundException if no index / valid commit-point can be found in this store
*/ */
public MetadataSnapshot getMetadata() throws IOException { public MetadataSnapshot getMetadata() throws IOException {
return getMetadata(null); return getMetadata(null);
@ -155,6 +178,12 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
/** /**
* Returns a new MetadataSnapshot for the given commit. If the given commit is <code>null</code> * Returns a new MetadataSnapshot for the given commit. If the given commit is <code>null</code>
* the latest commit point is used. * the latest commit point is used.
*
* @throws CorruptIndexException if the lucene index is corrupted. This can be caused by a checksum mismatch or an
* unexpected exception when opening the index reading the segments file.
* @throws FileNotFoundException if one or more files referenced by a commit are not present.
* @throws NoSuchFileException if one or more files referenced by a commit are not present.
* @throws IndexNotFoundException if the commit point can't be found in this store
*/ */
public MetadataSnapshot getMetadata(IndexCommit commit) throws IOException { public MetadataSnapshot getMetadata(IndexCommit commit) throws IOException {
ensureOpen(); ensureOpen();
@ -266,6 +295,10 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
} }
/**
* Reads a MetadataSnapshot from the given index locations or returns an empty snapshot if it can't be read.
* @throws IOException if the index we try to read is corrupted
*/
public static MetadataSnapshot readMetadataSnapshot(File[] indexLocations, ESLogger logger) throws IOException { public static MetadataSnapshot readMetadataSnapshot(File[] indexLocations, ESLogger logger) throws IOException {
final Directory[] dirs = new Directory[indexLocations.length]; final Directory[] dirs = new Directory[indexLocations.length];
try { try {
@ -275,9 +308,14 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
DistributorDirectory dir = new DistributorDirectory(dirs); DistributorDirectory dir = new DistributorDirectory(dirs);
failIfCorrupted(dir, new ShardId("", 1)); failIfCorrupted(dir, new ShardId("", 1));
return new MetadataSnapshot(null, dir, logger); return new MetadataSnapshot(null, dir, logger);
} catch (IndexNotFoundException ex) {
// that's fine - happens all the time no need to log
} catch (FileNotFoundException | NoSuchFileException ex) {
logger.info("Failed to open / find files while reading metadata snapshot");
} finally { } finally {
IOUtils.close(dirs); IOUtils.close(dirs);
} }
return MetadataSnapshot.EMPTY;
} }
/** /**
@ -489,10 +527,6 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
} }
} catch (CorruptIndexException ex) { } catch (CorruptIndexException ex) {
throw ex; throw ex;
} catch (FileNotFoundException | NoSuchFileException ex) {
// can't open index | no commit present -- we might open a snapshot index that is not fully restored?
logger.warn("Can't open file to read checksums", ex);
return ImmutableMap.of();
} catch (Throwable ex) { } catch (Throwable ex) {
try { try {
// Lucene checks the checksum after it tries to lookup the codec etc. // Lucene checks the checksum after it tries to lookup the codec etc.

View File

@ -67,10 +67,7 @@ import org.elasticsearch.indices.recovery.RecoveryState;
import org.elasticsearch.indices.recovery.RecoveryTarget; import org.elasticsearch.indices.recovery.RecoveryTarget;
import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.threadpool.ThreadPool;
import java.util.HashMap; import java.util.*;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
@ -729,7 +726,6 @@ public class IndicesClusterStateService extends AbstractLifecycleComponent<Indic
// For replicas: we are recovering a backup from a primary // For replicas: we are recovering a backup from a primary
RecoveryState.Type type = shardRouting.primary() ? RecoveryState.Type.RELOCATION : RecoveryState.Type.REPLICA; RecoveryState.Type type = shardRouting.primary() ? RecoveryState.Type.RELOCATION : RecoveryState.Type.REPLICA;
recoveryTarget.startRecovery(indexShard, type, sourceNode, new PeerRecoveryListener(shardRouting, indexService, indexMetaData)); recoveryTarget.startRecovery(indexShard, type, sourceNode, new PeerRecoveryListener(shardRouting, indexService, indexMetaData));
} catch (Throwable e) { } catch (Throwable e) {
indexShard.engine().failEngine("corrupted preexisting index", e); indexShard.engine().failEngine("corrupted preexisting index", e);
handleRecoveryFailure(indexService, indexMetaData, shardRouting, true, e); handleRecoveryFailure(indexService, indexMetaData, shardRouting, true, e);

View File

@ -19,7 +19,6 @@
package org.elasticsearch.indices.recovery; package org.elasticsearch.indices.recovery;
import com.google.common.collect.ImmutableMap;
import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ElasticsearchException;
@ -155,7 +154,7 @@ public class RecoveryTarget extends AbstractComponent {
logger.trace("collecting local files for {}", recoveryStatus); logger.trace("collecting local files for {}", recoveryStatus);
final Map<String, StoreFileMetaData> existingFiles; final Map<String, StoreFileMetaData> existingFiles;
try { try {
existingFiles = recoveryStatus.store().getMetadata().asMap(); existingFiles = recoveryStatus.store().getMetadataOrEmpty().asMap();
} catch (Exception e) { } catch (Exception e) {
logger.debug("error while listing local files, recovery as if there are none", e); logger.debug("error while listing local files, recovery as if there are none", e);
onGoingRecoveries.failRecovery(recoveryStatus.recoveryId(), onGoingRecoveries.failRecovery(recoveryStatus.recoveryId(),

View File

@ -50,10 +50,7 @@ import org.elasticsearch.transport.TransportService;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.Iterator; import java.util.*;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReferenceArray; import java.util.concurrent.atomic.AtomicReferenceArray;
/** /**
@ -149,10 +146,10 @@ public class TransportNodesListShardStoreMetaData extends TransportNodesOperatio
if (indexService != null) { if (indexService != null) {
InternalIndexShard indexShard = (InternalIndexShard) indexService.shard(shardId.id()); InternalIndexShard indexShard = (InternalIndexShard) indexService.shard(shardId.id());
if (indexShard != null) { if (indexShard != null) {
Store store = indexShard.store(); final Store store = indexShard.store();
store.incRef(); store.incRef();
try { try {
return new StoreFilesMetaData(true, shardId, indexShard.store().getMetadata().asMap()); return new StoreFilesMetaData(true, shardId, store.getMetadataOrEmpty().asMap());
} finally { } finally {
store.decRef(); store.decRef();
} }
@ -182,8 +179,7 @@ public class TransportNodesListShardStoreMetaData extends TransportNodesOperatio
if (!exists) { if (!exists) {
return new StoreFilesMetaData(false, shardId, ImmutableMap.<String, StoreFileMetaData>of()); return new StoreFilesMetaData(false, shardId, ImmutableMap.<String, StoreFileMetaData>of());
} }
final Store.MetadataSnapshot storeFileMetaDatas = Store.readMetadataSnapshot(shardIndexLocations, logger); return new StoreFilesMetaData(false, shardId, Store.readMetadataSnapshot(shardIndexLocations, logger).asMap());
return new StoreFilesMetaData(false, shardId, storeFileMetaDatas.asMap());
} }
@Override @Override

View File

@ -40,9 +40,7 @@ import java.io.IOException;
import java.nio.file.NoSuchFileException; import java.nio.file.NoSuchFileException;
import java.util.*; import java.util.*;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomBoolean; import static com.carrotsearch.randomizedtesting.RandomizedTest.*;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomInt;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;
import static org.hamcrest.Matchers.*; import static org.hamcrest.Matchers.*;
public class StoreTest extends ElasticsearchLuceneTestCase { public class StoreTest extends ElasticsearchLuceneTestCase {
@ -184,9 +182,15 @@ public class StoreTest extends ElasticsearchLuceneTestCase {
if (random().nextBoolean()) { if (random().nextBoolean()) {
DirectoryReader.open(writer, random().nextBoolean()).close(); // flush DirectoryReader.open(writer, random().nextBoolean()).close(); // flush
} }
Store.MetadataSnapshot metadata;
// check before we committed // check before we committed
Store.MetadataSnapshot metadata = store.getMetadata(); try {
assertThat(metadata.asMap().isEmpty(), is(true)); // nothing committed store.getMetadata();
fail("no index present - expected exception");
} catch (IndexNotFoundException ex) {
// expected
}
assertThat(store.getMetadataOrEmpty(), is(Store.MetadataSnapshot.EMPTY)); // nothing committed
writer.close(); writer.close();
Store.LegacyChecksums checksums = new Store.LegacyChecksums(); Store.LegacyChecksums checksums = new Store.LegacyChecksums();
@ -247,10 +251,15 @@ public class StoreTest extends ElasticsearchLuceneTestCase {
if (random().nextBoolean()) { if (random().nextBoolean()) {
DirectoryReader.open(writer, random().nextBoolean()).close(); // flush DirectoryReader.open(writer, random().nextBoolean()).close(); // flush
} }
Store.MetadataSnapshot metadata;
// check before we committed // check before we committed
Store.MetadataSnapshot metadata = store.getMetadata(); try {
assertThat(metadata.asMap().isEmpty(), is(true)); // nothing committed store.getMetadata();
fail("no index present - expected exception");
} catch (IndexNotFoundException ex) {
// expected
}
assertThat(store.getMetadataOrEmpty(), is(Store.MetadataSnapshot.EMPTY)); // nothing committed
writer.commit(); writer.commit();
writer.close(); writer.close();
metadata = store.getMetadata(); metadata = store.getMetadata();
@ -302,9 +311,15 @@ public class StoreTest extends ElasticsearchLuceneTestCase {
if (random().nextBoolean()) { if (random().nextBoolean()) {
DirectoryReader.open(writer, random().nextBoolean()).close(); // flush DirectoryReader.open(writer, random().nextBoolean()).close(); // flush
} }
Store.MetadataSnapshot metadata;
// check before we committed // check before we committed
Store.MetadataSnapshot metadata = store.getMetadata(); try {
assertThat(metadata.asMap().isEmpty(), is(true)); // nothing committed store.getMetadata();
fail("no index present - expected exception");
} catch (IndexNotFoundException ex) {
// expected
}
assertThat(store.getMetadataOrEmpty(), is(Store.MetadataSnapshot.EMPTY)); // nothing committed
writer.commit(); writer.commit();
writer.close(); writer.close();
Store.LegacyChecksums checksums = new Store.LegacyChecksums(); Store.LegacyChecksums checksums = new Store.LegacyChecksums();
@ -707,7 +722,4 @@ public class StoreTest extends ElasticsearchLuceneTestCase {
store.deleteContent(); store.deleteContent();
IOUtils.close(store); IOUtils.close(store);
} }
} }