Prevent interruption while store checks lucene files for consistency

This can cause premature closing of the underlying file descriptor immediately
if at the same time the thread is blocked on IO. The file descriptor will remain closed
and subsequent access to NIOFSDirectory will throw a ClosedChannelException.
This commit is contained in:
Simon Willnauer 2016-01-29 12:09:00 +01:00
parent ce89039926
commit f6b922a97a

View File

@ -252,7 +252,6 @@ public class RecoverySourceHandler {
final AtomicLong bytesSinceLastPause = new AtomicLong(); final AtomicLong bytesSinceLastPause = new AtomicLong();
final Function<StoreFileMetaData, OutputStream> outputStreamFactories = (md) -> new BufferedOutputStream(new RecoveryOutputStream(md, bytesSinceLastPause, translogView), chunkSizeInBytes); final Function<StoreFileMetaData, OutputStream> outputStreamFactories = (md) -> new BufferedOutputStream(new RecoveryOutputStream(md, bytesSinceLastPause, translogView), chunkSizeInBytes);
sendFiles(store, phase1Files.toArray(new StoreFileMetaData[phase1Files.size()]), outputStreamFactories); sendFiles(store, phase1Files.toArray(new StoreFileMetaData[phase1Files.size()]), outputStreamFactories);
cancellableThreads.execute(() -> {
// Send the CLEAN_FILES request, which takes all of the files that // Send the CLEAN_FILES request, which takes all of the files that
// were transferred and renames them from their temporary file // were transferred and renames them from their temporary file
// names to the actual file names. It also writes checksums for // names to the actual file names. It also writes checksums for
@ -262,10 +261,12 @@ public class RecoverySourceHandler {
// related to this recovery (out of date segments, for example) // related to this recovery (out of date segments, for example)
// are deleted // are deleted
try { try {
cancellableThreads.execute(() -> {
transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.CLEAN_FILES, transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.CLEAN_FILES,
new RecoveryCleanFilesRequest(request.recoveryId(), shard.shardId(), recoverySourceMetadata, translogView.totalOperations()), new RecoveryCleanFilesRequest(request.recoveryId(), shard.shardId(), recoverySourceMetadata, translogView.totalOperations()),
TransportRequestOptions.builder().withTimeout(recoverySettings.internalActionTimeout()).build(), TransportRequestOptions.builder().withTimeout(recoverySettings.internalActionTimeout()).build(),
EmptyTransportResponseHandler.INSTANCE_SAME).txGet(); EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
});
} catch (RemoteTransportException remoteException) { } catch (RemoteTransportException remoteException) {
final IOException corruptIndexException; final IOException corruptIndexException;
// we realized that after the index was copied and we wanted to finalize the recovery // we realized that after the index was copied and we wanted to finalize the recovery
@ -277,13 +278,11 @@ public class RecoverySourceHandler {
final Store.MetadataSnapshot recoverySourceMetadata1 = store.getMetadata(snapshot); final Store.MetadataSnapshot recoverySourceMetadata1 = store.getMetadata(snapshot);
StoreFileMetaData[] metadata = StoreFileMetaData[] metadata =
StreamSupport.stream(recoverySourceMetadata1.spliterator(), false).toArray(size -> new StoreFileMetaData[size]); StreamSupport.stream(recoverySourceMetadata1.spliterator(), false).toArray(size -> new StoreFileMetaData[size]);
ArrayUtil.timSort(metadata, new Comparator<StoreFileMetaData>() { ArrayUtil.timSort(metadata, (o1, o2) -> {
@Override
public int compare(StoreFileMetaData o1, StoreFileMetaData o2) {
return Long.compare(o1.length(), o2.length()); // check small files first return Long.compare(o1.length(), o2.length()); // check small files first
}
}); });
for (StoreFileMetaData md : metadata) { for (StoreFileMetaData md : metadata) {
cancellableThreads.checkForCancel();
logger.debug("{} checking integrity for file {} after remove corruption exception", shard.shardId(), md); logger.debug("{} checking integrity for file {} after remove corruption exception", shard.shardId(), md);
if (store.checkIntegrityNoException(md) == false) { // we are corrupted on the primary -- fail! if (store.checkIntegrityNoException(md) == false) { // we are corrupted on the primary -- fail!
shard.failShard("recovery", corruptIndexException); shard.failShard("recovery", corruptIndexException);
@ -305,7 +304,6 @@ public class RecoverySourceHandler {
throw remoteException; throw remoteException;
} }
} }
});
} }
prepareTargetForTranslog(translogView.totalOperations()); prepareTargetForTranslog(translogView.totalOperations());