HDFS-7996. After swapping a volume, BlockReceiver reports ReplicaNotFoundException (Lei (Eddy) Xu via Colin P. McCabe)
(cherry picked from commit023133cef9
) (cherry picked from commit0c5069c432
)
This commit is contained in:
parent
6855bbc6ca
commit
520043b438
|
@ -959,6 +959,9 @@ Release 2.7.0 - UNRELEASED
|
||||||
HDFS-8039. Fix TestDebugAdmin#testRecoverLease and
|
HDFS-8039. Fix TestDebugAdmin#testRecoverLease and
|
||||||
testVerifyBlockChecksumCommand on Windows. (Xiaoyu Yao via cnauroth)
|
testVerifyBlockChecksumCommand on Windows. (Xiaoyu Yao via cnauroth)
|
||||||
|
|
||||||
|
HDFS-7996. After swapping a volume, BlockReceiver reports
|
||||||
|
ReplicaNotFoundException (Lei (Eddy) Xu via Colin P. McCabe)
|
||||||
|
|
||||||
BREAKDOWN OF HDFS-7584 SUBTASKS AND RELATED JIRAS
|
BREAKDOWN OF HDFS-7584 SUBTASKS AND RELATED JIRAS
|
||||||
|
|
||||||
HDFS-7720. Quota by Storage Type API, tools and ClientNameNode
|
HDFS-7720. Quota by Storage Type API, tools and ClientNameNode
|
||||||
|
|
|
@ -281,7 +281,7 @@ class BlockReceiver implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* close files.
|
* close files and release volume reference.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
|
@ -798,6 +798,8 @@ class BlockReceiver implements Closeable {
|
||||||
// then finalize block or convert temporary to RBW.
|
// then finalize block or convert temporary to RBW.
|
||||||
// For client-writes, the block is finalized in the PacketResponder.
|
// For client-writes, the block is finalized in the PacketResponder.
|
||||||
if (isDatanode || isTransfer) {
|
if (isDatanode || isTransfer) {
|
||||||
|
// Hold a volume reference to finalize block.
|
||||||
|
try (ReplicaHandler handler = claimReplicaHandler()) {
|
||||||
// close the block/crc files
|
// close the block/crc files
|
||||||
close();
|
close();
|
||||||
block.setNumBytes(replicaInfo.getNumBytes());
|
block.setNumBytes(replicaInfo.getNumBytes());
|
||||||
|
@ -810,6 +812,7 @@ class BlockReceiver implements Closeable {
|
||||||
// Finalize the block.
|
// Finalize the block.
|
||||||
datanode.data.finalizeBlock(block);
|
datanode.data.finalizeBlock(block);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
datanode.metrics.incrBlocksWritten();
|
datanode.metrics.incrBlocksWritten();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -981,6 +984,13 @@ class BlockReceiver implements Closeable {
|
||||||
return partialCrc;
|
return partialCrc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** The caller claims the ownership of the replica handler. */
|
||||||
|
private ReplicaHandler claimReplicaHandler() {
|
||||||
|
ReplicaHandler handler = replicaHandler;
|
||||||
|
replicaHandler = null;
|
||||||
|
return handler;
|
||||||
|
}
|
||||||
|
|
||||||
private static enum PacketResponderType {
|
private static enum PacketResponderType {
|
||||||
NON_PIPELINE, LAST_IN_PIPELINE, HAS_DOWNSTREAM_IN_PIPELINE
|
NON_PIPELINE, LAST_IN_PIPELINE, HAS_DOWNSTREAM_IN_PIPELINE
|
||||||
}
|
}
|
||||||
|
@ -1280,11 +1290,14 @@ class BlockReceiver implements Closeable {
|
||||||
* @param startTime time when BlockReceiver started receiving the block
|
* @param startTime time when BlockReceiver started receiving the block
|
||||||
*/
|
*/
|
||||||
private void finalizeBlock(long startTime) throws IOException {
|
private void finalizeBlock(long startTime) throws IOException {
|
||||||
|
long endTime = 0;
|
||||||
|
// Hold a volume reference to finalize block.
|
||||||
|
try (ReplicaHandler handler = BlockReceiver.this.claimReplicaHandler()) {
|
||||||
BlockReceiver.this.close();
|
BlockReceiver.this.close();
|
||||||
final long endTime = ClientTraceLog.isInfoEnabled() ? System.nanoTime()
|
endTime = ClientTraceLog.isInfoEnabled() ? System.nanoTime() : 0;
|
||||||
: 0;
|
|
||||||
block.setNumBytes(replicaInfo.getNumBytes());
|
block.setNumBytes(replicaInfo.getNumBytes());
|
||||||
datanode.data.finalizeBlock(block);
|
datanode.data.finalizeBlock(block);
|
||||||
|
}
|
||||||
|
|
||||||
if (pinning) {
|
if (pinning) {
|
||||||
datanode.data.setPinning(block);
|
datanode.data.setPinning(block);
|
||||||
|
|
|
@ -34,6 +34,7 @@ import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
||||||
import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
|
import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
|
import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
|
||||||
import org.apache.hadoop.hdfs.server.common.Storage;
|
import org.apache.hadoop.hdfs.server.common.Storage;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
|
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
|
||||||
|
@ -64,6 +65,8 @@ import java.util.concurrent.TimeoutException;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.mockito.Mockito;
|
import org.mockito.Mockito;
|
||||||
|
import org.mockito.invocation.InvocationOnMock;
|
||||||
|
import org.mockito.stubbing.Answer;
|
||||||
|
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY;
|
||||||
import static org.hamcrest.CoreMatchers.anyOf;
|
import static org.hamcrest.CoreMatchers.anyOf;
|
||||||
|
@ -77,6 +80,7 @@ import static org.junit.Assert.assertTrue;
|
||||||
import static org.junit.Assert.fail;
|
import static org.junit.Assert.fail;
|
||||||
import static org.mockito.Matchers.any;
|
import static org.mockito.Matchers.any;
|
||||||
import static org.mockito.Matchers.anyString;
|
import static org.mockito.Matchers.anyString;
|
||||||
|
import static org.mockito.Mockito.doAnswer;
|
||||||
import static org.mockito.Mockito.timeout;
|
import static org.mockito.Mockito.timeout;
|
||||||
|
|
||||||
public class TestDataNodeHotSwapVolumes {
|
public class TestDataNodeHotSwapVolumes {
|
||||||
|
@ -577,6 +581,7 @@ public class TestDataNodeHotSwapVolumes {
|
||||||
final DataNode dn = cluster.getDataNodes().get(dataNodeIdx);
|
final DataNode dn = cluster.getDataNodes().get(dataNodeIdx);
|
||||||
final FileSystem fs = cluster.getFileSystem();
|
final FileSystem fs = cluster.getFileSystem();
|
||||||
final Path testFile = new Path("/test");
|
final Path testFile = new Path("/test");
|
||||||
|
final long lastTimeDiskErrorCheck = dn.getLastDiskErrorCheck();
|
||||||
|
|
||||||
FSDataOutputStream out = fs.create(testFile, REPLICATION);
|
FSDataOutputStream out = fs.create(testFile, REPLICATION);
|
||||||
|
|
||||||
|
@ -586,6 +591,23 @@ public class TestDataNodeHotSwapVolumes {
|
||||||
out.write(writeBuf);
|
out.write(writeBuf);
|
||||||
out.hflush();
|
out.hflush();
|
||||||
|
|
||||||
|
// Make FsDatasetSpi#finalizeBlock a time-consuming operation. So if the
|
||||||
|
// BlockReceiver releases volume reference before finalizeBlock(), the blocks
|
||||||
|
// on the volume will be removed, and finalizeBlock() throws IOE.
|
||||||
|
final FsDatasetSpi<? extends FsVolumeSpi> data = dn.data;
|
||||||
|
dn.data = Mockito.spy(data);
|
||||||
|
doAnswer(new Answer<Object>() {
|
||||||
|
public Object answer(InvocationOnMock invocation)
|
||||||
|
throws IOException, InterruptedException {
|
||||||
|
Thread.sleep(1000);
|
||||||
|
// Bypass the argument to FsDatasetImpl#finalizeBlock to verify that
|
||||||
|
// the block is not removed, since the volume reference should not
|
||||||
|
// be released at this point.
|
||||||
|
data.finalizeBlock((ExtendedBlock) invocation.getArguments()[0]);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}).when(dn.data).finalizeBlock(any(ExtendedBlock.class));
|
||||||
|
|
||||||
final CyclicBarrier barrier = new CyclicBarrier(2);
|
final CyclicBarrier barrier = new CyclicBarrier(2);
|
||||||
|
|
||||||
List<String> oldDirs = getDataDirs(dn);
|
List<String> oldDirs = getDataDirs(dn);
|
||||||
|
@ -612,13 +634,19 @@ public class TestDataNodeHotSwapVolumes {
|
||||||
out.hflush();
|
out.hflush();
|
||||||
out.close();
|
out.close();
|
||||||
|
|
||||||
|
reconfigThread.join();
|
||||||
|
|
||||||
// Verify the file has sufficient replications.
|
// Verify the file has sufficient replications.
|
||||||
DFSTestUtil.waitReplication(fs, testFile, REPLICATION);
|
DFSTestUtil.waitReplication(fs, testFile, REPLICATION);
|
||||||
// Read the content back
|
// Read the content back
|
||||||
byte[] content = DFSTestUtil.readFileBuffer(fs, testFile);
|
byte[] content = DFSTestUtil.readFileBuffer(fs, testFile);
|
||||||
assertEquals(BLOCK_SIZE, content.length);
|
assertEquals(BLOCK_SIZE, content.length);
|
||||||
|
|
||||||
reconfigThread.join();
|
// If an IOException thrown from BlockReceiver#run, it triggers
|
||||||
|
// DataNode#checkDiskError(). So we can test whether checkDiskError() is called,
|
||||||
|
// to see whether there is IOException in BlockReceiver#run().
|
||||||
|
assertEquals(lastTimeDiskErrorCheck, dn.getLastDiskErrorCheck());
|
||||||
|
|
||||||
if (!exceptions.isEmpty()) {
|
if (!exceptions.isEmpty()) {
|
||||||
throw new IOException(exceptions.get(0).getCause());
|
throw new IOException(exceptions.get(0).getCause());
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue