HDFS-15795. EC: Wrong checksum when reconstruction was failed by exception. Contributed by Yushi Hayasaka (#2657)
This commit is contained in:
parent
9bf2ac07bb
commit
18978f2e20
|
@ -480,8 +480,9 @@ final class BlockChecksumHelper {
|
|||
// Before populating the blockChecksum at this index, record the byte
|
||||
// offset where it will begin.
|
||||
blockChecksumPositions[idx] = blockChecksumBuf.getLength();
|
||||
ExtendedBlock block = null;
|
||||
try {
|
||||
ExtendedBlock block = getInternalBlock(numDataUnits, idx);
|
||||
block = getInternalBlock(numDataUnits, idx);
|
||||
|
||||
LiveBlockInfo liveBlkInfo = liveDns.get((byte) idx);
|
||||
if (liveBlkInfo == null) {
|
||||
|
@ -502,7 +503,9 @@ final class BlockChecksumHelper {
|
|||
break; // done with the computation, simply return.
|
||||
}
|
||||
} catch (IOException e) {
|
||||
LOG.warn("Failed to get the checksum", e);
|
||||
LOG.warn("Failed to get the checksum for block {} at index {} "
|
||||
+ "in blockGroup {}", block, idx, blockGroup, e);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -106,6 +106,12 @@ public class DataNodeFaultInjector {
|
|||
*/
|
||||
public void stripedBlockReconstruction() throws IOException {}
|
||||
|
||||
/**
|
||||
* Used as a hook to inject failure in erasure coding checksum reconstruction
|
||||
* process.
|
||||
*/
|
||||
public void stripedBlockChecksumReconstruction() throws IOException {}
|
||||
|
||||
/**
|
||||
* Used as a hook to inject latency when read block
|
||||
* in erasure coding reconstruction process.
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.nio.ByteBuffer;
|
|||
import java.util.Arrays;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector;
|
||||
import org.apache.hadoop.io.DataOutputBuffer;
|
||||
|
||||
/**
|
||||
|
@ -75,6 +76,7 @@ public abstract class StripedBlockChecksumReconstructor
|
|||
prepareDigester();
|
||||
long maxTargetLength = getMaxTargetLength();
|
||||
while (requestedLen > 0 && getPositionInBlock() < maxTargetLength) {
|
||||
DataNodeFaultInjector.get().stripedBlockChecksumReconstruction();
|
||||
long remaining = maxTargetLength - getPositionInBlock();
|
||||
final int toReconstructLen = (int) Math
|
||||
.min(getStripedReader().getBufferSize(), remaining);
|
||||
|
@ -225,4 +227,4 @@ public abstract class StripedBlockChecksumReconstructor
|
|||
getStripedReader().close();
|
||||
cleanup();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
|
|||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||
import org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector;
|
||||
import org.apache.hadoop.test.GenericTestUtils;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
|
@ -46,6 +47,8 @@ import java.io.IOException;
|
|||
import java.util.Random;
|
||||
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_ACCESS_TOKEN_ENABLE_KEY;
|
||||
import static org.mockito.Mockito.doThrow;
|
||||
import static org.mockito.Mockito.mock;
|
||||
|
||||
/**
|
||||
* This test serves a prototype to demo the idea proposed so far. It creates two
|
||||
|
@ -517,6 +520,37 @@ public class TestFileChecksum {
|
|||
bytesPerCRC - 1);
|
||||
}
|
||||
|
||||
@Test(timeout = 90000)
|
||||
public void testStripedFileChecksumWithReconstructFail()
|
||||
throws Exception {
|
||||
String stripedFile4 = ecDir + "/stripedFileChecksum4";
|
||||
prepareTestFiles(fileSize, new String[] {stripedFile4});
|
||||
|
||||
// get checksum
|
||||
FileChecksum fileChecksum = getFileChecksum(stripedFile4, -1, false);
|
||||
|
||||
DataNodeFaultInjector oldInjector = DataNodeFaultInjector.get();
|
||||
DataNodeFaultInjector newInjector = mock(DataNodeFaultInjector.class);
|
||||
doThrow(new IOException())
|
||||
.doNothing()
|
||||
.when(newInjector)
|
||||
.stripedBlockChecksumReconstruction();
|
||||
DataNodeFaultInjector.set(newInjector);
|
||||
|
||||
try {
|
||||
// Get checksum again with reconstruction.
|
||||
// If the reconstruction task fails, a client try to get checksum from
|
||||
// another DN which has a block of the block group because of a failure of
|
||||
// getting result.
|
||||
FileChecksum fileChecksum1 = getFileChecksum(stripedFile4, -1, true);
|
||||
|
||||
Assert.assertEquals("checksum should be same", fileChecksum,
|
||||
fileChecksum1);
|
||||
} finally {
|
||||
DataNodeFaultInjector.set(oldInjector);
|
||||
}
|
||||
}
|
||||
|
||||
@Test(timeout = 90000)
|
||||
public void testMixedBytesPerChecksum() throws Exception {
|
||||
int fileLength = bytesPerCRC * 3;
|
||||
|
|
Loading…
Reference in New Issue