HDFS-15795. EC: Wrong checksum when reconstruction was failed by exception. Contributed by Yushi Hayasaka (#2657)
(cherry picked from commit 18978f2e20
)
This commit is contained in:
parent
f97709beaa
commit
e0f8462b39
|
@ -480,8 +480,9 @@ final class BlockChecksumHelper {
|
||||||
// Before populating the blockChecksum at this index, record the byte
|
// Before populating the blockChecksum at this index, record the byte
|
||||||
// offset where it will begin.
|
// offset where it will begin.
|
||||||
blockChecksumPositions[idx] = blockChecksumBuf.getLength();
|
blockChecksumPositions[idx] = blockChecksumBuf.getLength();
|
||||||
|
ExtendedBlock block = null;
|
||||||
try {
|
try {
|
||||||
ExtendedBlock block = getInternalBlock(numDataUnits, idx);
|
block = getInternalBlock(numDataUnits, idx);
|
||||||
|
|
||||||
LiveBlockInfo liveBlkInfo = liveDns.get((byte) idx);
|
LiveBlockInfo liveBlkInfo = liveDns.get((byte) idx);
|
||||||
if (liveBlkInfo == null) {
|
if (liveBlkInfo == null) {
|
||||||
|
@ -502,7 +503,9 @@ final class BlockChecksumHelper {
|
||||||
break; // done with the computation, simply return.
|
break; // done with the computation, simply return.
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOG.warn("Failed to get the checksum", e);
|
LOG.warn("Failed to get the checksum for block {} at index {} "
|
||||||
|
+ "in blockGroup {}", block, idx, blockGroup, e);
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -96,6 +96,12 @@ public class DataNodeFaultInjector {
|
||||||
*/
|
*/
|
||||||
public void stripedBlockReconstruction() throws IOException {}
|
public void stripedBlockReconstruction() throws IOException {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Used as a hook to inject failure in erasure coding checksum reconstruction
|
||||||
|
* process.
|
||||||
|
*/
|
||||||
|
public void stripedBlockChecksumReconstruction() throws IOException {}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Used as a hook to inject latency when read block
|
* Used as a hook to inject latency when read block
|
||||||
* in erasure coding reconstruction process.
|
* in erasure coding reconstruction process.
|
||||||
|
|
|
@ -23,6 +23,7 @@ import java.nio.ByteBuffer;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector;
|
||||||
import org.apache.hadoop.io.DataOutputBuffer;
|
import org.apache.hadoop.io.DataOutputBuffer;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -75,6 +76,7 @@ public abstract class StripedBlockChecksumReconstructor
|
||||||
prepareDigester();
|
prepareDigester();
|
||||||
long maxTargetLength = getMaxTargetLength();
|
long maxTargetLength = getMaxTargetLength();
|
||||||
while (requestedLen > 0 && getPositionInBlock() < maxTargetLength) {
|
while (requestedLen > 0 && getPositionInBlock() < maxTargetLength) {
|
||||||
|
DataNodeFaultInjector.get().stripedBlockChecksumReconstruction();
|
||||||
long remaining = maxTargetLength - getPositionInBlock();
|
long remaining = maxTargetLength - getPositionInBlock();
|
||||||
final int toReconstructLen = (int) Math
|
final int toReconstructLen = (int) Math
|
||||||
.min(getStripedReader().getBufferSize(), remaining);
|
.min(getStripedReader().getBufferSize(), remaining);
|
||||||
|
@ -225,4 +227,4 @@ public abstract class StripedBlockChecksumReconstructor
|
||||||
getStripedReader().close();
|
getStripedReader().close();
|
||||||
cleanup();
|
cleanup();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||||
|
import org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector;
|
||||||
import org.apache.hadoop.test.GenericTestUtils;
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
|
@ -43,6 +44,8 @@ import java.io.IOException;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_ACCESS_TOKEN_ENABLE_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_ACCESS_TOKEN_ENABLE_KEY;
|
||||||
|
import static org.mockito.Mockito.doThrow;
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This test serves a prototype to demo the idea proposed so far. It creates two
|
* This test serves a prototype to demo the idea proposed so far. It creates two
|
||||||
|
@ -534,6 +537,37 @@ public class TestFileChecksum {
|
||||||
bytesPerCRC - 1);
|
bytesPerCRC - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 90000)
|
||||||
|
public void testStripedFileChecksumWithReconstructFail()
|
||||||
|
throws Exception {
|
||||||
|
String stripedFile4 = ecDir + "/stripedFileChecksum4";
|
||||||
|
prepareTestFiles(fileSize, new String[] {stripedFile4});
|
||||||
|
|
||||||
|
// get checksum
|
||||||
|
FileChecksum fileChecksum = getFileChecksum(stripedFile4, -1, false);
|
||||||
|
|
||||||
|
DataNodeFaultInjector oldInjector = DataNodeFaultInjector.get();
|
||||||
|
DataNodeFaultInjector newInjector = mock(DataNodeFaultInjector.class);
|
||||||
|
doThrow(new IOException())
|
||||||
|
.doNothing()
|
||||||
|
.when(newInjector)
|
||||||
|
.stripedBlockChecksumReconstruction();
|
||||||
|
DataNodeFaultInjector.set(newInjector);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Get checksum again with reconstruction.
|
||||||
|
// If the reconstruction task fails, a client try to get checksum from
|
||||||
|
// another DN which has a block of the block group because of a failure of
|
||||||
|
// getting result.
|
||||||
|
FileChecksum fileChecksum1 = getFileChecksum(stripedFile4, -1, true);
|
||||||
|
|
||||||
|
Assert.assertEquals("checksum should be same", fileChecksum,
|
||||||
|
fileChecksum1);
|
||||||
|
} finally {
|
||||||
|
DataNodeFaultInjector.set(oldInjector);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test(timeout = 90000)
|
@Test(timeout = 90000)
|
||||||
public void testMixedBytesPerChecksum() throws Exception {
|
public void testMixedBytesPerChecksum() throws Exception {
|
||||||
int fileLength = bytesPerCRC * 3;
|
int fileLength = bytesPerCRC * 3;
|
||||||
|
|
Loading…
Reference in New Issue