HDFS-5153. Datanode should send block reports for each storage in a separate message. (Arpit Agarwal)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1563254 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Arpit Agarwal 2014-01-31 21:00:33 +00:00
parent 5a2428d39f
commit 5beeb30169
12 changed files with 516 additions and 248 deletions

View File

@ -297,6 +297,9 @@ Release 2.4.0 - UNRELEASED
HDFS-5781. Use an array to record the mapping between FSEditLogOpCode and
the corresponding byte value. (jing9)
HDFS-5153. Datanode should send block reports for each storage in a
separate message. (Arpit Agarwal)
OPTIMIZATIONS
HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery

View File

@ -399,6 +399,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final long DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT = 60 * 60 * 1000;
public static final String DFS_BLOCKREPORT_INITIAL_DELAY_KEY = "dfs.blockreport.initialDelay";
public static final int DFS_BLOCKREPORT_INITIAL_DELAY_DEFAULT = 0;
public static final String DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY = "dfs.blockreport.split.threshold";
public static final long DFS_BLOCKREPORT_SPLIT_THRESHOLD_DEFAULT = 1000 * 1000;
public static final String DFS_CACHEREPORT_INTERVAL_MSEC_KEY = "dfs.cachereport.intervalMsec";
public static final long DFS_CACHEREPORT_INTERVAL_MSEC_DEFAULT = 10 * 1000;
public static final String DFS_BLOCK_INVALIDATE_LIMIT_KEY = "dfs.block.invalidate.limit";

View File

@ -1616,15 +1616,19 @@ public class BlockManager {
/**
* The given storage is reporting all its blocks.
* Update the (storage-->block list) and (block-->storage list) maps.
*
* @return true if all known storages of the given DN have finished reporting.
* @throws IOException
*/
public void processReport(final DatanodeID nodeID,
public boolean processReport(final DatanodeID nodeID,
final DatanodeStorage storage, final String poolId,
final BlockListAsLongs newReport) throws IOException {
namesystem.writeLock();
final long startTime = Time.now(); //after acquiring write lock
final long endTime;
DatanodeDescriptor node;
try {
final DatanodeDescriptor node = datanodeManager.getDatanode(nodeID);
node = datanodeManager.getDatanode(nodeID);
if (node == null || !node.isAlive) {
throw new IOException(
"ProcessReport from dead or unregistered node: " + nodeID);
@ -1632,13 +1636,21 @@ public class BlockManager {
// To minimize startup time, we discard any second (or later) block reports
// that we receive while still in startup phase.
final DatanodeStorageInfo storageInfo = node.updateStorage(storage);
DatanodeStorageInfo storageInfo = node.getStorageInfo(storage.getStorageID());
if (storageInfo == null) {
// We handle this for backwards compatibility.
storageInfo = node.updateStorage(storage);
LOG.warn("Unknown storageId " + storage.getStorageID() +
", updating storageMap. This indicates a buggy " +
"DataNode that isn't heartbeating correctly.");
}
if (namesystem.isInStartupSafeMode()
&& storageInfo.getBlockReportCount() > 0) {
blockLog.info("BLOCK* processReport: "
+ "discarded non-initial block report from " + nodeID
+ " because namenode still in startup phase");
return;
return !node.hasStaleStorages();
}
if (storageInfo.numBlocks() == 0) {
@ -1655,7 +1667,7 @@ public class BlockManager {
storageInfo.receivedBlockReport();
if (staleBefore && !storageInfo.areBlockContentsStale()) {
LOG.info("BLOCK* processReport: Received first block report from "
+ node + " after starting up or becoming active. Its block "
+ storage + " after starting up or becoming active. Its block "
+ "contents are no longer considered stale");
rescanPostponedMisreplicatedBlocks();
}
@ -1670,9 +1682,10 @@ public class BlockManager {
if (metrics != null) {
metrics.addBlockReport((int) (endTime - startTime));
}
blockLog.info("BLOCK* processReport: from "
+ nodeID + ", blocks: " + newReport.getNumberOfBlocks()
blockLog.info("BLOCK* processReport: from storage " + storage.getStorageID()
+ " node " + nodeID + ", blocks: " + newReport.getNumberOfBlocks()
+ ", processing time: " + (endTime - startTime) + " msecs");
return !node.hasStaleStorages();
}
/**
@ -1827,7 +1840,7 @@ public class BlockManager {
Collection<BlockToMarkCorrupt> toCorrupt, // add to corrupt replicas list
Collection<StatefulBlockInfo> toUC) { // add to under-construction list
final DatanodeStorageInfo storageInfo = dn.updateStorage(storage);
final DatanodeStorageInfo storageInfo = dn.getStorageInfo(storage.getStorageID());
// place a delimiter in the list which separates blocks
// that have been reported from those that have not

View File

@ -257,6 +257,17 @@ public class DatanodeDescriptor extends DatanodeInfo {
}
}
boolean hasStaleStorages() {
synchronized (storageMap) {
for (DatanodeStorageInfo storage : storageMap.values()) {
if (storage.areBlockContentsStale()) {
return true;
}
}
return false;
}
}
/**
* Remove block from the list of blocks belonging to the data-node. Remove
* data-node from the block.

View File

@ -22,11 +22,9 @@ import static org.apache.hadoop.util.Time.now;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.SocketTimeoutException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.*;
import com.google.common.base.Joiner;
import org.apache.commons.logging.Log;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
@ -437,75 +435,100 @@ class BPServiceActor implements Runnable {
/**
* Report the list blocks to the Namenode
* @return DatanodeCommands returned by the NN. May be null.
* @throws IOException
*/
DatanodeCommand blockReport() throws IOException {
List<DatanodeCommand> blockReport() throws IOException {
// send block report if timer has expired.
DatanodeCommand cmd = null;
long startTime = now();
if (startTime - lastBlockReport > dnConf.blockReportInterval) {
// Flush any block information that precedes the block report. Otherwise
// we have a chance that we will miss the delHint information
// or we will report an RBW replica after the BlockReport already reports
// a FINALIZED one.
reportReceivedDeletedBlocks();
// Send one block report per known storage.
// Create block report
long brCreateStartTime = now();
long totalBlockCount = 0;
Map<DatanodeStorage, BlockListAsLongs> perVolumeBlockLists =
dn.getFSDataset().getBlockReports(bpos.getBlockPoolId());
// Send block report
long brSendStartTime = now();
StorageBlockReport[] reports =
new StorageBlockReport[perVolumeBlockLists.size()];
int i = 0;
for(Map.Entry<DatanodeStorage, BlockListAsLongs> kvPair : perVolumeBlockLists.entrySet()) {
DatanodeStorage dnStorage = kvPair.getKey();
BlockListAsLongs blockList = kvPair.getValue();
totalBlockCount += blockList.getNumberOfBlocks();
reports[i++] =
new StorageBlockReport(
dnStorage, blockList.getBlockListAsLongs());
}
cmd = bpNamenode.blockReport(bpRegistration, bpos.getBlockPoolId(), reports);
// Log the block report processing stats from Datanode perspective
long brSendCost = now() - brSendStartTime;
long brCreateCost = brSendStartTime - brCreateStartTime;
dn.getMetrics().addBlockReport(brSendCost);
LOG.info("BlockReport of " + totalBlockCount
+ " blocks took " + brCreateCost + " msec to generate and "
+ brSendCost + " msecs for RPC and NN processing");
// If we have sent the first block report, then wait a random
// time before we start the periodic block reports.
if (resetBlockReportTime) {
lastBlockReport = startTime - DFSUtil.getRandom().nextInt((int)(dnConf.blockReportInterval));
resetBlockReportTime = false;
} else {
/* say the last block report was at 8:20:14. The current report
* should have started around 9:20:14 (default 1 hour interval).
* If current time is :
* 1) normal like 9:20:18, next report should be at 10:20:14
* 2) unexpected like 11:35:43, next report should be at 12:20:14
*/
lastBlockReport += (now() - lastBlockReport) /
dnConf.blockReportInterval * dnConf.blockReportInterval;
}
LOG.info("sent block report, processed command:" + cmd);
final long startTime = now();
if (startTime - lastBlockReport <= dnConf.blockReportInterval) {
return null;
}
return cmd;
ArrayList<DatanodeCommand> cmds = new ArrayList<DatanodeCommand>();
// Flush any block information that precedes the block report. Otherwise
// we have a chance that we will miss the delHint information
// or we will report an RBW replica after the BlockReport already reports
// a FINALIZED one.
reportReceivedDeletedBlocks();
lastDeletedReport = startTime;
long brCreateStartTime = now();
Map<DatanodeStorage, BlockListAsLongs> perVolumeBlockLists =
dn.getFSDataset().getBlockReports(bpos.getBlockPoolId());
// Convert the reports to the format expected by the NN.
int i = 0;
int totalBlockCount = 0;
StorageBlockReport reports[] =
new StorageBlockReport[perVolumeBlockLists.size()];
for(Map.Entry<DatanodeStorage, BlockListAsLongs> kvPair : perVolumeBlockLists.entrySet()) {
BlockListAsLongs blockList = kvPair.getValue();
reports[i++] = new StorageBlockReport(
kvPair.getKey(), blockList.getBlockListAsLongs());
totalBlockCount += blockList.getNumberOfBlocks();
}
// Send the reports to the NN.
int numReportsSent;
long brSendStartTime = now();
if (totalBlockCount < dnConf.blockReportSplitThreshold) {
// Below split threshold, send all reports in a single message.
numReportsSent = 1;
DatanodeCommand cmd =
bpNamenode.blockReport(bpRegistration, bpos.getBlockPoolId(), reports);
if (cmd != null) {
cmds.add(cmd);
}
} else {
// Send one block report per message.
numReportsSent = i;
for (StorageBlockReport report : reports) {
StorageBlockReport singleReport[] = { report };
DatanodeCommand cmd = bpNamenode.blockReport(
bpRegistration, bpos.getBlockPoolId(), singleReport);
if (cmd != null) {
cmds.add(cmd);
}
}
}
// Log the block report processing stats from Datanode perspective
long brSendCost = now() - brSendStartTime;
long brCreateCost = brSendStartTime - brCreateStartTime;
dn.getMetrics().addBlockReport(brSendCost);
LOG.info("Sent " + numReportsSent + " blockreports " + totalBlockCount +
" blocks total. Took " + brCreateCost +
" msec to generate and " + brSendCost +
" msecs for RPC and NN processing. " +
" Got back commands " +
(cmds.size() == 0 ? "none" : Joiner.on("; ").join(cmds)));
scheduleNextBlockReport(startTime);
return cmds.size() == 0 ? null : cmds;
}
private void scheduleNextBlockReport(long previousReportStartTime) {
// If we have sent the first set of block reports, then wait a random
// time before we start the periodic block reports.
if (resetBlockReportTime) {
lastBlockReport = previousReportStartTime -
DFSUtil.getRandom().nextInt((int)(dnConf.blockReportInterval));
resetBlockReportTime = false;
} else {
/* say the last block report was at 8:20:14. The current report
* should have started around 9:20:14 (default 1 hour interval).
* If current time is :
* 1) normal like 9:20:18, next report should be at 10:20:14
* 2) unexpected like 11:35:43, next report should be at 12:20:14
*/
lastBlockReport += (now() - lastBlockReport) /
dnConf.blockReportInterval * dnConf.blockReportInterval;
}
}
DatanodeCommand cacheReport() throws IOException {
// If caching is disabled, do not send a cache report
if (dn.getFSDataset().getCacheCapacity() == 0) {
@ -513,7 +536,7 @@ class BPServiceActor implements Runnable {
}
// send cache report if timer has expired.
DatanodeCommand cmd = null;
long startTime = Time.monotonicNow();
final long startTime = Time.monotonicNow();
if (startTime - lastCacheReport > dnConf.cacheReportInterval) {
if (LOG.isDebugEnabled()) {
LOG.debug("Sending cacheReport from service actor: " + this);
@ -613,7 +636,7 @@ class BPServiceActor implements Runnable {
//
while (shouldRun()) {
try {
long startTime = now();
final long startTime = now();
//
// Every so often, send heartbeat or block-report
@ -659,10 +682,10 @@ class BPServiceActor implements Runnable {
lastDeletedReport = startTime;
}
DatanodeCommand cmd = blockReport();
processCommand(new DatanodeCommand[]{ cmd });
List<DatanodeCommand> cmds = blockReport();
processCommand(cmds == null ? null : cmds.toArray(new DatanodeCommand[cmds.size()]));
cmd = cacheReport();
DatanodeCommand cmd = cacheReport();
processCommand(new DatanodeCommand[]{ cmd });
// Now safe to start scanning the block pool.

View File

@ -23,6 +23,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_INITIAL_DELAY
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_INITIAL_DELAY_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_SPLIT_THRESHOLD_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY;
@ -70,6 +72,7 @@ public class DNConf {
final long readaheadLength;
final long heartBeatInterval;
final long blockReportInterval;
final long blockReportSplitThreshold;
final long deleteReportInterval;
final long initialBlockReportDelay;
final long cacheReportInterval;
@ -117,6 +120,8 @@ public class DNConf {
DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT);
this.blockReportInterval = conf.getLong(DFS_BLOCKREPORT_INTERVAL_MSEC_KEY,
DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT);
this.blockReportSplitThreshold = conf.getLong(DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY,
DFS_BLOCKREPORT_SPLIT_THRESHOLD_DEFAULT);
this.cacheReportInterval = conf.getLong(DFS_CACHEREPORT_INTERVAL_MSEC_KEY,
DFS_CACHEREPORT_INTERVAL_MSEC_DEFAULT);

View File

@ -982,13 +982,18 @@ class NameNodeRpcServer implements NamenodeProtocols {
+ "from " + nodeReg + ", reports.length=" + reports.length);
}
final BlockManager bm = namesystem.getBlockManager();
boolean hasStaleStorages = true;
for(StorageBlockReport r : reports) {
final BlockListAsLongs blocks = new BlockListAsLongs(r.getBlocks());
bm.processReport(nodeReg, r.getStorage(), poolId, blocks);
hasStaleStorages = bm.processReport(nodeReg, r.getStorage(), poolId, blocks);
}
if (nn.getFSImage().isUpgradeFinalized() && !nn.isStandbyState())
if (nn.getFSImage().isUpgradeFinalized() &&
!nn.isStandbyState() &&
!hasStaleStorages) {
return new FinalizeCommand(poolId);
}
return null;
}

View File

@ -482,6 +482,20 @@
<description>Delay for first block report in seconds.</description>
</property>
<property>
<name>dfs.blockreport.split.threshold</name>
<value>1000000</value>
<description>If the number of blocks on the DataNode is below this
threshold then it will send block reports for all Storage Directories
in a single message.
If the number of blocks exceeds this threshold then the DataNode will
send block reports for each Storage Directory in separate messages.
Set to zero to always split.
</description>
</property>
<property>
<name>dfs.datanode.directoryscan.interval</name>
<value>21600</value>

View File

@ -52,7 +52,6 @@ import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
@ -69,20 +68,16 @@ import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;
/**
* This test simulates a variety of situations when blocks are being
* intentionally corrupted, unexpectedly modified, and so on before a block
* report is happening.
* This is the base class for simulating a variety of situations
* when blocks are being intentionally corrupted, unexpectedly modified,
* and so on before a block report is happening.
*
* For each test case it runs two variations:
* #1 - For a given DN, the first variation sends block reports for all
* storages in a single call to the NN.
* #2 - For a given DN, the second variation sends block reports for each
* storage in a separate call.
*
* The behavior should be the same in either variation.
* By overriding {@link #sendBlockReports}, derived classes can test
* different variations of how block reports are split across storages
* and messages.
*/
public class TestBlockReport {
public static final Log LOG = LogFactory.getLog(TestBlockReport.class);
public abstract class BlockReportTestBase {
public static final Log LOG = LogFactory.getLog(BlockReportTestBase.class);
private static short REPL_FACTOR = 1;
private static final int RAND_LIMIT = 2000;
@ -91,12 +86,11 @@ public class TestBlockReport {
private static final int DN_N0 = 0;
private static final int FILE_START = 0;
static final int BLOCK_SIZE = 1024;
static final int NUM_BLOCKS = 10;
static final int FILE_SIZE = NUM_BLOCKS * BLOCK_SIZE + 1;
static String bpid;
private static final int BLOCK_SIZE = 1024;
private static final int NUM_BLOCKS = 10;
private static final int FILE_SIZE = NUM_BLOCKS * BLOCK_SIZE + 1;
private MiniDFSCluster cluster;
protected MiniDFSCluster cluster;
private DistributedFileSystem fs;
private static Random rand = new Random(RAND_LIMIT);
@ -112,8 +106,7 @@ public class TestBlockReport {
public void startUpCluster() throws IOException {
REPL_FACTOR = 1; //Reset if case a test has modified the value
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPL_FACTOR).build();
fs = (DistributedFileSystem) cluster.getFileSystem();
bpid = cluster.getNamesystem().getBlockPoolId();
fs = cluster.getFileSystem();
}
@After
@ -123,6 +116,15 @@ public class TestBlockReport {
cluster.shutdown();
}
protected static void resetConfiguration() {
conf = new Configuration();
int customPerChecksumSize = 512;
int customBlockSize = customPerChecksumSize * 3;
conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, customPerChecksumSize);
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, customBlockSize);
conf.setLong(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, DN_RESCAN_INTERVAL);
}
// Generate a block report, optionally corrupting the generation
// stamp and/or length of one block.
private static StorageBlockReport[] getBlockReports(
@ -172,106 +174,11 @@ public class TestBlockReport {
* @param dnR
* @param poolId
* @param reports
* @param needtoSplit
* @throws IOException
*/
private void sendBlockReports(DatanodeRegistration dnR, String poolId,
StorageBlockReport[] reports, boolean needtoSplit) throws IOException {
if (!needtoSplit) {
LOG.info("Sending combined block reports for " + dnR);
cluster.getNameNodeRpc().blockReport(dnR, poolId, reports);
} else {
for (StorageBlockReport report : reports) {
LOG.info("Sending block report for storage " + report.getStorage().getStorageID());
StorageBlockReport[] singletonReport = { report };
cluster.getNameNodeRpc().blockReport(dnR, poolId, singletonReport);
}
}
}
protected abstract void sendBlockReports(DatanodeRegistration dnR, String poolId,
StorageBlockReport[] reports) throws IOException;
/**
* Test variations blockReport_01 through blockReport_09 with combined
* and split block reports.
*/
@Test
public void blockReportCombined_01() throws IOException {
blockReport_01(false);
}
@Test
public void blockReportSplit_01() throws IOException {
blockReport_01(true);
}
@Test
public void blockReportCombined_02() throws IOException {
blockReport_02(false);
}
@Test
public void blockReportSplit_02() throws IOException {
blockReport_02(true);
}
@Test
public void blockReportCombined_03() throws IOException {
blockReport_03(false);
}
@Test
public void blockReportSplit_03() throws IOException {
blockReport_03(true);
}
@Test
public void blockReportCombined_04() throws IOException {
blockReport_04(false);
}
@Test
public void blockReportSplit_04() throws IOException {
blockReport_04(true);
}
@Test
public void blockReportCombined_06() throws Exception {
blockReport_06(false);
}
@Test
public void blockReportSplit_06() throws Exception {
blockReport_06(true);
}
@Test
public void blockReportCombined_07() throws Exception {
blockReport_07(false);
}
@Test
public void blockReportSplit_07() throws Exception {
blockReport_07(true);
}
@Test
public void blockReportCombined_08() throws Exception {
blockReport_08(false);
}
@Test
public void blockReportSplit_08() throws Exception {
blockReport_08(true);
}
@Test
public void blockReportCombined_09() throws Exception {
blockReport_09(false);
}
@Test
public void blockReportSplit_09() throws Exception {
blockReport_09(true);
}
/**
* Test write a file, verifies and closes it. Then the length of the blocks
* are messed up and BlockReport is forced.
@ -279,7 +186,8 @@ public class TestBlockReport {
*
* @throws java.io.IOException on an error
*/
private void blockReport_01(boolean splitBlockReports) throws IOException {
@Test(timeout=300000)
public void blockReport_01() throws IOException {
final String METHOD_NAME = GenericTestUtils.getMethodName();
Path filePath = new Path("/" + METHOD_NAME + ".dat");
@ -312,7 +220,7 @@ public class TestBlockReport {
String poolId = cluster.getNamesystem().getBlockPoolId();
DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
StorageBlockReport[] reports = getBlockReports(dn, poolId, false, false);
sendBlockReports(dnR, poolId, reports, splitBlockReports);
sendBlockReports(dnR, poolId, reports);
List<LocatedBlock> blocksAfterReport =
DFSTestUtil.getAllBlocks(fs.open(filePath));
@ -338,7 +246,8 @@ public class TestBlockReport {
*
* @throws IOException in case of errors
*/
private void blockReport_02(boolean splitBlockReports) throws IOException {
@Test(timeout=300000)
public void blockReport_02() throws IOException {
final String METHOD_NAME = GenericTestUtils.getMethodName();
LOG.info("Running test " + METHOD_NAME);
@ -393,7 +302,7 @@ public class TestBlockReport {
String poolId = cluster.getNamesystem().getBlockPoolId();
DatanodeRegistration dnR = dn0.getDNRegistrationForBP(poolId);
StorageBlockReport[] reports = getBlockReports(dn0, poolId, false, false);
sendBlockReports(dnR, poolId, reports, splitBlockReports);
sendBlockReports(dnR, poolId, reports);
BlockManagerTestUtil.getComputedDatanodeWork(cluster.getNamesystem()
.getBlockManager());
@ -414,17 +323,18 @@ public class TestBlockReport {
*
* @throws IOException in case of an error
*/
private void blockReport_03(boolean splitBlockReports) throws IOException {
@Test(timeout=300000)
public void blockReport_03() throws IOException {
final String METHOD_NAME = GenericTestUtils.getMethodName();
Path filePath = new Path("/" + METHOD_NAME + ".dat");
ArrayList<Block> blocks = writeFile(METHOD_NAME, FILE_SIZE, filePath);
// all blocks belong to the same file, hence same BP
DataNode dn = cluster.getDataNodes().get(DN_N0);
String poolId = cluster.getNamesystem().getBlockPoolId();
DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
StorageBlockReport[] reports = getBlockReports(dn, poolId, true, false);
sendBlockReports(dnR, poolId, reports, splitBlockReports);
sendBlockReports(dnR, poolId, reports);
printStats();
assertThat("Wrong number of corrupt blocks",
@ -441,7 +351,8 @@ public class TestBlockReport {
*
* @throws IOException in case of an error
*/
private void blockReport_04(boolean splitBlockReports) throws IOException {
@Test(timeout=300000)
public void blockReport_04() throws IOException {
final String METHOD_NAME = GenericTestUtils.getMethodName();
Path filePath = new Path("/" + METHOD_NAME + ".dat");
DFSTestUtil.createFile(fs, filePath,
@ -459,7 +370,7 @@ public class TestBlockReport {
DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
StorageBlockReport[] reports = getBlockReports(dn, poolId, false, false);
sendBlockReports(dnR, poolId, reports, splitBlockReports);
sendBlockReports(dnR, poolId, reports);
printStats();
assertThat("Wrong number of corrupt blocks",
@ -476,7 +387,8 @@ public class TestBlockReport {
*
* @throws IOException in case of an error
*/
private void blockReport_06(boolean splitBlockReports) throws Exception {
@Test(timeout=300000)
public void blockReport_06() throws Exception {
final String METHOD_NAME = GenericTestUtils.getMethodName();
Path filePath = new Path("/" + METHOD_NAME + ".dat");
final int DN_N1 = DN_N0 + 1;
@ -489,7 +401,7 @@ public class TestBlockReport {
String poolId = cluster.getNamesystem().getBlockPoolId();
DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
StorageBlockReport[] reports = getBlockReports(dn, poolId, false, false);
sendBlockReports(dnR, poolId, reports, splitBlockReports);
sendBlockReports(dnR, poolId, reports);
printStats();
assertEquals("Wrong number of PendingReplication Blocks",
0, cluster.getNamesystem().getUnderReplicatedBlocks());
@ -508,7 +420,8 @@ public class TestBlockReport {
*
* @throws IOException in case of an error
*/
private void blockReport_07(boolean splitBlockReports) throws Exception {
@Test(timeout=300000)
public void blockReport_07() throws Exception {
final String METHOD_NAME = GenericTestUtils.getMethodName();
Path filePath = new Path("/" + METHOD_NAME + ".dat");
final int DN_N1 = DN_N0 + 1;
@ -522,7 +435,7 @@ public class TestBlockReport {
String poolId = cluster.getNamesystem().getBlockPoolId();
DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
StorageBlockReport[] reports = getBlockReports(dn, poolId, true, false);
sendBlockReports(dnR, poolId, reports, splitBlockReports);
sendBlockReports(dnR, poolId, reports);
printStats();
assertThat("Wrong number of corrupt blocks",
@ -533,7 +446,7 @@ public class TestBlockReport {
cluster.getNamesystem().getPendingReplicationBlocks(), is(0L));
reports = getBlockReports(dn, poolId, true, true);
sendBlockReports(dnR, poolId, reports, splitBlockReports);
sendBlockReports(dnR, poolId, reports);
printStats();
assertThat("Wrong number of corrupt blocks",
@ -559,7 +472,8 @@ public class TestBlockReport {
*
* @throws IOException in case of an error
*/
private void blockReport_08(boolean splitBlockReports) throws IOException {
@Test(timeout=300000)
public void blockReport_08() throws IOException {
final String METHOD_NAME = GenericTestUtils.getMethodName();
Path filePath = new Path("/" + METHOD_NAME + ".dat");
final int DN_N1 = DN_N0 + 1;
@ -578,13 +492,13 @@ public class TestBlockReport {
bc.start();
waitForTempReplica(bl, DN_N1);
// all blocks belong to the same file, hence same BP
DataNode dn = cluster.getDataNodes().get(DN_N1);
String poolId = cluster.getNamesystem().getBlockPoolId();
DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
StorageBlockReport[] reports = getBlockReports(dn, poolId, false, false);
sendBlockReports(dnR, poolId, reports, splitBlockReports);
sendBlockReports(dnR, poolId, reports);
printStats();
assertEquals("Wrong number of PendingReplication blocks",
blocks.size(), cluster.getNamesystem().getPendingReplicationBlocks());
@ -600,7 +514,8 @@ public class TestBlockReport {
// Similar to BlockReport_08 but corrupts GS and len of the TEMPORARY's
// replica block. Expect the same behaviour: NN should simply ignore this
// block
private void blockReport_09(boolean splitBlockReports) throws IOException {
@Test(timeout=300000)
public void blockReport_09() throws IOException {
final String METHOD_NAME = GenericTestUtils.getMethodName();
Path filePath = new Path("/" + METHOD_NAME + ".dat");
final int DN_N1 = DN_N0 + 1;
@ -620,17 +535,17 @@ public class TestBlockReport {
bc.start();
waitForTempReplica(bl, DN_N1);
// all blocks belong to the same file, hence same BP
DataNode dn = cluster.getDataNodes().get(DN_N1);
String poolId = cluster.getNamesystem().getBlockPoolId();
DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
StorageBlockReport[] reports = getBlockReports(dn, poolId, true, true);
sendBlockReports(dnR, poolId, reports, splitBlockReports);
sendBlockReports(dnR, poolId, reports);
printStats();
assertEquals("Wrong number of PendingReplication blocks",
2, cluster.getNamesystem().getPendingReplicationBlocks());
try {
bc.join();
} catch (InterruptedException e) {}
@ -638,7 +553,7 @@ public class TestBlockReport {
resetConfiguration(); // return the initial state of the configuration
}
}
/**
* Test for the case where one of the DNs in the pipeline is in the
* process of doing a block report exactly when the block is closed.
@ -648,7 +563,7 @@ public class TestBlockReport {
* corrupt.
* This is a regression test for HDFS-2791.
*/
@Test
@Test(timeout=300000)
public void testOneReplicaRbwReportArrivesAfterBlockCompleted() throws Exception {
final CountDownLatch brFinished = new CountDownLatch(1);
DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG) {
@ -663,7 +578,7 @@ public class TestBlockReport {
}
}
};
final String METHOD_NAME = GenericTestUtils.getMethodName();
Path filePath = new Path("/" + METHOD_NAME + ".dat");
@ -671,9 +586,9 @@ public class TestBlockReport {
// what happens when one of the DNs is slowed for some reason.
REPL_FACTOR = 2;
startDNandWait(null, false);
NameNode nn = cluster.getNameNode();
FSDataOutputStream out = fs.create(filePath, REPL_FACTOR);
try {
AppendTestUtil.write(out, 0, 10);
@ -684,19 +599,19 @@ public class TestBlockReport {
DataNode dn = cluster.getDataNodes().get(0);
DatanodeProtocolClientSideTranslatorPB spy =
DataNodeTestUtils.spyOnBposToNN(dn, nn);
Mockito.doAnswer(delayer)
.when(spy).blockReport(
Mockito.<DatanodeRegistration>anyObject(),
Mockito.anyString(),
Mockito.<StorageBlockReport[]>anyObject());
// Force a block report to be generated. The block report will have
// an RBW replica in it. Wait for the RPC to be sent, but block
// it before it gets to the NN.
dn.scheduleAllBlockReport(0);
delayer.waitForCall();
} finally {
IOUtils.closeStream(out);
}
@ -705,22 +620,22 @@ public class TestBlockReport {
// state.
delayer.proceed();
brFinished.await();
// Verify that no replicas are marked corrupt, and that the
// file is still readable.
BlockManagerTestUtil.updateState(nn.getNamesystem().getBlockManager());
assertEquals(0, nn.getNamesystem().getCorruptReplicaBlocks());
DFSTestUtil.readFile(fs, filePath);
// Ensure that the file is readable even from the DN that we futzed with.
cluster.stopDataNode(1);
DFSTestUtil.readFile(fs, filePath);
DFSTestUtil.readFile(fs, filePath);
}
private void waitForTempReplica(Block bl, int DN_N1) throws IOException {
final boolean tooLongWait = false;
final int TIMEOUT = 40000;
if(LOG.isDebugEnabled()) {
LOG.debug("Wait for datanode " + DN_N1 + " to appear");
}
@ -731,7 +646,7 @@ public class TestBlockReport {
LOG.debug("Total number of DNs " + cluster.getDataNodes().size());
}
cluster.waitActive();
// Look about specified DN for the replica of the block from 1st DN
final DataNode dn1 = cluster.getDataNodes().get(DN_N1);
String bpid = cluster.getNamesystem().getBlockPoolId();
@ -789,7 +704,7 @@ public class TestBlockReport {
return blocks;
}
private void startDNandWait(Path filePath, boolean waitReplicas)
private void startDNandWait(Path filePath, boolean waitReplicas)
throws IOException, InterruptedException, TimeoutException {
if (LOG.isDebugEnabled()) {
LOG.debug("Before next DN start: " + cluster.getDataNodes().size());
@ -802,7 +717,7 @@ public class TestBlockReport {
if (LOG.isDebugEnabled()) {
int lastDn = datanodes.size() - 1;
LOG.debug("New datanode "
+ cluster.getDataNodes().get(lastDn).getDisplayName()
+ cluster.getDataNodes().get(lastDn).getDisplayName()
+ " has been started");
}
if (waitReplicas) {
@ -898,7 +813,7 @@ public class TestBlockReport {
((Log4JLogger) NameNode.stateChangeLog).getLogger().setLevel(Level.ALL);
((Log4JLogger) LogFactory.getLog(FSNamesystem.class)).getLogger().setLevel(Level.ALL);
((Log4JLogger) DataNode.LOG).getLogger().setLevel(Level.ALL);
((Log4JLogger) TestBlockReport.LOG).getLogger().setLevel(Level.ALL);
((Log4JLogger) BlockReportTestBase.LOG).getLogger().setLevel(Level.ALL);
}
private Block findBlock(Path path, long size) throws IOException {
@ -918,11 +833,11 @@ public class TestBlockReport {
private class BlockChecker extends Thread {
Path filePath;
public BlockChecker(final Path filePath) {
this.filePath = filePath;
}
@Override
public void run() {
try {
@ -933,13 +848,4 @@ public class TestBlockReport {
}
}
}
private static void resetConfiguration() {
conf = new Configuration();
int customPerChecksumSize = 512;
int customBlockSize = customPerChecksumSize * 3;
conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, customPerChecksumSize);
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, customBlockSize);
conf.setLong(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, DN_RESCAN_INTERVAL);
}
}

View File

@ -0,0 +1,205 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.datanode;
import java.io.IOException;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.*;
import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY;
import org.apache.hadoop.test.GenericTestUtils;
import org.junit.After;
import org.junit.Test;
import org.mockito.ArgumentCaptor;
import org.mockito.Mockito;
import static org.hamcrest.core.Is.is;
import static org.junit.Assert.assertThat;
import static org.mockito.Matchers.*;
import static org.mockito.Mockito.times;
/**
* Tests that the DataNode respects
* {@link DFSConfigKeys#DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY}
*/
public class TestDnRespectsBlockReportSplitThreshold {
public static final Log LOG = LogFactory.getLog(TestStorageReport.class);
private static final int BLOCK_SIZE = 1024;
private static final short REPL_FACTOR = 1;
private static final long seed = 0xFEEDFACE;
private static final int BLOCKS_IN_FILE = 5;
private static Configuration conf;
private MiniDFSCluster cluster;
private DistributedFileSystem fs;
static String bpid;
public void startUpCluster(long splitThreshold) throws IOException {
conf = new HdfsConfiguration();
conf.setLong(DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY, splitThreshold);
cluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(REPL_FACTOR)
.build();
fs = cluster.getFileSystem();
bpid = cluster.getNamesystem().getBlockPoolId();
}
@After
public void shutDownCluster() throws IOException {
if (cluster != null) {
fs.close();
cluster.shutdown();
cluster = null;
}
}
private void createFile(String filenamePrefix, int blockCount)
throws IOException {
Path path = new Path("/" + filenamePrefix + ".dat");
DFSTestUtil.createFile(fs, path, BLOCK_SIZE,
blockCount * BLOCK_SIZE, BLOCK_SIZE, REPL_FACTOR, seed);
}
private void verifyCapturedArguments(
ArgumentCaptor<StorageBlockReport[]> captor,
int expectedReportsPerCall,
int expectedTotalBlockCount) {
List<StorageBlockReport[]> listOfReports = captor.getAllValues();
int numBlocksReported = 0;
for (StorageBlockReport[] reports : listOfReports) {
assertThat(reports.length, is(expectedReportsPerCall));
for (StorageBlockReport report : reports) {
BlockListAsLongs blockList = new BlockListAsLongs(report.getBlocks());
numBlocksReported += blockList.getNumberOfBlocks();
}
}
assert(numBlocksReported >= expectedTotalBlockCount);
}
/**
* Test that if splitThreshold is zero, then we always get a separate
* call per storage.
*/
@Test(timeout=300000)
public void testAlwaysSplit() throws IOException, InterruptedException {
startUpCluster(0);
NameNode nn = cluster.getNameNode();
DataNode dn = cluster.getDataNodes().get(0);
// Create a file with a few blocks.
createFile(GenericTestUtils.getMethodName(), BLOCKS_IN_FILE);
// Insert a spy object for the NN RPC.
DatanodeProtocolClientSideTranslatorPB nnSpy =
DataNodeTestUtils.spyOnBposToNN(dn, nn);
// Trigger a block report so there is an interaction with the spy
// object.
DataNodeTestUtils.triggerBlockReport(dn);
ArgumentCaptor<StorageBlockReport[]> captor =
ArgumentCaptor.forClass(StorageBlockReport[].class);
Mockito.verify(nnSpy, times(MiniDFSCluster.DIRS_PER_DATANODE)).blockReport(
any(DatanodeRegistration.class),
anyString(),
captor.capture());
verifyCapturedArguments(captor, 1, BLOCKS_IN_FILE);
}
/**
* Tests the behavior when the count of blocks is exactly one less than
* the threshold.
*/
@Test(timeout=300000)
public void testCornerCaseUnderThreshold() throws IOException, InterruptedException {
startUpCluster(BLOCKS_IN_FILE + 1);
NameNode nn = cluster.getNameNode();
DataNode dn = cluster.getDataNodes().get(0);
// Create a file with a few blocks.
createFile(GenericTestUtils.getMethodName(), BLOCKS_IN_FILE);
// Insert a spy object for the NN RPC.
DatanodeProtocolClientSideTranslatorPB nnSpy =
DataNodeTestUtils.spyOnBposToNN(dn, nn);
// Trigger a block report so there is an interaction with the spy
// object.
DataNodeTestUtils.triggerBlockReport(dn);
ArgumentCaptor<StorageBlockReport[]> captor =
ArgumentCaptor.forClass(StorageBlockReport[].class);
Mockito.verify(nnSpy, times(1)).blockReport(
any(DatanodeRegistration.class),
anyString(),
captor.capture());
verifyCapturedArguments(captor, MiniDFSCluster.DIRS_PER_DATANODE, BLOCKS_IN_FILE);
}
/**
* Tests the behavior when the count of blocks is exactly equal to the
* threshold.
*/
@Test(timeout=300000)
public void testCornerCaseAtThreshold() throws IOException, InterruptedException {
startUpCluster(BLOCKS_IN_FILE);
NameNode nn = cluster.getNameNode();
DataNode dn = cluster.getDataNodes().get(0);
// Create a file with a few blocks.
createFile(GenericTestUtils.getMethodName(), BLOCKS_IN_FILE);
// Insert a spy object for the NN RPC.
DatanodeProtocolClientSideTranslatorPB nnSpy =
DataNodeTestUtils.spyOnBposToNN(dn, nn);
// Trigger a block report so there is an interaction with the spy
// object.
DataNodeTestUtils.triggerBlockReport(dn);
ArgumentCaptor<StorageBlockReport[]> captor =
ArgumentCaptor.forClass(StorageBlockReport[].class);
Mockito.verify(nnSpy, times(MiniDFSCluster.DIRS_PER_DATANODE)).blockReport(
any(DatanodeRegistration.class),
anyString(),
captor.capture());
verifyCapturedArguments(captor, 1, BLOCKS_IN_FILE);
}
}

View File

@ -0,0 +1,42 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.datanode;
import java.io.IOException;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
/**
* Runs all tests in BlockReportTestBase, sending one block per storage.
* This is the default DataNode behavior post HDFS-2832.
*/
public class TestNNHandlesBlockReportPerStorage extends BlockReportTestBase {
@Override
protected void sendBlockReports(DatanodeRegistration dnR, String poolId,
StorageBlockReport[] reports) throws IOException {
for (StorageBlockReport report : reports) {
LOG.info("Sending block report for storage " + report.getStorage().getStorageID());
StorageBlockReport[] singletonReport = { report };
cluster.getNameNodeRpc().blockReport(dnR, poolId, singletonReport);
}
}
}

View File

@ -0,0 +1,39 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.datanode;
import java.io.IOException;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
/**
* Runs all tests in BlockReportTestBase, sending one block report
* per DataNode. This tests that the NN can handle the legacy DN
* behavior where it presents itself as a single logical storage.
*/
public class TestNNHandlesCombinedBlockReport extends BlockReportTestBase {
@Override
protected void sendBlockReports(DatanodeRegistration dnR, String poolId,
StorageBlockReport[] reports) throws IOException {
LOG.info("Sending combined block reports for " + dnR);
cluster.getNameNodeRpc().blockReport(dnR, poolId, reports);
}
}