HDFS-2826. Add test case for HDFS-1476 (safemode can initialize replication queues before exiting) (todd)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1235068 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
cfff5ff7f4
commit
d6ebd72325
|
@ -113,6 +113,9 @@ Release 0.23.1 - UNRELEASED
|
||||||
HDFS-2825. Add test hook to turn off the writer preferring its local
|
HDFS-2825. Add test hook to turn off the writer preferring its local
|
||||||
DN. (todd)
|
DN. (todd)
|
||||||
|
|
||||||
|
HDFS-2826. Add test case for HDFS-1476 (safemode can initialize
|
||||||
|
replication queues before exiting) (todd)
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
|
||||||
HDFS-2541. For a sufficiently large value of blocks, the DN Scanner
|
HDFS-2541. For a sufficiently large value of blocks, the DN Scanner
|
||||||
|
|
|
@ -171,6 +171,8 @@ import org.apache.hadoop.util.StringUtils;
|
||||||
import org.apache.hadoop.util.VersionInfo;
|
import org.apache.hadoop.util.VersionInfo;
|
||||||
import org.mortbay.util.ajax.JSON;
|
import org.mortbay.util.ajax.JSON;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
|
||||||
/***************************************************
|
/***************************************************
|
||||||
* FSNamesystem does the actual bookkeeping work for the
|
* FSNamesystem does the actual bookkeeping work for the
|
||||||
* DataNode.
|
* DataNode.
|
||||||
|
@ -2814,7 +2816,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
/** Total number of blocks. */
|
/** Total number of blocks. */
|
||||||
int blockTotal;
|
int blockTotal;
|
||||||
/** Number of safe blocks. */
|
/** Number of safe blocks. */
|
||||||
private int blockSafe;
|
int blockSafe;
|
||||||
/** Number of blocks needed to satisfy safe mode threshold condition */
|
/** Number of blocks needed to satisfy safe mode threshold condition */
|
||||||
private int blockThreshold;
|
private int blockThreshold;
|
||||||
/** Number of blocks needed before populating replication queues */
|
/** Number of blocks needed before populating replication queues */
|
||||||
|
@ -2822,7 +2824,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
/** time of the last status printout */
|
/** time of the last status printout */
|
||||||
private long lastStatusReport = 0;
|
private long lastStatusReport = 0;
|
||||||
/** flag indicating whether replication queues have been initialized */
|
/** flag indicating whether replication queues have been initialized */
|
||||||
private boolean initializedReplQueues = false;
|
boolean initializedReplQueues = false;
|
||||||
/** Was safemode entered automatically because available resources were low. */
|
/** Was safemode entered automatically because available resources were low. */
|
||||||
private boolean resourcesLow = false;
|
private boolean resourcesLow = false;
|
||||||
|
|
||||||
|
@ -2952,9 +2954,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
*/
|
*/
|
||||||
private synchronized void initializeReplQueues() {
|
private synchronized void initializeReplQueues() {
|
||||||
LOG.info("initializing replication queues");
|
LOG.info("initializing replication queues");
|
||||||
if (isPopulatingReplQueues()) {
|
assert !isPopulatingReplQueues() : "Already initialized repl queues";
|
||||||
LOG.warn("Replication queues already initialized.");
|
|
||||||
}
|
|
||||||
long startTimeMisReplicatedScan = now();
|
long startTimeMisReplicatedScan = now();
|
||||||
blockManager.processMisReplicatedBlocks();
|
blockManager.processMisReplicatedBlocks();
|
||||||
initializedReplQueues = true;
|
initializedReplQueues = true;
|
||||||
|
@ -4384,4 +4384,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
byte[] password) throws InvalidToken {
|
byte[] password) throws InvalidToken {
|
||||||
getDelegationTokenSecretManager().verifyToken(identifier, password);
|
getDelegationTokenSecretManager().verifyToken(identifier, password);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public SafeModeInfo getSafeModeInfoForTests() {
|
||||||
|
return safeMode;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,22 +26,29 @@ import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.permission.FsPermission;
|
import org.apache.hadoop.fs.permission.FsPermission;
|
||||||
|
import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
|
||||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||||
import org.apache.hadoop.io.IOUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
|
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
||||||
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
|
|
||||||
import static org.junit.Assert.*;
|
import static org.junit.Assert.*;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import com.google.common.base.Supplier;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests to verify safe mode correctness.
|
* Tests to verify safe mode correctness.
|
||||||
*/
|
*/
|
||||||
public class TestSafeMode {
|
public class TestSafeMode {
|
||||||
|
private static final Path TEST_PATH = new Path("/test");
|
||||||
private static final int BLOCK_SIZE = 1024;
|
private static final int BLOCK_SIZE = 1024;
|
||||||
Configuration conf;
|
Configuration conf;
|
||||||
MiniDFSCluster cluster;
|
MiniDFSCluster cluster;
|
||||||
|
@ -92,7 +99,7 @@ public class TestSafeMode {
|
||||||
|
|
||||||
// create two files with one block each.
|
// create two files with one block each.
|
||||||
DFSTestUtil.createFile(fs, file1, 1000, (short)1, 0);
|
DFSTestUtil.createFile(fs, file1, 1000, (short)1, 0);
|
||||||
DFSTestUtil.createFile(fs, file2, 2000, (short)1, 0);
|
DFSTestUtil.createFile(fs, file2, 1000, (short)1, 0);
|
||||||
fs.close();
|
fs.close();
|
||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
|
|
||||||
|
@ -137,6 +144,66 @@ public class TestSafeMode {
|
||||||
assertEquals("", status);
|
assertEquals("", status);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that the NN initializes its under-replicated blocks queue
|
||||||
|
* before it is ready to exit safemode (HDFS-1476)
|
||||||
|
*/
|
||||||
|
@Test(timeout=45000)
|
||||||
|
public void testInitializeReplQueuesEarly() throws Exception {
|
||||||
|
// Spray the blocks around the cluster when we add DNs instead of
|
||||||
|
// concentrating all blocks on the first node.
|
||||||
|
BlockManagerTestUtil.setWritingPrefersLocalNode(
|
||||||
|
cluster.getNamesystem().getBlockManager(), false);
|
||||||
|
|
||||||
|
cluster.startDataNodes(conf, 2, true, StartupOption.REGULAR, null);
|
||||||
|
cluster.waitActive();
|
||||||
|
DFSTestUtil.createFile(fs, TEST_PATH, 15*BLOCK_SIZE, (short)1, 1L);
|
||||||
|
|
||||||
|
|
||||||
|
List<DataNodeProperties> dnprops = Lists.newLinkedList();
|
||||||
|
dnprops.add(cluster.stopDataNode(0));
|
||||||
|
dnprops.add(cluster.stopDataNode(0));
|
||||||
|
dnprops.add(cluster.stopDataNode(0));
|
||||||
|
|
||||||
|
cluster.getConfiguration(0).setFloat(
|
||||||
|
DFSConfigKeys.DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY, 1f/15f);
|
||||||
|
|
||||||
|
cluster.restartNameNode();
|
||||||
|
final NameNode nn = cluster.getNameNode();
|
||||||
|
|
||||||
|
String status = nn.getNamesystem().getSafemode();
|
||||||
|
assertEquals("Safe mode is ON.The reported blocks 0 needs additional " +
|
||||||
|
"15 blocks to reach the threshold 0.9990 of total blocks 15. " +
|
||||||
|
"Safe mode will be turned off automatically.", status);
|
||||||
|
assertFalse("Mis-replicated block queues should not be initialized " +
|
||||||
|
"until threshold is crossed",
|
||||||
|
NameNodeAdapter.safeModeInitializedReplQueues(nn));
|
||||||
|
|
||||||
|
cluster.restartDataNode(dnprops.remove(0));
|
||||||
|
|
||||||
|
// Wait for the block report from the restarted DN to come in.
|
||||||
|
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
||||||
|
@Override
|
||||||
|
public Boolean get() {
|
||||||
|
return NameNodeAdapter.getSafeModeSafeBlocks(nn) > 0;
|
||||||
|
}
|
||||||
|
}, 10, 10000);
|
||||||
|
// SafeMode is fine-grain synchronized, so the processMisReplicatedBlocks
|
||||||
|
// call is still going on at this point - wait until it's done by grabbing
|
||||||
|
// the lock.
|
||||||
|
nn.getNamesystem().writeLock();
|
||||||
|
nn.getNamesystem().writeUnlock();
|
||||||
|
int safe = NameNodeAdapter.getSafeModeSafeBlocks(nn);
|
||||||
|
assertTrue("Expected first block report to make some but not all blocks " +
|
||||||
|
"safe. Got: " + safe, safe >= 1 && safe < 15);
|
||||||
|
BlockManagerTestUtil.updateState(nn.getNamesystem().getBlockManager());
|
||||||
|
|
||||||
|
assertTrue(NameNodeAdapter.safeModeInitializedReplQueues(nn));
|
||||||
|
assertEquals(15 - safe, nn.getNamesystem().getUnderReplicatedBlocks());
|
||||||
|
|
||||||
|
cluster.restartDataNodes();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test that, when under-replicated blocks are processed at the end of
|
* Test that, when under-replicated blocks are processed at the end of
|
||||||
* safe-mode, blocks currently under construction are not considered
|
* safe-mode, blocks currently under construction are not considered
|
||||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||||
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
|
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
|
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
|
import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem.SafeModeInfo;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
||||||
import org.apache.hadoop.ipc.Server;
|
import org.apache.hadoop.ipc.Server;
|
||||||
|
|
||||||
|
@ -97,4 +98,28 @@ public class NameNodeAdapter {
|
||||||
ns.readUnlock();
|
ns.readUnlock();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the number of blocks marked safe by safemode, or -1
|
||||||
|
* if safemode is not running.
|
||||||
|
*/
|
||||||
|
public static int getSafeModeSafeBlocks(NameNode nn) {
|
||||||
|
SafeModeInfo smi = nn.getNamesystem().getSafeModeInfoForTests();
|
||||||
|
if (smi == null) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return smi.blockSafe;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return true if safemode is not running, or if safemode has already
|
||||||
|
* initialized the replication queues
|
||||||
|
*/
|
||||||
|
public static boolean safeModeInitializedReplQueues(NameNode nn) {
|
||||||
|
SafeModeInfo smi = nn.getNamesystem().getSafeModeInfoForTests();
|
||||||
|
if (smi == null) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return smi.initializedReplQueues;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue