HDFS-3931. TestDatanodeBlockScanner#testBlockCorruptionPolicy2 is broken. Contributed by Andy Isaacson

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1388331 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Eli Collins 2012-09-21 05:48:42 +00:00
parent 7dee7f11f6
commit 3ea3508319
3 changed files with 40 additions and 24 deletions

View File

@ -254,6 +254,9 @@ Release 2.0.3-alpha - Unreleased
HDFS-3932. NameNode Web UI broken if the rpc-address is set to the wildcard. HDFS-3932. NameNode Web UI broken if the rpc-address is set to the wildcard.
(Colin Patrick McCabe via eli) (Colin Patrick McCabe via eli)
HDFS-3931. TestDatanodeBlockScanner#testBlockCorruptionPolicy2 is broken.
(Andy Isaacson via eli)
Release 2.0.2-alpha - 2012-09-07 Release 2.0.2-alpha - 2012-09-07
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -506,7 +506,7 @@ public class DFSTestUtil {
public static void waitReplication(FileSystem fs, Path fileName, short replFactor) public static void waitReplication(FileSystem fs, Path fileName, short replFactor)
throws IOException, InterruptedException, TimeoutException { throws IOException, InterruptedException, TimeoutException {
boolean correctReplFactor; boolean correctReplFactor;
final int ATTEMPTS = 20; final int ATTEMPTS = 40;
int count = 0; int count = 0;
do { do {

View File

@ -269,6 +269,7 @@ public class TestDatanodeBlockScanner {
conf.setLong(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 3); conf.setLong(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 3);
conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 3L); conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 3L);
conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REPLICATION_CONSIDERLOAD_KEY, false); conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REPLICATION_CONSIDERLOAD_KEY, false);
conf.setLong(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, 5L);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDataNodes).build(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDataNodes).build();
cluster.waitActive(); cluster.waitActive();
@ -276,35 +277,47 @@ public class TestDatanodeBlockScanner {
Path file1 = new Path("/tmp/testBlockCorruptRecovery/file"); Path file1 = new Path("/tmp/testBlockCorruptRecovery/file");
DFSTestUtil.createFile(fs, file1, 1024, numReplicas, 0); DFSTestUtil.createFile(fs, file1, 1024, numReplicas, 0);
ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, file1); ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, file1);
final int ITERATIONS = 10;
// Wait until block is replicated to numReplicas // Wait until block is replicated to numReplicas
DFSTestUtil.waitReplication(fs, file1, numReplicas); DFSTestUtil.waitReplication(fs, file1, numReplicas);
// Corrupt numCorruptReplicas replicas of block for (int k = 0; ; k++) {
int[] corruptReplicasDNIDs = new int[numCorruptReplicas]; // Corrupt numCorruptReplicas replicas of block
for (int i=0, j=0; (j != numCorruptReplicas) && (i < numDataNodes); i++) { int[] corruptReplicasDNIDs = new int[numCorruptReplicas];
if (corruptReplica(block, i)) { for (int i=0, j=0; (j != numCorruptReplicas) && (i < numDataNodes); i++) {
corruptReplicasDNIDs[j++] = i; if (corruptReplica(block, i)) {
LOG.info("successfully corrupted block " + block + " on node " corruptReplicasDNIDs[j++] = i;
+ i + " " + cluster.getDataNodes().get(i).getDisplayName()); LOG.info("successfully corrupted block " + block + " on node "
+ i + " " + cluster.getDataNodes().get(i).getDisplayName());
}
} }
}
// Restart the datanodes containing corrupt replicas // Restart the datanodes containing corrupt replicas
// so they would be reported to namenode and re-replicated // so they would be reported to namenode and re-replicated
// They MUST be restarted in reverse order from highest to lowest index, // They MUST be restarted in reverse order from highest to lowest index,
// because the act of restarting them removes them from the ArrayList // because the act of restarting them removes them from the ArrayList
// and causes the indexes of all nodes above them in the list to change. // and causes the indexes of all nodes above them in the list to change.
for (int i = numCorruptReplicas - 1; i >= 0 ; i--) { for (int i = numCorruptReplicas - 1; i >= 0 ; i--) {
LOG.info("restarting node with corrupt replica: position " LOG.info("restarting node with corrupt replica: position "
+ i + " node " + corruptReplicasDNIDs[i] + " " + i + " node " + corruptReplicasDNIDs[i] + " "
+ cluster.getDataNodes().get(corruptReplicasDNIDs[i]).getDisplayName()); + cluster.getDataNodes().get(corruptReplicasDNIDs[i]).getDisplayName());
cluster.restartDataNode(corruptReplicasDNIDs[i]); cluster.restartDataNode(corruptReplicasDNIDs[i]);
} }
// Loop until all corrupt replicas are reported // Loop until all corrupt replicas are reported
DFSTestUtil.waitCorruptReplicas(fs, cluster.getNamesystem(), file1, try {
block, numCorruptReplicas); DFSTestUtil.waitCorruptReplicas(fs, cluster.getNamesystem(), file1,
block, numCorruptReplicas);
} catch(TimeoutException e) {
if (k > ITERATIONS) {
throw e;
}
LOG.info("Timed out waiting for corrupt replicas, trying again, iteration " + k);
continue;
}
break;
}
// Loop until the block recovers after replication // Loop until the block recovers after replication
DFSTestUtil.waitReplication(fs, file1, numReplicas); DFSTestUtil.waitReplication(fs, file1, numReplicas);