HDFS-9500. Fix software version counts for DataNodes during rolling upgrade. Contributed by Erik Krogen.

(cherry picked from commit f3ac1f41b8)
This commit is contained in:
Erik Krogen 2016-10-27 15:14:21 -07:00 committed by Konstantin V Shvachko
parent edaa37177b
commit d002e4d10b
3 changed files with 53 additions and 6 deletions

View File

@ -181,6 +181,9 @@ Release 2.7.4 - UNRELEASED
HDFS-11015. Enforce timeout in balancer. (kihwal via zhz)
HDFS-9500. Fix software version counts for DataNodes during rolling upgrade.
(Erik Krogen via shv)
Release 2.7.3 - 2016-08-25
INCOMPATIBLE CHANGES

View File

@ -657,19 +657,26 @@ public class DatanodeManager {
}
}
/**
* Will return true for all Datanodes which have a non-null software
* version and are considered alive (by {@link DatanodeDescriptor#isAlive()}),
* indicating the node has not yet been removed. Use {@code isAlive}
* rather than {@link DatanodeManager#isDatanodeDead(DatanodeDescriptor)}
* to ensure that the version is decremented even if the datanode
* hasn't issued a heartbeat recently.
*
* @param node The datanode in question
* @return True iff its version count should be decremented
*/
private boolean shouldCountVersion(DatanodeDescriptor node) {
return node.getSoftwareVersion() != null && node.isAlive &&
!isDatanodeDead(node);
return node.getSoftwareVersion() != null && node.isAlive;
}
private void countSoftwareVersions() {
synchronized(datanodeMap) {
HashMap<String, Integer> versionCount = new HashMap<String, Integer>();
for(DatanodeDescriptor dn: datanodeMap.values()) {
// Check isAlive too because right after removeDatanode(),
// isDatanodeDead() is still true
if(shouldCountVersion(dn))
{
if (shouldCountVersion(dn)) {
Integer num = versionCount.get(dn.getSoftwareVersion());
num = num == null ? 1 : num+1;
versionCount.put(dn.getSoftwareVersion(), num);

View File

@ -77,6 +77,43 @@ public class TestDatanodeManager {
return HostFileManager.parseEntry("dummy", "dummy", host);
}
/**
* This test checks that if a node is re-registered with a new software
* version after the heartbeat expiry interval but before the HeartbeatManager
* has a chance to detect this and remove it, the node's version will still
* be correctly decremented.
*/
@Test
public void testNumVersionsCorrectAfterReregister()
throws IOException, InterruptedException {
//Create the DatanodeManager which will be tested
FSNamesystem fsn = Mockito.mock(FSNamesystem.class);
Mockito.when(fsn.hasWriteLock()).thenReturn(true);
Configuration conf = new Configuration();
conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 0);
conf.setLong(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 10);
DatanodeManager dm = mockDatanodeManager(fsn, conf);
String storageID = "someStorageID1";
String ip = "someIP" + storageID;
// Register then reregister the same node but with a different version
for (int i = 0; i <= 1; i++) {
dm.registerDatanode(new DatanodeRegistration(
new DatanodeID(ip, "", storageID, 9000, 0, 0, 0),
null, null, "version" + i));
if (i == 0) {
Thread.sleep(25);
}
}
//Verify DatanodeManager has the correct count
Map<String, Integer> mapToCheck = dm.getDatanodesSoftwareVersions();
assertNull("should be no more version0 nodes", mapToCheck.get("version0"));
assertEquals("should be one version1 node",
mapToCheck.get("version1").intValue(), 1);
}
/**
* This test sends a random sequence of node registrations and node removals
* to the DatanodeManager (of nodes with different IDs and versions), and