HDFS-16540. Data locality is lost when DataNode pod restarts in kubernetes (#4170) (#4246)

Cherry-pick backport of 9ed8d60511
This commit is contained in:
Michael Stack 2022-05-15 21:32:48 -07:00 committed by GitHub
parent d58f9d438f
commit 1d226874d2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 3 additions and 2 deletions

View File

@ -1172,6 +1172,7 @@ public class DatanodeManager {
DatanodeDescriptor nodeN = host2DatanodeMap.getDatanodeByXferAddr( DatanodeDescriptor nodeN = host2DatanodeMap.getDatanodeByXferAddr(
nodeReg.getIpAddr(), nodeReg.getXferPort()); nodeReg.getIpAddr(), nodeReg.getXferPort());
// Non-change. REMOVE. Number 2
if (nodeN != null && nodeN != nodeS) { if (nodeN != null && nodeN != nodeS) {
NameNode.LOG.info("BLOCK* registerDatanode: " + nodeN); NameNode.LOG.info("BLOCK* registerDatanode: " + nodeN);
// nodeN previously served a different data storage, // nodeN previously served a different data storage,
@ -1193,14 +1194,14 @@ public class DatanodeManager {
} }
} else { } else {
// nodeS is found // nodeS is found
/* The registering datanode is a replacement node for the existing /* The registering datanode is a replacement node for the existing
data storage, which from now on will be served by a new node. data storage, which from now on will be served by a new node.
If this message repeats, both nodes might have same storageID If this message repeats, both nodes might have same storageID
by (insanely rare) random chance. User needs to restart one of the by (insanely rare) random chance. User needs to restart one of the
nodes with its data cleared (or user can just remove the StorageID nodes with its data cleared (or user can just remove the StorageID
value in "VERSION" file under the data directory of the datanode, value in "VERSION" file under the data directory of the datanode,
but this is might not work if VERSION file format has changed but this is might not work if VERSION file format has changed
*/ */
NameNode.stateChangeLog.info("BLOCK* registerDatanode: " + nodeS NameNode.stateChangeLog.info("BLOCK* registerDatanode: " + nodeS
+ " is replaced by " + nodeReg + " with the same storageID " + " is replaced by " + nodeReg + " with the same storageID "
+ nodeReg.getDatanodeUuid()); + nodeReg.getDatanodeUuid());