HDFS-8995. Flaw in registration bookeeping can make DN die on reconnect. (Kihwal Lee via yliu)

This commit is contained in:
yliu 2015-09-02 08:58:51 +08:00
parent 4620767156
commit 5652131d2e
4 changed files with 9 additions and 9 deletions

View File

@ -1302,6 +1302,9 @@ Release 2.7.2 - UNRELEASED
HDFS-8891. HDFS concat should keep srcs order. (Yong Zhang via jing9) HDFS-8891. HDFS concat should keep srcs order. (Yong Zhang via jing9)
HDFS-8995. Flaw in registration bookeeping can make DN die on reconnect.
(Kihwal Lee via yliu)
Release 2.7.1 - 2015-07-06 Release 2.7.1 - 2015-07-06
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -351,9 +351,8 @@ class BPOfferService {
reg.getStorageInfo().getNamespaceID(), "namespace ID"); reg.getStorageInfo().getNamespaceID(), "namespace ID");
checkNSEquality(bpRegistration.getStorageInfo().getClusterID(), checkNSEquality(bpRegistration.getStorageInfo().getClusterID(),
reg.getStorageInfo().getClusterID(), "cluster ID"); reg.getStorageInfo().getClusterID(), "cluster ID");
} else {
bpRegistration = reg;
} }
bpRegistration = reg;
dn.bpRegistrationSucceeded(bpRegistration, getBlockPoolId()); dn.bpRegistrationSucceeded(bpRegistration, getBlockPoolId());
// Add the initial block token secret keys to the DN's secret manager. // Add the initial block token secret keys to the DN's secret manager.

View File

@ -767,15 +767,16 @@ class BPServiceActor implements Runnable {
void register(NamespaceInfo nsInfo) throws IOException { void register(NamespaceInfo nsInfo) throws IOException {
// The handshake() phase loaded the block pool storage // The handshake() phase loaded the block pool storage
// off disk - so update the bpRegistration object from that info // off disk - so update the bpRegistration object from that info
bpRegistration = bpos.createRegistration(); DatanodeRegistration newBpRegistration = bpos.createRegistration();
LOG.info(this + " beginning handshake with NN"); LOG.info(this + " beginning handshake with NN");
while (shouldRun()) { while (shouldRun()) {
try { try {
// Use returned registration from namenode with updated fields // Use returned registration from namenode with updated fields
bpRegistration = bpNamenode.registerDatanode(bpRegistration); newBpRegistration = bpNamenode.registerDatanode(newBpRegistration);
bpRegistration.setNamespaceInfo(nsInfo); newBpRegistration.setNamespaceInfo(nsInfo);
bpRegistration = newBpRegistration;
break; break;
} catch(EOFException e) { // namenode might have just restarted } catch(EOFException e) { // namenode might have just restarted
LOG.info("Problem connecting to server: " + nnAddr + " :" LOG.info("Problem connecting to server: " + nnAddr + " :"

View File

@ -1261,10 +1261,7 @@ public class DataNode extends ReconfigurableBase
*/ */
synchronized void bpRegistrationSucceeded(DatanodeRegistration bpRegistration, synchronized void bpRegistrationSucceeded(DatanodeRegistration bpRegistration,
String blockPoolId) throws IOException { String blockPoolId) throws IOException {
// Set the ID if we haven't already id = bpRegistration;
if (null == id) {
id = bpRegistration;
}
if(!storage.getDatanodeUuid().equals(bpRegistration.getDatanodeUuid())) { if(!storage.getDatanodeUuid().equals(bpRegistration.getDatanodeUuid())) {
throw new IOException("Inconsistent Datanode IDs. Name-node returned " throw new IOException("Inconsistent Datanode IDs. Name-node returned "