From 3d15728ff5301296801e541d9b23bd1687c4adad Mon Sep 17 00:00:00 2001 From: Vinayakumar B Date: Tue, 10 Feb 2015 10:43:08 +0530 Subject: [PATCH] HDFS-7714. Simultaneous restart of HA NameNodes and DataNode can cause DataNode to register successfully with only one NameNode.(Contributed by Vinayakumar B) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../apache/hadoop/hdfs/server/datanode/BPServiceActor.java | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 4a6bc11f746..1ca2263df58 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -896,6 +896,9 @@ Release 2.7.0 - UNRELEASED HDFS-7718. Store KeyProvider in ClientContext to avoid leaking key provider threads when using FileContext (Arun Suresh via Colin P. McCabe) + HDFS-7714. Simultaneous restart of HA NameNodes and DataNode can cause + DataNode to register successfully with only one NameNode.(vinayakumarb) + Release 2.6.1 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java index e3967270f8b..917b5dde7c6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java @@ -19,6 +19,7 @@ import static org.apache.hadoop.util.Time.now; +import java.io.EOFException; import java.io.IOException; import java.net.InetSocketAddress; import java.net.SocketTimeoutException; @@ -797,6 +798,10 @@ void register() throws IOException { // Use returned registration from namenode with updated fields bpRegistration = bpNamenode.registerDatanode(bpRegistration); break; + } catch(EOFException e) { // namenode might have just restarted + LOG.info("Problem connecting to server: " + nnAddr + " :" + + e.getLocalizedMessage()); + sleepAndLogInterrupts(1000, "connecting to server"); } catch(SocketTimeoutException e) { // namenode is busy LOG.info("Problem connecting to server: " + nnAddr); sleepAndLogInterrupts(1000, "connecting to server");