From 2777f777e9a05f804695d58bce6ad2d82692438f Mon Sep 17 00:00:00 2001 From: Andrew Wang Date: Fri, 1 Aug 2014 21:45:38 +0000 Subject: [PATCH] Revert HDFS-6788, bad merge. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1615239 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 - .../hdfs/server/datanode/BPOfferService.java | 299 +++++++----------- 2 files changed, 113 insertions(+), 189 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index ffe3101adf5..f7b4bba5c81 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -97,9 +97,6 @@ Release 2.6.0 - UNRELEASED HDFS-6802. Some tests in TestDFSClientFailover are missing @Test annotation. (Akira Ajisaka via wang) - HDFS-6788. Improve synchronization in BPOfferService with read write lock. - (Yongjun Zhang via wang) - OPTIMIZATIONS HDFS-6690. Deduplicate xattr names in memory. (wang) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java index d35db9b8f0f..ed541756b79 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java @@ -21,7 +21,6 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Sets; - import org.apache.commons.logging.Log; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; @@ -39,8 +38,6 @@ import java.util.List; import java.util.Set; import java.util.concurrent.CopyOnWriteArrayList; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantReadWriteLock; /** * One instance per block-pool/namespace on the DN, which handles the @@ -94,28 +91,6 @@ class BPOfferService { */ private long lastActiveClaimTxId = -1; - private final ReentrantReadWriteLock mReadWriteLock = - new ReentrantReadWriteLock(); - private final Lock mReadLock = mReadWriteLock.readLock(); - private final Lock mWriteLock = mReadWriteLock.writeLock(); - - // utility methods to acquire and release read lock and write lock - void readLock() { - mReadLock.lock(); - } - - void readUnlock() { - mReadLock.unlock(); - } - - void writeLock() { - mWriteLock.lock(); - } - - void writeUnlock() { - mWriteLock.unlock(); - } - BPOfferService(List nnAddrs, DataNode dn) { Preconditions.checkArgument(!nnAddrs.isEmpty(), "Must pass at least one NN."); @@ -160,19 +135,14 @@ boolean isAlive() { } return false; } - - String getBlockPoolId() { - readLock(); - try { - if (bpNSInfo != null) { - return bpNSInfo.getBlockPoolID(); - } else { - LOG.warn("Block pool ID needed, but service not yet registered with NN", - new Exception("trace")); - return null; - } - } finally { - readUnlock(); + + synchronized String getBlockPoolId() { + if (bpNSInfo != null) { + return bpNSInfo.getBlockPoolID(); + } else { + LOG.warn("Block pool ID needed, but service not yet registered with NN", + new Exception("trace")); + return null; } } @@ -180,37 +150,27 @@ boolean hasBlockPoolId() { return getNamespaceInfo() != null; } - NamespaceInfo getNamespaceInfo() { - readLock(); - try { - return bpNSInfo; - } finally { - readUnlock(); - } + synchronized NamespaceInfo getNamespaceInfo() { + return bpNSInfo; } @Override - public String toString() { - readLock(); - try { - if (bpNSInfo == null) { - // If we haven't yet connected to our NN, we don't yet know our - // own block pool ID. - // If _none_ of the block pools have connected yet, we don't even - // know the DatanodeID ID of this DN. - String datanodeUuid = dn.getDatanodeUuid(); + public synchronized String toString() { + if (bpNSInfo == null) { + // If we haven't yet connected to our NN, we don't yet know our + // own block pool ID. + // If _none_ of the block pools have connected yet, we don't even + // know the DatanodeID ID of this DN. + String datanodeUuid = dn.getDatanodeUuid(); - if (datanodeUuid == null || datanodeUuid.isEmpty()) { - datanodeUuid = "unassigned"; - } - return "Block pool (Datanode Uuid " + datanodeUuid + ")"; - } else { - return "Block pool " + getBlockPoolId() + - " (Datanode Uuid " + dn.getDatanodeUuid() + - ")"; + if (datanodeUuid == null || datanodeUuid.isEmpty()) { + datanodeUuid = "unassigned"; } - } finally { - readUnlock(); + return "Block pool (Datanode Uuid " + datanodeUuid + ")"; + } else { + return "Block pool " + getBlockPoolId() + + " (Datanode Uuid " + dn.getDatanodeUuid() + + ")"; } } @@ -306,37 +266,32 @@ DataNode getDataNode() { * verifies that this namespace matches (eg to prevent a misconfiguration * where a StandbyNode from a different cluster is specified) */ - void verifyAndSetNamespaceInfo(NamespaceInfo nsInfo) throws IOException { - writeLock(); - try { - if (this.bpNSInfo == null) { - this.bpNSInfo = nsInfo; - boolean success = false; + synchronized void verifyAndSetNamespaceInfo(NamespaceInfo nsInfo) throws IOException { + if (this.bpNSInfo == null) { + this.bpNSInfo = nsInfo; + boolean success = false; - // Now that we know the namespace ID, etc, we can pass this to the DN. - // The DN can now initialize its local storage if we are the - // first BP to handshake, etc. - try { - dn.initBlockPool(this); - success = true; - } finally { - if (!success) { - // The datanode failed to initialize the BP. We need to reset - // the namespace info so that other BPService actors still have - // a chance to set it, and re-initialize the datanode. - this.bpNSInfo = null; - } + // Now that we know the namespace ID, etc, we can pass this to the DN. + // The DN can now initialize its local storage if we are the + // first BP to handshake, etc. + try { + dn.initBlockPool(this); + success = true; + } finally { + if (!success) { + // The datanode failed to initialize the BP. We need to reset + // the namespace info so that other BPService actors still have + // a chance to set it, and re-initialize the datanode. + this.bpNSInfo = null; } - } else { - checkNSEquality(bpNSInfo.getBlockPoolID(), nsInfo.getBlockPoolID(), - "Blockpool ID"); - checkNSEquality(bpNSInfo.getNamespaceID(), nsInfo.getNamespaceID(), - "Namespace ID"); - checkNSEquality(bpNSInfo.getClusterID(), nsInfo.getClusterID(), - "Cluster ID"); } - } finally { - writeUnlock(); + } else { + checkNSEquality(bpNSInfo.getBlockPoolID(), nsInfo.getBlockPoolID(), + "Blockpool ID"); + checkNSEquality(bpNSInfo.getNamespaceID(), nsInfo.getNamespaceID(), + "Namespace ID"); + checkNSEquality(bpNSInfo.getClusterID(), nsInfo.getClusterID(), + "Cluster ID"); } } @@ -345,27 +300,22 @@ void verifyAndSetNamespaceInfo(NamespaceInfo nsInfo) throws IOException { * NN, it calls this function to verify that the NN it connected to * is consistent with other NNs serving the block-pool. */ - void registrationSucceeded(BPServiceActor bpServiceActor, + synchronized void registrationSucceeded(BPServiceActor bpServiceActor, DatanodeRegistration reg) throws IOException { - writeLock(); - try { - if (bpRegistration != null) { - checkNSEquality(bpRegistration.getStorageInfo().getNamespaceID(), - reg.getStorageInfo().getNamespaceID(), "namespace ID"); - checkNSEquality(bpRegistration.getStorageInfo().getClusterID(), - reg.getStorageInfo().getClusterID(), "cluster ID"); - } else { - bpRegistration = reg; - } - - dn.bpRegistrationSucceeded(bpRegistration, getBlockPoolId()); - // Add the initial block token secret keys to the DN's secret manager. - if (dn.isBlockTokenEnabled) { - dn.blockPoolTokenSecretManager.addKeys(getBlockPoolId(), - reg.getExportedKeys()); - } - } finally { - writeUnlock(); + if (bpRegistration != null) { + checkNSEquality(bpRegistration.getStorageInfo().getNamespaceID(), + reg.getStorageInfo().getNamespaceID(), "namespace ID"); + checkNSEquality(bpRegistration.getStorageInfo().getClusterID(), + reg.getStorageInfo().getClusterID(), "cluster ID"); + } else { + bpRegistration = reg; + } + + dn.bpRegistrationSucceeded(bpRegistration, getBlockPoolId()); + // Add the initial block token secret keys to the DN's secret manager. + if (dn.isBlockTokenEnabled) { + dn.blockPoolTokenSecretManager.addKeys(getBlockPoolId(), + reg.getExportedKeys()); } } @@ -383,35 +333,25 @@ private static void checkNSEquality( } } - DatanodeRegistration createRegistration() { - writeLock(); - try { - Preconditions.checkState(bpNSInfo != null, - "getRegistration() can only be called after initial handshake"); - return dn.createBPRegistration(bpNSInfo); - } finally { - writeUnlock(); - } + synchronized DatanodeRegistration createRegistration() { + Preconditions.checkState(bpNSInfo != null, + "getRegistration() can only be called after initial handshake"); + return dn.createBPRegistration(bpNSInfo); } /** * Called when an actor shuts down. If this is the last actor * to shut down, shuts down the whole blockpool in the DN. */ - void shutdownActor(BPServiceActor actor) { - writeLock(); - try { - if (bpServiceToActive == actor) { - bpServiceToActive = null; - } + synchronized void shutdownActor(BPServiceActor actor) { + if (bpServiceToActive == actor) { + bpServiceToActive = null; + } - bpServices.remove(actor); + bpServices.remove(actor); - if (bpServices.isEmpty()) { - dn.shutdownBlockPool(this); - } - } finally { - writeUnlock(); + if (bpServices.isEmpty()) { + dn.shutdownBlockPool(this); } } @@ -453,16 +393,11 @@ void reportRemoteBadBlock(DatanodeInfo dnInfo, ExtendedBlock block) { * @return a proxy to the active NN, or null if the BPOS has not * acknowledged any NN as active yet. */ - DatanodeProtocolClientSideTranslatorPB getActiveNN() { - readLock(); - try { - if (bpServiceToActive != null) { - return bpServiceToActive.bpNamenode; - } else { - return null; - } - } finally { - readUnlock(); + synchronized DatanodeProtocolClientSideTranslatorPB getActiveNN() { + if (bpServiceToActive != null) { + return bpServiceToActive.bpNamenode; + } else { + return null; } } @@ -490,50 +425,45 @@ void signalRollingUpgrade(boolean inProgress) { * @param actor the actor which received the heartbeat * @param nnHaState the HA-related heartbeat contents */ - void updateActorStatesFromHeartbeat( + synchronized void updateActorStatesFromHeartbeat( BPServiceActor actor, NNHAStatusHeartbeat nnHaState) { - writeLock(); - try { - final long txid = nnHaState.getTxId(); - - final boolean nnClaimsActive = - nnHaState.getState() == HAServiceState.ACTIVE; - final boolean bposThinksActive = bpServiceToActive == actor; - final boolean isMoreRecentClaim = txid > lastActiveClaimTxId; - - if (nnClaimsActive && !bposThinksActive) { - LOG.info("Namenode " + actor + " trying to claim ACTIVE state with " + - "txid=" + txid); - if (!isMoreRecentClaim) { - // Split-brain scenario - an NN is trying to claim active - // state when a different NN has already claimed it with a higher - // txid. - LOG.warn("NN " + actor + " tried to claim ACTIVE state at txid=" + - txid + " but there was already a more recent claim at txid=" + - lastActiveClaimTxId); - return; + final long txid = nnHaState.getTxId(); + + final boolean nnClaimsActive = + nnHaState.getState() == HAServiceState.ACTIVE; + final boolean bposThinksActive = bpServiceToActive == actor; + final boolean isMoreRecentClaim = txid > lastActiveClaimTxId; + + if (nnClaimsActive && !bposThinksActive) { + LOG.info("Namenode " + actor + " trying to claim ACTIVE state with " + + "txid=" + txid); + if (!isMoreRecentClaim) { + // Split-brain scenario - an NN is trying to claim active + // state when a different NN has already claimed it with a higher + // txid. + LOG.warn("NN " + actor + " tried to claim ACTIVE state at txid=" + + txid + " but there was already a more recent claim at txid=" + + lastActiveClaimTxId); + return; + } else { + if (bpServiceToActive == null) { + LOG.info("Acknowledging ACTIVE Namenode " + actor); } else { - if (bpServiceToActive == null) { - LOG.info("Acknowledging ACTIVE Namenode " + actor); - } else { - LOG.info("Namenode " + actor + " taking over ACTIVE state from " + - bpServiceToActive + " at higher txid=" + txid); - } - bpServiceToActive = actor; + LOG.info("Namenode " + actor + " taking over ACTIVE state from " + + bpServiceToActive + " at higher txid=" + txid); } - } else if (!nnClaimsActive && bposThinksActive) { - LOG.info("Namenode " + actor + " relinquishing ACTIVE state with " + - "txid=" + nnHaState.getTxId()); - bpServiceToActive = null; + bpServiceToActive = actor; } - - if (bpServiceToActive == actor) { - assert txid >= lastActiveClaimTxId; - lastActiveClaimTxId = txid; - } - } finally { - writeUnlock(); + } else if (!nnClaimsActive && bposThinksActive) { + LOG.info("Namenode " + actor + " relinquishing ACTIVE state with " + + "txid=" + nnHaState.getTxId()); + bpServiceToActive = null; + } + + if (bpServiceToActive == actor) { + assert txid >= lastActiveClaimTxId; + lastActiveClaimTxId = txid; } } @@ -604,14 +534,11 @@ boolean processCommandFromActor(DatanodeCommand cmd, actor.reRegister(); return true; } - writeLock(); - try { + synchronized (this) { if (actor == bpServiceToActive) { return processCommandFromActive(cmd, actor); } else { return processCommandFromStandby(cmd, actor); - } finally { - writeUnlock(); } } }