From 4d83618ec62c6a4682c71fd7fc1e4abf98514c0a Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Fri, 11 Apr 2014 16:45:06 +0000 Subject: [PATCH] HDFS-6229. Merge r1586714 from trunk. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1586715 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/hadoop/ipc/RetryCache.java | 25 ++++++++++++++++--- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../hdfs/server/namenode/FSNamesystem.java | 16 ++++++++++-- .../hadoop/hdfs/server/namenode/NameNode.java | 2 ++ 4 files changed, 41 insertions(+), 5 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RetryCache.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RetryCache.java index 2b8ad123cae..d35ed950cf4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RetryCache.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RetryCache.java @@ -20,6 +20,7 @@ package org.apache.hadoop.ipc; import java.util.Arrays; import java.util.UUID; +import java.util.concurrent.locks.ReentrantLock; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -183,6 +184,8 @@ public class RetryCache { private final long expirationTime; private String cacheName; + private final ReentrantLock lock = new ReentrantLock(); + /** * Constructor * @param cacheName name to identify the cache by @@ -206,6 +209,13 @@ public class RetryCache { || Arrays.equals(Server.getClientId(), RpcConstants.DUMMY_CLIENT_ID); } + public void lock() { + this.lock.lock(); + } + + public void unlock() { + this.lock.unlock(); + } private void incrCacheClearedCounter() { retryCacheMetrics.incrCacheCleared(); @@ -247,7 +257,8 @@ public class RetryCache { */ private CacheEntry waitForCompletion(CacheEntry newEntry) { CacheEntry mapEntry = null; - synchronized (this) { + lock.lock(); + try { mapEntry = set.get(newEntry); // If an entry in the cache does not exist, add a new one if (mapEntry == null) { @@ -262,6 +273,8 @@ public class RetryCache { } else { retryCacheMetrics.incrCacheHit(); } + } finally { + lock.unlock(); } // Entry already exists in cache. Wait for completion and return its state Preconditions.checkNotNull(mapEntry, @@ -292,8 +305,11 @@ public class RetryCache { public void addCacheEntry(byte[] clientId, int callId) { CacheEntry newEntry = new CacheEntry(clientId, callId, System.nanoTime() + expirationTime, true); - synchronized(this) { + lock.lock(); + try { set.put(newEntry); + } finally { + lock.unlock(); } retryCacheMetrics.incrCacheUpdated(); } @@ -303,8 +319,11 @@ public class RetryCache { // since the entry is loaded from editlog, we can assume it succeeded. CacheEntry newEntry = new CacheEntryWithPayload(clientId, callId, payload, System.nanoTime() + expirationTime, true); - synchronized(this) { + lock.lock(); + try { set.put(newEntry); + } finally { + lock.unlock(); } retryCacheMetrics.incrCacheUpdated(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 97676e14f18..13b4e54d372 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -131,6 +131,9 @@ Release 2.4.1 - UNRELEASED HDFS-6235. TestFileJournalManager can fail on Windows due to file locking if tests run out of order. (cnauroth) + HDFS-6229. Race condition in failover can cause RetryCache fail to work. + (jing9) + Release 2.4.0 - 2014-04-07 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 9839c17e6c4..1f956f1dd52 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -791,7 +791,19 @@ public class FSNamesystem implements Namesystem, FSClusterStats, public RetryCache getRetryCache() { return retryCache; } - + + void lockRetryCache() { + if (retryCache != null) { + retryCache.lock(); + } + } + + void unlockRetryCache() { + if (retryCache != null) { + retryCache.unlock(); + } + } + /** Whether or not retry cache is enabled */ boolean hasRetryCache() { return retryCache != null; @@ -6922,8 +6934,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats, if (cacheEntry != null && cacheEntry.isSuccess()) { return (String) cacheEntry.getPayload(); } - writeLock(); String snapshotPath = null; + writeLock(); try { checkOperation(OperationCategory.WRITE); checkNameNodeSafeMode("Cannot create snapshot for " + snapshotRoot); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index 64ee08e9aea..20bc74089c2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -1569,10 +1569,12 @@ public class NameNode implements NameNodeStatusMXBean { @Override public void writeLock() { namesystem.writeLock(); + namesystem.lockRetryCache(); } @Override public void writeUnlock() { + namesystem.unlockRetryCache(); namesystem.writeUnlock(); }