diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 3ce62b56d85..0fa946e7cf2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -194,6 +194,8 @@ Release 2.4.0 - UNRELEASED
HDFS-6050. NFS does not handle exceptions correctly in a few places
(brandonli)
+ HDFS-5138. Support HDFS upgrade in HA. (atm via todd)
+
OPTIMIZATIONS
HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery
diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml
index fa2c4d4471c..73326b9af9b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml
@@ -189,5 +189,10 @@
+
+
+
+
+
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java
index b88f9dccddf..dff2647a482 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.contrib.bkjournal;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.namenode.JournalManager;
import org.apache.hadoop.hdfs.server.namenode.EditLogOutputStream;
import org.apache.hadoop.hdfs.server.namenode.EditLogInputStream;
@@ -665,6 +667,37 @@ public class BookKeeperJournalManager implements JournalManager {
throw new UnsupportedOperationException();
}
+ @Override
+ public void doPreUpgrade() throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void doUpgrade(Storage storage) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getJournalCTime() throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void doFinalize() throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean canRollBack(StorageInfo storage, StorageInfo prevStorage,
+ int targetLayoutVersion) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void doRollback() throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
@Override
public void close() throws IOException {
try {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/contrib/bkjournal/TestBookKeeperAsHASharedDir.java b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/contrib/bkjournal/TestBookKeeperAsHASharedDir.java
index 0a14e785758..5611bb88a26 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/contrib/bkjournal/TestBookKeeperAsHASharedDir.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/contrib/bkjournal/TestBookKeeperAsHASharedDir.java
@@ -316,7 +316,7 @@ public class TestBookKeeperAsHASharedDir {
} catch (IOException ioe) {
LOG.info("Got expected exception", ioe);
GenericTestUtils.assertExceptionContains(
- "Cannot start an HA namenode with name dirs that need recovery", ioe);
+ "storage directory does not exist or is not accessible", ioe);
}
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index 5fddeeb42f6..76f38b5e6cf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -42,6 +42,7 @@ import java.net.URI;
import java.net.URISyntaxException;
import java.security.SecureRandom;
import java.text.SimpleDateFormat;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
@@ -52,7 +53,6 @@ import java.util.Locale;
import java.util.Map;
import java.util.Random;
import java.util.Set;
-import java.util.concurrent.TimeUnit;
import javax.net.SocketFactory;
@@ -71,7 +71,6 @@ import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
@@ -634,10 +633,24 @@ public class DFSUtil {
}
return ret;
}
+
+ /**
+ * Get all of the RPC addresses of the individual NNs in a given nameservice.
+ *
+ * @param conf Configuration
+ * @param nsId the nameservice whose NNs addresses we want.
+ * @param defaultValue default address to return in case key is not found.
+ * @return A map from nnId -> RPC address of each NN in the nameservice.
+ */
+ public static Map getRpcAddressesForNameserviceId(
+ Configuration conf, String nsId, String defaultValue) {
+ return getAddressesForNameserviceId(conf, nsId, defaultValue,
+ DFS_NAMENODE_RPC_ADDRESS_KEY);
+ }
private static Map getAddressesForNameserviceId(
Configuration conf, String nsId, String defaultValue,
- String[] keys) {
+ String... keys) {
Collection nnIds = getNameNodeIds(conf, nsId);
Map ret = Maps.newHashMap();
for (String nnId : emptyAsSingletonNull(nnIds)) {
@@ -1693,4 +1706,32 @@ public class DFSUtil {
}
return ttl*1000;
}
+
+ /**
+ * Assert that all objects in the collection are equal. Returns silently if
+ * so, throws an AssertionError if any object is not equal. All null values
+ * are considered equal.
+ *
+ * @param objects the collection of objects to check for equality.
+ */
+ public static void assertAllResultsEqual(Collection> objects) {
+ Object[] resultsArray = objects.toArray();
+
+ if (resultsArray.length == 0)
+ return;
+
+ for (int i = 0; i < resultsArray.length; i++) {
+ if (i == 0)
+ continue;
+ else {
+ Object currElement = resultsArray[i];
+ Object lastElement = resultsArray[i - 1];
+ if ((currElement == null && currElement != lastElement) ||
+ (currElement != null && !currElement.equals(lastElement))) {
+ throw new AssertionError("Not all elements match in results: " +
+ Arrays.toString(resultsArray));
+ }
+ }
+ }
+ }
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
index fd908a4cad9..d0f43ea2ef8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
@@ -26,7 +26,6 @@ import static org.apache.hadoop.hdfs.protocol.HdfsConstants.HA_DT_SERVICE_PREFIX
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.URI;
-import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
@@ -38,14 +37,14 @@ import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-
-import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.protocol.ClientProtocol;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSelector;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.ipc.RPC;
-
+import org.apache.hadoop.ipc.RemoteException;
+import org.apache.hadoop.ipc.StandbyException;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
@@ -301,4 +300,55 @@ public class HAUtil {
DFSClient dfsClient = dfs.getClient();
return RPC.getServerAddress(dfsClient.getNamenode());
}
+
+ /**
+ * Get an RPC proxy for each NN in an HA nameservice. Used when a given RPC
+ * call should be made on every NN in an HA nameservice, not just the active.
+ *
+ * @param conf configuration
+ * @param nsId the nameservice to get all of the proxies for.
+ * @return a list of RPC proxies for each NN in the nameservice.
+ * @throws IOException in the event of error.
+ */
+ public static List getProxiesForAllNameNodesInNameservice(
+ Configuration conf, String nsId) throws IOException {
+ Map nnAddresses =
+ DFSUtil.getRpcAddressesForNameserviceId(conf, nsId, null);
+
+ List namenodes = new ArrayList();
+ for (InetSocketAddress nnAddress : nnAddresses.values()) {
+ NameNodeProxies.ProxyAndInfo proxyInfo = null;
+ proxyInfo = NameNodeProxies.createNonHAProxy(conf,
+ nnAddress, ClientProtocol.class,
+ UserGroupInformation.getCurrentUser(), false);
+ namenodes.add(proxyInfo.getProxy());
+ }
+ return namenodes;
+ }
+
+ /**
+ * Used to ensure that at least one of the given HA NNs is currently in the
+ * active state..
+ *
+ * @param namenodes list of RPC proxies for each NN to check.
+ * @return true if at least one NN is active, false if all are in the standby state.
+ * @throws IOException in the event of error.
+ */
+ public static boolean isAtLeastOneActive(List namenodes)
+ throws IOException {
+ for (ClientProtocol namenode : namenodes) {
+ try {
+ namenode.getFileInfo("/");
+ return true;
+ } catch (RemoteException re) {
+ IOException cause = re.unwrapRemoteException();
+ if (cause instanceof StandbyException) {
+ // This is expected to happen for a standby NN.
+ } else {
+ throw re;
+ }
+ }
+ }
+ return false;
+ }
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLogger.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLogger.java
index ef5e1d78d64..6f11911eb3d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLogger.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLogger.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.NewEpochR
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto;
import org.apache.hadoop.hdfs.qjournal.protocol.RequestInfo;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
@@ -153,5 +154,18 @@ interface AsyncLogger {
*/
public void appendReport(StringBuilder sb);
+ public ListenableFuture doPreUpgrade();
+
+ public ListenableFuture doUpgrade(StorageInfo sInfo);
+
+ public ListenableFuture doFinalize();
+
+ public ListenableFuture canRollBack(StorageInfo storage,
+ StorageInfo prevStorage, int targetLayoutVersion);
+
+ public ListenableFuture doRollback();
+
+ public ListenableFuture getJournalCTime();
+
public ListenableFuture discardSegments(long startTxId);
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLoggerSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLoggerSet.java
index 1f968d94696..8d97c67eb60 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLoggerSet.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLoggerSet.java
@@ -29,6 +29,7 @@ import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetJourna
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.NewEpochResponseProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
@@ -317,4 +318,71 @@ class AsyncLoggerSet {
}
return QuorumCall.create(calls);
}
+
+ QuorumCall doPreUpgrade() {
+ Map> calls =
+ Maps.newHashMap();
+ for (AsyncLogger logger : loggers) {
+ ListenableFuture future =
+ logger.doPreUpgrade();
+ calls.put(logger, future);
+ }
+ return QuorumCall.create(calls);
+ }
+
+ public QuorumCall doUpgrade(StorageInfo sInfo) {
+ Map> calls =
+ Maps.newHashMap();
+ for (AsyncLogger logger : loggers) {
+ ListenableFuture future =
+ logger.doUpgrade(sInfo);
+ calls.put(logger, future);
+ }
+ return QuorumCall.create(calls);
+ }
+
+ public QuorumCall doFinalize() {
+ Map> calls =
+ Maps.newHashMap();
+ for (AsyncLogger logger : loggers) {
+ ListenableFuture future =
+ logger.doFinalize();
+ calls.put(logger, future);
+ }
+ return QuorumCall.create(calls);
+ }
+
+ public QuorumCall canRollBack(StorageInfo storage,
+ StorageInfo prevStorage, int targetLayoutVersion) {
+ Map> calls =
+ Maps.newHashMap();
+ for (AsyncLogger logger : loggers) {
+ ListenableFuture future =
+ logger.canRollBack(storage, prevStorage, targetLayoutVersion);
+ calls.put(logger, future);
+ }
+ return QuorumCall.create(calls);
+ }
+
+ public QuorumCall doRollback() {
+ Map> calls =
+ Maps.newHashMap();
+ for (AsyncLogger logger : loggers) {
+ ListenableFuture future =
+ logger.doRollback();
+ calls.put(logger, future);
+ }
+ return QuorumCall.create(calls);
+ }
+
+ public QuorumCall getJournalCTime() {
+ Map> calls =
+ Maps.newHashMap();
+ for (AsyncLogger logger : loggers) {
+ ListenableFuture future = logger.getJournalCTime();
+ calls.put(logger, future);
+ }
+ return QuorumCall.create(calls);
+ }
+
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java
index 18ed5d1291c..996d702572b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.hdfs.qjournal.protocol.RequestInfo;
import org.apache.hadoop.hdfs.qjournal.protocolPB.QJournalProtocolPB;
import org.apache.hadoop.hdfs.qjournal.protocolPB.QJournalProtocolTranslatorPB;
import org.apache.hadoop.hdfs.qjournal.server.GetJournalEditServlet;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
import org.apache.hadoop.ipc.ProtobufRpcEngine;
@@ -574,6 +575,72 @@ public class IPCLoggerChannel implements AsyncLogger {
}
});
}
+
+ @Override
+ public ListenableFuture doPreUpgrade() {
+ return executor.submit(new Callable() {
+ @Override
+ public Void call() throws IOException {
+ getProxy().doPreUpgrade(journalId);
+ return null;
+ }
+ });
+ }
+
+ @Override
+ public ListenableFuture doUpgrade(final StorageInfo sInfo) {
+ return executor.submit(new Callable() {
+ @Override
+ public Void call() throws IOException {
+ getProxy().doUpgrade(journalId, sInfo);
+ return null;
+ }
+ });
+ }
+
+ @Override
+ public ListenableFuture doFinalize() {
+ return executor.submit(new Callable() {
+ @Override
+ public Void call() throws IOException {
+ getProxy().doFinalize(journalId);
+ return null;
+ }
+ });
+ }
+
+ @Override
+ public ListenableFuture canRollBack(final StorageInfo storage,
+ final StorageInfo prevStorage, final int targetLayoutVersion) {
+ return executor.submit(new Callable() {
+ @Override
+ public Boolean call() throws IOException {
+ return getProxy().canRollBack(journalId, storage, prevStorage,
+ targetLayoutVersion);
+ }
+ });
+ }
+
+ @Override
+ public ListenableFuture doRollback() {
+ return executor.submit(new Callable() {
+ @Override
+ public Void call() throws IOException {
+ getProxy().doRollback(journalId);
+ return null;
+ }
+ });
+ }
+
+ @Override
+ public ListenableFuture getJournalCTime() {
+ return executor.submit(new Callable() {
+ @Override
+ public Long call() throws IOException {
+ return getProxy().getJournalCTime(journalId);
+ }
+ });
+ }
@Override
public String toString() {
@@ -646,4 +713,5 @@ public class IPCLoggerChannel implements AsyncLogger {
private boolean hasHttpServerEndPoint() {
return httpServerURL != null;
}
+
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java
index 7bacf48969c..438cd385cbe 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java
@@ -34,10 +34,13 @@ import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetJournalStateResponseProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.NewEpochResponseProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto;
+import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.namenode.EditLogFileInputStream;
import org.apache.hadoop.hdfs.server.namenode.EditLogInputStream;
import org.apache.hadoop.hdfs.server.namenode.EditLogOutputStream;
@@ -77,8 +80,14 @@ public class QuorumJournalManager implements JournalManager {
// Since these don't occur during normal operation, we can
// use rather lengthy timeouts, and don't need to make them
// configurable.
- private static final int FORMAT_TIMEOUT_MS = 60000;
- private static final int HASDATA_TIMEOUT_MS = 60000;
+ private static final int FORMAT_TIMEOUT_MS = 60000;
+ private static final int HASDATA_TIMEOUT_MS = 60000;
+ private static final int CAN_ROLL_BACK_TIMEOUT_MS = 60000;
+ private static final int FINALIZE_TIMEOUT_MS = 60000;
+ private static final int PRE_UPGRADE_TIMEOUT_MS = 60000;
+ private static final int ROLL_BACK_TIMEOUT_MS = 60000;
+ private static final int UPGRADE_TIMEOUT_MS = 60000;
+ private static final int GET_JOURNAL_CTIME_TIMEOUT_MS = 60000;
private static final int DISCARD_SEGMENTS_TIMEOUT_MS = 60000;
private final Configuration conf;
@@ -495,6 +504,134 @@ public class QuorumJournalManager implements JournalManager {
return loggers;
}
+ @Override
+ public void doPreUpgrade() throws IOException {
+ QuorumCall call = loggers.doPreUpgrade();
+ try {
+ call.waitFor(loggers.size(), loggers.size(), 0, PRE_UPGRADE_TIMEOUT_MS,
+ "doPreUpgrade");
+
+ if (call.countExceptions() > 0) {
+ call.rethrowException("Could not do pre-upgrade of one or more JournalNodes");
+ }
+ } catch (InterruptedException e) {
+ throw new IOException("Interrupted waiting for doPreUpgrade() response");
+ } catch (TimeoutException e) {
+ throw new IOException("Timed out waiting for doPreUpgrade() response");
+ }
+ }
+
+ @Override
+ public void doUpgrade(Storage storage) throws IOException {
+ QuorumCall call = loggers.doUpgrade(storage);
+ try {
+ call.waitFor(loggers.size(), loggers.size(), 0, UPGRADE_TIMEOUT_MS,
+ "doUpgrade");
+
+ if (call.countExceptions() > 0) {
+ call.rethrowException("Could not perform upgrade of one or more JournalNodes");
+ }
+ } catch (InterruptedException e) {
+ throw new IOException("Interrupted waiting for doUpgrade() response");
+ } catch (TimeoutException e) {
+ throw new IOException("Timed out waiting for doUpgrade() response");
+ }
+ }
+
+ @Override
+ public void doFinalize() throws IOException {
+ QuorumCall call = loggers.doFinalize();
+ try {
+ call.waitFor(loggers.size(), loggers.size(), 0, FINALIZE_TIMEOUT_MS,
+ "doFinalize");
+
+ if (call.countExceptions() > 0) {
+ call.rethrowException("Could not finalize one or more JournalNodes");
+ }
+ } catch (InterruptedException e) {
+ throw new IOException("Interrupted waiting for doFinalize() response");
+ } catch (TimeoutException e) {
+ throw new IOException("Timed out waiting for doFinalize() response");
+ }
+ }
+
+ @Override
+ public boolean canRollBack(StorageInfo storage, StorageInfo prevStorage,
+ int targetLayoutVersion) throws IOException {
+ QuorumCall call = loggers.canRollBack(storage,
+ prevStorage, targetLayoutVersion);
+ try {
+ call.waitFor(loggers.size(), loggers.size(), 0, CAN_ROLL_BACK_TIMEOUT_MS,
+ "lockSharedStorage");
+
+ if (call.countExceptions() > 0) {
+ call.rethrowException("Could not check if roll back possible for"
+ + " one or more JournalNodes");
+ }
+
+ // Either they all return the same thing or this call fails, so we can
+ // just return the first result.
+ DFSUtil.assertAllResultsEqual(call.getResults().values());
+ for (Boolean result : call.getResults().values()) {
+ return result;
+ }
+ } catch (InterruptedException e) {
+ throw new IOException("Interrupted waiting for lockSharedStorage() " +
+ "response");
+ } catch (TimeoutException e) {
+ throw new IOException("Timed out waiting for lockSharedStorage() " +
+ "response");
+ }
+
+ throw new AssertionError("Unreachable code.");
+ }
+
+ @Override
+ public void doRollback() throws IOException {
+ QuorumCall call = loggers.doRollback();
+ try {
+ call.waitFor(loggers.size(), loggers.size(), 0, ROLL_BACK_TIMEOUT_MS,
+ "doRollback");
+
+ if (call.countExceptions() > 0) {
+ call.rethrowException("Could not perform rollback of one or more JournalNodes");
+ }
+ } catch (InterruptedException e) {
+ throw new IOException("Interrupted waiting for doFinalize() response");
+ } catch (TimeoutException e) {
+ throw new IOException("Timed out waiting for doFinalize() response");
+ }
+ }
+
+ @Override
+ public long getJournalCTime() throws IOException {
+ QuorumCall call = loggers.getJournalCTime();
+ try {
+ call.waitFor(loggers.size(), loggers.size(), 0,
+ GET_JOURNAL_CTIME_TIMEOUT_MS, "getJournalCTime");
+
+ if (call.countExceptions() > 0) {
+ call.rethrowException("Could not journal CTime for one "
+ + "more JournalNodes");
+ }
+
+ // Either they all return the same thing or this call fails, so we can
+ // just return the first result.
+ DFSUtil.assertAllResultsEqual(call.getResults().values());
+ for (Long result : call.getResults().values()) {
+ return result;
+ }
+ } catch (InterruptedException e) {
+ throw new IOException("Interrupted waiting for getJournalCTime() " +
+ "response");
+ } catch (TimeoutException e) {
+ throw new IOException("Timed out waiting for getJournalCTime() " +
+ "response");
+ }
+
+ throw new AssertionError("Unreachable code.");
+ }
+
@Override
public void discardSegments(long startTxId) throws IOException {
QuorumCall call = loggers.discardSegments(startTxId);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocol/QJournalProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocol/QJournalProtocol.java
index c0d715cb19b..5e5f94c1db3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocol/QJournalProtocol.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocol/QJournalProtocol.java
@@ -29,6 +29,7 @@ import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.NewEpochR
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto;
import org.apache.hadoop.hdfs.qjournal.server.JournalNode;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.namenode.JournalManager;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.io.retry.Idempotent;
@@ -146,6 +147,19 @@ public interface QJournalProtocol {
public void acceptRecovery(RequestInfo reqInfo,
SegmentStateProto stateToAccept, URL fromUrl) throws IOException;
+ public void doPreUpgrade(String journalId) throws IOException;
+
+ public void doUpgrade(String journalId, StorageInfo sInfo) throws IOException;
+
+ public void doFinalize(String journalId) throws IOException;
+
+ public Boolean canRollBack(String journalId, StorageInfo storage,
+ StorageInfo prevStorage, int targetLayoutVersion) throws IOException;
+
+ public void doRollback(String journalId) throws IOException;
+
+ public Long getJournalCTime(String journalId) throws IOException;
+
/**
* Discard journal segments whose first TxId is greater than or equal to the
* given txid.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolServerSideTranslatorPB.java
index 5b13f2be2e4..3a4e3924392 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolServerSideTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolServerSideTranslatorPB.java
@@ -28,14 +28,26 @@ import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocol;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.AcceptRecoveryRequestProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.AcceptRecoveryResponseProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.CanRollBackRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.CanRollBackResponseProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DiscardSegmentsRequestProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DiscardSegmentsResponseProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoFinalizeRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoFinalizeResponseProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoPreUpgradeRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoPreUpgradeResponseProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoRollbackRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoRollbackResponseProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoUpgradeRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoUpgradeResponseProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.FinalizeLogSegmentRequestProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.FinalizeLogSegmentResponseProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.FormatRequestProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.FormatResponseProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetEditLogManifestRequestProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetEditLogManifestResponseProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetJournalCTimeRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetJournalCTimeResponseProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetJournalStateRequestProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetJournalStateResponseProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.HeartbeatRequestProto;
@@ -54,6 +66,8 @@ import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PurgeLogs
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.StartLogSegmentRequestProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.StartLogSegmentResponseProto;
import org.apache.hadoop.hdfs.qjournal.protocol.RequestInfo;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.namenode.NameNodeLayoutVersion;
import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
@@ -263,4 +277,79 @@ public class QJournalProtocolServerSideTranslatorPB implements QJournalProtocolP
throw new ServiceException(e);
}
}
+
+
+ @Override
+ public DoPreUpgradeResponseProto doPreUpgrade(RpcController controller,
+ DoPreUpgradeRequestProto request) throws ServiceException {
+ try {
+ impl.doPreUpgrade(convert(request.getJid()));
+ return DoPreUpgradeResponseProto.getDefaultInstance();
+ } catch (IOException e) {
+ throw new ServiceException(e);
+ }
+ }
+
+ @Override
+ public DoUpgradeResponseProto doUpgrade(RpcController controller,
+ DoUpgradeRequestProto request) throws ServiceException {
+ StorageInfo si = PBHelper.convert(request.getSInfo(), NodeType.JOURNAL_NODE);
+ try {
+ impl.doUpgrade(convert(request.getJid()), si);
+ return DoUpgradeResponseProto.getDefaultInstance();
+ } catch (IOException e) {
+ throw new ServiceException(e);
+ }
+ }
+
+ @Override
+ public DoFinalizeResponseProto doFinalize(RpcController controller,
+ DoFinalizeRequestProto request) throws ServiceException {
+ try {
+ impl.doFinalize(convert(request.getJid()));
+ return DoFinalizeResponseProto.getDefaultInstance();
+ } catch (IOException e) {
+ throw new ServiceException(e);
+ }
+ }
+
+ @Override
+ public CanRollBackResponseProto canRollBack(RpcController controller,
+ CanRollBackRequestProto request) throws ServiceException {
+ try {
+ StorageInfo si = PBHelper.convert(request.getStorage(), NodeType.JOURNAL_NODE);
+ Boolean result = impl.canRollBack(convert(request.getJid()), si,
+ PBHelper.convert(request.getPrevStorage(), NodeType.JOURNAL_NODE),
+ request.getTargetLayoutVersion());
+ return CanRollBackResponseProto.newBuilder()
+ .setCanRollBack(result)
+ .build();
+ } catch (IOException e) {
+ throw new ServiceException(e);
+ }
+ }
+
+ @Override
+ public DoRollbackResponseProto doRollback(RpcController controller, DoRollbackRequestProto request)
+ throws ServiceException {
+ try {
+ impl.doRollback(convert(request.getJid()));
+ return DoRollbackResponseProto.getDefaultInstance();
+ } catch (IOException e) {
+ throw new ServiceException(e);
+ }
+ }
+
+ @Override
+ public GetJournalCTimeResponseProto getJournalCTime(RpcController controller,
+ GetJournalCTimeRequestProto request) throws ServiceException {
+ try {
+ Long resultCTime = impl.getJournalCTime(convert(request.getJid()));
+ return GetJournalCTimeResponseProto.newBuilder()
+ .setResultCTime(resultCTime)
+ .build();
+ } catch (IOException e) {
+ throw new ServiceException(e);
+ }
+ }
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolTranslatorPB.java
index 85d593c8f06..25260a2a7c5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolTranslatorPB.java
@@ -28,11 +28,19 @@ import org.apache.hadoop.hdfs.protocolPB.PBHelper;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocol;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.AcceptRecoveryRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.CanRollBackRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.CanRollBackResponseProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DiscardSegmentsRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoFinalizeRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoPreUpgradeRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoRollbackRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoUpgradeRequestProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.FinalizeLogSegmentRequestProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.FormatRequestProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetEditLogManifestRequestProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetEditLogManifestResponseProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetJournalCTimeRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetJournalCTimeResponseProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetJournalStateRequestProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetJournalStateResponseProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.HeartbeatRequestProto;
@@ -49,6 +57,7 @@ import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.RequestIn
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.StartLogSegmentRequestProto;
import org.apache.hadoop.hdfs.qjournal.protocol.RequestInfo;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.ipc.ProtobufHelper;
@@ -279,6 +288,87 @@ public class QJournalProtocolTranslatorPB implements ProtocolMetaInterface,
RPC.getProtocolVersion(QJournalProtocolPB.class), methodName);
}
+ @Override
+ public void doPreUpgrade(String jid) throws IOException {
+ try {
+ rpcProxy.doPreUpgrade(NULL_CONTROLLER,
+ DoPreUpgradeRequestProto.newBuilder()
+ .setJid(convertJournalId(jid))
+ .build());
+ } catch (ServiceException e) {
+ throw ProtobufHelper.getRemoteException(e);
+ }
+ }
+
+ @Override
+ public void doUpgrade(String journalId, StorageInfo sInfo) throws IOException {
+ try {
+ rpcProxy.doUpgrade(NULL_CONTROLLER,
+ DoUpgradeRequestProto.newBuilder()
+ .setJid(convertJournalId(journalId))
+ .setSInfo(PBHelper.convert(sInfo))
+ .build());
+ } catch (ServiceException e) {
+ throw ProtobufHelper.getRemoteException(e);
+ }
+ }
+
+ @Override
+ public void doFinalize(String jid) throws IOException {
+ try {
+ rpcProxy.doFinalize(NULL_CONTROLLER,
+ DoFinalizeRequestProto.newBuilder()
+ .setJid(convertJournalId(jid))
+ .build());
+ } catch (ServiceException e) {
+ throw ProtobufHelper.getRemoteException(e);
+ }
+ }
+
+ @Override
+ public Boolean canRollBack(String journalId, StorageInfo storage,
+ StorageInfo prevStorage, int targetLayoutVersion) throws IOException {
+ try {
+ CanRollBackResponseProto response = rpcProxy.canRollBack(
+ NULL_CONTROLLER,
+ CanRollBackRequestProto.newBuilder()
+ .setJid(convertJournalId(journalId))
+ .setStorage(PBHelper.convert(storage))
+ .setPrevStorage(PBHelper.convert(prevStorage))
+ .setTargetLayoutVersion(targetLayoutVersion)
+ .build());
+ return response.getCanRollBack();
+ } catch (ServiceException e) {
+ throw ProtobufHelper.getRemoteException(e);
+ }
+ }
+
+ @Override
+ public void doRollback(String journalId) throws IOException {
+ try {
+ rpcProxy.doRollback(NULL_CONTROLLER,
+ DoRollbackRequestProto.newBuilder()
+ .setJid(convertJournalId(journalId))
+ .build());
+ } catch (ServiceException e) {
+ throw ProtobufHelper.getRemoteException(e);
+ }
+ }
+
+ @Override
+ public Long getJournalCTime(String journalId) throws IOException {
+ try {
+ GetJournalCTimeResponseProto response = rpcProxy.getJournalCTime(
+ NULL_CONTROLLER,
+ GetJournalCTimeRequestProto.newBuilder()
+ .setJid(convertJournalId(journalId))
+ .build());
+ return response.getResultCTime();
+ } catch (ServiceException e) {
+ throw ProtobufHelper.getRemoteException(e);
+ }
+ }
+
@Override
public void discardSegments(String journalId, long startTxId)
throws IOException {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/GetJournalEditServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/GetJournalEditServlet.java
index b1dff736255..e9387d7a0e0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/GetJournalEditServlet.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/GetJournalEditServlet.java
@@ -40,6 +40,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager;
import org.apache.hadoop.hdfs.server.common.JspHelper;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.namenode.FileJournalManager;
import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
import org.apache.hadoop.hdfs.server.namenode.ImageServlet;
@@ -139,20 +140,26 @@ public class GetJournalEditServlet extends HttpServlet {
private boolean checkStorageInfoOrSendError(JNStorage storage,
HttpServletRequest request, HttpServletResponse response)
throws IOException {
- String myStorageInfoString = storage.toColonSeparatedString();
+ int myNsId = storage.getNamespaceID();
+ String myClusterId = storage.getClusterID();
+
String theirStorageInfoString = StringEscapeUtils.escapeHtml(
request.getParameter(STORAGEINFO_PARAM));
- if (theirStorageInfoString != null
- && !myStorageInfoString.equals(theirStorageInfoString)) {
- String msg = "This node has storage info '" + myStorageInfoString
- + "' but the requesting node expected '"
- + theirStorageInfoString + "'";
-
- response.sendError(HttpServletResponse.SC_FORBIDDEN, msg);
- LOG.warn("Received an invalid request file transfer request from " +
- request.getRemoteAddr() + ": " + msg);
- return false;
+ if (theirStorageInfoString != null) {
+ int theirNsId = StorageInfo.getNsIdFromColonSeparatedString(
+ theirStorageInfoString);
+ String theirClusterId = StorageInfo.getClusterIdFromColonSeparatedString(
+ theirStorageInfoString);
+ if (myNsId != theirNsId || !myClusterId.equals(theirClusterId)) {
+ String msg = "This node has namespaceId '" + myNsId + " and clusterId '"
+ + myClusterId + "' but the requesting node expected '" + theirNsId
+ + "' and '" + theirClusterId + "'";
+ response.sendError(HttpServletResponse.SC_FORBIDDEN, msg);
+ LOG.warn("Received an invalid request file transfer request from " +
+ request.getRemoteAddr() + ": " + msg);
+ return false;
+ }
}
return true;
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java
index 347ac53a1d8..e972fe03af1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java
@@ -130,6 +130,10 @@ class JNStorage extends Storage {
return new File(sd.getCurrentDir(), "paxos");
}
+ File getRoot() {
+ return sd.getRoot();
+ }
+
/**
* Remove any log files and associated paxos files which are older than
* the given txid.
@@ -182,12 +186,15 @@ class JNStorage extends Storage {
unlockAll();
sd.clearDirectory();
writeProperties(sd);
+ createPaxosDir();
+ analyzeStorage();
+ }
+
+ void createPaxosDir() throws IOException {
if (!getPaxosDir().mkdirs()) {
throw new IOException("Could not create paxos dir: " + getPaxosDir());
}
- analyzeStorage();
}
-
void analyzeStorage() throws IOException {
this.state = sd.analyzeStorage(StartupOption.REGULAR, this);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java
index c92de2032ea..b2093b9130f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java
@@ -37,12 +37,14 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.qjournal.protocol.JournalNotFormattedException;
import org.apache.hadoop.hdfs.qjournal.protocol.JournalOutOfSyncException;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocol;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.NewEpochResponseProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PersistedRecoveryPaxosData;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto;
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto;
import org.apache.hadoop.hdfs.qjournal.protocol.RequestInfo;
import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.namenode.EditLogOutputStream;
import org.apache.hadoop.hdfs.server.namenode.FileJournalManager;
import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
@@ -73,7 +75,7 @@ import com.google.protobuf.TextFormat;
* Each such journal is entirely independent despite being hosted by
* the same JVM.
*/
-class Journal implements Closeable {
+public class Journal implements Closeable {
static final Log LOG = LogFactory.getLog(Journal.class);
@@ -122,8 +124,8 @@ class Journal implements Closeable {
*/
private BestEffortLongFile committedTxnId;
- private static final String LAST_PROMISED_FILENAME = "last-promised-epoch";
- private static final String LAST_WRITER_EPOCH = "last-writer-epoch";
+ public static final String LAST_PROMISED_FILENAME = "last-promised-epoch";
+ public static final String LAST_WRITER_EPOCH = "last-writer-epoch";
private static final String COMMITTED_TXID_FILENAME = "committed-txid";
private final FileJournalManager fjm;
@@ -627,7 +629,7 @@ class Journal implements Closeable {
}
/**
- * @see QJournalProtocol#getEditLogManifest(String, long)
+ * @see QJournalProtocol#getEditLogManifest(String, long, boolean)
*/
public RemoteEditLogManifest getEditLogManifest(long sinceTxId,
boolean inProgressOk) throws IOException {
@@ -729,7 +731,7 @@ class Journal implements Closeable {
}
/**
- * @see QJournalProtocol#acceptRecovery(RequestInfo, SegmentStateProto, URL)
+ * @see QJournalProtocol#acceptRecovery(RequestInfo, QJournalProtocolProtos.SegmentStateProto, URL)
*/
public synchronized void acceptRecovery(RequestInfo reqInfo,
SegmentStateProto segment, URL fromUrl)
@@ -987,4 +989,62 @@ class Journal implements Closeable {
// we delete all the segments after the startTxId. let's reset committedTxnId
committedTxnId.set(startTxId - 1);
}
+
+ public synchronized void doPreUpgrade() throws IOException {
+ storage.getJournalManager().doPreUpgrade();
+ }
+
+ public synchronized void doUpgrade(StorageInfo sInfo) throws IOException {
+ long oldCTime = storage.getCTime();
+ storage.cTime = sInfo.cTime;
+ int oldLV = storage.getLayoutVersion();
+ storage.layoutVersion = sInfo.layoutVersion;
+ LOG.info("Starting upgrade of edits directory: "
+ + ".\n old LV = " + oldLV
+ + "; old CTime = " + oldCTime
+ + ".\n new LV = " + storage.getLayoutVersion()
+ + "; new CTime = " + storage.getCTime());
+ storage.getJournalManager().doUpgrade(storage);
+ storage.createPaxosDir();
+
+ // Copy over the contents of the epoch data files to the new dir.
+ File currentDir = storage.getSingularStorageDir().getCurrentDir();
+ File previousDir = storage.getSingularStorageDir().getPreviousDir();
+
+ PersistentLongFile prevLastPromisedEpoch = new PersistentLongFile(
+ new File(previousDir, LAST_PROMISED_FILENAME), 0);
+ PersistentLongFile prevLastWriterEpoch = new PersistentLongFile(
+ new File(previousDir, LAST_WRITER_EPOCH), 0);
+
+ lastPromisedEpoch = new PersistentLongFile(
+ new File(currentDir, LAST_PROMISED_FILENAME), 0);
+ lastWriterEpoch = new PersistentLongFile(
+ new File(currentDir, LAST_WRITER_EPOCH), 0);
+
+ lastPromisedEpoch.set(prevLastPromisedEpoch.get());
+ lastWriterEpoch.set(prevLastWriterEpoch.get());
+ }
+
+ public synchronized void doFinalize() throws IOException {
+ LOG.info("Finalizing upgrade for journal "
+ + storage.getRoot() + "."
+ + (storage.getLayoutVersion()==0 ? "" :
+ "\n cur LV = " + storage.getLayoutVersion()
+ + "; cur CTime = " + storage.getCTime()));
+ storage.getJournalManager().doFinalize();
+ }
+
+ public Boolean canRollBack(StorageInfo storage, StorageInfo prevStorage,
+ int targetLayoutVersion) throws IOException {
+ return this.storage.getJournalManager().canRollBack(storage, prevStorage,
+ targetLayoutVersion);
+ }
+
+ public void doRollback() throws IOException {
+ storage.getJournalManager().doRollback();
+ }
+
+ public Long getJournalCTime() throws IOException {
+ return storage.getJournalManager().getJournalCTime();
+ }
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java
index 8bd991cab25..19c48ba9ed7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager;
import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.source.JvmMetrics;
@@ -290,4 +291,31 @@ public class JournalNode implements Tool, Configurable, JournalNodeMXBean {
throws IOException {
getOrCreateJournal(journalId).discardSegments(startTxId);
}
+
+ public void doPreUpgrade(String journalId) throws IOException {
+ getOrCreateJournal(journalId).doPreUpgrade();
+ }
+
+ public void doUpgrade(String journalId, StorageInfo sInfo) throws IOException {
+ getOrCreateJournal(journalId).doUpgrade(sInfo);
+ }
+
+ public void doFinalize(String journalId) throws IOException {
+ getOrCreateJournal(journalId).doFinalize();
+ }
+
+ public Boolean canRollBack(String journalId, StorageInfo storage,
+ StorageInfo prevStorage, int targetLayoutVersion) throws IOException {
+ return getOrCreateJournal(journalId).canRollBack(storage, prevStorage,
+ targetLayoutVersion);
+ }
+
+ public void doRollback(String journalId) throws IOException {
+ getOrCreateJournal(journalId).doRollback();
+ }
+
+ public Long getJournalCTime(String journalId) throws IOException {
+ return getOrCreateJournal(journalId).getJournalCTime();
+ }
+
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java
index 0c0ed01d4b1..aef0c19063d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentSt
import org.apache.hadoop.hdfs.qjournal.protocol.RequestInfo;
import org.apache.hadoop.hdfs.qjournal.protocolPB.QJournalProtocolPB;
import org.apache.hadoop.hdfs.qjournal.protocolPB.QJournalProtocolServerSideTranslatorPB;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
import org.apache.hadoop.ipc.ProtobufRpcEngine;
@@ -205,6 +206,38 @@ class JournalNodeRpcServer implements QJournalProtocol {
.acceptRecovery(reqInfo, log, fromUrl);
}
+ @Override
+ public void doPreUpgrade(String journalId) throws IOException {
+ jn.doPreUpgrade(journalId);
+ }
+
+ @Override
+ public void doUpgrade(String journalId, StorageInfo sInfo) throws IOException {
+ jn.doUpgrade(journalId, sInfo);
+ }
+
+ @Override
+ public void doFinalize(String journalId) throws IOException {
+ jn.doFinalize(journalId);
+ }
+
+ @Override
+ public Boolean canRollBack(String journalId, StorageInfo storage,
+ StorageInfo prevStorage, int targetLayoutVersion)
+ throws IOException {
+ return jn.canRollBack(journalId, storage, prevStorage, targetLayoutVersion);
+ }
+
+ @Override
+ public void doRollback(String journalId) throws IOException {
+ jn.doRollback(journalId);
+ }
+
+ @Override
+ public Long getJournalCTime(String journalId) throws IOException {
+ return jn.getJournalCTime(journalId);
+ }
+
@Override
public void discardSegments(String journalId, long startTxId)
throws IOException {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java
index 3a065b5d26e..3656aa2b3b3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java
@@ -18,7 +18,6 @@
package org.apache.hadoop.hdfs.server.common;
import java.io.File;
-import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
@@ -78,7 +77,6 @@ public abstract class Storage extends StorageInfo {
public static final int[] LAYOUT_VERSIONS_203 = {-19, -31};
public static final String STORAGE_FILE_LOCK = "in_use.lock";
- protected static final String STORAGE_FILE_VERSION = "VERSION";
public static final String STORAGE_DIR_CURRENT = "current";
public static final String STORAGE_DIR_PREVIOUS = "previous";
public static final String STORAGE_TMP_REMOVED = "removed.tmp";
@@ -121,22 +119,24 @@ public abstract class Storage extends StorageInfo {
private class DirIterator implements Iterator {
StorageDirType dirType;
+ boolean includeShared;
int prevIndex; // for remove()
int nextIndex; // for next()
- DirIterator(StorageDirType dirType) {
+ DirIterator(StorageDirType dirType, boolean includeShared) {
this.dirType = dirType;
this.nextIndex = 0;
this.prevIndex = 0;
+ this.includeShared = includeShared;
}
@Override
public boolean hasNext() {
if (storageDirs.isEmpty() || nextIndex >= storageDirs.size())
return false;
- if (dirType != null) {
+ if (dirType != null || !includeShared) {
while (nextIndex < storageDirs.size()) {
- if (getStorageDir(nextIndex).getStorageDirType().isOfType(dirType))
+ if (shouldReturnNextDir())
break;
nextIndex++;
}
@@ -151,9 +151,9 @@ public abstract class Storage extends StorageInfo {
StorageDirectory sd = getStorageDir(nextIndex);
prevIndex = nextIndex;
nextIndex++;
- if (dirType != null) {
+ if (dirType != null || !includeShared) {
while (nextIndex < storageDirs.size()) {
- if (getStorageDir(nextIndex).getStorageDirType().isOfType(dirType))
+ if (shouldReturnNextDir())
break;
nextIndex++;
}
@@ -167,6 +167,12 @@ public abstract class Storage extends StorageInfo {
storageDirs.remove(prevIndex); // remove last returned element
hasNext(); // reset nextIndex to correct place
}
+
+ private boolean shouldReturnNextDir() {
+ StorageDirectory sd = getStorageDir(nextIndex);
+ return (dirType == null || sd.getStorageDirType().isOfType(dirType)) &&
+ (includeShared || !sd.isShared());
+ }
}
/**
@@ -198,7 +204,27 @@ public abstract class Storage extends StorageInfo {
* them via the Iterator
*/
public Iterator dirIterator(StorageDirType dirType) {
- return new DirIterator(dirType);
+ return dirIterator(dirType, true);
+ }
+
+ /**
+ * Return all entries in storageDirs, potentially excluding shared dirs.
+ * @param includeShared whether or not to include shared dirs.
+ * @return an iterator over the configured storage dirs.
+ */
+ public Iterator dirIterator(boolean includeShared) {
+ return dirIterator(null, includeShared);
+ }
+
+ /**
+ * @param dirType all entries will be of this type of dir
+ * @param includeShared true to include any shared directories,
+ * false otherwise
+ * @return an iterator over the configured storage dirs.
+ */
+ public Iterator dirIterator(StorageDirType dirType,
+ boolean includeShared) {
+ return new DirIterator(dirType, includeShared);
}
public Iterable dirIterable(final StorageDirType dirType) {
@@ -228,7 +254,9 @@ public abstract class Storage extends StorageInfo {
@InterfaceAudience.Private
public static class StorageDirectory implements FormatConfirmable {
final File root; // root directory
- final boolean useLock; // flag to enable storage lock
+ // whether or not this dir is shared between two separate NNs for HA, or
+ // between multiple block pools in the case of federation.
+ final boolean isShared;
final StorageDirType dirType; // storage dir type
FileLock lock; // storage lock
@@ -236,11 +264,11 @@ public abstract class Storage extends StorageInfo {
public StorageDirectory(File dir) {
// default dirType is null
- this(dir, null, true);
+ this(dir, null, false);
}
public StorageDirectory(File dir, StorageDirType dirType) {
- this(dir, dirType, true);
+ this(dir, dirType, false);
}
public void setStorageUuid(String storageUuid) {
@@ -255,14 +283,14 @@ public abstract class Storage extends StorageInfo {
* Constructor
* @param dir directory corresponding to the storage
* @param dirType storage directory type
- * @param useLock true - enables locking on the storage directory and false
- * disables locking
+ * @param isShared whether or not this dir is shared between two NNs. true
+ * disables locking on the storage directory, false enables locking
*/
- public StorageDirectory(File dir, StorageDirType dirType, boolean useLock) {
+ public StorageDirectory(File dir, StorageDirType dirType, boolean isShared) {
this.root = dir;
this.lock = null;
this.dirType = dirType;
- this.useLock = useLock;
+ this.isShared = isShared;
}
/**
@@ -616,6 +644,10 @@ public abstract class Storage extends StorageInfo {
return true;
}
+
+ public boolean isShared() {
+ return isShared;
+ }
/**
@@ -630,7 +662,7 @@ public abstract class Storage extends StorageInfo {
* @throws IOException if locking fails
*/
public void lock() throws IOException {
- if (!useLock) {
+ if (isShared()) {
LOG.info("Locking is disabled");
return;
}
@@ -900,33 +932,21 @@ public abstract class Storage extends StorageInfo {
props.setProperty("cTime", String.valueOf(cTime));
}
- /**
- * Read properties from the VERSION file in the given storage directory.
- */
- public void readProperties(StorageDirectory sd) throws IOException {
- Properties props = readPropertiesFile(sd.getVersionFile());
- setFieldsFromProperties(props, sd);
- }
-
- /**
- * Read properties from the the previous/VERSION file in the given storage directory.
- */
- public void readPreviousVersionProperties(StorageDirectory sd)
- throws IOException {
- Properties props = readPropertiesFile(sd.getPreviousVersionFile());
- setFieldsFromProperties(props, sd);
- }
-
/**
* Write properties to the VERSION file in the given storage directory.
*/
public void writeProperties(StorageDirectory sd) throws IOException {
writeProperties(sd.getVersionFile(), sd);
}
-
+
public void writeProperties(File to, StorageDirectory sd) throws IOException {
Properties props = new Properties();
setPropertiesFromFields(props, sd);
+ writeProperties(to, sd, props);
+ }
+
+ public static void writeProperties(File to, StorageDirectory sd,
+ Properties props) throws IOException {
RandomAccessFile file = new RandomAccessFile(to, "rws");
FileOutputStream out = null;
try {
@@ -953,23 +973,6 @@ public abstract class Storage extends StorageInfo {
file.close();
}
}
-
- public static Properties readPropertiesFile(File from) throws IOException {
- RandomAccessFile file = new RandomAccessFile(from, "rws");
- FileInputStream in = null;
- Properties props = new Properties();
- try {
- in = new FileInputStream(file.getFD());
- file.seek(0);
- props.load(in);
- } finally {
- if (in != null) {
- in.close();
- }
- file.close();
- }
- return props;
- }
public static void rename(File from, File to) throws IOException {
if (!from.renameTo(to))
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/StorageInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/StorageInfo.java
index 7f37339d4f6..c87baee959c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/StorageInfo.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/StorageInfo.java
@@ -17,7 +17,10 @@
*/
package org.apache.hadoop.hdfs.server.common;
+import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
+import java.io.RandomAccessFile;
import java.util.Map;
import java.util.Properties;
import java.util.SortedSet;
@@ -208,4 +211,46 @@ public class StorageInfo {
}
return property;
}
+
+ public static int getNsIdFromColonSeparatedString(String in) {
+ return Integer.parseInt(in.split(":")[1]);
+ }
+
+ public static String getClusterIdFromColonSeparatedString(String in) {
+ return in.split(":")[3];
+ }
+
+ /**
+ * Read properties from the VERSION file in the given storage directory.
+ */
+ public void readProperties(StorageDirectory sd) throws IOException {
+ Properties props = readPropertiesFile(sd.getVersionFile());
+ setFieldsFromProperties(props, sd);
+ }
+
+ /**
+ * Read properties from the the previous/VERSION file in the given storage directory.
+ */
+ public void readPreviousVersionProperties(StorageDirectory sd)
+ throws IOException {
+ Properties props = readPropertiesFile(sd.getPreviousVersionFile());
+ setFieldsFromProperties(props, sd);
+ }
+
+ public static Properties readPropertiesFile(File from) throws IOException {
+ RandomAccessFile file = new RandomAccessFile(from, "rws");
+ FileInputStream in = null;
+ Properties props = new Properties();
+ try {
+ in = new FileInputStream(file.getFD());
+ file.seek(0);
+ props.load(in);
+ } finally {
+ if (in != null) {
+ in.close();
+ }
+ file.close();
+ }
+ return props;
+ }
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceStorage.java
index 9b74c720084..d684f210166 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceStorage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceStorage.java
@@ -111,7 +111,7 @@ public class BlockPoolSliceStorage extends Storage {
dataDirs.size());
for (Iterator it = dataDirs.iterator(); it.hasNext();) {
File dataDir = it.next();
- StorageDirectory sd = new StorageDirectory(dataDir, null, false);
+ StorageDirectory sd = new StorageDirectory(dataDir, null, true);
StorageState curState;
try {
curState = sd.analyzeStorage(startOpt, this);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupJournalManager.java
index f9d40642088..e8adde252f0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupJournalManager.java
@@ -20,6 +20,8 @@ package org.apache.hadoop.hdfs.server.namenode;
import java.io.IOException;
import java.util.Collection;
+import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.protocol.JournalInfo;
import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
@@ -103,4 +105,35 @@ class BackupJournalManager implements JournalManager {
public void discardSegments(long startTxId) throws IOException {
throw new UnsupportedOperationException();
}
+
+ @Override
+ public void doPreUpgrade() throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void doUpgrade(Storage storage) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void doFinalize() throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean canRollBack(StorageInfo storage, StorageInfo prevStorage,
+ int targetLayoutVersion) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void doRollback() throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getJournalCTime() throws IOException {
+ throw new UnsupportedOperationException();
+ }
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
index a5d41bbb1fa..85691550545 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
@@ -415,7 +415,8 @@ public class BackupNode extends NameNode {
return DFSUtil.getBackupNameServiceId(conf);
}
- protected HAState createHAState() {
+ @Override
+ protected HAState createHAState(StartupOption startOpt) {
return new BackupState();
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
index f9a4d2794d7..d8213d44385 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
@@ -44,6 +44,7 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
+import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.Storage.FormatConfirmable;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddBlockOp;
@@ -72,6 +73,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOldOp;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOp;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameSnapshotOp;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenewDelegationTokenOp;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RollingUpgradeOp;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetAclOp;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV1Op;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV2Op;
@@ -83,7 +85,6 @@ import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SymlinkOp;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TimesOp;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateBlocksOp;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateMasterKeyOp;
-import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RollingUpgradeOp;
import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream;
import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
@@ -256,10 +257,12 @@ public class FSEditLog implements LogsPurgeable {
if (u.getScheme().equals(NNStorage.LOCAL_URI_SCHEME)) {
StorageDirectory sd = storage.getStorageDirectory(u);
if (sd != null) {
- journalSet.add(new FileJournalManager(conf, sd, storage), required);
+ journalSet.add(new FileJournalManager(conf, sd, storage),
+ required, sharedEditsDirs.contains(u));
}
} else {
- journalSet.add(createJournal(u), required);
+ journalSet.add(createJournal(u), required,
+ sharedEditsDirs.contains(u));
}
}
@@ -1330,7 +1333,59 @@ public class FSEditLog implements LogsPurgeable {
// TODO: are we sure this is OK?
}
}
-
+
+ public long getSharedLogCTime() throws IOException {
+ for (JournalAndStream jas : journalSet.getAllJournalStreams()) {
+ if (jas.isShared()) {
+ return jas.getManager().getJournalCTime();
+ }
+ }
+ throw new IOException("No shared log found.");
+ }
+
+ public synchronized void doPreUpgradeOfSharedLog() throws IOException {
+ for (JournalAndStream jas : journalSet.getAllJournalStreams()) {
+ if (jas.isShared()) {
+ jas.getManager().doPreUpgrade();
+ }
+ }
+ }
+
+ public synchronized void doUpgradeOfSharedLog() throws IOException {
+ for (JournalAndStream jas : journalSet.getAllJournalStreams()) {
+ if (jas.isShared()) {
+ jas.getManager().doUpgrade(storage);
+ }
+ }
+ }
+
+ public synchronized void doFinalizeOfSharedLog() throws IOException {
+ for (JournalAndStream jas : journalSet.getAllJournalStreams()) {
+ if (jas.isShared()) {
+ jas.getManager().doFinalize();
+ }
+ }
+ }
+
+ public synchronized boolean canRollBackSharedLog(Storage prevStorage,
+ int targetLayoutVersion) throws IOException {
+ for (JournalAndStream jas : journalSet.getAllJournalStreams()) {
+ if (jas.isShared()) {
+ return jas.getManager().canRollBack(storage, prevStorage,
+ targetLayoutVersion);
+ }
+ }
+ throw new IOException("No shared log found.");
+ }
+
+ public synchronized void doRollback() throws IOException {
+ for (JournalAndStream jas : journalSet.getAllJournalStreams()) {
+ if (jas.isShared()) {
+ jas.getManager().doRollback();
+ }
+ }
+ }
+
public synchronized void discardSegments(long markerTxid)
throws IOException {
for (JournalAndStream jas : journalSet.getAllJournalStreams()) {
@@ -1469,4 +1524,5 @@ public class FSEditLog implements LogsPurgeable {
+ uri, e);
}
}
+
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
index 5e1d6972ffb..e0559870795 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
@@ -181,7 +181,8 @@ public class FSImage implements Closeable {
* @return true if the image needs to be saved or false otherwise
*/
boolean recoverTransitionRead(StartupOption startOpt, FSNamesystem target,
- MetaRecoveryContext recovery) throws IOException {
+ MetaRecoveryContext recovery)
+ throws IOException {
assert startOpt != StartupOption.FORMAT :
"NameNode formatting should be performed before reading the image";
@@ -260,8 +261,8 @@ public class FSImage implements Closeable {
doImportCheckpoint(target);
return false; // import checkpoint saved image already
case ROLLBACK:
- doRollback();
- break;
+ throw new AssertionError("Rollback is now a standalone command, "
+ + "NameNode should not be starting with this option.");
case REGULAR:
default:
// just load the image
@@ -280,17 +281,15 @@ public class FSImage implements Closeable {
private boolean recoverStorageDirs(StartupOption startOpt,
Map dataDirStates) throws IOException {
boolean isFormatted = false;
+ // This loop needs to be over all storage dirs, even shared dirs, to make
+ // sure that we properly examine their state, but we make sure we don't
+ // mutate the shared dir below in the actual loop.
for (Iterator it =
storage.dirIterator(); it.hasNext();) {
StorageDirectory sd = it.next();
StorageState curState;
try {
curState = sd.analyzeStorage(startOpt, storage);
- String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
- if (curState != StorageState.NORMAL && HAUtil.isHAEnabled(conf, nameserviceId)) {
- throw new IOException("Cannot start an HA namenode with name dirs " +
- "that need recovery. Dir: " + sd + " state: " + curState);
- }
// sd is locked but not opened
switch(curState) {
case NON_EXISTENT:
@@ -302,7 +301,7 @@ public class FSImage implements Closeable {
case NORMAL:
break;
default: // recovery is possible
- sd.doRecover(curState);
+ sd.doRecover(curState);
}
if (curState != StorageState.NOT_FORMATTED
&& startOpt != StartupOption.ROLLBACK) {
@@ -327,7 +326,7 @@ public class FSImage implements Closeable {
void checkUpgrade(FSNamesystem target) throws IOException {
// Upgrade or rolling upgrade is allowed only if there are
// no previous fs states in any of the directories
- for (Iterator it = storage.dirIterator(); it.hasNext();) {
+ for (Iterator it = storage.dirIterator(false); it.hasNext();) {
StorageDirectory sd = it.next();
if (sd.getPreviousDir().exists())
throw new InconsistentFSStateException(sd.getRoot(),
@@ -356,7 +355,7 @@ public class FSImage implements Closeable {
checkUpgrade(target);
// load the latest image
- this.loadFSImage(target, null, null);
+ this.loadFSImage(target, StartupOption.UPGRADE, null);
// Do upgrade for each directory
target.checkRollingUpgrade("upgrade namenode");
@@ -368,28 +367,17 @@ public class FSImage implements Closeable {
List errorSDs =
Collections.synchronizedList(new ArrayList());
- for (Iterator it = storage.dirIterator(); it.hasNext();) {
+ assert !editLog.isSegmentOpen() : "Edits log must not be open.";
+ LOG.info("Starting upgrade of local storage directories."
+ + "\n old LV = " + oldLV
+ + "; old CTime = " + oldCTime
+ + ".\n new LV = " + storage.getLayoutVersion()
+ + "; new CTime = " + storage.getCTime());
+ // Do upgrade for each directory
+ for (Iterator it = storage.dirIterator(false); it.hasNext();) {
StorageDirectory sd = it.next();
- LOG.info("Starting upgrade of image directory " + sd.getRoot()
- + ".\n old LV = " + oldLV
- + "; old CTime = " + oldCTime
- + ".\n new LV = " + storage.getLayoutVersion()
- + "; new CTime = " + storage.getCTime());
try {
- File curDir = sd.getCurrentDir();
- File prevDir = sd.getPreviousDir();
- File tmpDir = sd.getPreviousTmp();
- assert curDir.exists() : "Current directory must exist.";
- assert !prevDir.exists() : "previous directory must not exist.";
- assert !tmpDir.exists() : "previous.tmp directory must not exist.";
- assert !editLog.isSegmentOpen() : "Edits log must not be open.";
-
- // rename current to tmp
- NNStorage.rename(curDir, tmpDir);
-
- if (!curDir.mkdir()) {
- throw new IOException("Cannot create directory " + curDir);
- }
+ NNUpgradeUtil.doPreUpgrade(sd);
} catch (Exception e) {
LOG.error("Failed to move aside pre-upgrade storage " +
"in image directory " + sd.getRoot(), e);
@@ -397,41 +385,38 @@ public class FSImage implements Closeable {
continue;
}
}
+ if (target.isHaEnabled()) {
+ editLog.doPreUpgradeOfSharedLog();
+ }
storage.reportErrorsOnDirectories(errorSDs);
errorSDs.clear();
saveFSImageInAllDirs(target, editLog.getLastWrittenTxId());
- for (Iterator it = storage.dirIterator(); it.hasNext();) {
+ for (Iterator it = storage.dirIterator(false); it.hasNext();) {
StorageDirectory sd = it.next();
try {
- // Write the version file, since saveFsImage above only makes the
- // fsimage_, and the directory is otherwise empty.
- storage.writeProperties(sd);
-
- File prevDir = sd.getPreviousDir();
- File tmpDir = sd.getPreviousTmp();
- // rename tmp to previous
- NNStorage.rename(tmpDir, prevDir);
+ NNUpgradeUtil.doUpgrade(sd, storage);
} catch (IOException ioe) {
- LOG.error("Unable to rename temp to previous for " + sd.getRoot(), ioe);
errorSDs.add(sd);
continue;
}
- LOG.info("Upgrade of " + sd.getRoot() + " is complete.");
+ }
+ if (target.isHaEnabled()) {
+ editLog.doUpgradeOfSharedLog();
}
storage.reportErrorsOnDirectories(errorSDs);
-
+
isUpgradeFinalized = false;
if (!storage.getRemovedStorageDirs().isEmpty()) {
- //during upgrade, it's a fatal error to fail any storage directory
+ // during upgrade, it's a fatal error to fail any storage directory
throw new IOException("Upgrade failed in "
+ storage.getRemovedStorageDirs().size()
+ " storage directory(ies), previously logged.");
}
}
- private void doRollback() throws IOException {
+ void doRollback(FSNamesystem fsns) throws IOException {
// Rollback is allowed only if there is
// a previous fs states in at least one of the storage directories.
// Directories that don't have previous state do not rollback
@@ -439,85 +424,46 @@ public class FSImage implements Closeable {
FSImage prevState = new FSImage(conf);
try {
prevState.getStorage().layoutVersion = HdfsConstants.NAMENODE_LAYOUT_VERSION;
- for (Iterator it = storage.dirIterator(); it.hasNext();) {
+ for (Iterator it = storage.dirIterator(false); it.hasNext();) {
StorageDirectory sd = it.next();
- File prevDir = sd.getPreviousDir();
- if (!prevDir.exists()) { // use current directory then
- LOG.info("Storage directory " + sd.getRoot()
- + " does not contain previous fs state.");
- // read and verify consistency with other directories
- storage.readProperties(sd);
+ if (!NNUpgradeUtil.canRollBack(sd, storage, prevState.getStorage(),
+ HdfsConstants.NAMENODE_LAYOUT_VERSION)) {
continue;
}
-
- // read and verify consistency of the prev dir
- prevState.getStorage().readPreviousVersionProperties(sd);
-
- if (prevState.getLayoutVersion() != HdfsConstants.NAMENODE_LAYOUT_VERSION) {
- throw new IOException(
- "Cannot rollback to storage version " +
- prevState.getLayoutVersion() +
- " using this version of the NameNode, which uses storage version " +
- HdfsConstants.NAMENODE_LAYOUT_VERSION + ". " +
- "Please use the previous version of HDFS to perform the rollback.");
- }
canRollback = true;
}
+
+ if (fsns.isHaEnabled()) {
+ // If HA is enabled, check if the shared log can be rolled back as well.
+ editLog.initJournalsForWrite();
+ canRollback |= editLog.canRollBackSharedLog(prevState.getStorage(),
+ HdfsConstants.NAMENODE_LAYOUT_VERSION);
+ }
+
if (!canRollback)
throw new IOException("Cannot rollback. None of the storage "
+ "directories contain previous fs state.");
-
+
// Now that we know all directories are going to be consistent
// Do rollback for each directory containing previous state
- for (Iterator it = storage.dirIterator(); it.hasNext();) {
+ for (Iterator it = storage.dirIterator(false); it.hasNext();) {
StorageDirectory sd = it.next();
- File prevDir = sd.getPreviousDir();
- if (!prevDir.exists())
- continue;
-
LOG.info("Rolling back storage directory " + sd.getRoot()
- + ".\n new LV = " + prevState.getStorage().getLayoutVersion()
- + "; new CTime = " + prevState.getStorage().getCTime());
- File tmpDir = sd.getRemovedTmp();
- assert !tmpDir.exists() : "removed.tmp directory must not exist.";
- // rename current to tmp
- File curDir = sd.getCurrentDir();
- assert curDir.exists() : "Current directory must exist.";
- NNStorage.rename(curDir, tmpDir);
- // rename previous to current
- NNStorage.rename(prevDir, curDir);
-
- // delete tmp dir
- NNStorage.deleteDir(tmpDir);
- LOG.info("Rollback of " + sd.getRoot()+ " is complete.");
+ + ".\n new LV = " + prevState.getStorage().getLayoutVersion()
+ + "; new CTime = " + prevState.getStorage().getCTime());
+ NNUpgradeUtil.doRollBack(sd);
}
+ if (fsns.isHaEnabled()) {
+ // If HA is enabled, try to roll back the shared log as well.
+ editLog.doRollback();
+ }
+
isUpgradeFinalized = true;
} finally {
prevState.close();
}
}
- private void doFinalize(StorageDirectory sd) throws IOException {
- File prevDir = sd.getPreviousDir();
- if (!prevDir.exists()) { // already discarded
- LOG.info("Directory " + prevDir + " does not exist.");
- LOG.info("Finalize upgrade for " + sd.getRoot()+ " is not required.");
- return;
- }
- LOG.info("Finalizing upgrade for storage directory "
- + sd.getRoot() + "."
- + (storage.getLayoutVersion()==0 ? "" :
- "\n cur LV = " + storage.getLayoutVersion()
- + "; cur CTime = " + storage.getCTime()));
- assert sd.getCurrentDir().exists() : "Current directory must exist.";
- final File tmpDir = sd.getFinalizedTmp();
- // rename previous to tmp and remove
- NNStorage.rename(prevDir, tmpDir);
- NNStorage.deleteDir(tmpDir);
- isUpgradeFinalized = true;
- LOG.info("Finalize upgrade for " + sd.getRoot()+ " is complete.");
- }
-
/**
* Load image from a checkpoint directory and save it into the current one.
* @param target the NameSystem to import into
@@ -561,12 +507,23 @@ public class FSImage implements Closeable {
saveNamespace(target);
getStorage().writeAll();
}
-
- void finalizeUpgrade() throws IOException {
- for (Iterator it = storage.dirIterator(); it.hasNext();) {
+
+ void finalizeUpgrade(boolean finalizeEditLog) throws IOException {
+ LOG.info("Finalizing upgrade for local dirs. " +
+ (storage.getLayoutVersion() == 0 ? "" :
+ "\n cur LV = " + storage.getLayoutVersion()
+ + "; cur CTime = " + storage.getCTime()));
+ for (Iterator it = storage.dirIterator(false); it.hasNext();) {
StorageDirectory sd = it.next();
- doFinalize(sd);
+ NNUpgradeUtil.doFinalize(sd);
}
+ if (finalizeEditLog) {
+ // We only do this in the case that HA is enabled and we're active. In any
+ // other case the NN will have done the upgrade of the edits directories
+ // already by virtue of the fact that they're local.
+ editLog.doFinalizeOfSharedLog();
+ }
+ isUpgradeFinalized = true;
}
boolean isUpgradeFinalized() {
@@ -763,18 +720,33 @@ public class FSImage implements Closeable {
}
}
- public void initEditLog(StartupOption startOpt) {
+ public void initEditLog(StartupOption startOpt) throws IOException {
Preconditions.checkState(getNamespaceID() != 0,
"Must know namespace ID before initting edit log");
String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
- if (!HAUtil.isHAEnabled(conf, nameserviceId) ||
- (HAUtil.isHAEnabled(conf, nameserviceId) &&
- RollingUpgradeStartupOption.ROLLBACK.matches(startOpt))) {
- // If this NN is not HA or this NN is HA, but we're doing a rollback of
- // rolling upgrade so init the edit log for write.
+ if (!HAUtil.isHAEnabled(conf, nameserviceId)) {
+ // If this NN is not HA
editLog.initJournalsForWrite();
editLog.recoverUnclosedStreams();
+ } else if (HAUtil.isHAEnabled(conf, nameserviceId)
+ && (startOpt == StartupOption.UPGRADE
+ || RollingUpgradeStartupOption.ROLLBACK.matches(startOpt))) {
+ // This NN is HA, but we're doing an upgrade or a rollback of rolling
+ // upgrade so init the edit log for write.
+ editLog.initJournalsForWrite();
+ if (startOpt == StartupOption.UPGRADE) {
+ long sharedLogCTime = editLog.getSharedLogCTime();
+ if (this.storage.getCTime() < sharedLogCTime) {
+ throw new IOException("It looks like the shared log is already " +
+ "being upgraded but this NN has not been upgraded yet. You " +
+ "should restart this NameNode with the '" +
+ StartupOption.BOOTSTRAPSTANDBY.getName() + "' option to bring " +
+ "this NN in sync with the other.");
+ }
+ }
+ editLog.recoverUnclosedStreams();
} else {
+ // This NN is HA and we're not doing an upgrade.
editLog.initSharedJournalsForRead();
}
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index d9e8faa343e..3e894861ed7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -159,6 +159,8 @@ import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
+import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
+import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
import org.apache.hadoop.hdfs.protocol.ClientProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
@@ -170,8 +172,6 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
-import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
-import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
import org.apache.hadoop.hdfs.protocol.RecoveryInProgressException;
import org.apache.hadoop.hdfs.protocol.RollingUpgradeException;
@@ -556,6 +556,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
return leaseManager;
}
+ boolean isHaEnabled() {
+ return haEnabled;
+ }
+
/**
* Check the supplied configuration for correctness.
* @param conf Supplies the configuration to validate.
@@ -891,7 +895,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
}
// This will start a new log segment and write to the seen_txid file, so
// we shouldn't do it when coming up in standby state
- if (!haEnabled) {
+ if (!haEnabled || (haEnabled && startOpt == StartupOption.UPGRADE)) {
fsImage.openEditLogForWrite();
}
success = true;
@@ -1017,6 +1021,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
dir.fsImage.editLog.openForWrite();
}
+
if (haEnabled) {
// Renew all of the leases before becoming active.
// This is because, while we were in standby mode,
@@ -1052,14 +1057,17 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
initializedReplQueues = true;
}
+ private boolean inActiveState() {
+ return haContext != null &&
+ haContext.getState().getServiceState() == HAServiceState.ACTIVE;
+ }
+
/**
* @return Whether the namenode is transitioning to active state and is in the
* middle of the {@link #startActiveServices()}
*/
public boolean inTransitionToActive() {
- return haEnabled && haContext != null
- && haContext.getState().getServiceState() == HAServiceState.ACTIVE
- && startingActiveService;
+ return haEnabled && inActiveState() && startingActiveService;
}
private boolean shouldUseDelegationTokens() {
@@ -4587,11 +4595,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
void finalizeUpgrade() throws IOException {
checkSuperuserPrivilege();
- checkOperation(OperationCategory.WRITE);
+ checkOperation(OperationCategory.UNCHECKED);
writeLock();
try {
- checkOperation(OperationCategory.WRITE);
- getFSImage().finalizeUpgrade();
+ checkOperation(OperationCategory.UNCHECKED);
+ getFSImage().finalizeUpgrade(this.isHaEnabled() && inActiveState());
} finally {
writeUnlock();
}
@@ -7758,5 +7766,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
logger.addAppender(asyncAppender);
}
}
+
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
index 52fb2d0abad..05f0ca72399 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
@@ -17,35 +17,37 @@
*/
package org.apache.hadoop.hdfs.server.namenode;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
import java.io.File;
import java.io.IOException;
import java.util.Collection;
-import java.util.List;
-import java.util.Comparator;
import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
+import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
-import org.apache.hadoop.hdfs.server.namenode.NNStorageRetentionManager.StoragePurger;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.EditLogValidation;
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
+import org.apache.hadoop.hdfs.server.namenode.NNStorageRetentionManager.StoragePurger;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
import com.google.common.collect.ComparisonChain;
+import com.google.common.collect.Lists;
/**
* Journal manager for the common case of edits files being written
@@ -531,4 +533,49 @@ public class FileJournalManager implements JournalManager {
public void discardSegments(long startTxid) throws IOException {
discardEditLogSegments(startTxid);
}
+
+ @Override
+ public void doPreUpgrade() throws IOException {
+ LOG.info("Starting upgrade of edits directory " + sd.getRoot());
+ try {
+ NNUpgradeUtil.doPreUpgrade(sd);
+ } catch (IOException ioe) {
+ LOG.error("Failed to move aside pre-upgrade storage " +
+ "in image directory " + sd.getRoot(), ioe);
+ throw ioe;
+ }
+ }
+
+ /**
+ * This method assumes that the fields of the {@link Storage} object have
+ * already been updated to the appropriate new values for the upgrade.
+ */
+ @Override
+ public void doUpgrade(Storage storage) throws IOException {
+ NNUpgradeUtil.doUpgrade(sd, storage);
+ }
+
+ @Override
+ public void doFinalize() throws IOException {
+ NNUpgradeUtil.doFinalize(sd);
+ }
+
+ @Override
+ public boolean canRollBack(StorageInfo storage, StorageInfo prevStorage,
+ int targetLayoutVersion) throws IOException {
+ return NNUpgradeUtil.canRollBack(sd, storage,
+ prevStorage, targetLayoutVersion);
+ }
+
+ @Override
+ public void doRollback() throws IOException {
+ NNUpgradeUtil.doRollBack(sd);
+ }
+
+ @Override
+ public long getJournalCTime() throws IOException {
+ StorageInfo sInfo = new StorageInfo((NodeType)null);
+ sInfo.readProperties(sd);
+ return sInfo.getCTime();
+ }
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java
index eb1315af467..fed3530e702 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java
@@ -22,7 +22,9 @@ import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.Storage.FormatConfirmable;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
/**
@@ -65,6 +67,54 @@ public interface JournalManager extends Closeable, LogsPurgeable,
* Recover segments which have not been finalized.
*/
void recoverUnfinalizedSegments() throws IOException;
+
+ /**
+ * Perform any steps that must succeed across all JournalManagers involved in
+ * an upgrade before proceeding onto the actual upgrade stage. If a call to
+ * any JM's doPreUpgrade method fails, then doUpgrade will not be called for
+ * any JM.
+ */
+ void doPreUpgrade() throws IOException;
+
+ /**
+ * Perform the actual upgrade of the JM. After this is completed, the NN can
+ * begin to use the new upgraded metadata. This metadata may later be either
+ * finalized or rolled back to the previous state.
+ *
+ * @param storage info about the new upgraded versions.
+ */
+ void doUpgrade(Storage storage) throws IOException;
+
+ /**
+ * Finalize the upgrade. JMs should purge any state that they had been keeping
+ * around during the upgrade process. After this is completed, rollback is no
+ * longer allowed.
+ */
+ void doFinalize() throws IOException;
+
+ /**
+ * Return true if this JM can roll back to the previous storage state, false
+ * otherwise. The NN will refuse to run the rollback operation unless at least
+ * one JM or fsimage storage directory can roll back.
+ *
+ * @param storage the storage info for the current state
+ * @param prevStorage the storage info for the previous (unupgraded) state
+ * @param targetLayoutVersion the layout version we intend to roll back to
+ * @return true if this JM can roll back, false otherwise.
+ */
+ boolean canRollBack(StorageInfo storage, StorageInfo prevStorage,
+ int targetLayoutVersion) throws IOException;
+
+ /**
+ * Perform the rollback to the previous FS state. JMs which do not need to
+ * roll back their state should just return without error.
+ */
+ void doRollback() throws IOException;
+
+ /**
+ * @return the CTime of the journal manager.
+ */
+ long getJournalCTime() throws IOException;
/**
* Discard the segments whose first txid is >= the given txid.
@@ -93,4 +143,5 @@ public interface JournalManager extends Closeable, LogsPurgeable,
super(reason);
}
}
+
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
index bce54e85c28..a7203f95746 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
@@ -33,6 +33,8 @@ import java.util.concurrent.CopyOnWriteArrayList;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
@@ -78,11 +80,14 @@ public class JournalSet implements JournalManager {
private final JournalManager journal;
private boolean disabled = false;
private EditLogOutputStream stream;
- private boolean required = false;
+ private final boolean required;
+ private final boolean shared;
- public JournalAndStream(JournalManager manager, boolean required) {
+ public JournalAndStream(JournalManager manager, boolean required,
+ boolean shared) {
this.journal = manager;
this.required = required;
+ this.shared = shared;
}
public void startLogSegment(long txId, int layoutVersion) throws IOException {
@@ -164,6 +169,10 @@ public class JournalSet implements JournalManager {
public boolean isRequired() {
return required;
}
+
+ public boolean isShared() {
+ return shared;
+ }
}
// COW implementation is necessary since some users (eg the web ui) call
@@ -179,7 +188,7 @@ public class JournalSet implements JournalManager {
@Override
public void format(NamespaceInfo nsInfo) throws IOException {
- // The iteration is done by FSEditLog itself
+ // The operation is done by FSEditLog itself
throw new UnsupportedOperationException();
}
@@ -539,9 +548,13 @@ public class JournalSet implements JournalManager {
}
return jList;
}
-
+
void add(JournalManager j, boolean required) {
- JournalAndStream jas = new JournalAndStream(j, required);
+ add(j, required, false);
+ }
+
+ void add(JournalManager j, boolean required, boolean shared) {
+ JournalAndStream jas = new JournalAndStream(j, required, shared);
journals.add(jas);
}
@@ -663,4 +676,40 @@ public class JournalSet implements JournalManager {
// This operation is handled by FSEditLog directly.
throw new UnsupportedOperationException();
}
+
+ @Override
+ public void doPreUpgrade() throws IOException {
+ // This operation is handled by FSEditLog directly.
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void doUpgrade(Storage storage) throws IOException {
+ // This operation is handled by FSEditLog directly.
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void doFinalize() throws IOException {
+ // This operation is handled by FSEditLog directly.
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean canRollBack(StorageInfo storage, StorageInfo prevStorage, int targetLayoutVersion) throws IOException {
+ // This operation is handled by FSEditLog directly.
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void doRollback() throws IOException {
+ // This operation is handled by FSEditLog directly.
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getJournalCTime() throws IOException {
+ // This operation is handled by FSEditLog directly.
+ throw new UnsupportedOperationException();
+ }
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
index 912827a64b4..0a5594b6b8d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
@@ -301,7 +301,7 @@ public class NNStorage extends Storage implements Closeable,
if(dirName.getScheme().compareTo("file") == 0) {
this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
dirType,
- !sharedEditsDirs.contains(dirName))); // Don't lock the dir if it's shared.
+ sharedEditsDirs.contains(dirName))); // Don't lock the dir if it's shared.
}
}
@@ -312,7 +312,7 @@ public class NNStorage extends Storage implements Closeable,
// URI is of type file://
if(dirName.getScheme().compareTo("file") == 0)
this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
- NameNodeDirType.EDITS, !sharedEditsDirs.contains(dirName)));
+ NameNodeDirType.EDITS, sharedEditsDirs.contains(dirName)));
}
}
@@ -1007,7 +1007,7 @@ public class NNStorage extends Storage implements Closeable,
StringBuilder layoutVersions = new StringBuilder();
// First determine what range of layout versions we're going to inspect
- for (Iterator it = dirIterator();
+ for (Iterator it = dirIterator(false);
it.hasNext();) {
StorageDirectory sd = it.next();
if (!sd.getVersionFile().exists()) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java
new file mode 100644
index 00000000000..1c491e5dcea
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java
@@ -0,0 +1,174 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
+
+abstract class NNUpgradeUtil {
+
+ private static final Log LOG = LogFactory.getLog(NNUpgradeUtil.class);
+
+ /**
+ * Return true if this storage dir can roll back to the previous storage
+ * state, false otherwise. The NN will refuse to run the rollback operation
+ * unless at least one JM or fsimage storage directory can roll back.
+ *
+ * @param storage the storage info for the current state
+ * @param prevStorage the storage info for the previous (unupgraded) state
+ * @param targetLayoutVersion the layout version we intend to roll back to
+ * @return true if this JM can roll back, false otherwise.
+ * @throws IOException in the event of error
+ */
+ static boolean canRollBack(StorageDirectory sd, StorageInfo storage,
+ StorageInfo prevStorage, int targetLayoutVersion) throws IOException {
+ File prevDir = sd.getPreviousDir();
+ if (!prevDir.exists()) { // use current directory then
+ LOG.info("Storage directory " + sd.getRoot()
+ + " does not contain previous fs state.");
+ // read and verify consistency with other directories
+ storage.readProperties(sd);
+ return false;
+ }
+
+ // read and verify consistency of the prev dir
+ prevStorage.readPreviousVersionProperties(sd);
+
+ if (prevStorage.getLayoutVersion() != targetLayoutVersion) {
+ throw new IOException(
+ "Cannot rollback to storage version " +
+ prevStorage.getLayoutVersion() +
+ " using this version of the NameNode, which uses storage version " +
+ targetLayoutVersion + ". " +
+ "Please use the previous version of HDFS to perform the rollback.");
+ }
+
+ return true;
+ }
+
+ /**
+ * Finalize the upgrade. The previous dir, if any, will be renamed and
+ * removed. After this is completed, rollback is no longer allowed.
+ *
+ * @param sd the storage directory to finalize
+ * @throws IOException in the event of error
+ */
+ static void doFinalize(StorageDirectory sd) throws IOException {
+ File prevDir = sd.getPreviousDir();
+ if (!prevDir.exists()) { // already discarded
+ LOG.info("Directory " + prevDir + " does not exist.");
+ LOG.info("Finalize upgrade for " + sd.getRoot()+ " is not required.");
+ return;
+ }
+ LOG.info("Finalizing upgrade of storage directory " + sd.getRoot());
+ assert sd.getCurrentDir().exists() : "Current directory must exist.";
+ final File tmpDir = sd.getFinalizedTmp();
+ // rename previous to tmp and remove
+ NNStorage.rename(prevDir, tmpDir);
+ NNStorage.deleteDir(tmpDir);
+ LOG.info("Finalize upgrade for " + sd.getRoot()+ " is complete.");
+ }
+
+ /**
+ * Perform any steps that must succeed across all storage dirs/JournalManagers
+ * involved in an upgrade before proceeding onto the actual upgrade stage. If
+ * a call to any JM's or local storage dir's doPreUpgrade method fails, then
+ * doUpgrade will not be called for any JM. The existing current dir is
+ * renamed to previous.tmp, and then a new, empty current dir is created.
+ *
+ * @param sd the storage directory to perform the pre-upgrade procedure.
+ * @throws IOException in the event of error
+ */
+ static void doPreUpgrade(StorageDirectory sd) throws IOException {
+ LOG.info("Starting upgrade of storage directory " + sd.getRoot());
+ File curDir = sd.getCurrentDir();
+ File prevDir = sd.getPreviousDir();
+ File tmpDir = sd.getPreviousTmp();
+ assert curDir.exists() : "Current directory must exist.";
+ assert !prevDir.exists() : "previous directory must not exist.";
+ assert !tmpDir.exists() : "previous.tmp directory must not exist.";
+
+ // rename current to tmp
+ NNStorage.rename(curDir, tmpDir);
+
+ if (!curDir.mkdir()) {
+ throw new IOException("Cannot create directory " + curDir);
+ }
+ }
+
+ /**
+ * Perform the upgrade of the storage dir to the given storage info. The new
+ * storage info is written into the current directory, and the previous.tmp
+ * directory is renamed to previous.
+ *
+ * @param sd the storage directory to upgrade
+ * @param storage info about the new upgraded versions.
+ * @throws IOException in the event of error
+ */
+ static void doUpgrade(StorageDirectory sd, Storage storage) throws
+ IOException {
+ LOG.info("Performing upgrade of storage directory " + sd.getRoot());
+ try {
+ // Write the version file, since saveFsImage only makes the
+ // fsimage_, and the directory is otherwise empty.
+ storage.writeProperties(sd);
+
+ File prevDir = sd.getPreviousDir();
+ File tmpDir = sd.getPreviousTmp();
+ // rename tmp to previous
+ NNStorage.rename(tmpDir, prevDir);
+ } catch (IOException ioe) {
+ LOG.error("Unable to rename temp to previous for " + sd.getRoot(), ioe);
+ throw ioe;
+ }
+ }
+
+ /**
+ * Perform rollback of the storage dir to the previous state. The existing
+ * current dir is removed, and the previous dir is renamed to current.
+ *
+ * @param sd the storage directory to roll back.
+ * @throws IOException in the event of error
+ */
+ static void doRollBack(StorageDirectory sd)
+ throws IOException {
+ File prevDir = sd.getPreviousDir();
+ if (!prevDir.exists())
+ return;
+
+ File tmpDir = sd.getRemovedTmp();
+ assert !tmpDir.exists() : "removed.tmp directory must not exist.";
+ // rename current to tmp
+ File curDir = sd.getCurrentDir();
+ assert curDir.exists() : "Current directory must exist.";
+ NNStorage.rename(curDir, tmpDir);
+ // rename previous to current
+ NNStorage.rename(prevDir, curDir);
+
+ // delete tmp dir
+ NNStorage.deleteDir(tmpDir);
+ LOG.info("Rollback of " + sd.getRoot() + " is complete.");
+ }
+
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index 06e3f1e2f3b..856491679cb 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -662,7 +662,7 @@ public class NameNode implements NameNodeStatusMXBean {
String nsId = getNameServiceId(conf);
String namenodeId = HAUtil.getNameNodeId(conf, nsId);
this.haEnabled = HAUtil.isHAEnabled(conf, nsId);
- state = createHAState();
+ state = createHAState(getStartupOption(conf));
this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf);
this.haContext = createHAContext();
try {
@@ -684,8 +684,12 @@ public class NameNode implements NameNodeStatusMXBean {
}
}
- protected HAState createHAState() {
- return !haEnabled ? ACTIVE_STATE : STANDBY_STATE;
+ protected HAState createHAState(StartupOption startOpt) {
+ if (!haEnabled || startOpt == StartupOption.UPGRADE) {
+ return ACTIVE_STATE;
+ } else {
+ return STANDBY_STATE;
+ }
}
protected HAContext createHAContext() {
@@ -1037,26 +1041,28 @@ public class NameNode implements NameNodeStatusMXBean {
}
}
}
-
- private static boolean finalize(Configuration conf,
- boolean isConfirmationNeeded
- ) throws IOException {
+
+ @VisibleForTesting
+ public static boolean doRollback(Configuration conf,
+ boolean isConfirmationNeeded) throws IOException {
String nsId = DFSUtil.getNamenodeNameServiceId(conf);
String namenodeId = HAUtil.getNameNodeId(conf, nsId);
initializeGenericKeys(conf, nsId, namenodeId);
FSNamesystem nsys = new FSNamesystem(conf, new FSImage(conf));
System.err.print(
- "\"finalize\" will remove the previous state of the files system.\n"
- + "Recent upgrade will become permanent.\n"
- + "Rollback option will not be available anymore.\n");
+ "\"rollBack\" will remove the current state of the file system,\n"
+ + "returning you to the state prior to initiating your recent.\n"
+ + "upgrade. This action is permanent and cannot be undone. If you\n"
+ + "are performing a rollback in an HA environment, you should be\n"
+ + "certain that no NameNode process is running on any host.");
if (isConfirmationNeeded) {
- if (!confirmPrompt("Finalize filesystem state?")) {
- System.err.println("Finalize aborted.");
+ if (!confirmPrompt("Roll back file system state?")) {
+ System.err.println("Rollback aborted.");
return true;
}
}
- nsys.dir.fsImage.finalizeUpgrade();
+ nsys.dir.fsImage.doRollback(nsys);
return false;
}
@@ -1244,14 +1250,6 @@ public class NameNode implements NameNodeStatusMXBean {
return null;
}
setStartupOption(conf, startOpt);
-
- if (HAUtil.isHAEnabled(conf, DFSUtil.getNamenodeNameServiceId(conf)) &&
- (startOpt == StartupOption.UPGRADE ||
- startOpt == StartupOption.ROLLBACK ||
- startOpt == StartupOption.FINALIZE)) {
- throw new HadoopIllegalArgumentException("Invalid startup option. " +
- "Cannot perform DFS upgrade with HA enabled.");
- }
switch (startOpt) {
case FORMAT: {
@@ -1267,10 +1265,17 @@ public class NameNode implements NameNodeStatusMXBean {
return null;
}
case FINALIZE: {
- boolean aborted = finalize(conf, true);
- terminate(aborted ? 1 : 0);
+ System.err.println("Use of the argument '" + StartupOption.FINALIZE +
+ "' is no longer supported. To finalize an upgrade, start the NN " +
+ " and then run `hdfs dfsadmin -finalizeUpgrade'");
+ terminate(1);
return null; // avoid javac warning
}
+ case ROLLBACK: {
+ boolean aborted = doRollback(conf, true);
+ terminate(aborted ? 1 : 0);
+ return null; // avoid warning
+ }
case BOOTSTRAPSTANDBY: {
String toolArgs[] = Arrays.copyOfRange(argv, 1, argv.length);
int rc = BootstrapStandby.run(toolArgs, conf);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java
index 1f6c2705097..57dfd1a104e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java
@@ -44,9 +44,9 @@ import org.apache.hadoop.hdfs.server.namenode.EditLogInputStream;
import org.apache.hadoop.hdfs.server.namenode.FSImage;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.NNStorage;
+import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.TransferFsImage;
-import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.hdfs.tools.DFSHAAdmin;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
index 2339acf59cb..b260277b15b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
@@ -21,6 +21,7 @@ import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.net.InetSocketAddress;
+import java.net.URI;
import java.net.URL;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
@@ -47,6 +48,7 @@ import org.apache.hadoop.hdfs.HAUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.NameNodeProxies;
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
+import org.apache.hadoop.hdfs.protocol.ClientProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.DatanodeLocalInfo;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
@@ -871,7 +873,24 @@ public class DFSAdmin extends FsShell {
*/
public int finalizeUpgrade() throws IOException {
DistributedFileSystem dfs = getDFS();
- dfs.finalizeUpgrade();
+
+ Configuration dfsConf = dfs.getConf();
+ URI dfsUri = dfs.getUri();
+ boolean isHaEnabled = HAUtil.isLogicalUri(dfsConf, dfsUri);
+ if (isHaEnabled) {
+ // In the case of HA, run finalizeUpgrade for all NNs in this nameservice
+ String nsId = dfsUri.getHost();
+ List namenodes =
+ HAUtil.getProxiesForAllNameNodesInNameservice(dfsConf, nsId);
+ if (!HAUtil.isAtLeastOneActive(namenodes)) {
+ throw new IOException("Cannot finalize with no NameNode active");
+ }
+ for (ClientProtocol haNn : namenodes) {
+ haNn.finalizeUpgrade();
+ }
+ } else {
+ dfs.finalizeUpgrade();
+ }
return 0;
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/QJournalProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/QJournalProtocol.proto
index c31739995c9..e4903ba4955 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/QJournalProtocol.proto
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/QJournalProtocol.proto
@@ -145,6 +145,72 @@ message DiscardSegmentsRequestProto {
message DiscardSegmentsResponseProto {
}
+/**
+ * getJournalCTime()
+ */
+message GetJournalCTimeRequestProto {
+ required JournalIdProto jid = 1;
+}
+
+message GetJournalCTimeResponseProto {
+ required int64 resultCTime = 1;
+}
+
+/**
+ * doPreUpgrade()
+ */
+message DoPreUpgradeRequestProto {
+ required JournalIdProto jid = 1;
+}
+
+message DoPreUpgradeResponseProto {
+}
+
+/**
+ * doUpgrade()
+ */
+message DoUpgradeRequestProto {
+ required JournalIdProto jid = 1;
+ required StorageInfoProto sInfo = 2;
+}
+
+message DoUpgradeResponseProto {
+}
+
+/**
+ * doFinalize()
+ */
+message DoFinalizeRequestProto {
+ required JournalIdProto jid = 1;
+}
+
+message DoFinalizeResponseProto {
+}
+
+/**
+ * canRollBack()
+ */
+message CanRollBackRequestProto {
+ required JournalIdProto jid = 1;
+ required StorageInfoProto storage = 2;
+ required StorageInfoProto prevStorage = 3;
+ required int32 targetLayoutVersion = 4;
+}
+
+message CanRollBackResponseProto {
+ required bool canRollBack = 1;
+}
+
+/**
+ * doRollback()
+ */
+message DoRollbackRequestProto {
+ required JournalIdProto jid = 1;
+}
+
+message DoRollbackResponseProto {
+}
+
/**
* getJournalState()
*/
@@ -250,6 +316,18 @@ service QJournalProtocolService {
rpc discardSegments(DiscardSegmentsRequestProto) returns (DiscardSegmentsResponseProto);
+ rpc getJournalCTime(GetJournalCTimeRequestProto) returns (GetJournalCTimeResponseProto);
+
+ rpc doPreUpgrade(DoPreUpgradeRequestProto) returns (DoPreUpgradeResponseProto);
+
+ rpc doUpgrade(DoUpgradeRequestProto) returns (DoUpgradeResponseProto);
+
+ rpc doFinalize(DoFinalizeRequestProto) returns (DoFinalizeResponseProto);
+
+ rpc canRollBack(CanRollBackRequestProto) returns (CanRollBackResponseProto);
+
+ rpc doRollback(DoRollbackRequestProto) returns (DoRollbackResponseProto);
+
rpc getJournalState(GetJournalStateRequestProto) returns (GetJournalStateResponseProto);
rpc newEpoch(NewEpochRequestProto) returns (NewEpochResponseProto);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSHighAvailabilityWithQJM.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSHighAvailabilityWithQJM.apt.vm
index b02aaa9c7fd..31dccb29e41 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSHighAvailabilityWithQJM.apt.vm
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSHighAvailabilityWithQJM.apt.vm
@@ -763,3 +763,49 @@ digest:hdfs-zkfcs:vlUvLnd8MlacsE80rDuu6ONESbM=:rwcda
Even if automatic failover is configured, you may initiate a manual failover
using the same <<>> command. It will perform a coordinated
failover.
+
+* HDFS Upgrade/Finalization/Rollback with HA Enabled
+
+ When moving between versions of HDFS, sometimes the newer software can simply
+ be installed and the cluster restarted. Sometimes, however, upgrading the
+ version of HDFS you're running may require changing on-disk data. In this case,
+ one must use the HDFS Upgrade/Finalize/Rollback facility after installing the
+ new software. This process is made more complex in an HA environment, since the
+ on-disk metadata that the NN relies upon is by definition distributed, both on
+ the two HA NNs in the pair, and on the JournalNodes in the case that QJM is
+ being used for the shared edits storage. This documentation section describes
+ the procedure to use the HDFS Upgrade/Finalize/Rollback facility in an HA setup.
+
+ <>, the operator must do the following:
+
+ [[1]] Shut down all of the NNs as normal, and install the newer software.
+
+ [[2]] Start one of the NNs with the <<<'-upgrade'>>> flag.
+
+ [[3]] On start, this NN will not enter the standby state as usual in an HA
+ setup. Rather, this NN will immediately enter the active state, perform an
+ upgrade of its local storage dirs, and also perform an upgrade of the shared
+ edit log.
+
+ [[4]] At this point the other NN in the HA pair will be out of sync with
+ the upgraded NN. In order to bring it back in sync and once again have a highly
+ available setup, you should re-bootstrap this NameNode by running the NN with
+ the <<<'-bootstrapStandby'>>> flag. It is an error to start this second NN with
+ the <<<'-upgrade'>>> flag.
+
+ Note that if at any time you want to restart the NameNodes before finalizing
+ or rolling back the upgrade, you should start the NNs as normal, i.e. without
+ any special startup flag.
+
+ <>, the operator will use the <<<`hdfsadmin
+ dfsadmin -finalizeUpgrade'>>> command while the NNs are running and one of them
+ is active. The active NN at the time this happens will perform the finalization
+ of the shared log, and the NN whose local storage directories contain the
+ previous FS state will delete its local state.
+
+ <> of an upgrade, both NNs should first be shut down.
+ The operator should run the roll back command on the NN where they initiated
+ the upgrade procedure, which will perform the rollback on the local dirs there,
+ as well as on the shared log, either NFS or on the JNs. Afterward, this NN
+ should be started and the operator should run <<<`-bootstrapStandby'>>> on the
+ other NN to bring the two NNs in sync with this rolled-back file system state.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index d26fa392138..6fe8ecbadca 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -147,6 +147,7 @@ public class MiniDFSCluster {
private boolean enableManagedDfsDirsRedundancy = true;
private boolean manageDataDfsDirs = true;
private StartupOption option = null;
+ private StartupOption dnOption = null;
private String[] racks = null;
private String [] hosts = null;
private long [] simulatedCapacities = null;
@@ -242,6 +243,14 @@ public class MiniDFSCluster {
this.option = val;
return this;
}
+
+ /**
+ * Default: null
+ */
+ public Builder dnStartupOption(StartupOption val) {
+ this.dnOption = val;
+ return this;
+ }
/**
* Default: null
@@ -371,6 +380,7 @@ public class MiniDFSCluster {
builder.enableManagedDfsDirsRedundancy,
builder.manageDataDfsDirs,
builder.option,
+ builder.dnOption,
builder.racks,
builder.hosts,
builder.simulatedCapacities,
@@ -427,18 +437,24 @@ public class MiniDFSCluster {
/**
* Stores the information related to a namenode in the cluster
*/
- static class NameNodeInfo {
+ public static class NameNodeInfo {
final NameNode nameNode;
final Configuration conf;
final String nameserviceId;
final String nnId;
+ StartupOption startOpt;
NameNodeInfo(NameNode nn, String nameserviceId, String nnId,
- Configuration conf) {
+ StartupOption startOpt, Configuration conf) {
this.nameNode = nn;
this.nameserviceId = nameserviceId;
this.nnId = nnId;
+ this.startOpt = startOpt;
this.conf = conf;
}
+
+ public void setStartOpt(StartupOption startOpt) {
+ this.startOpt = startOpt;
+ }
}
/**
@@ -622,8 +638,8 @@ public class MiniDFSCluster {
long[] simulatedCapacities) throws IOException {
this.nameNodes = new NameNodeInfo[1]; // Single namenode in the cluster
initMiniDFSCluster(conf, numDataNodes, StorageType.DEFAULT, format,
- manageNameDfsDirs, true, true, manageDataDfsDirs,
- operation, racks, hosts,
+ manageNameDfsDirs, true, manageDataDfsDirs, manageDataDfsDirs,
+ operation, null, racks, hosts,
simulatedCapacities, null, true, false,
MiniDFSNNTopology.simpleSingleNN(nameNodePort, 0), true, false, false, null);
}
@@ -632,7 +648,8 @@ public class MiniDFSCluster {
Configuration conf,
int numDataNodes, StorageType storageType, boolean format, boolean manageNameDfsDirs,
boolean manageNameDfsSharedDirs, boolean enableManagedDfsDirsRedundancy,
- boolean manageDataDfsDirs, StartupOption operation, String[] racks,
+ boolean manageDataDfsDirs, StartupOption startOpt,
+ StartupOption dnStartOpt, String[] racks,
String[] hosts, long[] simulatedCapacities, String clusterId,
boolean waitSafeMode, boolean setupHostsFile,
MiniDFSNNTopology nnTopology, boolean checkExitOnShutdown,
@@ -685,7 +702,7 @@ public class MiniDFSCluster {
createNameNodesAndSetConf(
nnTopology, manageNameDfsDirs, manageNameDfsSharedDirs,
enableManagedDfsDirsRedundancy,
- format, operation, clusterId, conf);
+ format, startOpt, clusterId, conf);
} catch (IOException ioe) {
LOG.error("IOE creating namenodes. Permissions dump:\n" +
createPermissionsDiagnosisString(data_dir));
@@ -698,13 +715,14 @@ public class MiniDFSCluster {
}
}
- if (operation == StartupOption.RECOVER) {
+ if (startOpt == StartupOption.RECOVER) {
return;
}
// Start the DataNodes
startDataNodes(conf, numDataNodes, storageType, manageDataDfsDirs,
- operation, racks, hosts, simulatedCapacities, setupHostsFile,
+ dnStartOpt != null ? dnStartOpt : startOpt,
+ racks, hosts, simulatedCapacities, setupHostsFile,
checkDataNodeAddrConfig, checkDataNodeHostConfig, dnConfOverlays);
waitClusterUp();
//make sure ProxyUsers uses the latest conf
@@ -783,6 +801,8 @@ public class MiniDFSCluster {
if (manageNameDfsSharedDirs) {
URI sharedEditsUri = getSharedEditsDir(nnCounter, nnCounter+nnIds.size()-1);
conf.set(DFS_NAMENODE_SHARED_EDITS_DIR_KEY, sharedEditsUri.toString());
+ // Clean out the shared edits dir completely, including all subdirectories.
+ FileUtil.fullyDelete(new File(sharedEditsUri));
}
}
@@ -890,7 +910,8 @@ public class MiniDFSCluster {
URI srcDir = Lists.newArrayList(srcDirs).get(0);
FileSystem dstFS = FileSystem.getLocal(dstConf).getRaw();
for (URI dstDir : dstDirs) {
- Preconditions.checkArgument(!dstDir.equals(srcDir));
+ Preconditions.checkArgument(!dstDir.equals(srcDir),
+ "src and dst are the same: " + dstDir);
File dstDirF = new File(dstDir);
if (dstDirF.exists()) {
if (!FileUtil.fullyDelete(dstDirF)) {
@@ -924,6 +945,18 @@ public class MiniDFSCluster {
conf.set(key, "127.0.0.1:" + nnConf.getIpcPort());
}
+ private static String[] createArgs(StartupOption operation) {
+ if (operation == StartupOption.ROLLINGUPGRADE) {
+ return new String[]{operation.getName(),
+ operation.getRollingUpgradeStartupOption().name()};
+ }
+ String[] args = (operation == null ||
+ operation == StartupOption.FORMAT ||
+ operation == StartupOption.REGULAR) ?
+ new String[] {} : new String[] {operation.getName()};
+ return args;
+ }
+
private void createNameNode(int nnIndex, Configuration conf,
int numDataNodes, boolean format, StartupOption operation,
String clusterId, String nameserviceId,
@@ -938,10 +971,7 @@ public class MiniDFSCluster {
}
// Start the NameNode
- String[] args = (operation == null ||
- operation == StartupOption.FORMAT ||
- operation == StartupOption.REGULAR) ?
- new String[] {} : new String[] {operation.getName()};
+ String[] args = createArgs(operation);
NameNode nn = NameNode.createNameNode(args, conf);
if (operation == StartupOption.RECOVER) {
return;
@@ -963,7 +993,7 @@ public class MiniDFSCluster {
DFSUtil.setGenericConf(conf, nameserviceId, nnId,
DFS_NAMENODE_HTTP_ADDRESS_KEY);
nameNodes[nnIndex] = new NameNodeInfo(nn, nameserviceId, nnId,
- new Configuration(conf));
+ operation, new Configuration(conf));
}
/**
@@ -1544,7 +1574,7 @@ public class MiniDFSCluster {
nn.stop();
nn.join();
Configuration conf = nameNodes[nnIndex].conf;
- nameNodes[nnIndex] = new NameNodeInfo(null, null, null, conf);
+ nameNodes[nnIndex] = new NameNodeInfo(null, null, null, null, conf);
}
}
@@ -1590,10 +1620,17 @@ public class MiniDFSCluster {
String... args) throws IOException {
String nameserviceId = nameNodes[nnIndex].nameserviceId;
String nnId = nameNodes[nnIndex].nnId;
+ StartupOption startOpt = nameNodes[nnIndex].startOpt;
Configuration conf = nameNodes[nnIndex].conf;
shutdownNameNode(nnIndex);
+ if (args.length != 0) {
+ startOpt = null;
+ } else {
+ args = createArgs(startOpt);
+ }
NameNode nn = NameNode.createNameNode(args, conf);
- nameNodes[nnIndex] = new NameNodeInfo(nn, nameserviceId, nnId, conf);
+ nameNodes[nnIndex] = new NameNodeInfo(nn, nameserviceId, nnId, startOpt,
+ conf);
if (waitActive) {
waitClusterUp();
LOG.info("Restarted the namenode");
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSRollback.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSRollback.java
index debd8e48391..7a541e6622c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSRollback.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSRollback.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.util.StringUtils;
import org.junit.After;
import org.junit.Test;
@@ -98,10 +99,10 @@ public class TestDFSRollback {
* Attempts to start a NameNode with the given operation. Starting
* the NameNode should throw an exception.
*/
- void startNameNodeShouldFail(StartupOption operation, String searchString) {
+ void startNameNodeShouldFail(String searchString) {
try {
+ NameNode.doRollback(conf, false);
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
- .startupOption(operation)
.format(false)
.manageDataDfsDirs(false)
.manageNameDfsDirs(false)
@@ -150,24 +151,19 @@ public class TestDFSRollback {
log("Normal NameNode rollback", numDirs);
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "previous");
- cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
- .format(false)
- .manageDataDfsDirs(false)
- .manageNameDfsDirs(false)
- .startupOption(StartupOption.ROLLBACK)
- .build();
+ NameNode.doRollback(conf, false);
checkResult(NAME_NODE, nameNodeDirs);
- cluster.shutdown();
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
log("Normal DataNode rollback", numDirs);
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "previous");
+ NameNode.doRollback(conf, false);
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
.format(false)
.manageDataDfsDirs(false)
.manageNameDfsDirs(false)
- .startupOption(StartupOption.ROLLBACK)
+ .dnStartupOption(StartupOption.ROLLBACK)
.build();
UpgradeUtilities.createDataNodeStorageDirs(dataNodeDirs, "current");
UpgradeUtilities.createDataNodeStorageDirs(dataNodeDirs, "previous");
@@ -180,11 +176,12 @@ public class TestDFSRollback {
log("Normal BlockPool rollback", numDirs);
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "previous");
+ NameNode.doRollback(conf, false);
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
.format(false)
.manageDataDfsDirs(false)
.manageNameDfsDirs(false)
- .startupOption(StartupOption.ROLLBACK)
+ .dnStartupOption(StartupOption.ROLLBACK)
.build();
UpgradeUtilities.createDataNodeStorageDirs(dataNodeDirs, "current");
UpgradeUtilities.createBlockPoolStorageDirs(dataNodeDirs, "current",
@@ -219,10 +216,10 @@ public class TestDFSRollback {
cluster.shutdown();
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
UpgradeUtilities.createEmptyDirs(dataNodeDirs);
-
+
log("NameNode rollback without existing previous dir", numDirs);
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
- startNameNodeShouldFail(StartupOption.ROLLBACK,
+ startNameNodeShouldFail(
"None of the storage directories contain previous fs state");
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
@@ -239,15 +236,16 @@ public class TestDFSRollback {
cluster.shutdown();
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
UpgradeUtilities.createEmptyDirs(dataNodeDirs);
-
+
log("DataNode rollback with future stored layout version in previous", numDirs);
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "previous");
+ NameNode.doRollback(conf, false);
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
.format(false)
.manageDataDfsDirs(false)
.manageNameDfsDirs(false)
- .startupOption(StartupOption.ROLLBACK)
+ .dnStartupOption(StartupOption.ROLLBACK)
.build();
UpgradeUtilities.createDataNodeStorageDirs(dataNodeDirs, "current");
baseDirs = UpgradeUtilities.createDataNodeStorageDirs(dataNodeDirs, "previous");
@@ -269,11 +267,12 @@ public class TestDFSRollback {
log("DataNode rollback with newer fsscTime in previous", numDirs);
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "previous");
+ NameNode.doRollback(conf, false);
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
.format(false)
.manageDataDfsDirs(false)
.manageNameDfsDirs(false)
- .startupOption(StartupOption.ROLLBACK)
+ .dnStartupOption(StartupOption.ROLLBACK)
.build();
UpgradeUtilities.createDataNodeStorageDirs(dataNodeDirs, "current");
@@ -291,21 +290,19 @@ public class TestDFSRollback {
cluster.shutdown();
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
UpgradeUtilities.createEmptyDirs(dataNodeDirs);
-
+
log("NameNode rollback with no edits file", numDirs);
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
baseDirs = UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "previous");
deleteMatchingFiles(baseDirs, "edits.*");
- startNameNodeShouldFail(StartupOption.ROLLBACK,
- "Gap in transactions");
+ startNameNodeShouldFail("Gap in transactions");
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
log("NameNode rollback with no image file", numDirs);
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
baseDirs = UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "previous");
deleteMatchingFiles(baseDirs, "fsimage_.*");
- startNameNodeShouldFail(StartupOption.ROLLBACK,
- "No valid image files found");
+ startNameNodeShouldFail("No valid image files found");
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
log("NameNode rollback with corrupt version file", numDirs);
@@ -317,8 +314,7 @@ public class TestDFSRollback {
"layoutVersion".getBytes(Charsets.UTF_8),
"xxxxxxxxxxxxx".getBytes(Charsets.UTF_8));
}
- startNameNodeShouldFail(StartupOption.ROLLBACK,
- "file VERSION has layoutVersion missing");
+ startNameNodeShouldFail("file VERSION has layoutVersion missing");
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
@@ -332,8 +328,7 @@ public class TestDFSRollback {
UpgradeUtilities.createNameNodeVersionFile(conf, baseDirs,
storageInfo, UpgradeUtilities.getCurrentBlockPoolID(cluster));
- startNameNodeShouldFail(StartupOption.ROLLBACK,
- "Cannot rollback to storage version 1 using this version");
+ startNameNodeShouldFail("Cannot rollback to storage version 1 using this version");
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
} // end numDir loop
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
index 16189af41a9..cb32de0fccb 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
@@ -61,6 +61,7 @@ import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.util.Shell;
import org.junit.Assume;
import org.junit.Before;
@@ -758,4 +759,37 @@ public class TestDFSUtil {
assertEquals(4*24*60*60*1000l, DFSUtil.parseRelativeTime("4d"));
assertEquals(999*24*60*60*1000l, DFSUtil.parseRelativeTime("999d"));
}
+
+ @Test
+ public void testAssertAllResultsEqual() {
+ checkAllResults(new Long[]{}, true);
+ checkAllResults(new Long[]{1l}, true);
+ checkAllResults(new Long[]{1l, 1l}, true);
+ checkAllResults(new Long[]{1l, 1l, 1l}, true);
+ checkAllResults(new Long[]{new Long(1), new Long(1)}, true);
+ checkAllResults(new Long[]{null, null, null}, true);
+
+ checkAllResults(new Long[]{1l, 2l}, false);
+ checkAllResults(new Long[]{2l, 1l}, false);
+ checkAllResults(new Long[]{1l, 2l, 1l}, false);
+ checkAllResults(new Long[]{2l, 1l, 1l}, false);
+ checkAllResults(new Long[]{1l, 1l, 2l}, false);
+ checkAllResults(new Long[]{1l, null}, false);
+ checkAllResults(new Long[]{null, 1l}, false);
+ checkAllResults(new Long[]{1l, null, 1l}, false);
+ }
+
+ private static void checkAllResults(Long[] toCheck, boolean shouldSucceed) {
+ if (shouldSucceed) {
+ DFSUtil.assertAllResultsEqual(Arrays.asList(toCheck));
+ } else {
+ try {
+ DFSUtil.assertAllResultsEqual(Arrays.asList(toCheck));
+ fail("Should not have succeeded with input: " +
+ Arrays.toString(toCheck));
+ } catch (AssertionError ae) {
+ GenericTestUtils.assertExceptionContains("Not all elements match", ae);
+ }
+ }
+ }
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java
index 7646588a994..aed81793acc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java
@@ -204,7 +204,7 @@ public class TestRollingUpgrade {
.format(false)
.manageNameDfsDirs(false)
.build();
- DistributedFileSystem dfs2 = cluster2.getFileSystem();
+ final DistributedFileSystem dfs2 = cluster2.getFileSystem();
// Check that cluster2 sees the edits made on cluster1
Assert.assertTrue(dfs2.exists(foo));
@@ -243,7 +243,8 @@ public class TestRollingUpgrade {
Assert.assertEquals(info1.getStartTime(), finalize.getStartTime());
LOG.info("RESTART cluster 2 with regular startup option");
- cluster2.restartNameNode(StartupOption.REGULAR.getName());
+ cluster2.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
+ cluster2.restartNameNode();
Assert.assertTrue(dfs2.exists(foo));
Assert.assertTrue(dfs2.exists(bar));
Assert.assertTrue(dfs2.exists(baz));
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniJournalCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniJournalCluster.java
index fab83b46357..a4f67e5f71c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniJournalCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniJournalCluster.java
@@ -167,8 +167,16 @@ public class MiniJournalCluster {
return new File(baseDir, "journalnode-" + idx).getAbsoluteFile();
}
+ public File getJournalDir(int idx, String jid) {
+ return new File(getStorageDir(idx), jid);
+ }
+
public File getCurrentDir(int idx, String jid) {
- return new File(new File(getStorageDir(idx), jid), "current");
+ return new File(getJournalDir(idx, jid), "current");
+ }
+
+ public File getPreviousDir(int idx, String jid) {
+ return new File(getJournalDir(idx, jid), "previous");
}
public JournalNode getJournalNode(int i) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniQJMHACluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniQJMHACluster.java
index fb4721cdc94..30a5ba6064d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniQJMHACluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniQJMHACluster.java
@@ -29,6 +29,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider;
@@ -47,6 +48,7 @@ public class MiniQJMHACluster {
public static class Builder {
private final Configuration conf;
+ private StartupOption startOpt = null;
private final MiniDFSCluster.Builder dfsBuilder;
public Builder(Configuration conf) {
@@ -61,6 +63,10 @@ public class MiniQJMHACluster {
public MiniQJMHACluster build() throws IOException {
return new MiniQJMHACluster(this);
}
+
+ public void startupOption(StartupOption startOpt) {
+ this.startOpt = startOpt;
+ }
}
public static MiniDFSNNTopology createDefaultTopology() {
@@ -95,6 +101,9 @@ public class MiniQJMHACluster {
Configuration confNN0 = cluster.getConfiguration(0);
NameNode.initializeSharedEdits(confNN0, true);
+ cluster.getNameNodeInfos()[0].setStartOpt(builder.startOpt);
+ cluster.getNameNodeInfos()[1].setStartOpt(builder.startOpt);
+
// restart the cluster
cluster.restartNameNodes();
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestGenericJournalConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestGenericJournalConf.java
index c1244d21814..5bfe97544dc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestGenericJournalConf.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestGenericJournalConf.java
@@ -17,21 +17,22 @@
*/
package org.apache.hadoop.hdfs.server.namenode;
-import static org.junit.Assert.fail;
-import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
import static org.mockito.Mockito.mock;
import java.io.IOException;
import java.net.URI;
import java.util.Collection;
-import org.apache.hadoop.hdfs.server.common.Storage.FormatConfirmable;
-import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
+import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.junit.Test;
public class TestGenericJournalConf {
@@ -197,13 +198,59 @@ public class TestGenericJournalConf {
return false;
}
+ @Override
+ public void doPreUpgrade() throws IOException {}
+
+ @Override
+ public void doUpgrade(Storage storage) throws IOException {}
+
+ @Override
+ public void doFinalize() throws IOException {}
+
+ @Override
+ public boolean canRollBack(StorageInfo storage, StorageInfo prevStorage, int targetLayoutVersion)
+ throws IOException {
+ return false;
+ }
+
+ @Override
+ public void doRollback() throws IOException {}
+
@Override
public void discardSegments(long startTxId) throws IOException {}
+
+ @Override
+ public long getJournalCTime() throws IOException {
+ return -1;
+ }
}
public static class BadConstructorJournalManager extends DummyJournalManager {
public BadConstructorJournalManager() {
super(null, null, null);
}
+
+ @Override
+ public void doPreUpgrade() throws IOException {}
+
+ @Override
+ public void doUpgrade(Storage storage) throws IOException {}
+
+ @Override
+ public void doFinalize() throws IOException {}
+
+ @Override
+ public boolean canRollBack(StorageInfo storage, StorageInfo prevStorage, int targetLayoutVersion)
+ throws IOException {
+ return false;
+ }
+
+ @Override
+ public void doRollback() throws IOException {}
+
+ @Override
+ public long getJournalCTime() throws IOException {
+ return -1;
+ }
}
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java
index 502c9de4096..7abc5024a9b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java
@@ -91,7 +91,7 @@ public class TestBootstrapStandby {
fail("Did not throw");
} catch (IOException ioe) {
GenericTestUtils.assertExceptionContains(
- "Cannot start an HA namenode with name dirs that need recovery",
+ "storage directory does not exist or is not accessible",
ioe);
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSUpgradeWithHA.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSUpgradeWithHA.java
index 4f213b24055..c3a86741caa 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSUpgradeWithHA.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSUpgradeWithHA.java
@@ -1,89 +1,506 @@
/**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package org.apache.hadoop.hdfs.server.namenode.ha;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.File;
import java.io.IOException;
import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Collection;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster;
+import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster.Builder;
+import org.apache.hadoop.hdfs.qjournal.server.Journal;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.tools.DFSAdmin;
+import org.apache.hadoop.hdfs.util.PersistentLongFile;
import org.apache.hadoop.test.GenericTestUtils;
+import org.junit.Before;
import org.junit.Test;
-import com.google.common.collect.Lists;
+import com.google.common.base.Joiner;
/**
* Tests for upgrading with HA enabled.
*/
public class TestDFSUpgradeWithHA {
-
- private static final Log LOG = LogFactory.getLog(TestDFSUpgradeWithHA.class);
+ private static final Log LOG = LogFactory.getLog(TestDFSUpgradeWithHA.class);
+
+ private Configuration conf;
+
+ @Before
+ public void createConfiguration() {
+ conf = new HdfsConfiguration();
+ // Turn off persistent IPC, so that the DFSClient can survive NN restart
+ conf.setInt(
+ CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY,
+ 0);
+ }
+
+ private static void assertCTimesEqual(MiniDFSCluster cluster) {
+ long nn1CTime = cluster.getNamesystem(0).getFSImage().getStorage().getCTime();
+ long nn2CTime = cluster.getNamesystem(1).getFSImage().getStorage().getCTime();
+ assertEquals(nn1CTime, nn2CTime);
+ }
+
+ private static void checkClusterPreviousDirExistence(MiniDFSCluster cluster,
+ boolean shouldExist) {
+ for (int i = 0; i < 2; i++) {
+ checkNnPreviousDirExistence(cluster, i, shouldExist);
+ }
+ }
+
+ private static void checkNnPreviousDirExistence(MiniDFSCluster cluster,
+ int index, boolean shouldExist) {
+ Collection nameDirs = cluster.getNameDirs(index);
+ for (URI nnDir : nameDirs) {
+ checkPreviousDirExistence(new File(nnDir), shouldExist);
+ }
+ }
+
+ private static void checkJnPreviousDirExistence(MiniQJMHACluster jnCluster,
+ boolean shouldExist) throws IOException {
+ for (int i = 0; i < 3; i++) {
+ checkPreviousDirExistence(
+ jnCluster.getJournalCluster().getJournalDir(i, "ns1"), shouldExist);
+ }
+ if (shouldExist) {
+ assertEpochFilesCopied(jnCluster);
+ }
+ }
+
+ private static void assertEpochFilesCopied(MiniQJMHACluster jnCluster)
+ throws IOException {
+ for (int i = 0; i < 3; i++) {
+ File journalDir = jnCluster.getJournalCluster().getJournalDir(i, "ns1");
+ File currDir = new File(journalDir, "current");
+ File prevDir = new File(journalDir, "previous");
+ for (String fileName : new String[]{ Journal.LAST_PROMISED_FILENAME,
+ Journal.LAST_WRITER_EPOCH }) {
+ File prevFile = new File(prevDir, fileName);
+ // Possible the prev file doesn't exist, e.g. if there has never been a
+ // writer before the upgrade.
+ if (prevFile.exists()) {
+ PersistentLongFile prevLongFile = new PersistentLongFile(prevFile, -10);
+ PersistentLongFile currLongFile = new PersistentLongFile(new File(currDir,
+ fileName), -11);
+ assertTrue("Value in " + fileName + " has decreased on upgrade in "
+ + journalDir, prevLongFile.get() <= currLongFile.get());
+ }
+ }
+ }
+ }
+
+ private static void checkPreviousDirExistence(File rootDir,
+ boolean shouldExist) {
+ File previousDir = new File(rootDir, "previous");
+ if (shouldExist) {
+ assertTrue(previousDir + " does not exist", previousDir.exists());
+ } else {
+ assertFalse(previousDir + " does exist", previousDir.exists());
+ }
+ }
+
+ private void runFinalizeCommand(MiniDFSCluster cluster)
+ throws IOException {
+ HATestUtil.setFailoverConfigurations(cluster, conf);
+ new DFSAdmin(conf).finalizeUpgrade();
+ }
+
/**
- * Make sure that an HA NN refuses to start if given an upgrade-related
- * startup option.
+ * Ensure that an admin cannot finalize an HA upgrade without at least one NN
+ * being active.
*/
@Test
- public void testStartingWithUpgradeOptionsFails() throws IOException {
- for (StartupOption startOpt : Lists.newArrayList(new StartupOption[] {
- StartupOption.UPGRADE, StartupOption.FINALIZE,
- StartupOption.ROLLBACK })) {
- MiniDFSCluster cluster = null;
+ public void testCannotFinalizeIfNoActive() throws IOException,
+ URISyntaxException {
+ MiniDFSCluster cluster = null;
+ FileSystem fs = null;
+ try {
+ cluster = new MiniDFSCluster.Builder(conf)
+ .nnTopology(MiniDFSNNTopology.simpleHATopology())
+ .numDataNodes(0)
+ .build();
+
+ File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
+
+ // No upgrade is in progress at the moment.
+ checkClusterPreviousDirExistence(cluster, false);
+ assertCTimesEqual(cluster);
+ checkPreviousDirExistence(sharedDir, false);
+
+ // Transition NN0 to active and do some FS ops.
+ cluster.transitionToActive(0);
+ fs = HATestUtil.configureFailoverFs(cluster, conf);
+ assertTrue(fs.mkdirs(new Path("/foo1")));
+
+ // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
+ // flag.
+ cluster.shutdownNameNode(1);
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
+ cluster.restartNameNode(0, false);
+
+ checkNnPreviousDirExistence(cluster, 0, true);
+ checkNnPreviousDirExistence(cluster, 1, false);
+ checkPreviousDirExistence(sharedDir, true);
+
+ // NN0 should come up in the active state when given the -upgrade option,
+ // so no need to transition it to active.
+ assertTrue(fs.mkdirs(new Path("/foo2")));
+
+ // Restart NN0 without the -upgrade flag, to make sure that works.
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
+ cluster.restartNameNode(0, false);
+
+ // Make sure we can still do FS ops after upgrading.
+ cluster.transitionToActive(0);
+ assertTrue(fs.mkdirs(new Path("/foo3")));
+
+ // Now bootstrap the standby with the upgraded info.
+ int rc = BootstrapStandby.run(
+ new String[]{"-force"},
+ cluster.getConfiguration(1));
+ assertEquals(0, rc);
+
+ // Now restart NN1 and make sure that we can do ops against that as well.
+ cluster.restartNameNode(1);
+ cluster.transitionToStandby(0);
+ cluster.transitionToActive(1);
+ assertTrue(fs.mkdirs(new Path("/foo4")));
+
+ assertCTimesEqual(cluster);
+
+ // Now there's no active NN.
+ cluster.transitionToStandby(1);
+
try {
- cluster = new MiniDFSCluster.Builder(new Configuration())
- .nnTopology(MiniDFSNNTopology.simpleHATopology())
- .startupOption(startOpt)
- .numDataNodes(0)
- .build();
- fail("Should not have been able to start an HA NN in upgrade mode");
- } catch (IllegalArgumentException iae) {
+ runFinalizeCommand(cluster);
+ fail("Should not have been able to finalize upgrade with no NN active");
+ } catch (IOException ioe) {
GenericTestUtils.assertExceptionContains(
- "Cannot perform DFS upgrade with HA enabled.", iae);
- LOG.info("Got expected exception", iae);
- } finally {
- if (cluster != null) {
- cluster.shutdown();
- }
+ "Cannot finalize with no NameNode active", ioe);
+ }
+ } finally {
+ if (fs != null) {
+ fs.close();
+ }
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ }
+ }
+
+ /**
+ * Make sure that an HA NN with NFS-based HA can successfully start and
+ * upgrade.
+ */
+ @Test
+ public void testNfsUpgrade() throws IOException, URISyntaxException {
+ MiniDFSCluster cluster = null;
+ FileSystem fs = null;
+ try {
+ cluster = new MiniDFSCluster.Builder(conf)
+ .nnTopology(MiniDFSNNTopology.simpleHATopology())
+ .numDataNodes(0)
+ .build();
+
+ File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
+
+ // No upgrade is in progress at the moment.
+ checkClusterPreviousDirExistence(cluster, false);
+ assertCTimesEqual(cluster);
+ checkPreviousDirExistence(sharedDir, false);
+
+ // Transition NN0 to active and do some FS ops.
+ cluster.transitionToActive(0);
+ fs = HATestUtil.configureFailoverFs(cluster, conf);
+ assertTrue(fs.mkdirs(new Path("/foo1")));
+
+ // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
+ // flag.
+ cluster.shutdownNameNode(1);
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
+ cluster.restartNameNode(0, false);
+
+ checkNnPreviousDirExistence(cluster, 0, true);
+ checkNnPreviousDirExistence(cluster, 1, false);
+ checkPreviousDirExistence(sharedDir, true);
+
+ // NN0 should come up in the active state when given the -upgrade option,
+ // so no need to transition it to active.
+ assertTrue(fs.mkdirs(new Path("/foo2")));
+
+ // Restart NN0 without the -upgrade flag, to make sure that works.
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
+ cluster.restartNameNode(0, false);
+
+ // Make sure we can still do FS ops after upgrading.
+ cluster.transitionToActive(0);
+ assertTrue(fs.mkdirs(new Path("/foo3")));
+
+ // Now bootstrap the standby with the upgraded info.
+ int rc = BootstrapStandby.run(
+ new String[]{"-force"},
+ cluster.getConfiguration(1));
+ assertEquals(0, rc);
+
+ // Now restart NN1 and make sure that we can do ops against that as well.
+ cluster.restartNameNode(1);
+ cluster.transitionToStandby(0);
+ cluster.transitionToActive(1);
+ assertTrue(fs.mkdirs(new Path("/foo4")));
+
+ assertCTimesEqual(cluster);
+ } finally {
+ if (fs != null) {
+ fs.close();
+ }
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ }
+ }
+
+ /**
+ * Make sure that an HA NN can successfully upgrade when configured using
+ * JournalNodes.
+ */
+ @Test
+ public void testUpgradeWithJournalNodes() throws IOException,
+ URISyntaxException {
+ MiniQJMHACluster qjCluster = null;
+ FileSystem fs = null;
+ try {
+ Builder builder = new MiniQJMHACluster.Builder(conf);
+ builder.getDfsBuilder()
+ .numDataNodes(0);
+ qjCluster = builder.build();
+
+ MiniDFSCluster cluster = qjCluster.getDfsCluster();
+
+ // No upgrade is in progress at the moment.
+ checkJnPreviousDirExistence(qjCluster, false);
+ checkClusterPreviousDirExistence(cluster, false);
+ assertCTimesEqual(cluster);
+
+ // Transition NN0 to active and do some FS ops.
+ cluster.transitionToActive(0);
+ fs = HATestUtil.configureFailoverFs(cluster, conf);
+ assertTrue(fs.mkdirs(new Path("/foo1")));
+
+ // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
+ // flag.
+ cluster.shutdownNameNode(1);
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
+ cluster.restartNameNode(0, false);
+
+ checkNnPreviousDirExistence(cluster, 0, true);
+ checkNnPreviousDirExistence(cluster, 1, false);
+ checkJnPreviousDirExistence(qjCluster, true);
+
+ // NN0 should come up in the active state when given the -upgrade option,
+ // so no need to transition it to active.
+ assertTrue(fs.mkdirs(new Path("/foo2")));
+
+ // Restart NN0 without the -upgrade flag, to make sure that works.
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
+ cluster.restartNameNode(0, false);
+
+ // Make sure we can still do FS ops after upgrading.
+ cluster.transitionToActive(0);
+ assertTrue(fs.mkdirs(new Path("/foo3")));
+
+ // Now bootstrap the standby with the upgraded info.
+ int rc = BootstrapStandby.run(
+ new String[]{"-force"},
+ cluster.getConfiguration(1));
+ assertEquals(0, rc);
+
+ // Now restart NN1 and make sure that we can do ops against that as well.
+ cluster.restartNameNode(1);
+ cluster.transitionToStandby(0);
+ cluster.transitionToActive(1);
+ assertTrue(fs.mkdirs(new Path("/foo4")));
+
+ assertCTimesEqual(cluster);
+ } finally {
+ if (fs != null) {
+ fs.close();
+ }
+ if (qjCluster != null) {
+ qjCluster.shutdown();
+ }
+ }
+ }
+
+ @Test
+ public void testFinalizeWithJournalNodes() throws IOException,
+ URISyntaxException {
+ MiniQJMHACluster qjCluster = null;
+ FileSystem fs = null;
+ try {
+ Builder builder = new MiniQJMHACluster.Builder(conf);
+ builder.getDfsBuilder()
+ .numDataNodes(0);
+ qjCluster = builder.build();
+
+ MiniDFSCluster cluster = qjCluster.getDfsCluster();
+
+ // No upgrade is in progress at the moment.
+ checkJnPreviousDirExistence(qjCluster, false);
+ checkClusterPreviousDirExistence(cluster, false);
+ assertCTimesEqual(cluster);
+
+ // Transition NN0 to active and do some FS ops.
+ cluster.transitionToActive(0);
+ fs = HATestUtil.configureFailoverFs(cluster, conf);
+ assertTrue(fs.mkdirs(new Path("/foo1")));
+
+ // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
+ // flag.
+ cluster.shutdownNameNode(1);
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
+ cluster.restartNameNode(0, false);
+
+ assertTrue(fs.mkdirs(new Path("/foo2")));
+
+ checkNnPreviousDirExistence(cluster, 0, true);
+ checkNnPreviousDirExistence(cluster, 1, false);
+ checkJnPreviousDirExistence(qjCluster, true);
+
+ // Now bootstrap the standby with the upgraded info.
+ int rc = BootstrapStandby.run(
+ new String[]{"-force"},
+ cluster.getConfiguration(1));
+ assertEquals(0, rc);
+
+ cluster.restartNameNode(1);
+
+ runFinalizeCommand(cluster);
+
+ checkClusterPreviousDirExistence(cluster, false);
+ checkJnPreviousDirExistence(qjCluster, false);
+ assertCTimesEqual(cluster);
+ } finally {
+ if (fs != null) {
+ fs.close();
+ }
+ if (qjCluster != null) {
+ qjCluster.shutdown();
}
}
}
/**
- * Make sure that an HA NN won't start if a previous upgrade was in progress.
+ * Make sure that even if the NN which initiated the upgrade is in the standby
+ * state that we're allowed to finalize.
*/
@Test
- public void testStartingWithUpgradeInProgressFails() throws Exception {
+ public void testFinalizeFromSecondNameNodeWithJournalNodes()
+ throws IOException, URISyntaxException {
+ MiniQJMHACluster qjCluster = null;
+ FileSystem fs = null;
+ try {
+ Builder builder = new MiniQJMHACluster.Builder(conf);
+ builder.getDfsBuilder()
+ .numDataNodes(0);
+ qjCluster = builder.build();
+
+ MiniDFSCluster cluster = qjCluster.getDfsCluster();
+
+ // No upgrade is in progress at the moment.
+ checkJnPreviousDirExistence(qjCluster, false);
+ checkClusterPreviousDirExistence(cluster, false);
+ assertCTimesEqual(cluster);
+
+ // Transition NN0 to active and do some FS ops.
+ cluster.transitionToActive(0);
+ fs = HATestUtil.configureFailoverFs(cluster, conf);
+ assertTrue(fs.mkdirs(new Path("/foo1")));
+
+ // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
+ // flag.
+ cluster.shutdownNameNode(1);
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
+ cluster.restartNameNode(0, false);
+
+ checkNnPreviousDirExistence(cluster, 0, true);
+ checkNnPreviousDirExistence(cluster, 1, false);
+ checkJnPreviousDirExistence(qjCluster, true);
+
+ // Now bootstrap the standby with the upgraded info.
+ int rc = BootstrapStandby.run(
+ new String[]{"-force"},
+ cluster.getConfiguration(1));
+ assertEquals(0, rc);
+
+ cluster.restartNameNode(1);
+
+ // Make the second NN (not the one that initiated the upgrade) active when
+ // the finalize command is run.
+ cluster.transitionToStandby(0);
+ cluster.transitionToActive(1);
+
+ runFinalizeCommand(cluster);
+
+ checkClusterPreviousDirExistence(cluster, false);
+ checkJnPreviousDirExistence(qjCluster, false);
+ assertCTimesEqual(cluster);
+ } finally {
+ if (fs != null) {
+ fs.close();
+ }
+ if (qjCluster != null) {
+ qjCluster.shutdown();
+ }
+ }
+ }
+
+ /**
+ * Make sure that an HA NN will start if a previous upgrade was in progress.
+ */
+ @Test
+ public void testStartingWithUpgradeInProgressSucceeds() throws Exception {
MiniDFSCluster cluster = null;
try {
- cluster = new MiniDFSCluster.Builder(new Configuration())
+ cluster = new MiniDFSCluster.Builder(conf)
.nnTopology(MiniDFSNNTopology.simpleHATopology())
.numDataNodes(0)
.build();
-
+
// Simulate an upgrade having started.
for (int i = 0; i < 2; i++) {
for (URI uri : cluster.getNameDirs(i)) {
@@ -92,18 +509,226 @@ public class TestDFSUpgradeWithHA {
assertTrue(prevTmp.mkdirs());
}
}
-
+
cluster.restartNameNodes();
- fail("Should not have been able to start an HA NN with an in-progress upgrade");
- } catch (IOException ioe) {
- GenericTestUtils.assertExceptionContains(
- "Cannot start an HA namenode with name dirs that need recovery.",
- ioe);
- LOG.info("Got expected exception", ioe);
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
+
+ /**
+ * Test rollback with NFS shared dir.
+ */
+ @Test
+ public void testRollbackWithNfs() throws Exception {
+ MiniDFSCluster cluster = null;
+ FileSystem fs = null;
+ try {
+ cluster = new MiniDFSCluster.Builder(conf)
+ .nnTopology(MiniDFSNNTopology.simpleHATopology())
+ .numDataNodes(0)
+ .build();
+
+ File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
+
+ // No upgrade is in progress at the moment.
+ checkClusterPreviousDirExistence(cluster, false);
+ assertCTimesEqual(cluster);
+ checkPreviousDirExistence(sharedDir, false);
+
+ // Transition NN0 to active and do some FS ops.
+ cluster.transitionToActive(0);
+ fs = HATestUtil.configureFailoverFs(cluster, conf);
+ assertTrue(fs.mkdirs(new Path("/foo1")));
+
+ // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
+ // flag.
+ cluster.shutdownNameNode(1);
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
+ cluster.restartNameNode(0, false);
+
+ checkNnPreviousDirExistence(cluster, 0, true);
+ checkNnPreviousDirExistence(cluster, 1, false);
+ checkPreviousDirExistence(sharedDir, true);
+
+ // NN0 should come up in the active state when given the -upgrade option,
+ // so no need to transition it to active.
+ assertTrue(fs.mkdirs(new Path("/foo2")));
+
+ // Now bootstrap the standby with the upgraded info.
+ int rc = BootstrapStandby.run(
+ new String[]{"-force"},
+ cluster.getConfiguration(1));
+ assertEquals(0, rc);
+
+ cluster.restartNameNode(1);
+
+ checkNnPreviousDirExistence(cluster, 0, true);
+ checkNnPreviousDirExistence(cluster, 1, false);
+ checkPreviousDirExistence(sharedDir, true);
+ assertCTimesEqual(cluster);
+
+ // Now shut down the cluster and do the rollback.
+ Collection nn1NameDirs = cluster.getNameDirs(0);
+ cluster.shutdown();
+
+ conf.setStrings(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, Joiner.on(",").join(nn1NameDirs));
+ NameNode.doRollback(conf, false);
+
+ // The rollback operation should have rolled back the first NN's local
+ // dirs, and the shared dir, but not the other NN's dirs. Those have to be
+ // done by bootstrapping the standby.
+ checkNnPreviousDirExistence(cluster, 0, false);
+ checkPreviousDirExistence(sharedDir, false);
+ } finally {
+ if (fs != null) {
+ fs.close();
+ }
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ }
+ }
+
+ @Test
+ public void testRollbackWithJournalNodes() throws IOException,
+ URISyntaxException {
+ MiniQJMHACluster qjCluster = null;
+ FileSystem fs = null;
+ try {
+ Builder builder = new MiniQJMHACluster.Builder(conf);
+ builder.getDfsBuilder()
+ .numDataNodes(0);
+ qjCluster = builder.build();
+
+ MiniDFSCluster cluster = qjCluster.getDfsCluster();
+
+ // No upgrade is in progress at the moment.
+ checkClusterPreviousDirExistence(cluster, false);
+ assertCTimesEqual(cluster);
+ checkJnPreviousDirExistence(qjCluster, false);
+
+ // Transition NN0 to active and do some FS ops.
+ cluster.transitionToActive(0);
+ fs = HATestUtil.configureFailoverFs(cluster, conf);
+ assertTrue(fs.mkdirs(new Path("/foo1")));
+
+ // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
+ // flag.
+ cluster.shutdownNameNode(1);
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
+ cluster.restartNameNode(0, false);
+
+ checkNnPreviousDirExistence(cluster, 0, true);
+ checkNnPreviousDirExistence(cluster, 1, false);
+ checkJnPreviousDirExistence(qjCluster, true);
+
+ // NN0 should come up in the active state when given the -upgrade option,
+ // so no need to transition it to active.
+ assertTrue(fs.mkdirs(new Path("/foo2")));
+
+ // Now bootstrap the standby with the upgraded info.
+ int rc = BootstrapStandby.run(
+ new String[]{"-force"},
+ cluster.getConfiguration(1));
+ assertEquals(0, rc);
+
+ cluster.restartNameNode(1);
+
+ checkNnPreviousDirExistence(cluster, 0, true);
+ checkNnPreviousDirExistence(cluster, 1, false);
+ checkJnPreviousDirExistence(qjCluster, true);
+ assertCTimesEqual(cluster);
+
+ // Shut down the NNs, but deliberately leave the JNs up and running.
+ Collection nn1NameDirs = cluster.getNameDirs(0);
+ cluster.shutdown();
+
+ conf.setStrings(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, Joiner.on(",").join(nn1NameDirs));
+ NameNode.doRollback(conf, false);
+
+ // The rollback operation should have rolled back the first NN's local
+ // dirs, and the shared dir, but not the other NN's dirs. Those have to be
+ // done by bootstrapping the standby.
+ checkNnPreviousDirExistence(cluster, 0, false);
+ checkJnPreviousDirExistence(qjCluster, false);
+ } finally {
+ if (fs != null) {
+ fs.close();
+ }
+ if (qjCluster != null) {
+ qjCluster.shutdown();
+ }
+ }
+ }
+
+ /**
+ * Make sure that starting a second NN with the -upgrade flag fails if the
+ * other NN has already done that.
+ */
+ @Test
+ public void testCannotUpgradeSecondNameNode() throws IOException,
+ URISyntaxException {
+ MiniDFSCluster cluster = null;
+ FileSystem fs = null;
+ try {
+ cluster = new MiniDFSCluster.Builder(conf)
+ .nnTopology(MiniDFSNNTopology.simpleHATopology())
+ .numDataNodes(0)
+ .build();
+
+ File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
+
+ // No upgrade is in progress at the moment.
+ checkClusterPreviousDirExistence(cluster, false);
+ assertCTimesEqual(cluster);
+ checkPreviousDirExistence(sharedDir, false);
+
+ // Transition NN0 to active and do some FS ops.
+ cluster.transitionToActive(0);
+ fs = HATestUtil.configureFailoverFs(cluster, conf);
+ assertTrue(fs.mkdirs(new Path("/foo1")));
+
+ // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
+ // flag.
+ cluster.shutdownNameNode(1);
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
+ cluster.restartNameNode(0, false);
+
+ checkNnPreviousDirExistence(cluster, 0, true);
+ checkNnPreviousDirExistence(cluster, 1, false);
+ checkPreviousDirExistence(sharedDir, true);
+
+ // NN0 should come up in the active state when given the -upgrade option,
+ // so no need to transition it to active.
+ assertTrue(fs.mkdirs(new Path("/foo2")));
+
+ // Restart NN0 without the -upgrade flag, to make sure that works.
+ cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
+ cluster.restartNameNode(0, false);
+
+ // Make sure we can still do FS ops after upgrading.
+ cluster.transitionToActive(0);
+ assertTrue(fs.mkdirs(new Path("/foo3")));
+
+ // Make sure that starting the second NN with the -upgrade flag fails.
+ cluster.getNameNodeInfos()[1].setStartOpt(StartupOption.UPGRADE);
+ try {
+ cluster.restartNameNode(1, false);
+ fail("Should not have been able to start second NN with -upgrade");
+ } catch (IOException ioe) {
+ GenericTestUtils.assertExceptionContains(
+ "It looks like the shared log is already being upgraded", ioe);
+ }
+ } finally {
+ if (fs != null) {
+ fs.close();
+ }
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ }
+ }
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java
index b534c03aa09..272e5436434 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java
@@ -96,7 +96,7 @@ public class TestInitializeSharedEdits {
} catch (IOException ioe) {
LOG.info("Got expected exception", ioe);
GenericTestUtils.assertExceptionContains(
- "Cannot start an HA namenode with name dirs that need recovery", ioe);
+ "storage directory does not exist or is not accessible", ioe);
}
try {
cluster.restartNameNode(1, false);
@@ -104,7 +104,7 @@ public class TestInitializeSharedEdits {
} catch (IOException ioe) {
LOG.info("Got expected exception", ioe);
GenericTestUtils.assertExceptionContains(
- "Cannot start an HA namenode with name dirs that need recovery", ioe);
+ "storage directory does not exist or is not accessible", ioe);
}
}