diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index 319654c2b9d..512ca207b50 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -727,6 +727,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys { "dfs.edit.log.transfer.bandwidthPerSec"; public static final long DFS_EDIT_LOG_TRANSFER_RATE_DEFAULT = 0; //no throttling + public static final String DFS_QJM_OPERATIONS_TIMEOUT = + "dfs.qjm.operations.timeout"; + public static final long DFS_QJM_OPERATIONS_TIMEOUT_DEFAULT = 60000; + // Datanode File IO Stats public static final String DFS_DATANODE_ENABLE_FILEIO_FAULT_INJECTION_KEY = "dfs.datanode.enable.fileio.fault.injection"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java index 97c0050ddae..f66e2c0cdde 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java @@ -27,6 +27,7 @@ import java.util.Map; import java.util.Map.Entry; import java.util.PriorityQueue; +import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import org.apache.commons.logging.Log; @@ -76,18 +77,10 @@ public class QuorumJournalManager implements JournalManager { private final int newEpochTimeoutMs; private final int writeTxnsTimeoutMs; - // Since these don't occur during normal operation, we can - // use rather lengthy timeouts, and don't need to make them - // configurable. - private static final int FORMAT_TIMEOUT_MS = 60000; - private static final int HASDATA_TIMEOUT_MS = 60000; - private static final int CAN_ROLL_BACK_TIMEOUT_MS = 60000; - private static final int FINALIZE_TIMEOUT_MS = 60000; - private static final int PRE_UPGRADE_TIMEOUT_MS = 60000; - private static final int ROLL_BACK_TIMEOUT_MS = 60000; - private static final int DISCARD_SEGMENTS_TIMEOUT_MS = 60000; - private static final int UPGRADE_TIMEOUT_MS = 60000; - private static final int GET_JOURNAL_CTIME_TIMEOUT_MS = 60000; + // This timeout is used for calls that don't occur during normal operation + // e.g. format, upgrade operations and a few others. So we can use rather + // lengthy timeouts by default. + private final int timeoutMs; private final Configuration conf; private final URI uri; @@ -141,6 +134,10 @@ public QuorumJournalManager(Configuration conf, this.writeTxnsTimeoutMs = conf.getInt( DFSConfigKeys.DFS_QJOURNAL_WRITE_TXNS_TIMEOUT_KEY, DFSConfigKeys.DFS_QJOURNAL_WRITE_TXNS_TIMEOUT_DEFAULT); + this.timeoutMs = (int) conf.getTimeDuration(DFSConfigKeys + .DFS_QJM_OPERATIONS_TIMEOUT, + DFSConfigKeys.DFS_QJM_OPERATIONS_TIMEOUT_DEFAULT, TimeUnit + .MILLISECONDS); } protected List createLoggers( @@ -201,7 +198,7 @@ Map createNewUniqueEpoch() public void format(NamespaceInfo nsInfo) throws IOException { QuorumCall call = loggers.format(nsInfo); try { - call.waitFor(loggers.size(), loggers.size(), 0, FORMAT_TIMEOUT_MS, + call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs, "format"); } catch (InterruptedException e) { throw new IOException("Interrupted waiting for format() response"); @@ -220,7 +217,7 @@ public boolean hasSomeData() throws IOException { loggers.isFormatted(); try { - call.waitFor(loggers.size(), 0, 0, HASDATA_TIMEOUT_MS, "hasSomeData"); + call.waitFor(loggers.size(), 0, 0, timeoutMs, "hasSomeData"); } catch (InterruptedException e) { throw new IOException("Interrupted while determining if JNs have data"); } catch (TimeoutException e) { @@ -505,7 +502,7 @@ AsyncLoggerSet getLoggerSetForTests() { public void doPreUpgrade() throws IOException { QuorumCall call = loggers.doPreUpgrade(); try { - call.waitFor(loggers.size(), loggers.size(), 0, PRE_UPGRADE_TIMEOUT_MS, + call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs, "doPreUpgrade"); if (call.countExceptions() > 0) { @@ -522,7 +519,7 @@ public void doPreUpgrade() throws IOException { public void doUpgrade(Storage storage) throws IOException { QuorumCall call = loggers.doUpgrade(storage); try { - call.waitFor(loggers.size(), loggers.size(), 0, UPGRADE_TIMEOUT_MS, + call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs, "doUpgrade"); if (call.countExceptions() > 0) { @@ -539,7 +536,7 @@ public void doUpgrade(Storage storage) throws IOException { public void doFinalize() throws IOException { QuorumCall call = loggers.doFinalize(); try { - call.waitFor(loggers.size(), loggers.size(), 0, FINALIZE_TIMEOUT_MS, + call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs, "doFinalize"); if (call.countExceptions() > 0) { @@ -558,7 +555,7 @@ public boolean canRollBack(StorageInfo storage, StorageInfo prevStorage, QuorumCall call = loggers.canRollBack(storage, prevStorage, targetLayoutVersion); try { - call.waitFor(loggers.size(), loggers.size(), 0, CAN_ROLL_BACK_TIMEOUT_MS, + call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs, "lockSharedStorage"); if (call.countExceptions() > 0) { @@ -591,7 +588,7 @@ public boolean canRollBack(StorageInfo storage, StorageInfo prevStorage, public void doRollback() throws IOException { QuorumCall call = loggers.doRollback(); try { - call.waitFor(loggers.size(), loggers.size(), 0, ROLL_BACK_TIMEOUT_MS, + call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs, "doRollback"); if (call.countExceptions() > 0) { @@ -609,7 +606,7 @@ public void discardSegments(long startTxId) throws IOException { QuorumCall call = loggers.discardSegments(startTxId); try { call.waitFor(loggers.size(), loggers.size(), 0, - DISCARD_SEGMENTS_TIMEOUT_MS, "discardSegments"); + timeoutMs, "discardSegments"); if (call.countExceptions() > 0) { call.rethrowException( "Could not perform discardSegments of one or more JournalNodes"); @@ -628,7 +625,7 @@ public long getJournalCTime() throws IOException { QuorumCall call = loggers.getJournalCTime(); try { call.waitFor(loggers.size(), loggers.size(), 0, - GET_JOURNAL_CTIME_TIMEOUT_MS, "getJournalCTime"); + timeoutMs, "getJournalCTime"); if (call.countExceptions() > 0) { call.rethrowException("Could not journal CTime for one " diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 7338963f19f..726fe8c4643 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -4638,4 +4638,15 @@ + + dfs.qjm.operations.timeout + 60s + + Common key to set timeout for related operations in + QuorumJournalManager. This setting supports multiple time unit suffixes + as described in dfs.heartbeat.interval. + If no suffix is specified then milliseconds is assumed. + + +