HBASE-14680 Two configs for snapshot timeout and better defaults (Heng Chen)
Conflicts: hbase-common/src/main/resources/hbase-default.xml hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/DisabledTableSnapshotHandler.java
This commit is contained in:
parent
53a8ce5fab
commit
87c97c231a
|
@ -1461,4 +1461,18 @@ possible configurations would overwhelm and obscure the important.
|
|||
0.x Abort only when this percent of handlers have died;
|
||||
1 Abort only all of the handers have died.</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hbase.snapshot.master.timeout.millis</name>
|
||||
<value>300000</value>
|
||||
<description>
|
||||
Timeout for master for the snapshot procedure execution
|
||||
</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hbase.snapshot.region.timeout</name>
|
||||
<value>300000</value>
|
||||
<description>
|
||||
Timeout for regionservers to keep threads in snapshot request pool waiting
|
||||
</description>
|
||||
</property>
|
||||
</configuration>
|
||||
|
|
|
@ -26,18 +26,16 @@ import java.util.concurrent.ThreadPoolExecutor;
|
|||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.classification.InterfaceStability;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
|
||||
import org.apache.hadoop.hbase.errorhandling.ForeignException;
|
||||
import org.apache.hadoop.hbase.errorhandling.ForeignExceptionListener;
|
||||
import org.apache.hadoop.hbase.errorhandling.TimeoutExceptionInjector;
|
||||
import org.apache.hadoop.hbase.master.MasterServices;
|
||||
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
|
||||
import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
|
||||
import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
|
||||
import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
|
||||
import org.apache.hadoop.hbase.util.FSUtils;
|
||||
import org.apache.hadoop.hbase.util.ModifyRegionUtils;
|
||||
|
@ -53,7 +51,6 @@ import org.apache.zookeeper.KeeperException;
|
|||
@InterfaceStability.Evolving
|
||||
public class DisabledTableSnapshotHandler extends TakeSnapshotHandler {
|
||||
private static final Log LOG = LogFactory.getLog(DisabledTableSnapshotHandler.class);
|
||||
private final TimeoutExceptionInjector timeoutInjector;
|
||||
|
||||
/**
|
||||
* @param snapshot descriptor of the snapshot to take
|
||||
|
@ -62,9 +59,6 @@ public class DisabledTableSnapshotHandler extends TakeSnapshotHandler {
|
|||
public DisabledTableSnapshotHandler(SnapshotDescription snapshot,
|
||||
final MasterServices masterServices) {
|
||||
super(snapshot, masterServices);
|
||||
|
||||
// setup the timer
|
||||
timeoutInjector = getMasterTimerAndBindToMonitor(snapshot, conf, monitor);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -78,8 +72,6 @@ public class DisabledTableSnapshotHandler extends TakeSnapshotHandler {
|
|||
public void snapshotRegions(List<Pair<HRegionInfo, ServerName>> regionsAndLocations)
|
||||
throws IOException, KeeperException {
|
||||
try {
|
||||
timeoutInjector.start();
|
||||
|
||||
// 1. get all the regions hosting this table.
|
||||
|
||||
// extract each pair to separate lists
|
||||
|
@ -119,25 +111,6 @@ public class DisabledTableSnapshotHandler extends TakeSnapshotHandler {
|
|||
} finally {
|
||||
LOG.debug("Marking snapshot" + ClientSnapshotDescriptionUtils.toString(snapshot)
|
||||
+ " as finished.");
|
||||
|
||||
// 3. mark the timer as finished - even if we got an exception, we don't need to time the
|
||||
// operation any further
|
||||
timeoutInjector.complete();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a snapshot timer for the master which notifies the monitor when an error occurs
|
||||
* @param snapshot snapshot to monitor
|
||||
* @param conf configuration to use when getting the max snapshot life
|
||||
* @param monitor monitor to notify when the snapshot life expires
|
||||
* @return the timer to use update to signal the start and end of the snapshot
|
||||
*/
|
||||
private TimeoutExceptionInjector getMasterTimerAndBindToMonitor(SnapshotDescription snapshot,
|
||||
Configuration conf, ForeignExceptionListener monitor) {
|
||||
long maxTime = SnapshotDescriptionUtils.getMaxMasterTimeout(conf, snapshot.getType(),
|
||||
SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME);
|
||||
return new TimeoutExceptionInjector(monitor, maxTime);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -122,15 +122,6 @@ public class SnapshotManager extends MasterProcedureManager implements Stoppable
|
|||
*/
|
||||
private static final String SNAPSHOT_WAKE_MILLIS_KEY = "hbase.snapshot.master.wakeMillis";
|
||||
|
||||
/** By default, check to see if the snapshot is complete (ms) */
|
||||
private static final int SNAPSHOT_TIMEOUT_MILLIS_DEFAULT = 60000;
|
||||
|
||||
/**
|
||||
* Conf key for # of ms elapsed before injecting a snapshot timeout error when waiting for
|
||||
* completion.
|
||||
*/
|
||||
private static final String SNAPSHOT_TIMEOUT_MILLIS_KEY = "hbase.snapshot.master.timeoutMillis";
|
||||
|
||||
/** Name of the operation to use in the controller */
|
||||
public static final String ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION = "online-snapshot";
|
||||
|
||||
|
@ -1078,7 +1069,10 @@ public class SnapshotManager extends MasterProcedureManager implements Stoppable
|
|||
// get the configuration for the coordinator
|
||||
Configuration conf = master.getConfiguration();
|
||||
long wakeFrequency = conf.getInt(SNAPSHOT_WAKE_MILLIS_KEY, SNAPSHOT_WAKE_MILLIS_DEFAULT);
|
||||
long timeoutMillis = conf.getLong(SNAPSHOT_TIMEOUT_MILLIS_KEY, SNAPSHOT_TIMEOUT_MILLIS_DEFAULT);
|
||||
long timeoutMillis = Math.max(conf.getLong(SnapshotDescriptionUtils.SNAPSHOT_TIMEOUT_MILLIS_KEY,
|
||||
SnapshotDescriptionUtils.SNAPSHOT_TIMEOUT_MILLIS_DEFAULT),
|
||||
conf.getLong(SnapshotDescriptionUtils.MASTER_SNAPSHOT_TIMEOUT_MILLIS,
|
||||
SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME));
|
||||
int opThreads = conf.getInt(SNAPSHOT_POOL_THREADS_KEY, SNAPSHOT_POOL_THREADS_DEFAULT);
|
||||
|
||||
// setup the default procedure coordinator
|
||||
|
|
|
@ -88,8 +88,8 @@ public class RegionServerSnapshotManager extends RegionServerProcedureManager {
|
|||
|
||||
/** Conf key for max time to keep threads in snapshot request pool waiting */
|
||||
public static final String SNAPSHOT_TIMEOUT_MILLIS_KEY = "hbase.snapshot.region.timeout";
|
||||
/** Keep threads alive in request pool for max of 60 seconds */
|
||||
public static final long SNAPSHOT_TIMEOUT_MILLIS_DEFAULT = 60000;
|
||||
/** Keep threads alive in request pool for max of 300 seconds */
|
||||
public static final long SNAPSHOT_TIMEOUT_MILLIS_DEFAULT = 5 * 60000;
|
||||
|
||||
/** Conf key for millis between checks to see if snapshot completed or if there are errors*/
|
||||
public static final String SNAPSHOT_REQUEST_WAKE_MILLIS_KEY = "hbase.snapshot.region.wakefrequency";
|
||||
|
|
|
@ -105,10 +105,27 @@ public class SnapshotDescriptionUtils {
|
|||
/** Default value if no start time is specified */
|
||||
public static final long NO_SNAPSHOT_START_TIME_SPECIFIED = 0;
|
||||
|
||||
|
||||
public static final String MASTER_SNAPSHOT_TIMEOUT_MILLIS = "hbase.snapshot.master.timeout.millis";
|
||||
|
||||
/** By default, wait 60 seconds for a snapshot to complete */
|
||||
public static final long DEFAULT_MAX_WAIT_TIME = 60000;
|
||||
/** By default, wait 300 seconds for a snapshot to complete */
|
||||
public static final long DEFAULT_MAX_WAIT_TIME = 60000 * 5 ;
|
||||
|
||||
|
||||
/**
|
||||
* By default, check to see if the snapshot is complete (ms)
|
||||
* @deprecated Use {@link #DEFAULT_MAX_WAIT_TIME} instead.
|
||||
* */
|
||||
@Deprecated
|
||||
public static final int SNAPSHOT_TIMEOUT_MILLIS_DEFAULT = 60000 * 5;
|
||||
|
||||
/**
|
||||
* Conf key for # of ms elapsed before injecting a snapshot timeout error when waiting for
|
||||
* completion.
|
||||
* @deprecated Use {@link #MASTER_SNAPSHOT_TIMEOUT_MILLIS} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String SNAPSHOT_TIMEOUT_MILLIS_KEY = "hbase.snapshot.master.timeoutMillis";
|
||||
|
||||
private SnapshotDescriptionUtils() {
|
||||
// private constructor for utility class
|
||||
|
@ -128,7 +145,8 @@ public class SnapshotDescriptionUtils {
|
|||
default:
|
||||
confKey = MASTER_SNAPSHOT_TIMEOUT_MILLIS;
|
||||
}
|
||||
return conf.getLong(confKey, defaultMaxWaitTime);
|
||||
return Math.max(conf.getLong(confKey, defaultMaxWaitTime),
|
||||
conf.getLong(SNAPSHOT_TIMEOUT_MILLIS_KEY, defaultMaxWaitTime));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue