HBASE-14680 Two configs for snapshot timeout and better defaults (Heng Chen)
Conflicts: hbase-common/src/main/resources/hbase-default.xml hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/DisabledTableSnapshotHandler.java
This commit is contained in:
parent
53a8ce5fab
commit
87c97c231a
|
@ -1461,4 +1461,18 @@ possible configurations would overwhelm and obscure the important.
|
||||||
0.x Abort only when this percent of handlers have died;
|
0.x Abort only when this percent of handlers have died;
|
||||||
1 Abort only all of the handers have died.</description>
|
1 Abort only all of the handers have died.</description>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hbase.snapshot.master.timeout.millis</name>
|
||||||
|
<value>300000</value>
|
||||||
|
<description>
|
||||||
|
Timeout for master for the snapshot procedure execution
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hbase.snapshot.region.timeout</name>
|
||||||
|
<value>300000</value>
|
||||||
|
<description>
|
||||||
|
Timeout for regionservers to keep threads in snapshot request pool waiting
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
</configuration>
|
</configuration>
|
||||||
|
|
|
@ -26,18 +26,16 @@ import java.util.concurrent.ThreadPoolExecutor;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceStability;
|
import org.apache.hadoop.hbase.classification.InterfaceStability;
|
||||||
import org.apache.hadoop.hbase.HRegionInfo;
|
import org.apache.hadoop.hbase.HRegionInfo;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
|
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
|
||||||
import org.apache.hadoop.hbase.errorhandling.ForeignException;
|
import org.apache.hadoop.hbase.errorhandling.ForeignException;
|
||||||
import org.apache.hadoop.hbase.errorhandling.ForeignExceptionListener;
|
|
||||||
import org.apache.hadoop.hbase.errorhandling.TimeoutExceptionInjector;
|
|
||||||
import org.apache.hadoop.hbase.master.MasterServices;
|
import org.apache.hadoop.hbase.master.MasterServices;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
|
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
|
||||||
import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
|
import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
|
||||||
import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
|
|
||||||
import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
|
import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
|
||||||
import org.apache.hadoop.hbase.util.FSUtils;
|
import org.apache.hadoop.hbase.util.FSUtils;
|
||||||
import org.apache.hadoop.hbase.util.ModifyRegionUtils;
|
import org.apache.hadoop.hbase.util.ModifyRegionUtils;
|
||||||
|
@ -53,7 +51,6 @@ import org.apache.zookeeper.KeeperException;
|
||||||
@InterfaceStability.Evolving
|
@InterfaceStability.Evolving
|
||||||
public class DisabledTableSnapshotHandler extends TakeSnapshotHandler {
|
public class DisabledTableSnapshotHandler extends TakeSnapshotHandler {
|
||||||
private static final Log LOG = LogFactory.getLog(DisabledTableSnapshotHandler.class);
|
private static final Log LOG = LogFactory.getLog(DisabledTableSnapshotHandler.class);
|
||||||
private final TimeoutExceptionInjector timeoutInjector;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param snapshot descriptor of the snapshot to take
|
* @param snapshot descriptor of the snapshot to take
|
||||||
|
@ -62,9 +59,6 @@ public class DisabledTableSnapshotHandler extends TakeSnapshotHandler {
|
||||||
public DisabledTableSnapshotHandler(SnapshotDescription snapshot,
|
public DisabledTableSnapshotHandler(SnapshotDescription snapshot,
|
||||||
final MasterServices masterServices) {
|
final MasterServices masterServices) {
|
||||||
super(snapshot, masterServices);
|
super(snapshot, masterServices);
|
||||||
|
|
||||||
// setup the timer
|
|
||||||
timeoutInjector = getMasterTimerAndBindToMonitor(snapshot, conf, monitor);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -78,8 +72,6 @@ public class DisabledTableSnapshotHandler extends TakeSnapshotHandler {
|
||||||
public void snapshotRegions(List<Pair<HRegionInfo, ServerName>> regionsAndLocations)
|
public void snapshotRegions(List<Pair<HRegionInfo, ServerName>> regionsAndLocations)
|
||||||
throws IOException, KeeperException {
|
throws IOException, KeeperException {
|
||||||
try {
|
try {
|
||||||
timeoutInjector.start();
|
|
||||||
|
|
||||||
// 1. get all the regions hosting this table.
|
// 1. get all the regions hosting this table.
|
||||||
|
|
||||||
// extract each pair to separate lists
|
// extract each pair to separate lists
|
||||||
|
@ -119,25 +111,6 @@ public class DisabledTableSnapshotHandler extends TakeSnapshotHandler {
|
||||||
} finally {
|
} finally {
|
||||||
LOG.debug("Marking snapshot" + ClientSnapshotDescriptionUtils.toString(snapshot)
|
LOG.debug("Marking snapshot" + ClientSnapshotDescriptionUtils.toString(snapshot)
|
||||||
+ " as finished.");
|
+ " as finished.");
|
||||||
|
|
||||||
// 3. mark the timer as finished - even if we got an exception, we don't need to time the
|
|
||||||
// operation any further
|
|
||||||
timeoutInjector.complete();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a snapshot timer for the master which notifies the monitor when an error occurs
|
|
||||||
* @param snapshot snapshot to monitor
|
|
||||||
* @param conf configuration to use when getting the max snapshot life
|
|
||||||
* @param monitor monitor to notify when the snapshot life expires
|
|
||||||
* @return the timer to use update to signal the start and end of the snapshot
|
|
||||||
*/
|
|
||||||
private TimeoutExceptionInjector getMasterTimerAndBindToMonitor(SnapshotDescription snapshot,
|
|
||||||
Configuration conf, ForeignExceptionListener monitor) {
|
|
||||||
long maxTime = SnapshotDescriptionUtils.getMaxMasterTimeout(conf, snapshot.getType(),
|
|
||||||
SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME);
|
|
||||||
return new TimeoutExceptionInjector(monitor, maxTime);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -122,15 +122,6 @@ public class SnapshotManager extends MasterProcedureManager implements Stoppable
|
||||||
*/
|
*/
|
||||||
private static final String SNAPSHOT_WAKE_MILLIS_KEY = "hbase.snapshot.master.wakeMillis";
|
private static final String SNAPSHOT_WAKE_MILLIS_KEY = "hbase.snapshot.master.wakeMillis";
|
||||||
|
|
||||||
/** By default, check to see if the snapshot is complete (ms) */
|
|
||||||
private static final int SNAPSHOT_TIMEOUT_MILLIS_DEFAULT = 60000;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Conf key for # of ms elapsed before injecting a snapshot timeout error when waiting for
|
|
||||||
* completion.
|
|
||||||
*/
|
|
||||||
private static final String SNAPSHOT_TIMEOUT_MILLIS_KEY = "hbase.snapshot.master.timeoutMillis";
|
|
||||||
|
|
||||||
/** Name of the operation to use in the controller */
|
/** Name of the operation to use in the controller */
|
||||||
public static final String ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION = "online-snapshot";
|
public static final String ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION = "online-snapshot";
|
||||||
|
|
||||||
|
@ -1078,7 +1069,10 @@ public class SnapshotManager extends MasterProcedureManager implements Stoppable
|
||||||
// get the configuration for the coordinator
|
// get the configuration for the coordinator
|
||||||
Configuration conf = master.getConfiguration();
|
Configuration conf = master.getConfiguration();
|
||||||
long wakeFrequency = conf.getInt(SNAPSHOT_WAKE_MILLIS_KEY, SNAPSHOT_WAKE_MILLIS_DEFAULT);
|
long wakeFrequency = conf.getInt(SNAPSHOT_WAKE_MILLIS_KEY, SNAPSHOT_WAKE_MILLIS_DEFAULT);
|
||||||
long timeoutMillis = conf.getLong(SNAPSHOT_TIMEOUT_MILLIS_KEY, SNAPSHOT_TIMEOUT_MILLIS_DEFAULT);
|
long timeoutMillis = Math.max(conf.getLong(SnapshotDescriptionUtils.SNAPSHOT_TIMEOUT_MILLIS_KEY,
|
||||||
|
SnapshotDescriptionUtils.SNAPSHOT_TIMEOUT_MILLIS_DEFAULT),
|
||||||
|
conf.getLong(SnapshotDescriptionUtils.MASTER_SNAPSHOT_TIMEOUT_MILLIS,
|
||||||
|
SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME));
|
||||||
int opThreads = conf.getInt(SNAPSHOT_POOL_THREADS_KEY, SNAPSHOT_POOL_THREADS_DEFAULT);
|
int opThreads = conf.getInt(SNAPSHOT_POOL_THREADS_KEY, SNAPSHOT_POOL_THREADS_DEFAULT);
|
||||||
|
|
||||||
// setup the default procedure coordinator
|
// setup the default procedure coordinator
|
||||||
|
|
|
@ -88,8 +88,8 @@ public class RegionServerSnapshotManager extends RegionServerProcedureManager {
|
||||||
|
|
||||||
/** Conf key for max time to keep threads in snapshot request pool waiting */
|
/** Conf key for max time to keep threads in snapshot request pool waiting */
|
||||||
public static final String SNAPSHOT_TIMEOUT_MILLIS_KEY = "hbase.snapshot.region.timeout";
|
public static final String SNAPSHOT_TIMEOUT_MILLIS_KEY = "hbase.snapshot.region.timeout";
|
||||||
/** Keep threads alive in request pool for max of 60 seconds */
|
/** Keep threads alive in request pool for max of 300 seconds */
|
||||||
public static final long SNAPSHOT_TIMEOUT_MILLIS_DEFAULT = 60000;
|
public static final long SNAPSHOT_TIMEOUT_MILLIS_DEFAULT = 5 * 60000;
|
||||||
|
|
||||||
/** Conf key for millis between checks to see if snapshot completed or if there are errors*/
|
/** Conf key for millis between checks to see if snapshot completed or if there are errors*/
|
||||||
public static final String SNAPSHOT_REQUEST_WAKE_MILLIS_KEY = "hbase.snapshot.region.wakefrequency";
|
public static final String SNAPSHOT_REQUEST_WAKE_MILLIS_KEY = "hbase.snapshot.region.wakefrequency";
|
||||||
|
|
|
@ -105,10 +105,27 @@ public class SnapshotDescriptionUtils {
|
||||||
/** Default value if no start time is specified */
|
/** Default value if no start time is specified */
|
||||||
public static final long NO_SNAPSHOT_START_TIME_SPECIFIED = 0;
|
public static final long NO_SNAPSHOT_START_TIME_SPECIFIED = 0;
|
||||||
|
|
||||||
|
|
||||||
public static final String MASTER_SNAPSHOT_TIMEOUT_MILLIS = "hbase.snapshot.master.timeout.millis";
|
public static final String MASTER_SNAPSHOT_TIMEOUT_MILLIS = "hbase.snapshot.master.timeout.millis";
|
||||||
|
|
||||||
/** By default, wait 60 seconds for a snapshot to complete */
|
/** By default, wait 300 seconds for a snapshot to complete */
|
||||||
public static final long DEFAULT_MAX_WAIT_TIME = 60000;
|
public static final long DEFAULT_MAX_WAIT_TIME = 60000 * 5 ;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* By default, check to see if the snapshot is complete (ms)
|
||||||
|
* @deprecated Use {@link #DEFAULT_MAX_WAIT_TIME} instead.
|
||||||
|
* */
|
||||||
|
@Deprecated
|
||||||
|
public static final int SNAPSHOT_TIMEOUT_MILLIS_DEFAULT = 60000 * 5;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Conf key for # of ms elapsed before injecting a snapshot timeout error when waiting for
|
||||||
|
* completion.
|
||||||
|
* @deprecated Use {@link #MASTER_SNAPSHOT_TIMEOUT_MILLIS} instead.
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
public static final String SNAPSHOT_TIMEOUT_MILLIS_KEY = "hbase.snapshot.master.timeoutMillis";
|
||||||
|
|
||||||
private SnapshotDescriptionUtils() {
|
private SnapshotDescriptionUtils() {
|
||||||
// private constructor for utility class
|
// private constructor for utility class
|
||||||
|
@ -128,7 +145,8 @@ public class SnapshotDescriptionUtils {
|
||||||
default:
|
default:
|
||||||
confKey = MASTER_SNAPSHOT_TIMEOUT_MILLIS;
|
confKey = MASTER_SNAPSHOT_TIMEOUT_MILLIS;
|
||||||
}
|
}
|
||||||
return conf.getLong(confKey, defaultMaxWaitTime);
|
return Math.max(conf.getLong(confKey, defaultMaxWaitTime),
|
||||||
|
conf.getLong(SNAPSHOT_TIMEOUT_MILLIS_KEY, defaultMaxWaitTime));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue