HBASE-5770 Add a clock skew warning threshold
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1325389 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
05e3db77c5
commit
38eb4579e4
|
@ -98,6 +98,7 @@ public class ServerManager {
|
|||
private final DeadServer deadservers;
|
||||
|
||||
private final long maxSkew;
|
||||
private final long warningSkew;
|
||||
|
||||
/**
|
||||
* Set of region servers which are dead but not expired immediately. If one
|
||||
|
@ -124,6 +125,7 @@ public class ServerManager {
|
|||
this.services = services;
|
||||
Configuration c = master.getConfiguration();
|
||||
maxSkew = c.getLong("hbase.master.maxclockskew", 30000);
|
||||
warningSkew = c.getLong("hbase.master.warningclockskew", 10000);
|
||||
this.deadservers = new DeadServer();
|
||||
this.connection = connect ? HConnectionManager.getConnection(c) : null;
|
||||
}
|
||||
|
@ -196,14 +198,14 @@ public class ServerManager {
|
|||
}
|
||||
|
||||
/**
|
||||
* Checks if the clock skew between the server and the master. If the clock
|
||||
* skew is too much it will throw an Exception.
|
||||
* Checks if the clock skew between the server and the master. If the clock skew exceeds the
|
||||
* configured max, it will throw an exception; if it exceeds the configured warning threshold,
|
||||
* it will log a warning but start normally.
|
||||
* @param serverName Incoming servers's name
|
||||
* @param serverCurrentTime
|
||||
* @throws ClockOutOfSyncException
|
||||
* @throws ClockOutOfSyncException if the skew exceeds the configured max value
|
||||
*/
|
||||
private void checkClockSkew(final ServerName serverName,
|
||||
final long serverCurrentTime)
|
||||
private void checkClockSkew(final ServerName serverName, final long serverCurrentTime)
|
||||
throws ClockOutOfSyncException {
|
||||
long skew = System.currentTimeMillis() - serverCurrentTime;
|
||||
if (skew > maxSkew) {
|
||||
|
@ -212,6 +214,11 @@ public class ServerManager {
|
|||
"Time difference of " + skew + "ms > max allowed of " + maxSkew + "ms";
|
||||
LOG.warn(message);
|
||||
throw new ClockOutOfSyncException(message);
|
||||
} else if (skew > warningSkew){
|
||||
String message = "Reported time for server " + serverName + " is out of sync with master " +
|
||||
"by " + skew + "ms. (Warning threshold is " + warningSkew + "ms; " +
|
||||
"error threshold is " + maxSkew + "ms)";
|
||||
LOG.warn(message);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
*/
|
||||
package org.apache.hadoop.hbase.master;
|
||||
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.net.InetAddress;
|
||||
|
||||
import junit.framework.Assert;
|
||||
|
@ -82,18 +84,25 @@ public class TestClockSkewDetection {
|
|||
InetAddress ia1 = InetAddress.getLocalHost();
|
||||
sm.regionServerStartup(ia1, 1234, -1, System.currentTimeMillis());
|
||||
|
||||
long maxSkew = 30000;
|
||||
final Configuration c = HBaseConfiguration.create();
|
||||
long maxSkew = c.getLong("hbase.master.maxclockskew", 30000);
|
||||
long warningSkew = c.getLong("hbase.master.warningclockskew", 1000);
|
||||
|
||||
try {
|
||||
LOG.debug("regionServerStartup 2");
|
||||
InetAddress ia2 = InetAddress.getLocalHost();
|
||||
sm.regionServerStartup(ia2, 1235, -1, System.currentTimeMillis() - maxSkew * 2);
|
||||
Assert.assertTrue("HMaster should have thrown an ClockOutOfSyncException "
|
||||
+ "but didn't.", false);
|
||||
fail("HMaster should have thrown an ClockOutOfSyncException but didn't.");
|
||||
} catch(ClockOutOfSyncException e) {
|
||||
//we want an exception
|
||||
LOG.info("Recieved expected exception: "+e);
|
||||
}
|
||||
|
||||
// make sure values above warning threshold but below max threshold don't kill
|
||||
LOG.debug("regionServerStartup 3");
|
||||
InetAddress ia3 = InetAddress.getLocalHost();
|
||||
sm.regionServerStartup(ia3, 1236, -1, System.currentTimeMillis() - warningSkew * 2);
|
||||
|
||||
}
|
||||
|
||||
@org.junit.Rule
|
||||
|
|
Loading…
Reference in New Issue