HBASE-5770 Add a clock skew warning threshold

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1325389 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2012-04-12 17:20:25 +00:00
parent 05e3db77c5
commit 38eb4579e4
2 changed files with 24 additions and 8 deletions

View File

@ -98,6 +98,7 @@ public class ServerManager {
private final DeadServer deadservers;
private final long maxSkew;
private final long warningSkew;
/**
* Set of region servers which are dead but not expired immediately. If one
@ -124,6 +125,7 @@ public class ServerManager {
this.services = services;
Configuration c = master.getConfiguration();
maxSkew = c.getLong("hbase.master.maxclockskew", 30000);
warningSkew = c.getLong("hbase.master.warningclockskew", 10000);
this.deadservers = new DeadServer();
this.connection = connect ? HConnectionManager.getConnection(c) : null;
}
@ -196,14 +198,14 @@ public class ServerManager {
}
/**
* Checks if the clock skew between the server and the master. If the clock
* skew is too much it will throw an Exception.
* Checks if the clock skew between the server and the master. If the clock skew exceeds the
* configured max, it will throw an exception; if it exceeds the configured warning threshold,
* it will log a warning but start normally.
* @param serverName Incoming servers's name
* @param serverCurrentTime
* @throws ClockOutOfSyncException
* @throws ClockOutOfSyncException if the skew exceeds the configured max value
*/
private void checkClockSkew(final ServerName serverName,
final long serverCurrentTime)
private void checkClockSkew(final ServerName serverName, final long serverCurrentTime)
throws ClockOutOfSyncException {
long skew = System.currentTimeMillis() - serverCurrentTime;
if (skew > maxSkew) {
@ -212,6 +214,11 @@ public class ServerManager {
"Time difference of " + skew + "ms > max allowed of " + maxSkew + "ms";
LOG.warn(message);
throw new ClockOutOfSyncException(message);
} else if (skew > warningSkew){
String message = "Reported time for server " + serverName + " is out of sync with master " +
"by " + skew + "ms. (Warning threshold is " + warningSkew + "ms; " +
"error threshold is " + maxSkew + "ms)";
LOG.warn(message);
}
}

View File

@ -19,6 +19,8 @@
*/
package org.apache.hadoop.hbase.master;
import static org.junit.Assert.fail;
import java.net.InetAddress;
import junit.framework.Assert;
@ -82,18 +84,25 @@ public class TestClockSkewDetection {
InetAddress ia1 = InetAddress.getLocalHost();
sm.regionServerStartup(ia1, 1234, -1, System.currentTimeMillis());
long maxSkew = 30000;
final Configuration c = HBaseConfiguration.create();
long maxSkew = c.getLong("hbase.master.maxclockskew", 30000);
long warningSkew = c.getLong("hbase.master.warningclockskew", 1000);
try {
LOG.debug("regionServerStartup 2");
InetAddress ia2 = InetAddress.getLocalHost();
sm.regionServerStartup(ia2, 1235, -1, System.currentTimeMillis() - maxSkew * 2);
Assert.assertTrue("HMaster should have thrown an ClockOutOfSyncException "
+ "but didn't.", false);
fail("HMaster should have thrown an ClockOutOfSyncException but didn't.");
} catch(ClockOutOfSyncException e) {
//we want an exception
LOG.info("Recieved expected exception: "+e);
}
// make sure values above warning threshold but below max threshold don't kill
LOG.debug("regionServerStartup 3");
InetAddress ia3 = InetAddress.getLocalHost();
sm.regionServerStartup(ia3, 1236, -1, System.currentTimeMillis() - warningSkew * 2);
}
@org.junit.Rule