HBASE-5770 Add a clock skew warning threshold
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1325389 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
05e3db77c5
commit
38eb4579e4
|
@ -98,6 +98,7 @@ public class ServerManager {
|
||||||
private final DeadServer deadservers;
|
private final DeadServer deadservers;
|
||||||
|
|
||||||
private final long maxSkew;
|
private final long maxSkew;
|
||||||
|
private final long warningSkew;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set of region servers which are dead but not expired immediately. If one
|
* Set of region servers which are dead but not expired immediately. If one
|
||||||
|
@ -124,6 +125,7 @@ public class ServerManager {
|
||||||
this.services = services;
|
this.services = services;
|
||||||
Configuration c = master.getConfiguration();
|
Configuration c = master.getConfiguration();
|
||||||
maxSkew = c.getLong("hbase.master.maxclockskew", 30000);
|
maxSkew = c.getLong("hbase.master.maxclockskew", 30000);
|
||||||
|
warningSkew = c.getLong("hbase.master.warningclockskew", 10000);
|
||||||
this.deadservers = new DeadServer();
|
this.deadservers = new DeadServer();
|
||||||
this.connection = connect ? HConnectionManager.getConnection(c) : null;
|
this.connection = connect ? HConnectionManager.getConnection(c) : null;
|
||||||
}
|
}
|
||||||
|
@ -196,14 +198,14 @@ public class ServerManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks if the clock skew between the server and the master. If the clock
|
* Checks if the clock skew between the server and the master. If the clock skew exceeds the
|
||||||
* skew is too much it will throw an Exception.
|
* configured max, it will throw an exception; if it exceeds the configured warning threshold,
|
||||||
|
* it will log a warning but start normally.
|
||||||
* @param serverName Incoming servers's name
|
* @param serverName Incoming servers's name
|
||||||
* @param serverCurrentTime
|
* @param serverCurrentTime
|
||||||
* @throws ClockOutOfSyncException
|
* @throws ClockOutOfSyncException if the skew exceeds the configured max value
|
||||||
*/
|
*/
|
||||||
private void checkClockSkew(final ServerName serverName,
|
private void checkClockSkew(final ServerName serverName, final long serverCurrentTime)
|
||||||
final long serverCurrentTime)
|
|
||||||
throws ClockOutOfSyncException {
|
throws ClockOutOfSyncException {
|
||||||
long skew = System.currentTimeMillis() - serverCurrentTime;
|
long skew = System.currentTimeMillis() - serverCurrentTime;
|
||||||
if (skew > maxSkew) {
|
if (skew > maxSkew) {
|
||||||
|
@ -212,6 +214,11 @@ public class ServerManager {
|
||||||
"Time difference of " + skew + "ms > max allowed of " + maxSkew + "ms";
|
"Time difference of " + skew + "ms > max allowed of " + maxSkew + "ms";
|
||||||
LOG.warn(message);
|
LOG.warn(message);
|
||||||
throw new ClockOutOfSyncException(message);
|
throw new ClockOutOfSyncException(message);
|
||||||
|
} else if (skew > warningSkew){
|
||||||
|
String message = "Reported time for server " + serverName + " is out of sync with master " +
|
||||||
|
"by " + skew + "ms. (Warning threshold is " + warningSkew + "ms; " +
|
||||||
|
"error threshold is " + maxSkew + "ms)";
|
||||||
|
LOG.warn(message);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,8 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hbase.master;
|
package org.apache.hadoop.hbase.master;
|
||||||
|
|
||||||
|
import static org.junit.Assert.fail;
|
||||||
|
|
||||||
import java.net.InetAddress;
|
import java.net.InetAddress;
|
||||||
|
|
||||||
import junit.framework.Assert;
|
import junit.framework.Assert;
|
||||||
|
@ -82,18 +84,25 @@ public class TestClockSkewDetection {
|
||||||
InetAddress ia1 = InetAddress.getLocalHost();
|
InetAddress ia1 = InetAddress.getLocalHost();
|
||||||
sm.regionServerStartup(ia1, 1234, -1, System.currentTimeMillis());
|
sm.regionServerStartup(ia1, 1234, -1, System.currentTimeMillis());
|
||||||
|
|
||||||
long maxSkew = 30000;
|
final Configuration c = HBaseConfiguration.create();
|
||||||
|
long maxSkew = c.getLong("hbase.master.maxclockskew", 30000);
|
||||||
|
long warningSkew = c.getLong("hbase.master.warningclockskew", 1000);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
LOG.debug("regionServerStartup 2");
|
LOG.debug("regionServerStartup 2");
|
||||||
InetAddress ia2 = InetAddress.getLocalHost();
|
InetAddress ia2 = InetAddress.getLocalHost();
|
||||||
sm.regionServerStartup(ia2, 1235, -1, System.currentTimeMillis() - maxSkew * 2);
|
sm.regionServerStartup(ia2, 1235, -1, System.currentTimeMillis() - maxSkew * 2);
|
||||||
Assert.assertTrue("HMaster should have thrown an ClockOutOfSyncException "
|
fail("HMaster should have thrown an ClockOutOfSyncException but didn't.");
|
||||||
+ "but didn't.", false);
|
|
||||||
} catch(ClockOutOfSyncException e) {
|
} catch(ClockOutOfSyncException e) {
|
||||||
//we want an exception
|
//we want an exception
|
||||||
LOG.info("Recieved expected exception: "+e);
|
LOG.info("Recieved expected exception: "+e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// make sure values above warning threshold but below max threshold don't kill
|
||||||
|
LOG.debug("regionServerStartup 3");
|
||||||
|
InetAddress ia3 = InetAddress.getLocalHost();
|
||||||
|
sm.regionServerStartup(ia3, 1236, -1, System.currentTimeMillis() - warningSkew * 2);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@org.junit.Rule
|
@org.junit.Rule
|
||||||
|
|
Loading…
Reference in New Issue