From 38eb4579e44fe15c46bdaa4a3a408277555fec64 Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Thu, 12 Apr 2012 17:20:25 +0000 Subject: [PATCH] HBASE-5770 Add a clock skew warning threshold git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1325389 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop/hbase/master/ServerManager.java | 17 ++++++++++++----- .../hbase/master/TestClockSkewDetection.java | 15 ++++++++++++--- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index b5f4cb6f6e9..70901fe3905 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -98,6 +98,7 @@ public class ServerManager { private final DeadServer deadservers; private final long maxSkew; + private final long warningSkew; /** * Set of region servers which are dead but not expired immediately. If one @@ -124,6 +125,7 @@ public class ServerManager { this.services = services; Configuration c = master.getConfiguration(); maxSkew = c.getLong("hbase.master.maxclockskew", 30000); + warningSkew = c.getLong("hbase.master.warningclockskew", 10000); this.deadservers = new DeadServer(); this.connection = connect ? HConnectionManager.getConnection(c) : null; } @@ -196,14 +198,14 @@ public class ServerManager { } /** - * Checks if the clock skew between the server and the master. If the clock - * skew is too much it will throw an Exception. + * Checks if the clock skew between the server and the master. If the clock skew exceeds the + * configured max, it will throw an exception; if it exceeds the configured warning threshold, + * it will log a warning but start normally. * @param serverName Incoming servers's name * @param serverCurrentTime - * @throws ClockOutOfSyncException + * @throws ClockOutOfSyncException if the skew exceeds the configured max value */ - private void checkClockSkew(final ServerName serverName, - final long serverCurrentTime) + private void checkClockSkew(final ServerName serverName, final long serverCurrentTime) throws ClockOutOfSyncException { long skew = System.currentTimeMillis() - serverCurrentTime; if (skew > maxSkew) { @@ -212,6 +214,11 @@ public class ServerManager { "Time difference of " + skew + "ms > max allowed of " + maxSkew + "ms"; LOG.warn(message); throw new ClockOutOfSyncException(message); + } else if (skew > warningSkew){ + String message = "Reported time for server " + serverName + " is out of sync with master " + + "by " + skew + "ms. (Warning threshold is " + warningSkew + "ms; " + + "error threshold is " + maxSkew + "ms)"; + LOG.warn(message); } } diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestClockSkewDetection.java b/src/test/java/org/apache/hadoop/hbase/master/TestClockSkewDetection.java index 977a5c7a756..7899bf09552 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestClockSkewDetection.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestClockSkewDetection.java @@ -19,6 +19,8 @@ */ package org.apache.hadoop.hbase.master; +import static org.junit.Assert.fail; + import java.net.InetAddress; import junit.framework.Assert; @@ -82,18 +84,25 @@ public class TestClockSkewDetection { InetAddress ia1 = InetAddress.getLocalHost(); sm.regionServerStartup(ia1, 1234, -1, System.currentTimeMillis()); - long maxSkew = 30000; + final Configuration c = HBaseConfiguration.create(); + long maxSkew = c.getLong("hbase.master.maxclockskew", 30000); + long warningSkew = c.getLong("hbase.master.warningclockskew", 1000); try { LOG.debug("regionServerStartup 2"); InetAddress ia2 = InetAddress.getLocalHost(); sm.regionServerStartup(ia2, 1235, -1, System.currentTimeMillis() - maxSkew * 2); - Assert.assertTrue("HMaster should have thrown an ClockOutOfSyncException " - + "but didn't.", false); + fail("HMaster should have thrown an ClockOutOfSyncException but didn't."); } catch(ClockOutOfSyncException e) { //we want an exception LOG.info("Recieved expected exception: "+e); } + + // make sure values above warning threshold but below max threshold don't kill + LOG.debug("regionServerStartup 3"); + InetAddress ia3 = InetAddress.getLocalHost(); + sm.regionServerStartup(ia3, 1236, -1, System.currentTimeMillis() - warningSkew * 2); + } @org.junit.Rule