From 5a8da868334ebfcf41f8a38bb8809bc346396882 Mon Sep 17 00:00:00 2001 From: Jim Kellerman Date: Wed, 7 May 2008 22:08:21 +0000 Subject: [PATCH] HBASE-611 regionserver should do basic health check before reporting alls-well to the master git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@654301 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 2 ++ .../hbase/regionserver/HRegionServer.java | 23 +++++++++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index b501e8ea5d8..6d6d4b2e8a5 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -46,6 +46,8 @@ Hbase Change Log HBASE-47 Option to set TTL for columns in hbase (Andrew Purtell via Bryan Duxbury and Stack) HBASE-600 Filters have excessive DEBUG logging + HBASE-611 regionserver should do basic health check before reporting + alls-well to the master Release 0.1.1 - 04/11/2008 diff --git a/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 3a4930db4c1..519db76040e 100644 --- a/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -270,7 +270,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable { init(reportForDuty(sleeper)); long lastMsg = 0; // Now ask master what it wants us to do and tell it what we have done - for (int tries = 0; !stopRequested.get();) { + for (int tries = 0; !stopRequested.get() && isHealthy();) { long now = System.currentTimeMillis(); if (lastMsg != 0 && (now - lastMsg) >= serverLeaseTimeout) { // It has been way too long since we last reported to the master. @@ -576,7 +576,26 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable { serverInfo.getServerAddress().toString()); } - /* Run some housekeeping tasks before we go into 'hibernation' sleeping at + /* + * Verify that server is healthy + */ + private boolean isHealthy() { + if (!fsOk) { + // File system problem + return false; + } + // Verify that all threads are alive + if (!(leases.isAlive() && compactSplitThread.isAlive() && + cacheFlusher.isAlive() && logRoller.isAlive() && + workerThread.isAlive())) { + // One or more threads are no longer alive - shut down + stop(); + return false; + } + return true; + } + /* + * Run some housekeeping tasks before we go into 'hibernation' sleeping at * the end of the main HRegionServer run loop. */ private void housekeeping() {