HBASE-1198 OOME in IPC server does not trigger abort behavior

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@743660 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Kyle Purtell 2009-02-12 08:56:49 +00:00
parent 0e83a2224c
commit 3ca8865ad6
4 changed files with 87 additions and 19 deletions

View File

@ -21,6 +21,7 @@ Release 0.20.0 - Unreleased
name (Jonathan Gray via Andrew Purtell)
HBASE-1190 TableInputFormatBase with row filters scan too far (Dave
Latham via Andrew Purtell)
HBASE-1198 OOME in IPC server does not trigger abort behavior
IMPROVEMENTS
HBASE-1089 Add count of regions on filesystem to master UI; add percentage

View File

@ -0,0 +1,31 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.ipc;
/**
* An interface for calling out of RPC for error conditions.
*/
public interface HBaseRPCErrorHandler {
/**
* Take actions on the event of an OutOfMemoryError.
* @param e the throwable
* @return if the server should be shut down
*/
public boolean checkOOME(final Throwable e) ;
}

View File

@ -152,6 +152,7 @@ public abstract class HBaseServer {
private Responder responder = null;
private int numConnections = 0;
private Handler[] handlers = null;
private HBaseRPCErrorHandler errorHandler = null;
/**
* A convenience method to bind to a given address and report
@ -313,6 +314,14 @@ public abstract class HBaseServer {
key = null;
}
} catch (OutOfMemoryError e) {
if (errorHandler != null) {
if (errorHandler.checkOOME(e)) {
LOG.info(getName() + ": exiting on OOME");
closeCurrentConnection(key, e);
cleanupConnections(true);
return;
}
} else {
// we can run out of memory if we have too many threads
// log the event and sleep for a minute and give
// some thread(s) a chance to finish
@ -320,6 +329,7 @@ public abstract class HBaseServer {
closeCurrentConnection(key, e);
cleanupConnections(true);
try { Thread.sleep(60000); } catch (Exception ie) {}
}
} catch (InterruptedException e) {
if (running) { // unexpected -- log it
LOG.info(getName() + " caught: " +
@ -501,6 +511,12 @@ public abstract class HBaseServer {
}
}
} catch (OutOfMemoryError e) {
if (errorHandler != null) {
if (errorHandler.checkOOME(e)) {
LOG.info(getName() + ": exiting on OOME");
return;
}
} else {
//
// we can run out of memory if we have too many threads
// log the event and sleep for a minute and give
@ -508,6 +524,7 @@ public abstract class HBaseServer {
//
LOG.warn("Out of Memory in server select", e);
try { Thread.sleep(60000); } catch (Exception ie) {}
}
} catch (Exception e) {
LOG.warn("Exception in Responder " +
StringUtils.stringifyException(e));
@ -926,6 +943,16 @@ public abstract class HBaseServer {
LOG.info(getName() + " caught: " +
StringUtils.stringifyException(e));
}
} catch (OutOfMemoryError e) {
if (errorHandler != null) {
if (errorHandler.checkOOME(e)) {
LOG.info(getName() + ": exiting on OOME");
return;
}
} else {
// rethrow if no handler
throw e;
}
} catch (Exception e) {
LOG.info(getName() + " caught: " +
StringUtils.stringifyException(e));
@ -1061,6 +1088,13 @@ public abstract class HBaseServer {
return callQueue.size();
}
/**
* Set the handler for calling out of RPC for error conditions.
* @param handler the handler implementation
*/
public void setErrorHandler(HBaseRPCErrorHandler handler) {
this.errorHandler = handler;
}
/**
* When the read or write buffer size is larger than this limit, i/o will be

View File

@ -88,6 +88,7 @@ import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.io.HbaseMapWritable;
import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.hbase.ipc.HBaseRPC;
import org.apache.hadoop.hbase.ipc.HBaseRPCErrorHandler;
import org.apache.hadoop.hbase.ipc.HBaseRPCProtocolVersion;
import org.apache.hadoop.hbase.ipc.HBaseServer;
import org.apache.hadoop.hbase.ipc.HMasterRegionInterface;
@ -108,7 +109,7 @@ import org.apache.hadoop.util.StringUtils;
* HRegionServer makes a set of HRegions available to clients. It checks in with
* the HMaster. There are many HRegionServers in a single HBase deployment.
*/
public class HRegionServer implements HConstants, HRegionInterface, Runnable {
public class HRegionServer implements HConstants, HRegionInterface, HBaseRPCErrorHandler, Runnable {
static final Log LOG = LogFactory.getLog(HRegionServer.class);
private static final HMsg REPORT_EXITING = new HMsg(Type.MSG_REPORT_EXITING);
private static final HMsg REPORT_QUIESCED = new HMsg(Type.MSG_REPORT_QUIESCED);
@ -274,6 +275,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
this.server = HBaseRPC.getServer(this, address.getBindAddress(),
address.getPort(), conf.getInt("hbase.regionserver.handler.count", 10),
false, conf);
this.server.setErrorHandler(this);
// Address is givin a default IP for the moment. Will be changed after
// calling the master.
this.serverInfo = new HServerInfo(new HServerAddress(
@ -718,7 +720,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
* @param e
* @return True if we OOME'd and are aborting.
*/
private boolean checkOOME(final Throwable e) {
public boolean checkOOME(final Throwable e) {
boolean stop = false;
if (e instanceof OutOfMemoryError ||
(e.getCause() != null && e.getCause() instanceof OutOfMemoryError) ||