HBASE-1198 OOME in IPC server does not trigger abort behavior
git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@743660 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0e83a2224c
commit
3ca8865ad6
|
@ -21,6 +21,7 @@ Release 0.20.0 - Unreleased
|
|||
name (Jonathan Gray via Andrew Purtell)
|
||||
HBASE-1190 TableInputFormatBase with row filters scan too far (Dave
|
||||
Latham via Andrew Purtell)
|
||||
HBASE-1198 OOME in IPC server does not trigger abort behavior
|
||||
|
||||
IMPROVEMENTS
|
||||
HBASE-1089 Add count of regions on filesystem to master UI; add percentage
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.ipc;
|
||||
|
||||
/**
|
||||
* An interface for calling out of RPC for error conditions.
|
||||
*/
|
||||
public interface HBaseRPCErrorHandler {
|
||||
/**
|
||||
* Take actions on the event of an OutOfMemoryError.
|
||||
* @param e the throwable
|
||||
* @return if the server should be shut down
|
||||
*/
|
||||
public boolean checkOOME(final Throwable e) ;
|
||||
}
|
|
@ -152,6 +152,7 @@ public abstract class HBaseServer {
|
|||
private Responder responder = null;
|
||||
private int numConnections = 0;
|
||||
private Handler[] handlers = null;
|
||||
private HBaseRPCErrorHandler errorHandler = null;
|
||||
|
||||
/**
|
||||
* A convenience method to bind to a given address and report
|
||||
|
@ -313,13 +314,22 @@ public abstract class HBaseServer {
|
|||
key = null;
|
||||
}
|
||||
} catch (OutOfMemoryError e) {
|
||||
// we can run out of memory if we have too many threads
|
||||
// log the event and sleep for a minute and give
|
||||
// some thread(s) a chance to finish
|
||||
LOG.warn("Out of Memory in server select", e);
|
||||
closeCurrentConnection(key, e);
|
||||
cleanupConnections(true);
|
||||
try { Thread.sleep(60000); } catch (Exception ie) {}
|
||||
if (errorHandler != null) {
|
||||
if (errorHandler.checkOOME(e)) {
|
||||
LOG.info(getName() + ": exiting on OOME");
|
||||
closeCurrentConnection(key, e);
|
||||
cleanupConnections(true);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// we can run out of memory if we have too many threads
|
||||
// log the event and sleep for a minute and give
|
||||
// some thread(s) a chance to finish
|
||||
LOG.warn("Out of Memory in server select", e);
|
||||
closeCurrentConnection(key, e);
|
||||
cleanupConnections(true);
|
||||
try { Thread.sleep(60000); } catch (Exception ie) {}
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
if (running) { // unexpected -- log it
|
||||
LOG.info(getName() + " caught: " +
|
||||
|
@ -364,7 +374,7 @@ public abstract class HBaseServer {
|
|||
return (InetSocketAddress)acceptChannel.socket().getLocalSocketAddress();
|
||||
}
|
||||
|
||||
void doAccept(SelectionKey key) throws IOException, OutOfMemoryError {
|
||||
void doAccept(SelectionKey key) throws IOException, OutOfMemoryError {
|
||||
Connection c = null;
|
||||
ServerSocketChannel server = (ServerSocketChannel) key.channel();
|
||||
// accept up to 10 connections
|
||||
|
@ -501,13 +511,20 @@ public abstract class HBaseServer {
|
|||
}
|
||||
}
|
||||
} catch (OutOfMemoryError e) {
|
||||
//
|
||||
// we can run out of memory if we have too many threads
|
||||
// log the event and sleep for a minute and give
|
||||
// some thread(s) a chance to finish
|
||||
//
|
||||
LOG.warn("Out of Memory in server select", e);
|
||||
try { Thread.sleep(60000); } catch (Exception ie) {}
|
||||
if (errorHandler != null) {
|
||||
if (errorHandler.checkOOME(e)) {
|
||||
LOG.info(getName() + ": exiting on OOME");
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
//
|
||||
// we can run out of memory if we have too many threads
|
||||
// log the event and sleep for a minute and give
|
||||
// some thread(s) a chance to finish
|
||||
//
|
||||
LOG.warn("Out of Memory in server select", e);
|
||||
try { Thread.sleep(60000); } catch (Exception ie) {}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Exception in Responder " +
|
||||
StringUtils.stringifyException(e));
|
||||
|
@ -926,6 +943,16 @@ public abstract class HBaseServer {
|
|||
LOG.info(getName() + " caught: " +
|
||||
StringUtils.stringifyException(e));
|
||||
}
|
||||
} catch (OutOfMemoryError e) {
|
||||
if (errorHandler != null) {
|
||||
if (errorHandler.checkOOME(e)) {
|
||||
LOG.info(getName() + ": exiting on OOME");
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// rethrow if no handler
|
||||
throw e;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.info(getName() + " caught: " +
|
||||
StringUtils.stringifyException(e));
|
||||
|
@ -1060,8 +1087,15 @@ public abstract class HBaseServer {
|
|||
public int getCallQueueLen() {
|
||||
return callQueue.size();
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Set the handler for calling out of RPC for error conditions.
|
||||
* @param handler the handler implementation
|
||||
*/
|
||||
public void setErrorHandler(HBaseRPCErrorHandler handler) {
|
||||
this.errorHandler = handler;
|
||||
}
|
||||
|
||||
/**
|
||||
* When the read or write buffer size is larger than this limit, i/o will be
|
||||
* done in chunks of this size. Most RPC requests and responses would be
|
||||
|
|
|
@ -88,6 +88,7 @@ import org.apache.hadoop.hbase.io.Cell;
|
|||
import org.apache.hadoop.hbase.io.HbaseMapWritable;
|
||||
import org.apache.hadoop.hbase.io.RowResult;
|
||||
import org.apache.hadoop.hbase.ipc.HBaseRPC;
|
||||
import org.apache.hadoop.hbase.ipc.HBaseRPCErrorHandler;
|
||||
import org.apache.hadoop.hbase.ipc.HBaseRPCProtocolVersion;
|
||||
import org.apache.hadoop.hbase.ipc.HBaseServer;
|
||||
import org.apache.hadoop.hbase.ipc.HMasterRegionInterface;
|
||||
|
@ -108,7 +109,7 @@ import org.apache.hadoop.util.StringUtils;
|
|||
* HRegionServer makes a set of HRegions available to clients. It checks in with
|
||||
* the HMaster. There are many HRegionServers in a single HBase deployment.
|
||||
*/
|
||||
public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||
public class HRegionServer implements HConstants, HRegionInterface, HBaseRPCErrorHandler, Runnable {
|
||||
static final Log LOG = LogFactory.getLog(HRegionServer.class);
|
||||
private static final HMsg REPORT_EXITING = new HMsg(Type.MSG_REPORT_EXITING);
|
||||
private static final HMsg REPORT_QUIESCED = new HMsg(Type.MSG_REPORT_QUIESCED);
|
||||
|
@ -274,6 +275,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
|||
this.server = HBaseRPC.getServer(this, address.getBindAddress(),
|
||||
address.getPort(), conf.getInt("hbase.regionserver.handler.count", 10),
|
||||
false, conf);
|
||||
this.server.setErrorHandler(this);
|
||||
// Address is givin a default IP for the moment. Will be changed after
|
||||
// calling the master.
|
||||
this.serverInfo = new HServerInfo(new HServerAddress(
|
||||
|
@ -718,7 +720,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
|||
* @param e
|
||||
* @return True if we OOME'd and are aborting.
|
||||
*/
|
||||
private boolean checkOOME(final Throwable e) {
|
||||
public boolean checkOOME(final Throwable e) {
|
||||
boolean stop = false;
|
||||
if (e instanceof OutOfMemoryError ||
|
||||
(e.getCause() != null && e.getCause() instanceof OutOfMemoryError) ||
|
||||
|
|
Loading…
Reference in New Issue