HBASE-1198 OOME in IPC server does not trigger abort behavior
git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@743660 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0e83a2224c
commit
3ca8865ad6
|
@ -21,6 +21,7 @@ Release 0.20.0 - Unreleased
|
||||||
name (Jonathan Gray via Andrew Purtell)
|
name (Jonathan Gray via Andrew Purtell)
|
||||||
HBASE-1190 TableInputFormatBase with row filters scan too far (Dave
|
HBASE-1190 TableInputFormatBase with row filters scan too far (Dave
|
||||||
Latham via Andrew Purtell)
|
Latham via Andrew Purtell)
|
||||||
|
HBASE-1198 OOME in IPC server does not trigger abort behavior
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
HBASE-1089 Add count of regions on filesystem to master UI; add percentage
|
HBASE-1089 Add count of regions on filesystem to master UI; add percentage
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.hbase.ipc;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An interface for calling out of RPC for error conditions.
|
||||||
|
*/
|
||||||
|
public interface HBaseRPCErrorHandler {
|
||||||
|
/**
|
||||||
|
* Take actions on the event of an OutOfMemoryError.
|
||||||
|
* @param e the throwable
|
||||||
|
* @return if the server should be shut down
|
||||||
|
*/
|
||||||
|
public boolean checkOOME(final Throwable e) ;
|
||||||
|
}
|
|
@ -152,6 +152,7 @@ public abstract class HBaseServer {
|
||||||
private Responder responder = null;
|
private Responder responder = null;
|
||||||
private int numConnections = 0;
|
private int numConnections = 0;
|
||||||
private Handler[] handlers = null;
|
private Handler[] handlers = null;
|
||||||
|
private HBaseRPCErrorHandler errorHandler = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A convenience method to bind to a given address and report
|
* A convenience method to bind to a given address and report
|
||||||
|
@ -313,6 +314,14 @@ public abstract class HBaseServer {
|
||||||
key = null;
|
key = null;
|
||||||
}
|
}
|
||||||
} catch (OutOfMemoryError e) {
|
} catch (OutOfMemoryError e) {
|
||||||
|
if (errorHandler != null) {
|
||||||
|
if (errorHandler.checkOOME(e)) {
|
||||||
|
LOG.info(getName() + ": exiting on OOME");
|
||||||
|
closeCurrentConnection(key, e);
|
||||||
|
cleanupConnections(true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
// we can run out of memory if we have too many threads
|
// we can run out of memory if we have too many threads
|
||||||
// log the event and sleep for a minute and give
|
// log the event and sleep for a minute and give
|
||||||
// some thread(s) a chance to finish
|
// some thread(s) a chance to finish
|
||||||
|
@ -320,6 +329,7 @@ public abstract class HBaseServer {
|
||||||
closeCurrentConnection(key, e);
|
closeCurrentConnection(key, e);
|
||||||
cleanupConnections(true);
|
cleanupConnections(true);
|
||||||
try { Thread.sleep(60000); } catch (Exception ie) {}
|
try { Thread.sleep(60000); } catch (Exception ie) {}
|
||||||
|
}
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
if (running) { // unexpected -- log it
|
if (running) { // unexpected -- log it
|
||||||
LOG.info(getName() + " caught: " +
|
LOG.info(getName() + " caught: " +
|
||||||
|
@ -501,6 +511,12 @@ public abstract class HBaseServer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (OutOfMemoryError e) {
|
} catch (OutOfMemoryError e) {
|
||||||
|
if (errorHandler != null) {
|
||||||
|
if (errorHandler.checkOOME(e)) {
|
||||||
|
LOG.info(getName() + ": exiting on OOME");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
//
|
//
|
||||||
// we can run out of memory if we have too many threads
|
// we can run out of memory if we have too many threads
|
||||||
// log the event and sleep for a minute and give
|
// log the event and sleep for a minute and give
|
||||||
|
@ -508,6 +524,7 @@ public abstract class HBaseServer {
|
||||||
//
|
//
|
||||||
LOG.warn("Out of Memory in server select", e);
|
LOG.warn("Out of Memory in server select", e);
|
||||||
try { Thread.sleep(60000); } catch (Exception ie) {}
|
try { Thread.sleep(60000); } catch (Exception ie) {}
|
||||||
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
LOG.warn("Exception in Responder " +
|
LOG.warn("Exception in Responder " +
|
||||||
StringUtils.stringifyException(e));
|
StringUtils.stringifyException(e));
|
||||||
|
@ -926,6 +943,16 @@ public abstract class HBaseServer {
|
||||||
LOG.info(getName() + " caught: " +
|
LOG.info(getName() + " caught: " +
|
||||||
StringUtils.stringifyException(e));
|
StringUtils.stringifyException(e));
|
||||||
}
|
}
|
||||||
|
} catch (OutOfMemoryError e) {
|
||||||
|
if (errorHandler != null) {
|
||||||
|
if (errorHandler.checkOOME(e)) {
|
||||||
|
LOG.info(getName() + ": exiting on OOME");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// rethrow if no handler
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
LOG.info(getName() + " caught: " +
|
LOG.info(getName() + " caught: " +
|
||||||
StringUtils.stringifyException(e));
|
StringUtils.stringifyException(e));
|
||||||
|
@ -1061,6 +1088,13 @@ public abstract class HBaseServer {
|
||||||
return callQueue.size();
|
return callQueue.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the handler for calling out of RPC for error conditions.
|
||||||
|
* @param handler the handler implementation
|
||||||
|
*/
|
||||||
|
public void setErrorHandler(HBaseRPCErrorHandler handler) {
|
||||||
|
this.errorHandler = handler;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* When the read or write buffer size is larger than this limit, i/o will be
|
* When the read or write buffer size is larger than this limit, i/o will be
|
||||||
|
|
|
@ -88,6 +88,7 @@ import org.apache.hadoop.hbase.io.Cell;
|
||||||
import org.apache.hadoop.hbase.io.HbaseMapWritable;
|
import org.apache.hadoop.hbase.io.HbaseMapWritable;
|
||||||
import org.apache.hadoop.hbase.io.RowResult;
|
import org.apache.hadoop.hbase.io.RowResult;
|
||||||
import org.apache.hadoop.hbase.ipc.HBaseRPC;
|
import org.apache.hadoop.hbase.ipc.HBaseRPC;
|
||||||
|
import org.apache.hadoop.hbase.ipc.HBaseRPCErrorHandler;
|
||||||
import org.apache.hadoop.hbase.ipc.HBaseRPCProtocolVersion;
|
import org.apache.hadoop.hbase.ipc.HBaseRPCProtocolVersion;
|
||||||
import org.apache.hadoop.hbase.ipc.HBaseServer;
|
import org.apache.hadoop.hbase.ipc.HBaseServer;
|
||||||
import org.apache.hadoop.hbase.ipc.HMasterRegionInterface;
|
import org.apache.hadoop.hbase.ipc.HMasterRegionInterface;
|
||||||
|
@ -108,7 +109,7 @@ import org.apache.hadoop.util.StringUtils;
|
||||||
* HRegionServer makes a set of HRegions available to clients. It checks in with
|
* HRegionServer makes a set of HRegions available to clients. It checks in with
|
||||||
* the HMaster. There are many HRegionServers in a single HBase deployment.
|
* the HMaster. There are many HRegionServers in a single HBase deployment.
|
||||||
*/
|
*/
|
||||||
public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
public class HRegionServer implements HConstants, HRegionInterface, HBaseRPCErrorHandler, Runnable {
|
||||||
static final Log LOG = LogFactory.getLog(HRegionServer.class);
|
static final Log LOG = LogFactory.getLog(HRegionServer.class);
|
||||||
private static final HMsg REPORT_EXITING = new HMsg(Type.MSG_REPORT_EXITING);
|
private static final HMsg REPORT_EXITING = new HMsg(Type.MSG_REPORT_EXITING);
|
||||||
private static final HMsg REPORT_QUIESCED = new HMsg(Type.MSG_REPORT_QUIESCED);
|
private static final HMsg REPORT_QUIESCED = new HMsg(Type.MSG_REPORT_QUIESCED);
|
||||||
|
@ -274,6 +275,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
this.server = HBaseRPC.getServer(this, address.getBindAddress(),
|
this.server = HBaseRPC.getServer(this, address.getBindAddress(),
|
||||||
address.getPort(), conf.getInt("hbase.regionserver.handler.count", 10),
|
address.getPort(), conf.getInt("hbase.regionserver.handler.count", 10),
|
||||||
false, conf);
|
false, conf);
|
||||||
|
this.server.setErrorHandler(this);
|
||||||
// Address is givin a default IP for the moment. Will be changed after
|
// Address is givin a default IP for the moment. Will be changed after
|
||||||
// calling the master.
|
// calling the master.
|
||||||
this.serverInfo = new HServerInfo(new HServerAddress(
|
this.serverInfo = new HServerInfo(new HServerAddress(
|
||||||
|
@ -718,7 +720,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
* @param e
|
* @param e
|
||||||
* @return True if we OOME'd and are aborting.
|
* @return True if we OOME'd and are aborting.
|
||||||
*/
|
*/
|
||||||
private boolean checkOOME(final Throwable e) {
|
public boolean checkOOME(final Throwable e) {
|
||||||
boolean stop = false;
|
boolean stop = false;
|
||||||
if (e instanceof OutOfMemoryError ||
|
if (e instanceof OutOfMemoryError ||
|
||||||
(e.getCause() != null && e.getCause() instanceof OutOfMemoryError) ||
|
(e.getCause() != null && e.getCause() instanceof OutOfMemoryError) ||
|
||||||
|
|
Loading…
Reference in New Issue