From 581421a6b2e3b2bd0c09276f604aec53e3910a42 Mon Sep 17 00:00:00 2001 From: Zhihong Yu Date: Sun, 25 Sep 2011 03:46:53 +0000 Subject: [PATCH] HBASE-4014 Coprocessors: Flag the presence of coprocessors in logged exceptions (Eugene Koontz) git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1175292 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 4 + .../hbase/coprocessor/CoprocessorHost.java | 74 ++++- .../apache/hadoop/hbase/master/HMaster.java | 18 ++ .../hbase/master/MasterCoprocessorHost.java | 201 +++++++++--- .../hbase/regionserver/HRegionServer.java | 8 +- .../regionserver/RegionCoprocessorHost.java | 285 ++++++++++++++---- src/main/resources/hbase-default.xml | 11 + ...stMasterCoprocessorExceptionWithAbort.java | 228 ++++++++++++++ ...tMasterCoprocessorExceptionWithRemove.java | 221 ++++++++++++++ ...onServerCoprocessorExceptionWithAbort.java | 121 ++++++++ ...nServerCoprocessorExceptionWithRemove.java | 141 +++++++++ 11 files changed, 1220 insertions(+), 92 deletions(-) create mode 100644 src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithAbort.java create mode 100644 src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithRemove.java create mode 100644 src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionServerCoprocessorExceptionWithAbort.java create mode 100644 src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionServerCoprocessorExceptionWithRemove.java diff --git a/CHANGES.txt b/CHANGES.txt index f47b7243025..a7b1abda179 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -5,6 +5,10 @@ Release 0.93.0 - Unreleased (dhruba borthakur) HBASE-4461 Expose getRowOrBefore via Thrift (jgray) + BUGS + HBASE-4014 Coprocessors: Flag the presence of coprocessors in logged + exceptions (Eugene Koontz) + Release 0.92.0 - Unreleased INCOMPATIBLE CHANGES HBASE-2002 Coprocessors: Client side support; Support RPC interface diff --git a/src/main/java/org/apache/hadoop/hbase/coprocessor/CoprocessorHost.java b/src/main/java/org/apache/hadoop/hbase/coprocessor/CoprocessorHost.java index 4e492e14712..dbae4fdba0a 100644 --- a/src/main/java/org/apache/hadoop/hbase/coprocessor/CoprocessorHost.java +++ b/src/main/java/org/apache/hadoop/hbase/coprocessor/CoprocessorHost.java @@ -27,6 +27,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Coprocessor; import org.apache.hadoop.hbase.CoprocessorEnvironment; +import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.client.*; @@ -35,6 +36,7 @@ import org.apache.hadoop.hbase.ipc.CoprocessorProtocol; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.SortedCopyOnWriteSet; import org.apache.hadoop.hbase.util.VersionInfo; +import org.apache.hadoop.hbase.Server; import java.io.File; import java.io.IOException; @@ -71,6 +73,12 @@ public abstract class CoprocessorHost { pathPrefix = UUID.randomUUID().toString(); } + private static Set coprocessorNames = + Collections.synchronizedSet(new HashSet()); + public static Set getLoadedCoprocessors() { + return coprocessorNames; + } + /** * Load system coprocessors. Read the class names from configuration. * Called by constructor. @@ -156,7 +164,7 @@ public abstract class CoprocessorHost { // load the jar and get the implementation main class String cp = System.getProperty("java.class.path"); // NOTE: Path.toURL is deprecated (toURI instead) but the URLClassLoader - // unsuprisingly wants URLs, not URIs; so we will use the deprecated + // unsurprisingly wants URLs, not URIs; so we will use the deprecated // method which returns URLs for as long as it is available List paths = new ArrayList(); paths.add(new File(dst.toString()).getCanonicalFile().toURL()); @@ -213,6 +221,9 @@ public abstract class CoprocessorHost { if (env instanceof Environment) { ((Environment)env).startup(); } + // HBASE-4014: maintain list of loaded coprocessors for later crash analysis + // if server (master or regionserver) aborts. + coprocessorNames.add(implClass.getName()); return env; } @@ -576,4 +587,65 @@ public abstract class CoprocessorHost { return new HTableWrapper(tableName); } } + + protected void abortServer(final String service, + final Server server, + final CoprocessorEnvironment environment, + final Throwable e) { + String coprocessorName = (environment.getInstance()).toString(); + server.abort("Aborting service: " + service + " running on : " + + server.getServerName() + " because coprocessor: " + + coprocessorName + " threw an exception.", e); + } + + protected void abortServer(final CoprocessorEnvironment environment, + final Throwable e) { + String coprocessorName = (environment.getInstance()).toString(); + LOG.error("The coprocessor: " + coprocessorName + " threw an unexpected " + + "exception: " + e + ", but there's no specific implementation of " + + " abortServer() for this coprocessor's environment."); + } + + + /** + * This is used by coprocessor hooks which are declared to throw IOException + * (or its subtypes). For such hooks, we should handle throwable objects + * depending on the Throwable's type. Those which are instances of + * IOException should be passed on to the client. This is in conformance with + * the HBase idiom regarding IOException: that it represents a circumstance + * that should be passed along to the client for its own handling. For + * example, a coprocessor that implements access controls would throw a + * subclass of IOException, such as AccessDeniedException, in its preGet() + * method to prevent an unauthorized client's performing a Get on a particular + * table. + * @param env Coprocessor Environment + * @param e Throwable object thrown by coprocessor. + * @exception IOException Exception + */ + protected void handleCoprocessorThrowable(final CoprocessorEnvironment env, + final Throwable e) + throws IOException { + if (e instanceof IOException) { + throw (IOException)e; + } + // If we got here, e is not an IOException. A loaded coprocessor has a + // fatal bug, and the server (master or regionserver) should remove the + // faulty coprocessor from its set of active coprocessors. Setting + // 'hbase.coprocessor.abortonerror' to true will cause abortServer(), + // which may be useful in development and testing environments where + // 'failing fast' for error analysis is desired. + if (env.getConfiguration().getBoolean("hbase.coprocessor.abortonerror",false)) { + // server is configured to abort. + abortServer(env, e); + } else { + LOG.error("Removing coprocessor '" + env.toString() + "' from " + + "environment because it threw: " + e,e); + coprocessors.remove(env); + throw new DoNotRetryIOException("Coprocessor: '" + env.toString() + + "' threw: '" + e + "' and has been removed" + "from the active " + + "coprocessor set.", e); + } + } } + + diff --git a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 06bf814225c..270f3f362d4 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -56,6 +56,7 @@ import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.MetaScanner; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor; +import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; import org.apache.hadoop.hbase.executor.ExecutorService; import org.apache.hadoop.hbase.executor.ExecutorService.ExecutorType; import org.apache.hadoop.hbase.ipc.HBaseRPC; @@ -1187,8 +1188,25 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { return fileSystemManager.getClusterId(); } + /** + * The set of loaded coprocessors is stored in a static set. Since it's + * statically allocated, it does not require that HMaster's cpHost be + * initialized prior to accessing it. + * @return a String representation of the set of names of the loaded + * coprocessors. + */ + public static String getLoadedCoprocessors() { + return CoprocessorHost.getLoadedCoprocessors().toString(); + } + @Override public void abort(final String msg, final Throwable t) { + if (cpHost != null) { + // HBASE-4014: dump a list of loaded coprocessors. + LOG.fatal("Master server abort: loaded coprocessors are: " + + getLoadedCoprocessors()); + } + if (abortNow(msg, t)) { if (t != null) LOG.fatal(msg, t); else LOG.fatal(msg); diff --git a/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java b/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java index 0c95017f047..4beafb21509 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java +++ b/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java @@ -20,6 +20,8 @@ package org.apache.hadoop.hbase.master; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.*; import org.apache.hadoop.hbase.coprocessor.*; @@ -34,6 +36,8 @@ import java.io.IOException; public class MasterCoprocessorHost extends CoprocessorHost { + private static final Log LOG = LogFactory.getLog(MasterCoprocessorHost.class); + /** * Coprocessor environment extension providing access to master related * services. @@ -69,6 +73,11 @@ public class MasterCoprocessorHost masterServices); } + @Override + protected void abortServer(final CoprocessorEnvironment env, final Throwable e) { + abortServer("master", masterServices, env, e); + } + /* Implementation of hooks for invoking MasterObservers */ void preCreateTable(HTableDescriptor htd, HRegionInfo[] regions) throws IOException { @@ -76,7 +85,11 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).preCreateTable(ctx, htd, regions); + try { + ((MasterObserver)env.getInstance()).preCreateTable(ctx, htd, regions); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -90,7 +103,11 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).postCreateTable(ctx, htd, regions); + try { + ((MasterObserver)env.getInstance()).postCreateTable(ctx, htd, regions); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -103,7 +120,11 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).preDeleteTable(ctx, tableName); + try { + ((MasterObserver)env.getInstance()).preDeleteTable(ctx, tableName); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -116,7 +137,11 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).postDeleteTable(ctx, tableName); + try { + ((MasterObserver)env.getInstance()).postDeleteTable(ctx, tableName); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -130,7 +155,12 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).preModifyTable(ctx, tableName, htd); + try { + ((MasterObserver)env.getInstance()).preModifyTable(ctx, tableName, + htd); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -144,7 +174,12 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).postModifyTable(ctx, tableName, htd); + try { + ((MasterObserver)env.getInstance()).postModifyTable(ctx, tableName, + htd); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -159,7 +194,11 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).preAddColumn(ctx, tableName, column); + try { + ((MasterObserver)env.getInstance()).preAddColumn(ctx, tableName, column); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } bypass |= ctx.shouldBypass(); if (ctx.shouldComplete()) { break; @@ -175,7 +214,12 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).postAddColumn(ctx, tableName, column); + try { + ((MasterObserver)env.getInstance()).postAddColumn(ctx, tableName, + column); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -190,8 +234,12 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).preModifyColumn( + try { + ((MasterObserver)env.getInstance()).preModifyColumn( ctx, tableName, descriptor); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } bypass |= ctx.shouldBypass(); if (ctx.shouldComplete()) { break; @@ -207,8 +255,12 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).postModifyColumn( - ctx, tableName, descriptor); + try { + ((MasterObserver)env.getInstance()).postModifyColumn( + ctx, tableName, descriptor); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -223,7 +275,11 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).preDeleteColumn(ctx, tableName, c); + try { + ((MasterObserver)env.getInstance()).preDeleteColumn(ctx, tableName, c); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } bypass |= ctx.shouldBypass(); if (ctx.shouldComplete()) { break; @@ -239,7 +295,12 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).postDeleteColumn(ctx, tableName, c); + try { + ((MasterObserver)env.getInstance()).postDeleteColumn(ctx, tableName, + c); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -252,7 +313,11 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).preEnableTable(ctx, tableName); + try { + ((MasterObserver)env.getInstance()).preEnableTable(ctx, tableName); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -265,7 +330,11 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).postEnableTable(ctx, tableName); + try { + ((MasterObserver)env.getInstance()).postEnableTable(ctx, tableName); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -278,7 +347,11 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).preDisableTable(ctx, tableName); + try { + ((MasterObserver)env.getInstance()).preDisableTable(ctx, tableName); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -291,7 +364,11 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).postDisableTable(ctx, tableName); + try { + ((MasterObserver)env.getInstance()).postDisableTable(ctx, tableName); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -306,8 +383,12 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).preMove( - ctx, region, srcServer, destServer); + try { + ((MasterObserver)env.getInstance()).preMove( + ctx, region, srcServer, destServer); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } bypass |= ctx.shouldBypass(); if (ctx.shouldComplete()) { break; @@ -323,8 +404,12 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).postMove( - ctx, region, srcServer, destServer); + try { + ((MasterObserver)env.getInstance()).postMove( + ctx, region, srcServer, destServer); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -338,7 +423,11 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver) env.getInstance()).preAssign(ctx, regionInfo); + try { + ((MasterObserver) env.getInstance()).preAssign(ctx, regionInfo); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } bypass |= ctx.shouldBypass(); if (ctx.shouldComplete()) { break; @@ -353,7 +442,11 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver) env.getInstance()).postAssign(ctx, regionInfo); + try { + ((MasterObserver)env.getInstance()).postAssign(ctx, regionInfo); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -368,8 +461,12 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).preUnassign( - ctx, regionInfo, force); + try { + ((MasterObserver)env.getInstance()).preUnassign( + ctx, regionInfo, force); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } bypass |= ctx.shouldBypass(); if (ctx.shouldComplete()) { break; @@ -385,8 +482,12 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).postUnassign( - ctx, regionInfo, force); + try { + ((MasterObserver)env.getInstance()).postUnassign( + ctx, regionInfo, force); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -400,7 +501,11 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).preBalance(ctx); + try { + ((MasterObserver)env.getInstance()).preBalance(ctx); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } bypass |= ctx.shouldBypass(); if (ctx.shouldComplete()) { break; @@ -415,7 +520,11 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).postBalance(ctx); + try { + ((MasterObserver)env.getInstance()).postBalance(ctx); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -429,8 +538,12 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - balance = ((MasterObserver)env.getInstance()).preBalanceSwitch( - ctx, balance); + try { + balance = ((MasterObserver)env.getInstance()).preBalanceSwitch( + ctx, balance); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -445,8 +558,12 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).postBalanceSwitch( - ctx, oldValue, newValue); + try { + ((MasterObserver)env.getInstance()).postBalanceSwitch( + ctx, oldValue, newValue); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -459,7 +576,11 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).preShutdown(ctx); + try { + ((MasterObserver)env.getInstance()).preShutdown(ctx); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -472,7 +593,11 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).preStopMaster(ctx); + try { + ((MasterObserver)env.getInstance()).preStopMaster(ctx); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -485,7 +610,11 @@ public class MasterCoprocessorHost for (MasterEnvironment env: coprocessors) { if (env.getInstance() instanceof MasterObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((MasterObserver)env.getInstance()).postStartMaster(ctx); + try { + ((MasterObserver)env.getInstance()).postStartMaster(ctx); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index f960a8bea33..0c06f4feb29 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -92,6 +92,7 @@ import org.apache.hadoop.hbase.client.Row; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.coprocessor.Exec; import org.apache.hadoop.hbase.client.coprocessor.ExecResult; +import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; import org.apache.hadoop.hbase.executor.ExecutorService; import org.apache.hadoop.hbase.executor.ExecutorService.ExecutorType; import org.apache.hadoop.hbase.filter.BinaryComparator; @@ -617,7 +618,7 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, try { // Try and register with the Master; tell it we are here. Break if - // server is stopped or the clusterup flag is down of hdfs went wacky. + // server is stopped or the clusterup flag is down or hdfs went wacky. while (keepLooping()) { MapWritable w = reportForDuty(); if (w == null) { @@ -1506,6 +1507,11 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, } this.abortRequested = true; this.reservedSpace.clear(); + // HBASE-4014: show list of coprocessors that were loaded to help debug + // regionserver crashes.Note that we're implicitly using + // java.util.HashSet's toString() method to print the coprocessor names. + LOG.fatal("RegionServer abort: loaded coprocessors are: " + + CoprocessorHost.getLoadedCoprocessors()); if (this.metrics != null) { LOG.info("Dump of metrics: " + this.metrics); } diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/RegionCoprocessorHost.java b/src/main/java/org/apache/hadoop/hbase/regionserver/RegionCoprocessorHost.java index a6cf6a8068e..0ba7fb4289a 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/RegionCoprocessorHost.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/RegionCoprocessorHost.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.Coprocessor; +import org.apache.hadoop.hbase.CoprocessorEnvironment; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.coprocessor.*; @@ -196,6 +197,33 @@ public class RegionCoprocessorHost rsServices); } + @Override + protected void abortServer(final CoprocessorEnvironment env, final Throwable e) { + abortServer("regionserver", rsServices, env, e); + } + + /** + * HBASE-4014 : This is used by coprocessor hooks which are not declared to throw exceptions. + * + * For example, {@link + * org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost#preOpen()} and + * {@link org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost#postOpen()} are such hooks. + * + * See also {@link org.apache.hadoop.hbase.master.MasterCoprocessorHost#handleCoprocessorThrowable()} + * @param env: The coprocessor that threw the exception. + * @param e: The exception that was thrown. + */ + private void handleCoprocessorThrowableNoRethrow( + final CoprocessorEnvironment env, final Throwable e) { + try { + handleCoprocessorThrowable(env,e); + } catch (IOException ioe) { + // We cannot throw exceptions from the caller hook, so ignore. + LOG.warn("handleCoprocessorThrowable() threw an IOException while attempting to handle Throwable " + e + + ". Ignoring.",e); + } + } + /** * Invoked before a region open */ @@ -204,7 +232,11 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).preOpen(ctx); + try { + ((RegionObserver)env.getInstance()).preOpen(ctx); + } catch (Throwable e) { + handleCoprocessorThrowableNoRethrow(env, e); + } if (ctx.shouldComplete()) { break; } @@ -220,7 +252,11 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).postOpen(ctx); + try { + ((RegionObserver)env.getInstance()).postOpen(ctx); + } catch (Throwable e) { + handleCoprocessorThrowableNoRethrow(env, e); + } if (ctx.shouldComplete()) { break; } @@ -237,7 +273,11 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).preClose(ctx, abortRequested); + try { + ((RegionObserver)env.getInstance()).preClose(ctx, abortRequested); + } catch (Throwable e) { + handleCoprocessorThrowableNoRethrow(env, e); + } } } } @@ -251,7 +291,12 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).postClose(ctx, abortRequested); + try { + ((RegionObserver)env.getInstance()).postClose(ctx, abortRequested); + } catch (Throwable e) { + handleCoprocessorThrowableNoRethrow(env, e); + } + } shutdown(env); } @@ -293,8 +338,12 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).postCompactSelection( - ctx, store, selected); + try { + ((RegionObserver)env.getInstance()).postCompactSelection( + ctx, store, selected); + } catch (Throwable e) { + handleCoprocessorThrowableNoRethrow(env,e); + } if (ctx.shouldComplete()) { break; } @@ -313,8 +362,12 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - scanner = ((RegionObserver)env.getInstance()).preCompact( - ctx, store, scanner); + try { + scanner = ((RegionObserver)env.getInstance()).preCompact( + ctx, store, scanner); + } catch (Throwable e) { + handleCoprocessorThrowableNoRethrow(env,e); + } bypass |= ctx.shouldBypass(); if (ctx.shouldComplete()) { break; @@ -334,7 +387,11 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).postCompact(ctx, store, resultFile); + try { + ((RegionObserver)env.getInstance()).postCompact(ctx, store, resultFile); + } catch (Throwable e) { + handleCoprocessorThrowableNoRethrow(env, e); + } if (ctx.shouldComplete()) { break; } @@ -350,7 +407,11 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).preFlush(ctx); + try { + ((RegionObserver)env.getInstance()).preFlush(ctx); + } catch (Throwable e) { + handleCoprocessorThrowableNoRethrow(env, e); + } if (ctx.shouldComplete()) { break; } @@ -366,7 +427,11 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).postFlush(ctx); + try { + ((RegionObserver)env.getInstance()).postFlush(ctx); + } catch (Throwable e) { + handleCoprocessorThrowableNoRethrow(env, e); + } if (ctx.shouldComplete()) { break; } @@ -382,7 +447,11 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).preSplit(ctx); + try { + ((RegionObserver)env.getInstance()).preSplit(ctx); + } catch (Throwable e) { + handleCoprocessorThrowableNoRethrow(env, e); + } if (ctx.shouldComplete()) { break; } @@ -400,7 +469,11 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).postSplit(ctx, l, r); + try { + ((RegionObserver)env.getInstance()).postSplit(ctx, l, r); + } catch (Throwable e) { + handleCoprocessorThrowableNoRethrow(env, e); + } if (ctx.shouldComplete()) { break; } @@ -424,8 +497,12 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).preGetClosestRowBefore(ctx, row, family, - result); + try { + ((RegionObserver)env.getInstance()).preGetClosestRowBefore(ctx, row, + family, result); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } bypass |= ctx.shouldBypass(); if (ctx.shouldComplete()) { break; @@ -447,8 +524,12 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).postGetClosestRowBefore(ctx, row, family, - result); + try { + ((RegionObserver)env.getInstance()).postGetClosestRowBefore(ctx, row, + family, result); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -468,7 +549,11 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).preGet(ctx, get, results); + try { + ((RegionObserver)env.getInstance()).preGet(ctx, get, results); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } bypass |= ctx.shouldBypass(); if (ctx.shouldComplete()) { break; @@ -490,7 +575,11 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).postGet(ctx, get, results); + try { + ((RegionObserver)env.getInstance()).postGet(ctx, get, results); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -511,7 +600,11 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - exists = ((RegionObserver)env.getInstance()).preExists(ctx, get, exists); + try { + exists = ((RegionObserver)env.getInstance()).preExists(ctx, get, exists); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } bypass |= ctx.shouldBypass(); if (ctx.shouldComplete()) { break; @@ -533,7 +626,11 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - exists = ((RegionObserver)env.getInstance()).postExists(ctx, get, exists); + try { + exists = ((RegionObserver)env.getInstance()).postExists(ctx, get, exists); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -555,7 +652,11 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).prePut(ctx, familyMap, writeToWAL); + try { + ((RegionObserver)env.getInstance()).prePut(ctx, familyMap, writeToWAL); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } bypass |= ctx.shouldBypass(); if (ctx.shouldComplete()) { break; @@ -576,7 +677,11 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).postPut(ctx, familyMap, writeToWAL); + try { + ((RegionObserver)env.getInstance()).postPut(ctx, familyMap, writeToWAL); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -597,7 +702,11 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).preDelete(ctx, familyMap, writeToWAL); + try { + ((RegionObserver)env.getInstance()).preDelete(ctx, familyMap, writeToWAL); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } bypass |= ctx.shouldBypass(); if (ctx.shouldComplete()) { break; @@ -618,7 +727,11 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).postDelete(ctx, familyMap, writeToWAL); + try { + ((RegionObserver)env.getInstance()).postDelete(ctx, familyMap, writeToWAL); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -647,8 +760,14 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - result = ((RegionObserver)env.getInstance()).preCheckAndPut(ctx, row, family, - qualifier, compareOp, comparator, put, result); + try { + result = ((RegionObserver)env.getInstance()).preCheckAndPut(ctx, row, family, + qualifier, compareOp, comparator, put, result); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } + + bypass |= ctx.shouldBypass(); if (ctx.shouldComplete()) { break; @@ -676,8 +795,12 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - result = ((RegionObserver)env.getInstance()).postCheckAndPut(ctx, row, - family, qualifier, compareOp, comparator, put, result); + try { + result = ((RegionObserver)env.getInstance()).postCheckAndPut(ctx, row, + family, qualifier, compareOp, comparator, put, result); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -707,8 +830,12 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - result = ((RegionObserver)env.getInstance()).preCheckAndDelete(ctx, row, - family, qualifier, compareOp, comparator, delete, result); + try { + result = ((RegionObserver)env.getInstance()).preCheckAndDelete(ctx, row, + family, qualifier, compareOp, comparator, delete, result); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } bypass |= ctx.shouldBypass(); if (ctx.shouldComplete()) { break; @@ -736,9 +863,13 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - result = ((RegionObserver)env.getInstance()) - .postCheckAndDelete(ctx, row, family, qualifier, compareOp, - comparator, delete, result); + try { + result = ((RegionObserver)env.getInstance()) + .postCheckAndDelete(ctx, row, family, qualifier, compareOp, + comparator, delete, result); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -765,8 +896,12 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - amount = ((RegionObserver)env.getInstance()).preIncrementColumnValue(ctx, - row, family, qualifier, amount, writeToWAL); + try { + amount = ((RegionObserver)env.getInstance()).preIncrementColumnValue(ctx, + row, family, qualifier, amount, writeToWAL); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } bypass |= ctx.shouldBypass(); if (ctx.shouldComplete()) { break; @@ -793,8 +928,12 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - result = ((RegionObserver)env.getInstance()).postIncrementColumnValue(ctx, - row, family, qualifier, amount, writeToWAL, result); + try { + result = ((RegionObserver)env.getInstance()).postIncrementColumnValue(ctx, + row, family, qualifier, amount, writeToWAL, result); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -817,7 +956,11 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).preIncrement(ctx, increment, result); + try { + ((RegionObserver)env.getInstance()).preIncrement(ctx, increment, result); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } bypass |= ctx.shouldBypass(); if (ctx.shouldComplete()) { break; @@ -838,7 +981,11 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).postIncrement(ctx, increment, result); + try { + ((RegionObserver)env.getInstance()).postIncrement(ctx, increment, result); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -859,7 +1006,11 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - s = ((RegionObserver)env.getInstance()).preScannerOpen(ctx, scan, s); + try { + s = ((RegionObserver)env.getInstance()).preScannerOpen(ctx, scan, s); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } bypass |= ctx.shouldBypass(); if (ctx.shouldComplete()) { break; @@ -881,7 +1032,11 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - s = ((RegionObserver)env.getInstance()).postScannerOpen(ctx, scan, s); + try { + s = ((RegionObserver)env.getInstance()).postScannerOpen(ctx, scan, s); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -906,8 +1061,12 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - hasNext = ((RegionObserver)env.getInstance()).preScannerNext(ctx, s, results, - limit, hasNext); + try { + hasNext = ((RegionObserver)env.getInstance()).preScannerNext(ctx, s, results, + limit, hasNext); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } bypass |= ctx.shouldBypass(); if (ctx.shouldComplete()) { break; @@ -932,8 +1091,12 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - hasMore = ((RegionObserver)env.getInstance()).postScannerNext(ctx, s, - results, limit, hasMore); + try { + hasMore = ((RegionObserver)env.getInstance()).postScannerNext(ctx, s, + results, limit, hasMore); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -954,7 +1117,11 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).preScannerClose(ctx, s); + try { + ((RegionObserver)env.getInstance()).preScannerClose(ctx, s); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } bypass |= ctx.shouldBypass(); if (ctx.shouldComplete()) { break; @@ -974,7 +1141,11 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).postScannerClose(ctx, s); + try { + ((RegionObserver)env.getInstance()).postScannerClose(ctx, s); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } @@ -996,14 +1167,17 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).preWALRestore(ctx, info, logKey, - logEdit); + try { + ((RegionObserver)env.getInstance()).preWALRestore(ctx, info, logKey, + logEdit); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } bypass |= ctx.shouldBypass(); if (ctx.shouldComplete()) { break; } } - } return bypass; } @@ -1020,13 +1194,16 @@ public class RegionCoprocessorHost for (RegionEnvironment env: coprocessors) { if (env.getInstance() instanceof RegionObserver) { ctx = ObserverContext.createAndPrepare(env, ctx); - ((RegionObserver)env.getInstance()).postWALRestore(ctx, info, - logKey, logEdit); + try { + ((RegionObserver)env.getInstance()).postWALRestore(ctx, info, + logKey, logEdit); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } if (ctx.shouldComplete()) { break; } } - } } } diff --git a/src/main/resources/hbase-default.xml b/src/main/resources/hbase-default.xml index 2c8f44b1fd2..a35e7c737a7 100644 --- a/src/main/resources/hbase-default.xml +++ b/src/main/resources/hbase-default.xml @@ -725,4 +725,15 @@ version is X.X.X-SNAPSHOT" + + hbase.coprocessor.abortonerror + false + + Set to true to cause the hosting server (master or regionserver) to + abort if a coprocessor throws a Throwable object that is not IOException or + a subclass of IOException. Setting it to true might be useful in development + environments where one wants to terminate the server as soon as possible to + simplify coprocessor failure analysis. + + diff --git a/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithAbort.java b/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithAbort.java new file mode 100644 index 00000000000..0bc89847ced --- /dev/null +++ b/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithAbort.java @@ -0,0 +1,228 @@ +/* + * Copyright 2011 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.coprocessor; + +import java.io.IOException; +import java.io.InterruptedIOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.Abortable; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.CoprocessorEnvironment; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.MasterCoprocessorHost; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperNodeTracker; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import static org.junit.Assert.*; + +/** + * Tests unhandled exceptions thrown by coprocessors running on master. + * Expected result is that the master will abort with an informative + * error message describing the set of its loaded coprocessors for crash diagnosis. + * (HBASE-4014). + */ +public class TestMasterCoprocessorExceptionWithAbort { + + public static class MasterTracker extends ZooKeeperNodeTracker { + public boolean masterZKNodeWasDeleted = false; + + public MasterTracker(ZooKeeperWatcher zkw, String masterNode, Abortable abortable) { + super(zkw, masterNode, abortable); + } + + @Override + public synchronized void nodeDeleted(String path) { + if (path.equals("/hbase/master")) { + masterZKNodeWasDeleted = true; + } + } + } + + public static class CreateTableThread extends Thread { + HBaseTestingUtility UTIL; + public CreateTableThread(HBaseTestingUtility UTIL) { + this.UTIL = UTIL; + } + + @Override + public void run() { + // create a table : master coprocessor will throw an exception and not + // catch it. + HTableDescriptor htd = new HTableDescriptor(TEST_TABLE); + htd.addFamily(new HColumnDescriptor(TEST_FAMILY)); + try { + HBaseAdmin admin = UTIL.getHBaseAdmin(); + admin.createTable(htd); + fail("BuggyMasterObserver failed to throw an exception."); + } catch (IOException e) { + assertEquals("HBaseAdmin threw an interrupted IOException as expected.", + e.getClass().getName(), "java.io.InterruptedIOException"); + } + } + } + + public static class BuggyMasterObserver extends BaseMasterObserver { + private boolean preCreateTableCalled; + private boolean postCreateTableCalled; + private boolean startCalled; + private boolean postStartMasterCalled; + + @Override + public void postCreateTable(ObserverContext env, + HTableDescriptor desc, HRegionInfo[] regions) throws IOException { + // cause a NullPointerException and don't catch it: this will cause the + // master to abort(). + Integer i; + i = null; + i = i++; + } + + public boolean wasCreateTableCalled() { + return preCreateTableCalled && postCreateTableCalled; + } + + @Override + public void postStartMaster(ObserverContext ctx) + throws IOException { + postStartMasterCalled = true; + } + + public boolean wasStartMasterCalled() { + return postStartMasterCalled; + } + + @Override + public void start(CoprocessorEnvironment env) throws IOException { + startCalled = true; + } + + public boolean wasStarted() { + return startCalled; + } + } + + private static HBaseTestingUtility UTIL = new HBaseTestingUtility(); + private static byte[] TEST_TABLE = Bytes.toBytes("observed_table"); + private static byte[] TEST_FAMILY = Bytes.toBytes("fam1"); + + @BeforeClass + public static void setupBeforeClass() throws Exception { + Configuration conf = UTIL.getConfiguration(); + conf.set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, + BuggyMasterObserver.class.getName()); + conf.set("hbase.coprocessor.abortonerror", "true"); + UTIL.startMiniCluster(2); + } + + @AfterClass + public static void teardownAfterClass() throws Exception { + UTIL.shutdownMiniCluster(); + } + + @Test(timeout=30000) + public void testExceptionFromCoprocessorWhenCreatingTable() + throws IOException { + MiniHBaseCluster cluster = UTIL.getHBaseCluster(); + + HMaster master = cluster.getMaster(); + MasterCoprocessorHost host = master.getCoprocessorHost(); + BuggyMasterObserver cp = (BuggyMasterObserver)host.findCoprocessor( + BuggyMasterObserver.class.getName()); + assertFalse("No table created yet", cp.wasCreateTableCalled()); + + // set a watch on the zookeeper /hbase/master node. If the master dies, + // the node will be deleted. + ZooKeeperWatcher zkw = new ZooKeeperWatcher(UTIL.getConfiguration(), + "unittest", new Abortable() { + @Override + public void abort(String why, Throwable e) { + throw new RuntimeException("Fatal ZK error: " + why, e); + } + @Override + public boolean isAborted() { + return false; + } + }); + + MasterTracker masterTracker = new MasterTracker(zkw,"/hbase/master", + new Abortable() { + @Override + public void abort(String why, Throwable e) { + throw new RuntimeException("Fatal ZK master tracker error, why=", e); + } + @Override + public boolean isAborted() { + return false; + } + }); + + masterTracker.start(); + zkw.registerListener(masterTracker); + + // Test (part of the) output that should have be printed by master when it aborts: + // (namely the part that shows the set of loaded coprocessors). + // In this test, there is only a single coprocessor (BuggyMasterObserver). + assertTrue(master.getLoadedCoprocessors(). + equals("[" + + TestMasterCoprocessorExceptionWithAbort.BuggyMasterObserver.class.getName() + + "]")); + + CreateTableThread createTableThread = new CreateTableThread(UTIL); + + // Attempting to create a table (using createTableThread above) triggers an NPE in BuggyMasterObserver. + // Master will then abort and the /hbase/master zk node will be deleted. + createTableThread.start(); + + // Wait up to 30 seconds for master's /hbase/master zk node to go away after master aborts. + for (int i = 0; i < 30; i++) { + if (masterTracker.masterZKNodeWasDeleted == true) { + break; + } + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + fail("InterruptedException while waiting for master zk node to " + + "be deleted."); + } + } + + assertTrue("Master aborted on coprocessor exception, as expected.", + masterTracker.masterZKNodeWasDeleted); + + createTableThread.interrupt(); + try { + createTableThread.join(1000); + } catch (InterruptedException e) { + assertTrue("Ignoring InterruptedException while waiting for " + + " createTableThread.join().", true); + } + } + +} diff --git a/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithRemove.java b/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithRemove.java new file mode 100644 index 00000000000..1f9b77cf2b1 --- /dev/null +++ b/src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterCoprocessorExceptionWithRemove.java @@ -0,0 +1,221 @@ +/* + * Copyright 2011 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.coprocessor; + +import java.io.IOException; +import java.io.InterruptedIOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.Abortable; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.CoprocessorEnvironment; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.MasterCoprocessorHost; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperNodeTracker; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import static org.junit.Assert.*; + +/** + * Tests unhandled exceptions thrown by coprocessors running on master. + * Expected result is that the master will remove the buggy coprocessor from + * its set of coprocessors and throw a org.apache.hadoop.hbase.DoNotRetryIOException + * back to the client. + * (HBASE-4014). + */ +public class TestMasterCoprocessorExceptionWithRemove { + + public static class MasterTracker extends ZooKeeperNodeTracker { + public boolean masterZKNodeWasDeleted = false; + + public MasterTracker(ZooKeeperWatcher zkw, String masterNode, Abortable abortable) { + super(zkw, masterNode, abortable); + } + + @Override + public synchronized void nodeDeleted(String path) { + if (path.equals("/hbase/master")) { + masterZKNodeWasDeleted = true; + } + } + } + + public static class BuggyMasterObserver extends BaseMasterObserver { + private boolean preCreateTableCalled; + private boolean postCreateTableCalled; + private boolean startCalled; + private boolean postStartMasterCalled; + + @Override + public void postCreateTable(ObserverContext env, + HTableDescriptor desc, HRegionInfo[] regions) throws IOException { + // Cause a NullPointerException and don't catch it: this should cause the + // master to throw an o.apache.hadoop.hbase.DoNotRetryIOException to the + // client. + Integer i; + i = null; + i = i++; + } + + public boolean wasCreateTableCalled() { + return preCreateTableCalled && postCreateTableCalled; + } + + @Override + public void postStartMaster(ObserverContext ctx) + throws IOException { + postStartMasterCalled = true; + } + + public boolean wasStartMasterCalled() { + return postStartMasterCalled; + } + + @Override + public void start(CoprocessorEnvironment env) throws IOException { + startCalled = true; + } + + public boolean wasStarted() { + return startCalled; + } + } + + private static HBaseTestingUtility UTIL = new HBaseTestingUtility(); + + private static byte[] TEST_TABLE1 = Bytes.toBytes("observed_table1"); + private static byte[] TEST_FAMILY1 = Bytes.toBytes("fam1"); + + private static byte[] TEST_TABLE2 = Bytes.toBytes("table2"); + private static byte[] TEST_FAMILY2 = Bytes.toBytes("fam2"); + + @BeforeClass + public static void setupBeforeClass() throws Exception { + Configuration conf = UTIL.getConfiguration(); + conf.set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, + BuggyMasterObserver.class.getName()); + UTIL.startMiniCluster(2); + } + + @AfterClass + public static void teardownAfterClass() throws Exception { + UTIL.shutdownMiniCluster(); + } + + @Test(timeout=30000) + public void testExceptionFromCoprocessorWhenCreatingTable() + throws IOException { + MiniHBaseCluster cluster = UTIL.getHBaseCluster(); + + HMaster master = cluster.getMaster(); + MasterCoprocessorHost host = master.getCoprocessorHost(); + BuggyMasterObserver cp = (BuggyMasterObserver)host.findCoprocessor( + BuggyMasterObserver.class.getName()); + assertFalse("No table created yet", cp.wasCreateTableCalled()); + + // Set a watch on the zookeeper /hbase/master node. If the master dies, + // the node will be deleted. + // Master should *NOT* die: + // we are testing that the default setting of hbase.coprocessor.abortonerror + // =false + // is respected. + ZooKeeperWatcher zkw = new ZooKeeperWatcher(UTIL.getConfiguration(), + "unittest", new Abortable() { + @Override + public void abort(String why, Throwable e) { + throw new RuntimeException("Fatal ZK error: " + why, e); + } + @Override + public boolean isAborted() { + return false; + } + }); + + MasterTracker masterTracker = new MasterTracker(zkw,"/hbase/master", + new Abortable() { + @Override + public void abort(String why, Throwable e) { + throw new RuntimeException("Fatal Zookeeper tracker error, why=", e); + } + @Override + public boolean isAborted() { + return false; + } + }); + + masterTracker.start(); + zkw.registerListener(masterTracker); + + // Test (part of the) output that should have be printed by master when it aborts: + // (namely the part that shows the set of loaded coprocessors). + // In this test, there is only a single coprocessor (BuggyMasterObserver). + String coprocessorName = + BuggyMasterObserver.class.getName(); + assertTrue(master.getLoadedCoprocessors().equals("[" + coprocessorName + "]")); + + HTableDescriptor htd1 = new HTableDescriptor(TEST_TABLE1); + htd1.addFamily(new HColumnDescriptor(TEST_FAMILY1)); + + boolean threwDNRE = false; + try { + HBaseAdmin admin = UTIL.getHBaseAdmin(); + admin.createTable(htd1); + } catch (IOException e) { + if (e.getClass().getName().equals("org.apache.hadoop.hbase.DoNotRetryIOException")) { + threwDNRE = true; + } + } finally { + assertTrue(threwDNRE); + } + + // wait for a few seconds to make sure that the Master hasn't aborted. + try { + Thread.sleep(3000); + } catch (InterruptedException e) { + fail("InterruptedException while sleeping."); + } + + assertFalse("Master survived coprocessor NPE, as expected.", + masterTracker.masterZKNodeWasDeleted); + + String loadedCoprocessors = master.getLoadedCoprocessors(); + assertTrue(loadedCoprocessors.equals("[" + coprocessorName + "]")); + + // Verify that BuggyMasterObserver has been removed due to its misbehavior + // by creating another table: should not have a problem this time. + HTableDescriptor htd2 = new HTableDescriptor(TEST_TABLE2); + htd2.addFamily(new HColumnDescriptor(TEST_FAMILY2)); + HBaseAdmin admin = UTIL.getHBaseAdmin(); + try { + admin.createTable(htd2); + } catch (IOException e) { + fail("Failed to create table after buggy coprocessor removal: " + e); + } + } +} diff --git a/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionServerCoprocessorExceptionWithAbort.java b/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionServerCoprocessorExceptionWithAbort.java new file mode 100644 index 00000000000..41ac81ba13f --- /dev/null +++ b/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionServerCoprocessorExceptionWithAbort.java @@ -0,0 +1,121 @@ +/* + * Copyright 2011 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.coprocessor; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.*; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.JVMClusterUtil; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperNodeTracker; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +import static org.junit.Assert.*; + +/** + * Tests unhandled exceptions thrown by coprocessors running on a regionserver.. + * Expected result is that the regionserver will abort with an informative + * error message describing the set of its loaded coprocessors for crash + * diagnosis. (HBASE-4014). + */ +public class TestRegionServerCoprocessorExceptionWithAbort { + private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + @BeforeClass + public static void setupBeforeClass() throws Exception { + // set configure to indicate which cp should be loaded + Configuration conf = TEST_UTIL.getConfiguration(); + conf.set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, + BuggyRegionObserver.class.getName()); + conf.set("hbase.coprocessor.abortonerror", "true"); + TEST_UTIL.startMiniCluster(2); + } + + @AfterClass + public static void teardownAfterClass() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Test(timeout=30000) + public void testExceptionFromCoprocessorDuringPut() + throws IOException { + // When we try to write to TEST_TABLE, the buggy coprocessor will + // cause a NullPointerException, which will cause the regionserver (which + // hosts the region we attempted to write to) to abort. + byte[] TEST_TABLE = Bytes.toBytes("observed_table"); + byte[] TEST_FAMILY = Bytes.toBytes("aaa"); + + HTable table = TEST_UTIL.createTable(TEST_TABLE, TEST_FAMILY); + TEST_UTIL.createMultiRegions(table, TEST_FAMILY); + + // Note which regionServer will abort (after put is attempted). + HRegionServer regionServer = + TEST_UTIL.getRSForFirstRegionInTable(TEST_TABLE); + try { + final byte[] ROW = Bytes.toBytes("aaa"); + Put put = new Put(ROW); + put.add(TEST_FAMILY, ROW, ROW); + table.put(put); + } catch (IOException e) { + fail("put() failed: " + e); + } + // Wait up to 30 seconds for regionserver to abort. + boolean regionServerAborted = false; + for (int i = 0; i < 30; i++) { + if (regionServer.isAborted()) { + regionServerAborted = true; + break; + } + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + fail("InterruptedException while waiting for regionserver " + + "zk node to be deleted."); + } + } + assertTrue("RegionServer aborted on coprocessor exception, as expected.", + regionServerAborted); + } + + public static class BuggyRegionObserver extends SimpleRegionObserver { + @Override + public void prePut(final ObserverContext c, + final Map> familyMap, + final boolean writeToWAL) { + String tableName = + c.getEnvironment().getRegion().getRegionInfo().getTableNameAsString(); + if (tableName.equals("observed_table")) { + Integer i = null; + i = i + 1; + } + } + } +} diff --git a/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionServerCoprocessorExceptionWithRemove.java b/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionServerCoprocessorExceptionWithRemove.java new file mode 100644 index 00000000000..ea5e4593f66 --- /dev/null +++ b/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionServerCoprocessorExceptionWithRemove.java @@ -0,0 +1,141 @@ +/* + * Copyright 2011 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.coprocessor; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.*; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.JVMClusterUtil; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperNodeTracker; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +import static org.junit.Assert.*; + +/** + * Tests unhandled exceptions thrown by coprocessors running on regionserver. + * Expected result is that the master will remove the buggy coprocessor from + * its set of coprocessors and throw a org.apache.hadoop.hbase.DoNotRetryIOException + * back to the client. + * (HBASE-4014). + */ +public class TestRegionServerCoprocessorExceptionWithRemove { + public static class BuggyRegionObserver extends SimpleRegionObserver { + @Override + public void prePut(final ObserverContext c, + final Map> familyMap, + final boolean writeToWAL) { + String tableName = + c.getEnvironment().getRegion().getRegionInfo().getTableNameAsString(); + if (tableName.equals("observed_table")) { + Integer i = null; + i = i + 1; + } + } + } + + private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + private static ZooKeeperWatcher zkw = null; + + @BeforeClass + public static void setupBeforeClass() throws Exception { + // set configure to indicate which cp should be loaded + Configuration conf = TEST_UTIL.getConfiguration(); + conf.set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, + BuggyRegionObserver.class.getName()); + TEST_UTIL.startMiniCluster(2); + } + + @AfterClass + public static void teardownAfterClass() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Test(timeout=30000) + public void testExceptionFromCoprocessorDuringPut() + throws IOException { + // Set watches on the zookeeper nodes for all of the regionservers in the + // cluster. When we try to write to TEST_TABLE, the buggy coprocessor will + // cause a NullPointerException, which will cause the regionserver (which + // hosts the region we attempted to write to) to abort. In turn, this will + // cause the nodeDeleted() method of the DeadRegionServer tracker to + // execute, which will set the rsZKNodeDeleted flag to true, which will + // pass this test. + + byte[] TEST_TABLE = Bytes.toBytes("observed_table"); + byte[] TEST_FAMILY = Bytes.toBytes("aaa"); + + HTable table = TEST_UTIL.createTable(TEST_TABLE, TEST_FAMILY); + TEST_UTIL.createMultiRegions(table, TEST_FAMILY); + // Note which regionServer that should survive the buggy coprocessor's + // prePut(). + HRegionServer regionServer = + TEST_UTIL.getRSForFirstRegionInTable(TEST_TABLE); + + // same logic as {@link TestMasterCoprocessorExceptionWithRemove}, + // but exception will be RetriesExhaustedWithDetailException rather + // than DoNotRetryIOException. The latter exception is what the RegionServer + // will have actually thrown, but the client will wrap this in a + // RetriesExhaustedWithDetailException. + // We will verify that "DoNotRetryIOException" appears in the text of the + // the exception's detailMessage. + boolean threwDNRE = false; + try { + final byte[] ROW = Bytes.toBytes("aaa"); + Put put = new Put(ROW); + put.add(TEST_FAMILY, ROW, ROW); + table.put(put); + } catch (RetriesExhaustedWithDetailsException e) { + // below, could call instead : + // startsWith("Failed 1 action: DoNotRetryIOException.") + // But that might be too brittle if client-side + // DoNotRetryIOException-handler changes its message. + assertTrue(e.getMessage().contains("DoNotRetryIOException")); + threwDNRE = true; + } finally { + assertTrue(threwDNRE); + } + + // Wait 3 seconds for the regionserver to abort: expected result is that + // it will survive and not abort. + for (int i = 0; i < 3; i++) { + assertFalse(regionServer.isAborted()); + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + fail("InterruptedException while waiting for regionserver " + + "zk node to be deleted."); + } + } + } +}