diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/Procedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/Procedure.java index 198bcddb0af..0279a6038de 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/Procedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/Procedure.java @@ -213,6 +213,7 @@ public class Procedure implements Callable, ForeignExceptionListener { sendGlobalBarrierReached(); // wait for all members to report barrier release + LOG.debug("Waiting for all members to 'release'"); waitForLatch(releasedBarrierLatch, monitor, wakeFrequency, "released"); // make sure we didn't get an error during in barrier execution and release diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureCoordinator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureCoordinator.java index d55fc851f32..b7e0c04ee89 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureCoordinator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureCoordinator.java @@ -174,6 +174,7 @@ public class ProcedureCoordinator { // kick off the procedure's execution in a separate thread try { if (this.procedures.putIfAbsent(procName, proc) == null) { + LOG.debug("Submitting procedure " + procName); this.pool.submit(proc); return true; } else { @@ -200,11 +201,16 @@ public class ProcedureCoordinator { void rpcConnectionFailure(final String message, final IOException cause) { Collection toNotify = procedures.values(); + boolean isTraceEnabled = LOG.isTraceEnabled(); + LOG.debug("received connection failure: " + message, cause); for (Procedure proc : toNotify) { if (proc == null) { continue; } // notify the elements, if they aren't null + if (isTraceEnabled) { + LOG.trace("connection failure - notify procedure: " + proc.getName()); + } proc.receive(new ForeignException(proc.getName(), cause)); } } @@ -215,6 +221,7 @@ public class ProcedureCoordinator { * @param reason serialized information about the abort */ public void abortProcedure(String procName, ForeignException reason) { + LOG.debug("abort procedure " + procName, reason); // if we know about the Procedure, notify it Procedure proc = procedures.get(procName); if (proc == null) { @@ -272,7 +279,9 @@ public class ProcedureCoordinator { LOG.warn("Member '"+ member +"' is trying to acquire an unknown procedure '"+ procName +"'"); return; } - + if (LOG.isTraceEnabled()) { + LOG.trace("Member '"+ member +"' acquired procedure '"+ procName +"'"); + } proc.barrierAcquiredByMember(member); } @@ -289,6 +298,9 @@ public class ProcedureCoordinator { LOG.warn("Member '"+ member +"' is trying to release an unknown procedure '"+ procName +"'"); return; } + if (LOG.isTraceEnabled()) { + LOG.trace("Member '"+ member +"' released procedure '"+ procName +"'"); + } proc.barrierReleasedByMember(member, dataFromMember); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMember.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMember.java index 1f220221d12..485821e645c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMember.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMember.java @@ -176,9 +176,12 @@ public class ProcedureMember implements Closeable { public void receivedReachedGlobalBarrier(String procName) { Subprocedure subproc = subprocs.get(procName); if (subproc == null) { - LOG.warn("Unexpected reached glabal barrier message for Sub-Procedure '" + procName + "'"); + LOG.warn("Unexpected reached globa barrier message for Sub-Procedure '" + procName + "'"); return; } + if (LOG.isTraceEnabled()) { + LOG.trace("reached global barrier message for Sub-Procedure '" + procName + "'"); + } subproc.receiveReachedGlobalBarrier(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureCoordinatorRpcs.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureCoordinatorRpcs.java index fedd5956b22..085d642c9ec 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureCoordinatorRpcs.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureCoordinatorRpcs.java @@ -84,8 +84,9 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs { // If we get an abort node watch triggered here, we'll go complete creating the acquired // znode but then handle the acquire znode and bail out } catch (KeeperException e) { - LOG.error("Failed to watch abort", e); - throw new IOException("Failed while watching abort node:" + abortNode, e); + String msg = "Failed while watching abort node:" + abortNode; + LOG.error(msg, e); + throw new IOException(msg, e); } // create the acquire barrier @@ -104,7 +105,9 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs { } } } catch (KeeperException e) { - throw new IOException("Failed while creating acquire node:" + acquire, e); + String msg = "Failed while creating acquire node:" + acquire; + LOG.error(msg, e); + throw new IOException(msg, e); } } @@ -124,9 +127,10 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs { // ProtobufUtil.isPBMagicPrefix will check null if (dataFromMember != null && dataFromMember.length > 0) { if (!ProtobufUtil.isPBMagicPrefix(dataFromMember)) { - throw new IOException( - "Failed to get data from finished node or data is illegally formatted: " - + znode); + String msg = + "Failed to get data from finished node or data is illegally formatted: " + znode; + LOG.error(msg); + throw new IOException(msg); } else { dataFromMember = Arrays.copyOfRange(dataFromMember, ProtobufUtil.lengthOfPBMagic(), dataFromMember.length); @@ -138,9 +142,13 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs { } } } catch (KeeperException e) { - throw new IOException("Failed while creating reached node:" + reachedNode, e); + String msg = "Failed while creating reached node:" + reachedNode; + LOG.error(msg, e); + throw new IOException(msg, e); } catch (InterruptedException e) { - throw new InterruptedIOException("Interrupted while creating reached node:" + reachedNode); + String msg = "Interrupted while creating reached node:" + reachedNode; + LOG.error(msg, e); + throw new InterruptedIOException(msg); } } @@ -162,7 +170,9 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs { // children trickling in stillGettingNotifications = true; } catch (KeeperException e) { - throw new IOException("Failed to complete reset procedure " + procName, e); + String msg = "Failed to complete reset procedure " + procName; + LOG.error(msg, e); + throw new IOException(msg, e); } } while (stillGettingNotifications); } @@ -282,7 +292,8 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs { LOG.warn("Got an error notification for op:" + abortNode + " but we can't read the information. Killing the procedure."); // we got a remote exception, but we can't describe it - ee = new ForeignException(coordName, "Data in abort node is illegally formatted. ignoring content."); + ee = new ForeignException(coordName, + "Data in abort node is illegally formatted. ignoring content."); } else { data = Arrays.copyOfRange(data, ProtobufUtil.lengthOfPBMagic(), data.length); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureUtil.java index 81712185ff1..56983ff23ff 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureUtil.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureUtil.java @@ -275,6 +275,10 @@ public abstract class ZKProcedureUtil // harder to figure out how to keep an procedure going and the subject of HBASE-5487. ZKUtil.deleteChildrenRecursivelyMultiOrSequential(watcher, true, acquiredZnode, reachedZnode, abortZnode); + + if (LOG.isTraceEnabled()) { + logZKTree(this.baseZNode); + } } public void clearZNodes(String procedureName) throws KeeperException { @@ -291,5 +295,9 @@ public abstract class ZKProcedureUtil ZKUtil.deleteNodeRecursivelyMultiOrSequential(watcher, true, acquiredBarrierNode, reachedBarrierNode, abortZNode); + + if (LOG.isTraceEnabled()) { + logZKTree(this.baseZNode); + } } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java index 998c1fb3d7d..f0836018245 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java @@ -139,6 +139,7 @@ public class FlushSnapshotSubprocedure extends Subprocedure { try { taskManager.waitForOutstandingTasks(); } catch (InterruptedException e) { + LOG.error("got interrupted exception for " + getMemberName()); throw new ForeignException(getMemberName(), e); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java index d7f77fb4baa..e08cf0eafff 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java @@ -175,7 +175,7 @@ public class RegionServerSnapshotManager extends RegionServerProcedureManager { // will hang and fail. LOG.debug("Launching subprocedure for snapshot " + snapshot.getName() + " from table " - + snapshot.getTable()); + + snapshot.getTable() + " type " + snapshot.getType()); ForeignExceptionDispatcher exnDispatcher = new ForeignExceptionDispatcher(snapshot.getName()); Configuration conf = rss.getConfiguration(); long timeoutMillis = conf.getLong(SNAPSHOT_TIMEOUT_MILLIS_KEY,