HBASE-14547 Add more debug/trace to zk-procedure

This commit is contained in:
Matteo Bertozzi 2015-10-02 15:37:42 -07:00
parent b9bfd6f7e3
commit 86b505c224
7 changed files with 49 additions and 13 deletions

View File

@ -213,6 +213,7 @@ public class Procedure implements Callable<Void>, ForeignExceptionListener {
sendGlobalBarrierReached(); sendGlobalBarrierReached();
// wait for all members to report barrier release // wait for all members to report barrier release
LOG.debug("Waiting for all members to 'release'");
waitForLatch(releasedBarrierLatch, monitor, wakeFrequency, "released"); waitForLatch(releasedBarrierLatch, monitor, wakeFrequency, "released");
// make sure we didn't get an error during in barrier execution and release // make sure we didn't get an error during in barrier execution and release

View File

@ -174,6 +174,7 @@ public class ProcedureCoordinator {
// kick off the procedure's execution in a separate thread // kick off the procedure's execution in a separate thread
try { try {
if (this.procedures.putIfAbsent(procName, proc) == null) { if (this.procedures.putIfAbsent(procName, proc) == null) {
LOG.debug("Submitting procedure " + procName);
this.pool.submit(proc); this.pool.submit(proc);
return true; return true;
} else { } else {
@ -200,11 +201,16 @@ public class ProcedureCoordinator {
void rpcConnectionFailure(final String message, final IOException cause) { void rpcConnectionFailure(final String message, final IOException cause) {
Collection<Procedure> toNotify = procedures.values(); Collection<Procedure> toNotify = procedures.values();
boolean isTraceEnabled = LOG.isTraceEnabled();
LOG.debug("received connection failure: " + message, cause);
for (Procedure proc : toNotify) { for (Procedure proc : toNotify) {
if (proc == null) { if (proc == null) {
continue; continue;
} }
// notify the elements, if they aren't null // notify the elements, if they aren't null
if (isTraceEnabled) {
LOG.trace("connection failure - notify procedure: " + proc.getName());
}
proc.receive(new ForeignException(proc.getName(), cause)); proc.receive(new ForeignException(proc.getName(), cause));
} }
} }
@ -215,6 +221,7 @@ public class ProcedureCoordinator {
* @param reason serialized information about the abort * @param reason serialized information about the abort
*/ */
public void abortProcedure(String procName, ForeignException reason) { public void abortProcedure(String procName, ForeignException reason) {
LOG.debug("abort procedure " + procName, reason);
// if we know about the Procedure, notify it // if we know about the Procedure, notify it
Procedure proc = procedures.get(procName); Procedure proc = procedures.get(procName);
if (proc == null) { if (proc == null) {
@ -272,7 +279,9 @@ public class ProcedureCoordinator {
LOG.warn("Member '"+ member +"' is trying to acquire an unknown procedure '"+ procName +"'"); LOG.warn("Member '"+ member +"' is trying to acquire an unknown procedure '"+ procName +"'");
return; return;
} }
if (LOG.isTraceEnabled()) {
LOG.trace("Member '"+ member +"' acquired procedure '"+ procName +"'");
}
proc.barrierAcquiredByMember(member); proc.barrierAcquiredByMember(member);
} }
@ -289,6 +298,9 @@ public class ProcedureCoordinator {
LOG.warn("Member '"+ member +"' is trying to release an unknown procedure '"+ procName +"'"); LOG.warn("Member '"+ member +"' is trying to release an unknown procedure '"+ procName +"'");
return; return;
} }
if (LOG.isTraceEnabled()) {
LOG.trace("Member '"+ member +"' released procedure '"+ procName +"'");
}
proc.barrierReleasedByMember(member, dataFromMember); proc.barrierReleasedByMember(member, dataFromMember);
} }

View File

@ -176,9 +176,12 @@ public class ProcedureMember implements Closeable {
public void receivedReachedGlobalBarrier(String procName) { public void receivedReachedGlobalBarrier(String procName) {
Subprocedure subproc = subprocs.get(procName); Subprocedure subproc = subprocs.get(procName);
if (subproc == null) { if (subproc == null) {
LOG.warn("Unexpected reached glabal barrier message for Sub-Procedure '" + procName + "'"); LOG.warn("Unexpected reached globa barrier message for Sub-Procedure '" + procName + "'");
return; return;
} }
if (LOG.isTraceEnabled()) {
LOG.trace("reached global barrier message for Sub-Procedure '" + procName + "'");
}
subproc.receiveReachedGlobalBarrier(); subproc.receiveReachedGlobalBarrier();
} }

View File

@ -84,8 +84,9 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs {
// If we get an abort node watch triggered here, we'll go complete creating the acquired // If we get an abort node watch triggered here, we'll go complete creating the acquired
// znode but then handle the acquire znode and bail out // znode but then handle the acquire znode and bail out
} catch (KeeperException e) { } catch (KeeperException e) {
LOG.error("Failed to watch abort", e); String msg = "Failed while watching abort node:" + abortNode;
throw new IOException("Failed while watching abort node:" + abortNode, e); LOG.error(msg, e);
throw new IOException(msg, e);
} }
// create the acquire barrier // create the acquire barrier
@ -104,7 +105,9 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs {
} }
} }
} catch (KeeperException e) { } catch (KeeperException e) {
throw new IOException("Failed while creating acquire node:" + acquire, e); String msg = "Failed while creating acquire node:" + acquire;
LOG.error(msg, e);
throw new IOException(msg, e);
} }
} }
@ -124,9 +127,10 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs {
// ProtobufUtil.isPBMagicPrefix will check null // ProtobufUtil.isPBMagicPrefix will check null
if (dataFromMember != null && dataFromMember.length > 0) { if (dataFromMember != null && dataFromMember.length > 0) {
if (!ProtobufUtil.isPBMagicPrefix(dataFromMember)) { if (!ProtobufUtil.isPBMagicPrefix(dataFromMember)) {
throw new IOException( String msg =
"Failed to get data from finished node or data is illegally formatted: " "Failed to get data from finished node or data is illegally formatted: " + znode;
+ znode); LOG.error(msg);
throw new IOException(msg);
} else { } else {
dataFromMember = Arrays.copyOfRange(dataFromMember, ProtobufUtil.lengthOfPBMagic(), dataFromMember = Arrays.copyOfRange(dataFromMember, ProtobufUtil.lengthOfPBMagic(),
dataFromMember.length); dataFromMember.length);
@ -138,9 +142,13 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs {
} }
} }
} catch (KeeperException e) { } catch (KeeperException e) {
throw new IOException("Failed while creating reached node:" + reachedNode, e); String msg = "Failed while creating reached node:" + reachedNode;
LOG.error(msg, e);
throw new IOException(msg, e);
} catch (InterruptedException e) { } catch (InterruptedException e) {
throw new InterruptedIOException("Interrupted while creating reached node:" + reachedNode); String msg = "Interrupted while creating reached node:" + reachedNode;
LOG.error(msg, e);
throw new InterruptedIOException(msg);
} }
} }
@ -162,7 +170,9 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs {
// children trickling in // children trickling in
stillGettingNotifications = true; stillGettingNotifications = true;
} catch (KeeperException e) { } catch (KeeperException e) {
throw new IOException("Failed to complete reset procedure " + procName, e); String msg = "Failed to complete reset procedure " + procName;
LOG.error(msg, e);
throw new IOException(msg, e);
} }
} while (stillGettingNotifications); } while (stillGettingNotifications);
} }
@ -282,7 +292,8 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs {
LOG.warn("Got an error notification for op:" + abortNode LOG.warn("Got an error notification for op:" + abortNode
+ " but we can't read the information. Killing the procedure."); + " but we can't read the information. Killing the procedure.");
// we got a remote exception, but we can't describe it // we got a remote exception, but we can't describe it
ee = new ForeignException(coordName, "Data in abort node is illegally formatted. ignoring content."); ee = new ForeignException(coordName,
"Data in abort node is illegally formatted. ignoring content.");
} else { } else {
data = Arrays.copyOfRange(data, ProtobufUtil.lengthOfPBMagic(), data.length); data = Arrays.copyOfRange(data, ProtobufUtil.lengthOfPBMagic(), data.length);

View File

@ -275,6 +275,10 @@ public abstract class ZKProcedureUtil
// harder to figure out how to keep an procedure going and the subject of HBASE-5487. // harder to figure out how to keep an procedure going and the subject of HBASE-5487.
ZKUtil.deleteChildrenRecursivelyMultiOrSequential(watcher, true, acquiredZnode, reachedZnode, ZKUtil.deleteChildrenRecursivelyMultiOrSequential(watcher, true, acquiredZnode, reachedZnode,
abortZnode); abortZnode);
if (LOG.isTraceEnabled()) {
logZKTree(this.baseZNode);
}
} }
public void clearZNodes(String procedureName) throws KeeperException { public void clearZNodes(String procedureName) throws KeeperException {
@ -291,5 +295,9 @@ public abstract class ZKProcedureUtil
ZKUtil.deleteNodeRecursivelyMultiOrSequential(watcher, true, acquiredBarrierNode, ZKUtil.deleteNodeRecursivelyMultiOrSequential(watcher, true, acquiredBarrierNode,
reachedBarrierNode, abortZNode); reachedBarrierNode, abortZNode);
if (LOG.isTraceEnabled()) {
logZKTree(this.baseZNode);
}
} }
} }

View File

@ -139,6 +139,7 @@ public class FlushSnapshotSubprocedure extends Subprocedure {
try { try {
taskManager.waitForOutstandingTasks(); taskManager.waitForOutstandingTasks();
} catch (InterruptedException e) { } catch (InterruptedException e) {
LOG.error("got interrupted exception for " + getMemberName());
throw new ForeignException(getMemberName(), e); throw new ForeignException(getMemberName(), e);
} }
} }

View File

@ -175,7 +175,7 @@ public class RegionServerSnapshotManager extends RegionServerProcedureManager {
// will hang and fail. // will hang and fail.
LOG.debug("Launching subprocedure for snapshot " + snapshot.getName() + " from table " LOG.debug("Launching subprocedure for snapshot " + snapshot.getName() + " from table "
+ snapshot.getTable()); + snapshot.getTable() + " type " + snapshot.getType());
ForeignExceptionDispatcher exnDispatcher = new ForeignExceptionDispatcher(snapshot.getName()); ForeignExceptionDispatcher exnDispatcher = new ForeignExceptionDispatcher(snapshot.getName());
Configuration conf = rss.getConfiguration(); Configuration conf = rss.getConfiguration();
long timeoutMillis = conf.getLong(SNAPSHOT_TIMEOUT_MILLIS_KEY, long timeoutMillis = conf.getLong(SNAPSHOT_TIMEOUT_MILLIS_KEY,