HBASE-14547 Add more debug/trace to zk-procedure
This commit is contained in:
parent
b9bfd6f7e3
commit
86b505c224
|
@ -213,6 +213,7 @@ public class Procedure implements Callable<Void>, ForeignExceptionListener {
|
||||||
sendGlobalBarrierReached();
|
sendGlobalBarrierReached();
|
||||||
|
|
||||||
// wait for all members to report barrier release
|
// wait for all members to report barrier release
|
||||||
|
LOG.debug("Waiting for all members to 'release'");
|
||||||
waitForLatch(releasedBarrierLatch, monitor, wakeFrequency, "released");
|
waitForLatch(releasedBarrierLatch, monitor, wakeFrequency, "released");
|
||||||
|
|
||||||
// make sure we didn't get an error during in barrier execution and release
|
// make sure we didn't get an error during in barrier execution and release
|
||||||
|
|
|
@ -174,6 +174,7 @@ public class ProcedureCoordinator {
|
||||||
// kick off the procedure's execution in a separate thread
|
// kick off the procedure's execution in a separate thread
|
||||||
try {
|
try {
|
||||||
if (this.procedures.putIfAbsent(procName, proc) == null) {
|
if (this.procedures.putIfAbsent(procName, proc) == null) {
|
||||||
|
LOG.debug("Submitting procedure " + procName);
|
||||||
this.pool.submit(proc);
|
this.pool.submit(proc);
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
|
@ -200,11 +201,16 @@ public class ProcedureCoordinator {
|
||||||
void rpcConnectionFailure(final String message, final IOException cause) {
|
void rpcConnectionFailure(final String message, final IOException cause) {
|
||||||
Collection<Procedure> toNotify = procedures.values();
|
Collection<Procedure> toNotify = procedures.values();
|
||||||
|
|
||||||
|
boolean isTraceEnabled = LOG.isTraceEnabled();
|
||||||
|
LOG.debug("received connection failure: " + message, cause);
|
||||||
for (Procedure proc : toNotify) {
|
for (Procedure proc : toNotify) {
|
||||||
if (proc == null) {
|
if (proc == null) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// notify the elements, if they aren't null
|
// notify the elements, if they aren't null
|
||||||
|
if (isTraceEnabled) {
|
||||||
|
LOG.trace("connection failure - notify procedure: " + proc.getName());
|
||||||
|
}
|
||||||
proc.receive(new ForeignException(proc.getName(), cause));
|
proc.receive(new ForeignException(proc.getName(), cause));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -215,6 +221,7 @@ public class ProcedureCoordinator {
|
||||||
* @param reason serialized information about the abort
|
* @param reason serialized information about the abort
|
||||||
*/
|
*/
|
||||||
public void abortProcedure(String procName, ForeignException reason) {
|
public void abortProcedure(String procName, ForeignException reason) {
|
||||||
|
LOG.debug("abort procedure " + procName, reason);
|
||||||
// if we know about the Procedure, notify it
|
// if we know about the Procedure, notify it
|
||||||
Procedure proc = procedures.get(procName);
|
Procedure proc = procedures.get(procName);
|
||||||
if (proc == null) {
|
if (proc == null) {
|
||||||
|
@ -272,7 +279,9 @@ public class ProcedureCoordinator {
|
||||||
LOG.warn("Member '"+ member +"' is trying to acquire an unknown procedure '"+ procName +"'");
|
LOG.warn("Member '"+ member +"' is trying to acquire an unknown procedure '"+ procName +"'");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("Member '"+ member +"' acquired procedure '"+ procName +"'");
|
||||||
|
}
|
||||||
proc.barrierAcquiredByMember(member);
|
proc.barrierAcquiredByMember(member);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -289,6 +298,9 @@ public class ProcedureCoordinator {
|
||||||
LOG.warn("Member '"+ member +"' is trying to release an unknown procedure '"+ procName +"'");
|
LOG.warn("Member '"+ member +"' is trying to release an unknown procedure '"+ procName +"'");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("Member '"+ member +"' released procedure '"+ procName +"'");
|
||||||
|
}
|
||||||
proc.barrierReleasedByMember(member, dataFromMember);
|
proc.barrierReleasedByMember(member, dataFromMember);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -176,9 +176,12 @@ public class ProcedureMember implements Closeable {
|
||||||
public void receivedReachedGlobalBarrier(String procName) {
|
public void receivedReachedGlobalBarrier(String procName) {
|
||||||
Subprocedure subproc = subprocs.get(procName);
|
Subprocedure subproc = subprocs.get(procName);
|
||||||
if (subproc == null) {
|
if (subproc == null) {
|
||||||
LOG.warn("Unexpected reached glabal barrier message for Sub-Procedure '" + procName + "'");
|
LOG.warn("Unexpected reached globa barrier message for Sub-Procedure '" + procName + "'");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("reached global barrier message for Sub-Procedure '" + procName + "'");
|
||||||
|
}
|
||||||
subproc.receiveReachedGlobalBarrier();
|
subproc.receiveReachedGlobalBarrier();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -84,8 +84,9 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs {
|
||||||
// If we get an abort node watch triggered here, we'll go complete creating the acquired
|
// If we get an abort node watch triggered here, we'll go complete creating the acquired
|
||||||
// znode but then handle the acquire znode and bail out
|
// znode but then handle the acquire znode and bail out
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
LOG.error("Failed to watch abort", e);
|
String msg = "Failed while watching abort node:" + abortNode;
|
||||||
throw new IOException("Failed while watching abort node:" + abortNode, e);
|
LOG.error(msg, e);
|
||||||
|
throw new IOException(msg, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
// create the acquire barrier
|
// create the acquire barrier
|
||||||
|
@ -104,7 +105,9 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
throw new IOException("Failed while creating acquire node:" + acquire, e);
|
String msg = "Failed while creating acquire node:" + acquire;
|
||||||
|
LOG.error(msg, e);
|
||||||
|
throw new IOException(msg, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -124,9 +127,10 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs {
|
||||||
// ProtobufUtil.isPBMagicPrefix will check null
|
// ProtobufUtil.isPBMagicPrefix will check null
|
||||||
if (dataFromMember != null && dataFromMember.length > 0) {
|
if (dataFromMember != null && dataFromMember.length > 0) {
|
||||||
if (!ProtobufUtil.isPBMagicPrefix(dataFromMember)) {
|
if (!ProtobufUtil.isPBMagicPrefix(dataFromMember)) {
|
||||||
throw new IOException(
|
String msg =
|
||||||
"Failed to get data from finished node or data is illegally formatted: "
|
"Failed to get data from finished node or data is illegally formatted: " + znode;
|
||||||
+ znode);
|
LOG.error(msg);
|
||||||
|
throw new IOException(msg);
|
||||||
} else {
|
} else {
|
||||||
dataFromMember = Arrays.copyOfRange(dataFromMember, ProtobufUtil.lengthOfPBMagic(),
|
dataFromMember = Arrays.copyOfRange(dataFromMember, ProtobufUtil.lengthOfPBMagic(),
|
||||||
dataFromMember.length);
|
dataFromMember.length);
|
||||||
|
@ -138,9 +142,13 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
throw new IOException("Failed while creating reached node:" + reachedNode, e);
|
String msg = "Failed while creating reached node:" + reachedNode;
|
||||||
|
LOG.error(msg, e);
|
||||||
|
throw new IOException(msg, e);
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
throw new InterruptedIOException("Interrupted while creating reached node:" + reachedNode);
|
String msg = "Interrupted while creating reached node:" + reachedNode;
|
||||||
|
LOG.error(msg, e);
|
||||||
|
throw new InterruptedIOException(msg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -162,7 +170,9 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs {
|
||||||
// children trickling in
|
// children trickling in
|
||||||
stillGettingNotifications = true;
|
stillGettingNotifications = true;
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
throw new IOException("Failed to complete reset procedure " + procName, e);
|
String msg = "Failed to complete reset procedure " + procName;
|
||||||
|
LOG.error(msg, e);
|
||||||
|
throw new IOException(msg, e);
|
||||||
}
|
}
|
||||||
} while (stillGettingNotifications);
|
} while (stillGettingNotifications);
|
||||||
}
|
}
|
||||||
|
@ -282,7 +292,8 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs {
|
||||||
LOG.warn("Got an error notification for op:" + abortNode
|
LOG.warn("Got an error notification for op:" + abortNode
|
||||||
+ " but we can't read the information. Killing the procedure.");
|
+ " but we can't read the information. Killing the procedure.");
|
||||||
// we got a remote exception, but we can't describe it
|
// we got a remote exception, but we can't describe it
|
||||||
ee = new ForeignException(coordName, "Data in abort node is illegally formatted. ignoring content.");
|
ee = new ForeignException(coordName,
|
||||||
|
"Data in abort node is illegally formatted. ignoring content.");
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
data = Arrays.copyOfRange(data, ProtobufUtil.lengthOfPBMagic(), data.length);
|
data = Arrays.copyOfRange(data, ProtobufUtil.lengthOfPBMagic(), data.length);
|
||||||
|
|
|
@ -275,6 +275,10 @@ public abstract class ZKProcedureUtil
|
||||||
// harder to figure out how to keep an procedure going and the subject of HBASE-5487.
|
// harder to figure out how to keep an procedure going and the subject of HBASE-5487.
|
||||||
ZKUtil.deleteChildrenRecursivelyMultiOrSequential(watcher, true, acquiredZnode, reachedZnode,
|
ZKUtil.deleteChildrenRecursivelyMultiOrSequential(watcher, true, acquiredZnode, reachedZnode,
|
||||||
abortZnode);
|
abortZnode);
|
||||||
|
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
logZKTree(this.baseZNode);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void clearZNodes(String procedureName) throws KeeperException {
|
public void clearZNodes(String procedureName) throws KeeperException {
|
||||||
|
@ -291,5 +295,9 @@ public abstract class ZKProcedureUtil
|
||||||
|
|
||||||
ZKUtil.deleteNodeRecursivelyMultiOrSequential(watcher, true, acquiredBarrierNode,
|
ZKUtil.deleteNodeRecursivelyMultiOrSequential(watcher, true, acquiredBarrierNode,
|
||||||
reachedBarrierNode, abortZNode);
|
reachedBarrierNode, abortZNode);
|
||||||
|
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
logZKTree(this.baseZNode);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -139,6 +139,7 @@ public class FlushSnapshotSubprocedure extends Subprocedure {
|
||||||
try {
|
try {
|
||||||
taskManager.waitForOutstandingTasks();
|
taskManager.waitForOutstandingTasks();
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
|
LOG.error("got interrupted exception for " + getMemberName());
|
||||||
throw new ForeignException(getMemberName(), e);
|
throw new ForeignException(getMemberName(), e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -175,7 +175,7 @@ public class RegionServerSnapshotManager extends RegionServerProcedureManager {
|
||||||
// will hang and fail.
|
// will hang and fail.
|
||||||
|
|
||||||
LOG.debug("Launching subprocedure for snapshot " + snapshot.getName() + " from table "
|
LOG.debug("Launching subprocedure for snapshot " + snapshot.getName() + " from table "
|
||||||
+ snapshot.getTable());
|
+ snapshot.getTable() + " type " + snapshot.getType());
|
||||||
ForeignExceptionDispatcher exnDispatcher = new ForeignExceptionDispatcher(snapshot.getName());
|
ForeignExceptionDispatcher exnDispatcher = new ForeignExceptionDispatcher(snapshot.getName());
|
||||||
Configuration conf = rss.getConfiguration();
|
Configuration conf = rss.getConfiguration();
|
||||||
long timeoutMillis = conf.getLong(SNAPSHOT_TIMEOUT_MILLIS_KEY,
|
long timeoutMillis = conf.getLong(SNAPSHOT_TIMEOUT_MILLIS_KEY,
|
||||||
|
|
Loading…
Reference in New Issue