HBASE-13885 ZK watches leaks during snapshots.

This commit is contained in:
Lars Hofhansl 2015-06-16 12:21:49 -07:00
parent f2f19ddb5c
commit 7f6f199cf7
3 changed files with 13 additions and 2 deletions

View File

@ -275,7 +275,10 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs {
ForeignException ee = null;
try {
byte[] data = ZKUtil.getData(zkProc.getWatcher(), abortNode);
if (!ProtobufUtil.isPBMagicPrefix(data)) {
if (data == null || data.length == 0) {
// ignore
return;
} else if (!ProtobufUtil.isPBMagicPrefix(data)) {
LOG.warn("Got an error notification for op:" + abortNode
+ " but we can't read the information. Killing the procedure.");
// we got a remote exception, but we can't describe it

View File

@ -318,7 +318,10 @@ public class ZKProcedureMemberRpcs implements ProcedureMemberRpcs {
// figure out the data we need to pass
ForeignException ee;
try {
if (!ProtobufUtil.isPBMagicPrefix(data)) {
if (data == null || data.length == 0) {
// ignore
return;
} else if (!ProtobufUtil.isPBMagicPrefix(data)) {
String msg = "Illegally formatted data in abort node for proc " + opName
+ ". Killing the procedure.";
LOG.error(msg);

View File

@ -283,6 +283,11 @@ public abstract class ZKProcedureUtil
// TODO This is potentially racy since not atomic. update when we support zk that has multi
LOG.info("Clearing all znodes for procedure " + procedureName + "including nodes "
+ acquiredZnode + " " + reachedZnode + " " + abortZnode);
// Make sure we trigger the watches on these nodes by creating them. (HBASE-13885)
ZKUtil.createAndFailSilent(watcher, getAcquiredBarrierNode(procedureName));
ZKUtil.createAndFailSilent(watcher, getAbortZNode(procedureName));
ZKUtil.deleteNodeRecursively(watcher, getAcquiredBarrierNode(procedureName));
ZKUtil.deleteNodeRecursively(watcher, getReachedBarrierNode(procedureName));
ZKUtil.deleteNodeRecursively(watcher, getAbortZNode(procedureName));