HBASE-7523 Snapshot attempt with the name of a previously taken snapshot fails sometimes

git-svn-id: https://svn.apache.org/repos/asf/hbase/branches/hbase-7290@1445837 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jonathan Hsieh 2013-02-13 18:47:13 +00:00
parent 232fa82451
commit 43ddbac484
5 changed files with 19 additions and 10 deletions

View File

@ -74,7 +74,7 @@ public class EnabledTableSnapshotHandler extends TakeSnapshotHandler {
Procedure proc = coordinator.startProcedure(this.monitor, this.snapshot.getName(),
this.snapshot.toByteArray(), Lists.newArrayList(regionServers));
if (proc == null) {
String msg = "Failed to submit distribute procedure for snapshot '"
String msg = "Failed to submit distributed procedure for snapshot '"
+ snapshot.getName() + "'";
LOG.error(msg);
throw new HBaseSnapshotException(msg);

View File

@ -310,7 +310,8 @@ public class Procedure implements Callable<Void>, ForeignExceptionListener {
}
if (removed) {
LOG.debug("Member: '" + member + "' released barrier for procedure'" + procName
+ "', counting down latch");
+ "', counting down latch. Waiting for " + releasedBarrierLatch.getCount()
+ " more");
} else {
LOG.warn("Member: '" + member + "' released barrier for procedure'" + procName
+ "', but we weren't waiting on it to release!");

View File

@ -113,9 +113,16 @@ public class ProcedureCoordinator {
// make sure we aren't already running an procedure of that name
synchronized (procedures) {
if (procedures.get(procName) != null) {
Procedure oldProc = procedures.get(procName);
if (oldProc != null) {
// procedures are always eventually completed on both successful and failed execution
if (oldProc.completedLatch.getCount() != 0) {
LOG.warn("Procedure " + procName + " currently running. Rejecting new request");
return false;
}
LOG.debug("Procedure " + procName + " was in running list but was completed. Accepting new attempt.");
procedures.remove(procName);
}
}
// kick off the procedure's execution in a separate thread
@ -128,6 +135,8 @@ public class ProcedureCoordinator {
}
return true;
} catch (RejectedExecutionException e) {
LOG.warn("Procedure " + procName + " rejected by execution pool. Propagating error and " +
"cancelling operation.", e);
// the thread pool is full and we can't run the procedure
proc.receive(new ForeignException(procName, e));

View File

@ -185,7 +185,6 @@ abstract public class Subprocedure implements Callable<Void> {
// make sure we didn't get an external exception
rethrowException();
LOG.debug("Subprocedure '" + barrierName + "' locally completed");
} catch (Exception e) {
String msg = null;
if (e instanceof InterruptedException) {

View File

@ -87,7 +87,7 @@ public class ZKProcedureMemberRpcs implements ProcedureMemberRpcs {
String parent = ZKUtil.getParent(path);
// if its the end barrier, the procedure can be completed
if (parent.equals(this.reachedZnode)) {
recievedReachedGlobalBarrier(path);
receivedReachedGlobalBarrier(path);
return;
} else if (parent.equals(this.abortZnode)) {
abort(path);
@ -104,10 +104,10 @@ public class ZKProcedureMemberRpcs implements ProcedureMemberRpcs {
public void nodeChildrenChanged(String path) {
LOG.info("Received children changed event:" + path);
if (path.equals(this.acquiredZnode)) {
LOG.info("Recieved start event.");
LOG.info("Received start event.");
waitForNewProcedures();
} else if (path.equals(this.abortZnode)) {
LOG.info("Recieved abort event.");
LOG.info("Received abort event.");
watchForAbortedProcedures();
}
}
@ -134,7 +134,7 @@ public class ZKProcedureMemberRpcs implements ProcedureMemberRpcs {
* Pass along the procedure global barrier notification to any listeners
* @param path full znode path that cause the notification
*/
private void recievedReachedGlobalBarrier(String path) {
private void receivedReachedGlobalBarrier(String path) {
LOG.debug("Recieved reached global barrier:" + path);
String procName = ZKUtil.getNodeName(path);
this.member.receivedReachedGlobalBarrier(procName);
@ -244,7 +244,7 @@ public class ZKProcedureMemberRpcs implements ProcedureMemberRpcs {
String reachedBarrier = zkController.getReachedBarrierNode(procName);
LOG.debug("Watch for global barrier reached:" + reachedBarrier);
if (ZKUtil.watchAndCheckExists(zkController.getWatcher(), reachedBarrier)) {
recievedReachedGlobalBarrier(reachedBarrier);
receivedReachedGlobalBarrier(reachedBarrier);
}
} catch (KeeperException e) {
member.controllerConnectionFailure("Failed to acquire barrier for procedure: "