From f6ccae350203ec35f3b7acd75431a5abd7e9f5a8 Mon Sep 17 00:00:00 2001 From: Matteo Bertozzi Date: Thu, 1 Sep 2016 21:06:32 -0700 Subject: [PATCH] HBASE-16507 Procedure v2 - Force DDL operation to always roll forward --- .../procedure2/StateMachineProcedure.java | 31 +- .../procedure/AddColumnFamilyProcedure.java | 69 ++-- .../procedure/CloneSnapshotProcedure.java | 73 ++-- .../procedure/CreateNamespaceProcedure.java | 108 +----- .../procedure/CreateTableProcedure.java | 75 ++-- .../DeleteColumnFamilyProcedure.java | 90 ++--- .../procedure/DeleteNamespaceProcedure.java | 69 ++-- .../procedure/DeleteTableProcedure.java | 23 +- .../procedure/DisableTableProcedure.java | 51 +-- .../procedure/EnableTableProcedure.java | 75 ++-- .../ModifyColumnFamilyProcedure.java | 66 ++-- .../procedure/ModifyNamespaceProcedure.java | 83 ++--- .../procedure/ModifyTableProcedure.java | 91 +---- .../procedure/RestoreSnapshotProcedure.java | 38 +- .../procedure/TruncateTableProcedure.java | 17 +- .../MasterProcedureTestingUtility.java | 326 ++++++++---------- .../TestAddColumnFamilyProcedure.java | 11 +- .../procedure/TestCloneSnapshotProcedure.java | 21 +- .../TestCreateNamespaceProcedure.java | 14 +- .../procedure/TestCreateTableProcedure.java | 68 +--- .../TestDeleteColumnFamilyProcedure.java | 50 +-- .../TestDeleteNamespaceProcedure.java | 15 +- .../procedure/TestDeleteTableProcedure.java | 3 +- .../procedure/TestDisableTableProcedure.java | 6 +- .../TestDispatchMergingRegionsProcedure.java | 14 +- .../procedure/TestEnableTableProcedure.java | 14 +- .../TestModifyColumnFamilyProcedure.java | 18 +- .../TestModifyNamespaceProcedure.java | 15 +- .../procedure/TestModifyTableProcedure.java | 66 +--- .../master/procedure/TestProcedureAdmin.java | 7 +- .../TestRestoreSnapshotProcedure.java | 6 +- .../procedure/TestTruncateTableProcedure.java | 5 +- 32 files changed, 533 insertions(+), 1085 deletions(-) diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java index a403193c3de..7eb6465ed44 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java @@ -21,9 +21,12 @@ package org.apache.hadoop.hbase.procedure2; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.ArrayList; import java.util.Arrays; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.classification.InterfaceStability; import org.apache.hadoop.hbase.protobuf.generated.ProcedureProtos.StateMachineProcedureData; @@ -43,6 +46,10 @@ import org.apache.hadoop.hbase.protobuf.generated.ProcedureProtos.StateMachinePr @InterfaceStability.Evolving public abstract class StateMachineProcedure extends Procedure { + private static final Log LOG = LogFactory.getLog(StateMachineProcedure.class); + + private final AtomicBoolean aborted = new AtomicBoolean(false); + private Flow stateFlow = Flow.HAS_MORE_STATE; private int stateCount = 0; private int[] states = null; @@ -96,6 +103,9 @@ public abstract class StateMachineProcedure * @param state the state enum object */ protected void setNextState(final TState state) { + if (aborted.get() && isRollbackSupported(getCurrentState())) { + setAbortFailure(getClass().getSimpleName(), "abort requested"); + } setNextState(getStateId(state)); } @@ -129,7 +139,7 @@ public abstract class StateMachineProcedure throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException { updateTimestamp(); try { - if (!hasMoreState()) return null; + if (!hasMoreState() || isFailed()) return null; TState state = getCurrentState(); if (stateCount == 0) { @@ -162,6 +172,25 @@ public abstract class StateMachineProcedure } } + @Override + protected boolean abort(final TEnvironment env) { + final TState state = getCurrentState(); + if (isRollbackSupported(state)) { + LOG.debug("abort requested for " + getClass().getSimpleName() + " state=" + state); + aborted.set(true); + return true; + } + return false; + } + + /** + * Used by the default implementation of abort() to know if the current state can be aborted + * and rollback can be triggered. + */ + protected boolean isRollbackSupported(final TState state) { + return false; + } + @Override protected boolean isYieldAfterExecutionStep(final TEnvironment env) { return isYieldBeforeExecuteFromState(env, getCurrentState()); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AddColumnFamilyProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AddColumnFamilyProcedure.java index 7c0b6917404..237db1af6fe 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AddColumnFamilyProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AddColumnFamilyProcedure.java @@ -22,7 +22,6 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.List; -import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -49,8 +48,6 @@ public class AddColumnFamilyProcedure implements TableProcedureInterface { private static final Log LOG = LogFactory.getLog(AddColumnFamilyProcedure.class); - private final AtomicBoolean aborted = new AtomicBoolean(false); - private TableName tableName; private HTableDescriptor unmodifiedHTableDescriptor; private HColumnDescriptor cfDescriptor; @@ -118,10 +115,12 @@ public class AddColumnFamilyProcedure throw new UnsupportedOperationException(this + " unhandled state=" + state); } } catch (IOException e) { - LOG.warn("Error trying to add the column family" + getColumnFamilyName() + " to the table " - + tableName + " (in state=" + state + ")", e); - - setFailure("master-add-columnfamily", e); + if (isRollbackSupported(state)) { + setFailure("master-add-columnfamily", e); + } else { + LOG.warn("Retriable error trying to add the column family " + getColumnFamilyName() + + " to the table " + tableName + " (in state=" + state + ")", e); + } } return Flow.HAS_MORE_STATE; } @@ -129,33 +128,26 @@ public class AddColumnFamilyProcedure @Override protected void rollbackState(final MasterProcedureEnv env, final AddColumnFamilyState state) throws IOException { - if (isTraceEnabled()) { - LOG.trace(this + " rollback state=" + state); + if (state == AddColumnFamilyState.ADD_COLUMN_FAMILY_PREPARE || + state == AddColumnFamilyState.ADD_COLUMN_FAMILY_PRE_OPERATION) { + // nothing to rollback, pre is just table-state checks. + // We can fail if the table does not exist or is not disabled. + // TODO: coprocessor rollback semantic is still undefined. + return; } - try { - switch (state) { - case ADD_COLUMN_FAMILY_REOPEN_ALL_REGIONS: - break; // Nothing to undo. - case ADD_COLUMN_FAMILY_POST_OPERATION: - // TODO-MAYBE: call the coprocessor event to undo? - break; - case ADD_COLUMN_FAMILY_UPDATE_TABLE_DESCRIPTOR: - restoreTableDescriptor(env); - break; - case ADD_COLUMN_FAMILY_PRE_OPERATION: - // TODO-MAYBE: call the coprocessor event to undo? - break; + + // The procedure doesn't have a rollback. The execution will succeed, at some point. + throw new UnsupportedOperationException("unhandled state=" + state); + } + + @Override + protected boolean isRollbackSupported(final AddColumnFamilyState state) { + switch (state) { case ADD_COLUMN_FAMILY_PREPARE: - break; // nothing to do + case ADD_COLUMN_FAMILY_PRE_OPERATION: + return true; default: - throw new UnsupportedOperationException(this + " unhandled state=" + state); - } - } catch (IOException e) { - // This will be retried. Unless there is a bug in the code, - // this should be just a "temporary error" (e.g. network down) - LOG.warn("Failed rollback attempt step " + state + " for adding the column family" - + getColumnFamilyName() + " to the table " + tableName, e); - throw e; + return false; } } @@ -179,21 +171,6 @@ public class AddColumnFamilyProcedure return AddColumnFamilyState.ADD_COLUMN_FAMILY_PREPARE; } - @Override - protected void setNextState(AddColumnFamilyState state) { - if (aborted.get()) { - setAbortFailure("add-columnfamily", "abort requested"); - } else { - super.setNextState(state); - } - } - - @Override - public boolean abort(final MasterProcedureEnv env) { - aborted.set(true); - return true; - } - @Override protected boolean acquireLock(final MasterProcedureEnv env) { if (env.waitInitialized(this)) return false; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CloneSnapshotProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CloneSnapshotProcedure.java index 1212072c303..331faed0325 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CloneSnapshotProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CloneSnapshotProcedure.java @@ -27,7 +27,6 @@ import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -72,8 +71,6 @@ public class CloneSnapshotProcedure implements TableProcedureInterface { private static final Log LOG = LogFactory.getLog(CloneSnapshotProcedure.class); - private final AtomicBoolean aborted = new AtomicBoolean(false); - private User user; private HTableDescriptor hTableDescriptor; private SnapshotDescription snapshot; @@ -162,8 +159,12 @@ public class CloneSnapshotProcedure throw new UnsupportedOperationException("unhandled state=" + state); } } catch (IOException e) { - LOG.error("Error trying to create table=" + getTableName() + " state=" + state, e); - setFailure("master-create-table", e); + if (isRollbackSupported(state)) { + setFailure("master-clone-snapshot", e); + } else { + LOG.warn("Retriable error trying to clone snapshot=" + snapshot.getName() + + " to table=" + getTableName() + " state=" + state, e); + } } return Flow.HAS_MORE_STATE; } @@ -171,38 +172,23 @@ public class CloneSnapshotProcedure @Override protected void rollbackState(final MasterProcedureEnv env, final CloneSnapshotState state) throws IOException { - if (isTraceEnabled()) { - LOG.trace(this + " rollback state=" + state); + if (state == CloneSnapshotState.CLONE_SNAPSHOT_PRE_OPERATION) { + DeleteTableProcedure.deleteTableStates(env, getTableName()); + // TODO-MAYBE: call the deleteTable coprocessor event? + return; } - try { - switch (state) { - case CLONE_SNAPSHOT_POST_OPERATION: - // TODO-MAYBE: call the deleteTable coprocessor event? - break; - case CLONE_SNAPSHOT_UPDATE_DESC_CACHE: - DeleteTableProcedure.deleteTableDescriptorCache(env, getTableName()); - break; - case CLONE_SNAPSHOT_ASSIGN_REGIONS: - DeleteTableProcedure.deleteAssignmentState(env, getTableName()); - break; - case CLONE_SNAPSHOT_ADD_TO_META: - DeleteTableProcedure.deleteFromMeta(env, getTableName(), newRegions); - break; - case CLONE_SNAPSHOT_WRITE_FS_LAYOUT: - DeleteTableProcedure.deleteFromFs(env, getTableName(), newRegions, false); - break; - case CLONE_SNAPSHOT_PRE_OPERATION: - DeleteTableProcedure.deleteTableStates(env, getTableName()); - // TODO-MAYBE: call the deleteTable coprocessor event? - break; - default: - throw new UnsupportedOperationException("unhandled state=" + state); - } - } catch (IOException e) { - // This will be retried. Unless there is a bug in the code, - // this should be just a "temporary error" (e.g. network down) - LOG.warn("Failed rollback attempt step=" + state + " table=" + getTableName(), e); - throw e; + + // The procedure doesn't have a rollback. The execution will succeed, at some point. + throw new UnsupportedOperationException("unhandled state=" + state); + } + + @Override + protected boolean isRollbackSupported(final CloneSnapshotState state) { + switch (state) { + case CLONE_SNAPSHOT_PRE_OPERATION: + return true; + default: + return false; } } @@ -221,15 +207,6 @@ public class CloneSnapshotProcedure return CloneSnapshotState.CLONE_SNAPSHOT_PRE_OPERATION; } - @Override - protected void setNextState(final CloneSnapshotState state) { - if (aborted.get()) { - setAbortFailure("clone-snapshot", "abort requested"); - } else { - super.setNextState(state); - } - } - @Override public TableName getTableName() { return hTableDescriptor.getTableName(); @@ -240,12 +217,6 @@ public class CloneSnapshotProcedure return TableOperationType.CREATE; // Clone is creating a table } - @Override - public boolean abort(final MasterProcedureEnv env) { - aborted.set(true); - return true; - } - @Override public void toStringClassDetails(StringBuilder sb) { sb.append(getClass().getSimpleName()); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateNamespaceProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateNamespaceProcedure.java index 7c8bf9cc8be..19375281f70 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateNamespaceProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateNamespaceProcedure.java @@ -21,7 +21,6 @@ package org.apache.hadoop.hbase.master.procedure; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -46,8 +45,6 @@ public class CreateNamespaceProcedure implements TableProcedureInterface { private static final Log LOG = LogFactory.getLog(CreateNamespaceProcedure.class); - private final AtomicBoolean aborted = new AtomicBoolean(false); - private NamespaceDescriptor nsDescriptor; private Boolean traceEnabled; @@ -94,10 +91,12 @@ public class CreateNamespaceProcedure throw new UnsupportedOperationException(this + " unhandled state=" + state); } } catch (IOException e) { - LOG.warn("Error trying to create the namespace" + nsDescriptor.getName() - + " (in state=" + state + ")", e); - - setFailure("master-create-namespace", e); + if (isRollbackSupported(state)) { + setFailure("master-create-namespace", e); + } else { + LOG.warn("Retriable error trying to create namespace=" + nsDescriptor.getName() + + " (in state=" + state + ")", e); + } } return Flow.HAS_MORE_STATE; } @@ -105,34 +104,22 @@ public class CreateNamespaceProcedure @Override protected void rollbackState(final MasterProcedureEnv env, final CreateNamespaceState state) throws IOException { - if (isTraceEnabled()) { - LOG.trace(this + " rollback state=" + state); + if (state == CreateNamespaceState.CREATE_NAMESPACE_PREPARE) { + // nothing to rollback, pre-create is just state checks. + // TODO: coprocessor rollback semantic is still undefined. + return; } - try { - switch (state) { - case CREATE_NAMESPACE_SET_NAMESPACE_QUOTA: - rollbackSetNamespaceQuota(env); - break; - case CREATE_NAMESPACE_UPDATE_ZK: - rollbackZKNamespaceManagerChange(env); - break; - case CREATE_NAMESPACE_INSERT_INTO_NS_TABLE: - rollbackInsertIntoNSTable(env); - break; - case CREATE_NAMESPACE_CREATE_DIRECTORY: - rollbackCreateDirectory(env); - break; + // The procedure doesn't have a rollback. The execution will succeed, at some point. + throw new UnsupportedOperationException("unhandled state=" + state); + } + + @Override + protected boolean isRollbackSupported(final CreateNamespaceState state) { + switch (state) { case CREATE_NAMESPACE_PREPARE: - break; // nothing to do + return true; default: - throw new UnsupportedOperationException(this + " unhandled state=" + state); - } - } catch (IOException e) { - // This will be retried. Unless there is a bug in the code, - // this should be just a "temporary error" (e.g. network down) - LOG.warn("Failed rollback attempt step " + state + " for creating the namespace " - + nsDescriptor.getName(), e); - throw e; + return false; } } @@ -151,21 +138,6 @@ public class CreateNamespaceProcedure return CreateNamespaceState.CREATE_NAMESPACE_PREPARE; } - @Override - protected void setNextState(CreateNamespaceState state) { - if (aborted.get()) { - setAbortFailure("create-namespace", "abort requested"); - } else { - super.setNextState(state); - } - } - - @Override - public boolean abort(final MasterProcedureEnv env) { - aborted.set(true); - return true; - } - @Override public void serializeStateData(final OutputStream stream) throws IOException { super.serializeStateData(stream); @@ -256,20 +228,6 @@ public class CreateNamespaceProcedure FSUtils.getNamespaceDir(mfs.getRootDir(), nsDescriptor.getName())); } - /** - * undo create directory - * @param env MasterProcedureEnv - * @throws IOException - */ - private void rollbackCreateDirectory(final MasterProcedureEnv env) throws IOException { - try { - DeleteNamespaceProcedure.deleteDirectory(env, nsDescriptor.getName()); - } catch (Exception e) { - // Ignore exception - LOG.debug("Rollback of createDirectory throws exception: " + e); - } - } - /** * Insert the row into ns table * @param env MasterProcedureEnv @@ -282,20 +240,6 @@ public class CreateNamespaceProcedure getTableNamespaceManager(env).insertIntoNSTable(nsDescriptor); } - /** - * Undo the insert. - * @param env MasterProcedureEnv - * @throws IOException - */ - private void rollbackInsertIntoNSTable(final MasterProcedureEnv env) throws IOException { - try { - DeleteNamespaceProcedure.deleteFromNSTable(env, nsDescriptor.getName()); - } catch (Exception e) { - // Ignore exception - LOG.debug("Rollback of insertIntoNSTable throws exception: " + e); - } - } - /** * Update ZooKeeper. * @param env MasterProcedureEnv @@ -308,20 +252,6 @@ public class CreateNamespaceProcedure getTableNamespaceManager(env).updateZKNamespaceManager(nsDescriptor); } - /** - * rollback ZooKeeper update. - * @param env MasterProcedureEnv - * @throws IOException - */ - private void rollbackZKNamespaceManagerChange(final MasterProcedureEnv env) throws IOException { - try { - DeleteNamespaceProcedure.removeFromZKNamespaceManager(env, nsDescriptor.getName()); - } catch (Exception e) { - // Ignore exception - LOG.debug("Rollback of updateZKNamespaceManager throws exception: " + e); - } - } - /** * Set quota for the namespace * @param env MasterProcedureEnv diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java index ae8d2fb1048..f77c2490c85 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java @@ -24,7 +24,6 @@ import java.io.IOException; import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.List; -import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -61,8 +60,6 @@ public class CreateTableProcedure implements TableProcedureInterface { private static final Log LOG = LogFactory.getLog(CreateTableProcedure.class); - private final AtomicBoolean aborted = new AtomicBoolean(false); - // used for compatibility with old clients private final ProcedurePrepareLatch syncLatch; @@ -137,8 +134,11 @@ public class CreateTableProcedure throw new UnsupportedOperationException("unhandled state=" + state); } } catch (IOException e) { - LOG.error("Error trying to create table=" + getTableName() + " state=" + state, e); - setFailure("master-create-table", e); + if (isRollbackSupported(state)) { + setFailure("master-create-table", e); + } else { + LOG.warn("Retriable error trying to create table=" + getTableName() + " state=" + state, e); + } } return Flow.HAS_MORE_STATE; } @@ -146,38 +146,26 @@ public class CreateTableProcedure @Override protected void rollbackState(final MasterProcedureEnv env, final CreateTableState state) throws IOException { - if (LOG.isTraceEnabled()) { - LOG.trace(this + " rollback state=" + state); + if (state == CreateTableState.CREATE_TABLE_PRE_OPERATION) { + // nothing to rollback, pre-create is just table-state checks. + // We can fail if the table does exist or the descriptor is malformed. + // TODO: coprocessor rollback semantic is still undefined. + DeleteTableProcedure.deleteTableStates(env, getTableName()); + ProcedurePrepareLatch.releaseLatch(syncLatch, this); + return; } - try { - switch (state) { - case CREATE_TABLE_POST_OPERATION: - break; - case CREATE_TABLE_UPDATE_DESC_CACHE: - DeleteTableProcedure.deleteTableDescriptorCache(env, getTableName()); - break; - case CREATE_TABLE_ASSIGN_REGIONS: - DeleteTableProcedure.deleteAssignmentState(env, getTableName()); - break; - case CREATE_TABLE_ADD_TO_META: - DeleteTableProcedure.deleteFromMeta(env, getTableName(), newRegions); - break; - case CREATE_TABLE_WRITE_FS_LAYOUT: - DeleteTableProcedure.deleteFromFs(env, getTableName(), newRegions, false); - break; - case CREATE_TABLE_PRE_OPERATION: - DeleteTableProcedure.deleteTableStates(env, getTableName()); - // TODO-MAYBE: call the deleteTable coprocessor event? - ProcedurePrepareLatch.releaseLatch(syncLatch, this); - break; - default: - throw new UnsupportedOperationException("unhandled state=" + state); - } - } catch (IOException e) { - // This will be retried. Unless there is a bug in the code, - // this should be just a "temporary error" (e.g. network down) - LOG.warn("Failed rollback attempt step=" + state + " table=" + getTableName(), e); - throw e; + + // The procedure doesn't have a rollback. The execution will succeed, at some point. + throw new UnsupportedOperationException("unhandled state=" + state); + } + + @Override + protected boolean isRollbackSupported(final CreateTableState state) { + switch (state) { + case CREATE_TABLE_PRE_OPERATION: + return true; + default: + return false; } } @@ -196,15 +184,6 @@ public class CreateTableProcedure return CreateTableState.CREATE_TABLE_PRE_OPERATION; } - @Override - protected void setNextState(final CreateTableState state) { - if (aborted.get()) { - setAbortFailure("create-table", "abort requested"); - } else { - super.setNextState(state); - } - } - @Override public TableName getTableName() { return hTableDescriptor.getTableName(); @@ -215,12 +194,6 @@ public class CreateTableProcedure return TableOperationType.CREATE; } - @Override - public boolean abort(final MasterProcedureEnv env) { - aborted.set(true); - return true; - } - @Override public void toStringClassDetails(StringBuilder sb) { sb.append(getClass().getSimpleName()); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteColumnFamilyProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteColumnFamilyProcedure.java index 9df9c0dc0ab..00a1c70e112 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteColumnFamilyProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteColumnFamilyProcedure.java @@ -23,7 +23,6 @@ import java.io.InputStream; import java.io.OutputStream; import java.security.PrivilegedExceptionAction; import java.util.List; -import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -51,8 +50,6 @@ public class DeleteColumnFamilyProcedure implements TableProcedureInterface { private static final Log LOG = LogFactory.getLog(DeleteColumnFamilyProcedure.class); - private final AtomicBoolean aborted = new AtomicBoolean(false); - private HTableDescriptor unmodifiedHTableDescriptor; private TableName tableName; private byte [] familyName; @@ -125,14 +122,11 @@ public class DeleteColumnFamilyProcedure throw new UnsupportedOperationException(this + " unhandled state=" + state); } } catch (IOException e) { - if (!isRollbackSupported(state)) { - // We reach a state that cannot be rolled back. We just need to keep retry. - LOG.warn("Error trying to delete the column family " + getColumnFamilyName() - + " from table " + tableName + "(in state=" + state + ")", e); + if (isRollbackSupported(state)) { + setFailure("master-delete-columnfamily", e); } else { - LOG.error("Error trying to delete the column family " + getColumnFamilyName() - + " from table " + tableName + "(in state=" + state + ")", e); - setFailure("master-delete-column-family", e); + LOG.warn("Retriable error trying to delete the column family " + getColumnFamilyName() + + " from table " + tableName + " (in state=" + state + ")", e); } } return Flow.HAS_MORE_STATE; @@ -141,39 +135,26 @@ public class DeleteColumnFamilyProcedure @Override protected void rollbackState(final MasterProcedureEnv env, final DeleteColumnFamilyState state) throws IOException { - if (isTraceEnabled()) { - LOG.trace(this + " rollback state=" + state); + if (state == DeleteColumnFamilyState.DELETE_COLUMN_FAMILY_PREPARE || + state == DeleteColumnFamilyState.DELETE_COLUMN_FAMILY_PRE_OPERATION) { + // nothing to rollback, pre is just table-state checks. + // We can fail if the table does not exist or is not disabled. + // TODO: coprocessor rollback semantic is still undefined. + return; } - try { - switch (state) { - case DELETE_COLUMN_FAMILY_REOPEN_ALL_REGIONS: - break; // Nothing to undo. - case DELETE_COLUMN_FAMILY_POST_OPERATION: - // TODO-MAYBE: call the coprocessor event to undo? - break; - case DELETE_COLUMN_FAMILY_DELETE_FS_LAYOUT: - // Once we reach to this state - we could NOT rollback - as it is tricky to undelete - // the deleted files. We are not suppose to reach here, throw exception so that we know - // there is a code bug to investigate. - throw new UnsupportedOperationException(this + " rollback of state=" + state - + " is unsupported."); - case DELETE_COLUMN_FAMILY_UPDATE_TABLE_DESCRIPTOR: - restoreTableDescriptor(env); - break; + + // The procedure doesn't have a rollback. The execution will succeed, at some point. + throw new UnsupportedOperationException("unhandled state=" + state); + } + + @Override + protected boolean isRollbackSupported(final DeleteColumnFamilyState state) { + switch (state) { case DELETE_COLUMN_FAMILY_PRE_OPERATION: - // TODO-MAYBE: call the coprocessor event to undo? - break; case DELETE_COLUMN_FAMILY_PREPARE: - break; // nothing to do + return true; default: - throw new UnsupportedOperationException(this + " unhandled state=" + state); - } - } catch (IOException e) { - // This will be retried. Unless there is a bug in the code, - // this should be just a "temporary error" (e.g. network down) - LOG.warn("Failed rollback attempt step " + state + " for deleting the column family" - + getColumnFamilyName() + " to the table " + tableName, e); - throw e; + return false; } } @@ -197,21 +178,6 @@ public class DeleteColumnFamilyProcedure return DeleteColumnFamilyState.DELETE_COLUMN_FAMILY_PREPARE; } - @Override - protected void setNextState(DeleteColumnFamilyState state) { - if (aborted.get() && isRollbackSupported(state)) { - setAbortFailure("delete-columnfamily", "abort requested"); - } else { - super.setNextState(state); - } - } - - @Override - public boolean abort(final MasterProcedureEnv env) { - aborted.set(true); - return true; - } - @Override protected boolean acquireLock(final MasterProcedureEnv env) { if (env.waitInitialized(this)) return false; @@ -429,22 +395,6 @@ public class DeleteColumnFamilyProcedure } } - /* - * Check whether we are in the state that can be rollback - */ - private boolean isRollbackSupported(final DeleteColumnFamilyState state) { - switch (state) { - case DELETE_COLUMN_FAMILY_REOPEN_ALL_REGIONS: - case DELETE_COLUMN_FAMILY_POST_OPERATION: - case DELETE_COLUMN_FAMILY_DELETE_FS_LAYOUT: - // It is not safe to rollback if we reach to these states. - return false; - default: - break; - } - return true; - } - private List getRegionInfoList(final MasterProcedureEnv env) throws IOException { if (regionInfoList == null) { regionInfoList = ProcedureSyncWait.getRegionsFromMeta(env, getTableName()); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteNamespaceProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteNamespaceProcedure.java index 3f4de12d3e4..8333919dd3e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteNamespaceProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteNamespaceProcedure.java @@ -22,7 +22,6 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -52,8 +51,6 @@ public class DeleteNamespaceProcedure implements TableProcedureInterface { private static final Log LOG = LogFactory.getLog(DeleteNamespaceProcedure.class); - private final AtomicBoolean aborted = new AtomicBoolean(false); - private NamespaceDescriptor nsDescriptor; private String namespaceName; private Boolean traceEnabled; @@ -76,6 +73,7 @@ public class DeleteNamespaceProcedure if (isTraceEnabled()) { LOG.trace(this + " execute state=" + state); } + LOG.info(this + " execute state=" + state); try { switch (state) { @@ -102,10 +100,12 @@ public class DeleteNamespaceProcedure throw new UnsupportedOperationException(this + " unhandled state=" + state); } } catch (IOException e) { - LOG.warn("Error trying to delete the namespace " + namespaceName - + " (in state=" + state + ")", e); - - setFailure("master-delete-namespace", e); + if (isRollbackSupported(state)) { + setFailure("master-delete-namespace", e); + } else { + LOG.warn("Retriable error trying to delete namespace " + namespaceName + + " (in state=" + state + ")", e); + } } return Flow.HAS_MORE_STATE; } @@ -113,34 +113,24 @@ public class DeleteNamespaceProcedure @Override protected void rollbackState(final MasterProcedureEnv env, final DeleteNamespaceState state) throws IOException { - if (isTraceEnabled()) { - LOG.trace(this + " rollback state=" + state); + if (state == DeleteNamespaceState.DELETE_NAMESPACE_PREPARE) { + // nothing to rollback, pre is just table-state checks. + // We can fail if the table does not exist or is not disabled. + // TODO: coprocessor rollback semantic is still undefined. + return; } - try { - switch (state) { - case DELETE_NAMESPACE_REMOVE_NAMESPACE_QUOTA: - rollbacRemoveNamespaceQuota(env); - break; - case DELETE_NAMESPACE_DELETE_DIRECTORIES: - rollbackDeleteDirectory(env); - break; - case DELETE_NAMESPACE_REMOVE_FROM_ZK: - undoRemoveFromZKNamespaceManager(env); - break; - case DELETE_NAMESPACE_DELETE_FROM_NS_TABLE: - undoDeleteFromNSTable(env); - break; + + // The procedure doesn't have a rollback. The execution will succeed, at some point. + throw new UnsupportedOperationException("unhandled state=" + state); + } + + @Override + protected boolean isRollbackSupported(final DeleteNamespaceState state) { + switch (state) { case DELETE_NAMESPACE_PREPARE: - break; // nothing to do + return true; default: - throw new UnsupportedOperationException(this + " unhandled state=" + state); - } - } catch (IOException e) { - // This will be retried. Unless there is a bug in the code, - // this should be just a "temporary error" (e.g. network down) - LOG.warn("Failed rollback attempt step " + state + " for deleting the namespace " - + namespaceName, e); - throw e; + return false; } } @@ -159,21 +149,6 @@ public class DeleteNamespaceProcedure return DeleteNamespaceState.DELETE_NAMESPACE_PREPARE; } - @Override - protected void setNextState(DeleteNamespaceState state) { - if (aborted.get()) { - setAbortFailure("delete-namespace", "abort requested"); - } else { - super.setNextState(state); - } - } - - @Override - public boolean abort(final MasterProcedureEnv env) { - aborted.set(true); - return true; - } - @Override public void serializeStateData(final OutputStream stream) throws IOException { super.serializeStateData(stream); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java index 64bdccc9280..d3b2aa54b8c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java @@ -148,7 +148,11 @@ public class DeleteTableProcedure throw new UnsupportedOperationException("unhandled state=" + state); } } catch (HBaseException|IOException e) { - LOG.warn("Retriable error trying to delete table=" + getTableName() + " state=" + state, e); + if (isRollbackSupported(state)) { + setFailure("master-delete-table", e); + } else { + LOG.warn("Retriable error trying to delete table=" + getTableName() + " state=" + state, e); + } } return Flow.HAS_MORE_STATE; } @@ -158,6 +162,7 @@ public class DeleteTableProcedure if (state == DeleteTableState.DELETE_TABLE_PRE_OPERATION) { // nothing to rollback, pre-delete is just table-state checks. // We can fail if the table does not exist or is not disabled. + // TODO: coprocessor rollback semantic is still undefined. ProcedurePrepareLatch.releaseLatch(syncLatch, this); return; } @@ -166,6 +171,16 @@ public class DeleteTableProcedure throw new UnsupportedOperationException("unhandled state=" + state); } + @Override + protected boolean isRollbackSupported(final DeleteTableState state) { + switch (state) { + case DELETE_TABLE_PRE_OPERATION: + return true; + default: + return false; + } + } + @Override protected DeleteTableState getState(final int stateId) { return DeleteTableState.valueOf(stateId); @@ -191,12 +206,6 @@ public class DeleteTableProcedure return TableOperationType.DELETE; } - @Override - public boolean abort(final MasterProcedureEnv env) { - // TODO: We may be able to abort if the procedure is not started yet. - return false; - } - @Override protected boolean acquireLock(final MasterProcedureEnv env) { if (env.waitInitialized(this)) return false; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java index e2d7d163364..c13f3225685 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java @@ -24,7 +24,6 @@ import java.io.OutputStream; import java.security.PrivilegedExceptionAction; import java.util.List; import java.util.concurrent.ExecutorService; -import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -57,8 +56,6 @@ public class DisableTableProcedure implements TableProcedureInterface { private static final Log LOG = LogFactory.getLog(DisableTableProcedure.class); - private final AtomicBoolean aborted = new AtomicBoolean(false); - // This is for back compatible with 1.0 asynchronized operations. private final ProcedurePrepareLatch syncLatch; @@ -157,7 +154,12 @@ public class DisableTableProcedure throw new UnsupportedOperationException("unhandled state=" + state); } } catch (IOException e) { - LOG.warn("Retriable error trying to disable table=" + tableName + " state=" + state, e); + if (isRollbackSupported(state)) { + setFailure("master-disable-table", e); + } else { + LOG.warn("Retriable error trying to disable table=" + tableName + + " (in state=" + state + ")", e); + } } return Flow.HAS_MORE_STATE; } @@ -165,17 +167,33 @@ public class DisableTableProcedure @Override protected void rollbackState(final MasterProcedureEnv env, final DisableTableState state) throws IOException { - if (state == DisableTableState.DISABLE_TABLE_PREPARE) { - // nothing to rollback, prepare-disable is just table-state checks. - // We can fail if the table does not exist or is not disabled. - ProcedurePrepareLatch.releaseLatch(syncLatch, this); - return; + // nothing to rollback, prepare-disable is just table-state checks. + // We can fail if the table does not exist or is not disabled. + switch (state) { + case DISABLE_TABLE_PRE_OPERATION: + return; + case DISABLE_TABLE_PREPARE: + ProcedurePrepareLatch.releaseLatch(syncLatch, this); + return; + default: + break; } // The delete doesn't have a rollback. The execution will succeed, at some point. throw new UnsupportedOperationException("unhandled state=" + state); } + @Override + protected boolean isRollbackSupported(final DisableTableState state) { + switch (state) { + case DISABLE_TABLE_PREPARE: + case DISABLE_TABLE_PRE_OPERATION: + return true; + default: + return false; + } + } + @Override protected DisableTableState getState(final int stateId) { return DisableTableState.valueOf(stateId); @@ -191,21 +209,6 @@ public class DisableTableProcedure return DisableTableState.DISABLE_TABLE_PREPARE; } - @Override - protected void setNextState(final DisableTableState state) { - if (aborted.get()) { - setAbortFailure("disable-table", "abort requested"); - } else { - super.setNextState(state); - } - } - - @Override - public boolean abort(final MasterProcedureEnv env) { - aborted.set(true); - return true; - } - @Override protected boolean acquireLock(final MasterProcedureEnv env) { if (env.waitInitialized(this)) return false; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java index 1817a16dfac..55a71036f2c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java @@ -26,7 +26,6 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -60,8 +59,6 @@ public class EnableTableProcedure implements TableProcedureInterface { private static final Log LOG = LogFactory.getLog(EnableTableProcedure.class); - private final AtomicBoolean aborted = new AtomicBoolean(false); - // This is for back compatible with 1.0 asynchronized operations. private final ProcedurePrepareLatch syncLatch; @@ -150,8 +147,12 @@ public class EnableTableProcedure throw new UnsupportedOperationException("unhandled state=" + state); } } catch (IOException e) { - LOG.error("Error trying to enable table=" + tableName + " state=" + state, e); - setFailure("master-enable-table", e); + if (isRollbackSupported(state)) { + setFailure("master-enable-table", e); + } else { + LOG.warn("Retriable error trying to enable table=" + tableName + + " (in state=" + state + ")", e); + } } return Flow.HAS_MORE_STATE; } @@ -159,39 +160,30 @@ public class EnableTableProcedure @Override protected void rollbackState(final MasterProcedureEnv env, final EnableTableState state) throws IOException { - if (isTraceEnabled()) { - LOG.trace(this + " rollback state=" + state); - } - try { - switch (state) { - case ENABLE_TABLE_POST_OPERATION: - // TODO-MAYBE: call the coprocessor event to undo (eg. DisableTableProcedure.preDisable())? - break; - case ENABLE_TABLE_SET_ENABLED_TABLE_STATE: - DisableTableProcedure.setTableStateToDisabling(env, tableName); - break; - case ENABLE_TABLE_MARK_REGIONS_ONLINE: - markRegionsOfflineDuringRecovery(env); - break; - case ENABLE_TABLE_SET_ENABLING_TABLE_STATE: - DisableTableProcedure.setTableStateToDisabled(env, tableName); - break; + // nothing to rollback, prepare-disable is just table-state checks. + // We can fail if the table does not exist or is not disabled. + switch (state) { case ENABLE_TABLE_PRE_OPERATION: - // TODO-MAYBE: call the coprocessor event to undo (eg. DisableTableProcedure.postDisable())? - break; + return; case ENABLE_TABLE_PREPARE: - // Nothing to undo for this state. - // We do need to count down the latch count so that we don't stuck. ProcedurePrepareLatch.releaseLatch(syncLatch, this); - break; + return; default: - throw new UnsupportedOperationException("unhandled state=" + state); - } - } catch (IOException e) { - // This will be retried. Unless there is a bug in the code, - // this should be just a "temporary error" (e.g. network down) - LOG.warn("Failed enable table rollback attempt step=" + state + " table=" + tableName, e); - throw e; + break; + } + + // The delete doesn't have a rollback. The execution will succeed, at some point. + throw new UnsupportedOperationException("unhandled state=" + state); + } + + @Override + protected boolean isRollbackSupported(final EnableTableState state) { + switch (state) { + case ENABLE_TABLE_PREPARE: + case ENABLE_TABLE_PRE_OPERATION: + return true; + default: + return false; } } @@ -210,21 +202,6 @@ public class EnableTableProcedure return EnableTableState.ENABLE_TABLE_PREPARE; } - @Override - protected void setNextState(final EnableTableState state) { - if (aborted.get()) { - setAbortFailure("Enable-table", "abort requested"); - } else { - super.setNextState(state); - } - } - - @Override - public boolean abort(final MasterProcedureEnv env) { - aborted.set(true); - return true; - } - @Override protected boolean acquireLock(final MasterProcedureEnv env) { if (env.waitInitialized(this)) return false; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyColumnFamilyProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyColumnFamilyProcedure.java index b06ffe87052..2c7badf0826 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyColumnFamilyProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyColumnFamilyProcedure.java @@ -23,7 +23,6 @@ import java.io.InputStream; import java.io.OutputStream; import java.security.PrivilegedExceptionAction; import java.util.List; -import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -50,8 +49,6 @@ public class ModifyColumnFamilyProcedure implements TableProcedureInterface { private static final Log LOG = LogFactory.getLog(ModifyColumnFamilyProcedure.class); - private final AtomicBoolean aborted = new AtomicBoolean(false); - private TableName tableName; private HTableDescriptor unmodifiedHTableDescriptor; private HColumnDescriptor cfDescriptor; @@ -116,10 +113,12 @@ public class ModifyColumnFamilyProcedure throw new UnsupportedOperationException(this + " unhandled state=" + state); } } catch (IOException e) { - LOG.warn("Error trying to modify the column family " + getColumnFamilyName() - + " of the table " + tableName + "(in state=" + state + ")", e); - - setFailure("master-modify-columnfamily", e); + if (isRollbackSupported(state)) { + setFailure("master-modify-columnfamily", e); + } else { + LOG.warn("Retriable error trying to disable table=" + tableName + + " (in state=" + state + ")", e); + } } return Flow.HAS_MORE_STATE; } @@ -127,33 +126,25 @@ public class ModifyColumnFamilyProcedure @Override protected void rollbackState(final MasterProcedureEnv env, final ModifyColumnFamilyState state) throws IOException { - if (isTraceEnabled()) { - LOG.trace(this + " rollback state=" + state); + if (state == ModifyColumnFamilyState.MODIFY_COLUMN_FAMILY_PREPARE || + state == ModifyColumnFamilyState.MODIFY_COLUMN_FAMILY_PRE_OPERATION) { + // nothing to rollback, pre-modify is just checks. + // TODO: coprocessor rollback semantic is still undefined. + return; } - try { - switch (state) { - case MODIFY_COLUMN_FAMILY_REOPEN_ALL_REGIONS: - break; // Nothing to undo. - case MODIFY_COLUMN_FAMILY_POST_OPERATION: - // TODO-MAYBE: call the coprocessor event to undo? - break; - case MODIFY_COLUMN_FAMILY_UPDATE_TABLE_DESCRIPTOR: - restoreTableDescriptor(env); - break; + + // The delete doesn't have a rollback. The execution will succeed, at some point. + throw new UnsupportedOperationException("unhandled state=" + state); + } + + @Override + protected boolean isRollbackSupported(final ModifyColumnFamilyState state) { + switch (state) { case MODIFY_COLUMN_FAMILY_PRE_OPERATION: - // TODO-MAYBE: call the coprocessor event to undo? - break; case MODIFY_COLUMN_FAMILY_PREPARE: - break; // nothing to do + return true; default: - throw new UnsupportedOperationException(this + " unhandled state=" + state); - } - } catch (IOException e) { - // This will be retried. Unless there is a bug in the code, - // this should be just a "temporary error" (e.g. network down) - LOG.warn("Failed rollback attempt step " + state + " for adding the column family" - + getColumnFamilyName() + " to the table " + tableName, e); - throw e; + return false; } } @@ -177,21 +168,6 @@ public class ModifyColumnFamilyProcedure return ModifyColumnFamilyState.MODIFY_COLUMN_FAMILY_PREPARE; } - @Override - protected void setNextState(ModifyColumnFamilyState state) { - if (aborted.get()) { - setAbortFailure("modify-columnfamily", "abort requested"); - } else { - super.setNextState(state); - } - } - - @Override - public boolean abort(final MasterProcedureEnv env) { - aborted.set(true); - return true; - } - @Override protected boolean acquireLock(final MasterProcedureEnv env) { if (env.waitInitialized(this)) return false; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyNamespaceProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyNamespaceProcedure.java index a16c8d15a26..197290c936b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyNamespaceProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyNamespaceProcedure.java @@ -21,7 +21,6 @@ package org.apache.hadoop.hbase.master.procedure; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -44,8 +43,6 @@ public class ModifyNamespaceProcedure implements TableProcedureInterface { private static final Log LOG = LogFactory.getLog(ModifyNamespaceProcedure.class); - private final AtomicBoolean aborted = new AtomicBoolean(false); - private NamespaceDescriptor oldNsDescriptor; private NamespaceDescriptor newNsDescriptor; private Boolean traceEnabled; @@ -87,10 +84,12 @@ public class ModifyNamespaceProcedure throw new UnsupportedOperationException(this + " unhandled state=" + state); } } catch (IOException e) { - LOG.warn("Error trying to modify the namespace" + newNsDescriptor.getName() - + " (in state=" + state + ")", e); - - setFailure("master-modify-namespace", e); + if (isRollbackSupported(state)) { + setFailure("master-modify-namespace", e); + } else { + LOG.warn("Retriable error trying to modify namespace=" + newNsDescriptor.getName() + + " (in state=" + state + ")", e); + } } return Flow.HAS_MORE_STATE; } @@ -98,28 +97,23 @@ public class ModifyNamespaceProcedure @Override protected void rollbackState(final MasterProcedureEnv env, final ModifyNamespaceState state) throws IOException { - if (isTraceEnabled()) { - LOG.trace(this + " rollback state=" + state); + if (state == ModifyNamespaceState.MODIFY_NAMESPACE_PREPARE) { + // nothing to rollback, pre-modify is just checks. + // TODO: coprocessor rollback semantic is still undefined. + return; } - try { - switch (state) { - case MODIFY_NAMESPACE_UPDATE_ZK: - rollbackZKNamespaceManagerChange(env); - break; - case MODIFY_NAMESPACE_UPDATE_NS_TABLE: - rollbackUpdateInNSTable(env); - break; + + // The procedure doesn't have a rollback. The execution will succeed, at some point. + throw new UnsupportedOperationException("unhandled state=" + state); + } + + @Override + protected boolean isRollbackSupported(final ModifyNamespaceState state) { + switch (state) { case MODIFY_NAMESPACE_PREPARE: - break; // nothing to do + return true; default: - throw new UnsupportedOperationException(this + " unhandled state=" + state); - } - } catch (IOException e) { - // This will be retried. Unless there is a bug in the code, - // this should be just a "temporary error" (e.g. network down) - LOG.warn("Failed rollback attempt step " + state + " for creating the namespace " - + newNsDescriptor.getName(), e); - throw e; + return false; } } @@ -138,21 +132,6 @@ public class ModifyNamespaceProcedure return ModifyNamespaceState.MODIFY_NAMESPACE_PREPARE; } - @Override - protected void setNextState(ModifyNamespaceState state) { - if (aborted.get()) { - setAbortFailure("modify-namespace", "abort requested"); - } else { - super.setNextState(state); - } - } - - @Override - public boolean abort(final MasterProcedureEnv env) { - aborted.set(true); - return true; - } - @Override public void serializeStateData(final OutputStream stream) throws IOException { super.serializeStateData(stream); @@ -238,17 +217,6 @@ public class ModifyNamespaceProcedure getTableNamespaceManager(env).insertIntoNSTable(newNsDescriptor); } - /** - * rollback the row into namespace table - * @param env MasterProcedureEnv - * @throws IOException - */ - private void rollbackUpdateInNSTable(final MasterProcedureEnv env) throws IOException { - if (oldNsDescriptor != null) { - getTableNamespaceManager(env).insertIntoNSTable(oldNsDescriptor); - } - } - /** * Update ZooKeeper. * @param env MasterProcedureEnv @@ -258,17 +226,6 @@ public class ModifyNamespaceProcedure getTableNamespaceManager(env).updateZKNamespaceManager(newNsDescriptor); } - /** - * Update ZooKeeper during undo. - * @param env MasterProcedureEnv - * @throws IOException - */ - private void rollbackZKNamespaceManagerChange(final MasterProcedureEnv env) throws IOException { - if (oldNsDescriptor != null) { - getTableNamespaceManager(env).updateZKNamespaceManager(oldNsDescriptor); - } - } - private TableNamespaceManager getTableNamespaceManager(final MasterProcedureEnv env) { return env.getMasterServices().getClusterSchema().getTableNamespaceManager(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java index 64c26b9c561..83017bfec06 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java @@ -25,7 +25,6 @@ import java.security.PrivilegedExceptionAction; import java.util.HashSet; import java.util.List; import java.util.Set; -import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -58,8 +57,6 @@ public class ModifyTableProcedure implements TableProcedureInterface { private static final Log LOG = LogFactory.getLog(ModifyTableProcedure.class); - private final AtomicBoolean aborted = new AtomicBoolean(false); - private HTableDescriptor unmodifiedHTableDescriptor = null; private HTableDescriptor modifiedHTableDescriptor; private User user; @@ -140,12 +137,11 @@ public class ModifyTableProcedure throw new UnsupportedOperationException("unhandled state=" + state); } } catch (IOException e) { - if (!isRollbackSupported(state)) { - // We reach a state that cannot be rolled back. We just need to keep retry. - LOG.warn("Error trying to modify table=" + getTableName() + " state=" + state, e); - } else { - LOG.error("Error trying to modify table=" + getTableName() + " state=" + state, e); + if (isRollbackSupported(state)) { setFailure("master-modify-table", e); + } else { + LOG.warn("Retriable error trying to modify table=" + getTableName() + + " (in state=" + state + ")", e); } } return Flow.HAS_MORE_STATE; @@ -154,41 +150,25 @@ public class ModifyTableProcedure @Override protected void rollbackState(final MasterProcedureEnv env, final ModifyTableState state) throws IOException { - if (isTraceEnabled()) { - LOG.trace(this + " rollback state=" + state); + if (state == ModifyTableState.MODIFY_TABLE_PREPARE || + state == ModifyTableState.MODIFY_TABLE_PRE_OPERATION) { + // nothing to rollback, pre-modify is just checks. + // TODO: coprocessor rollback semantic is still undefined. + return; } - try { - switch (state) { - case MODIFY_TABLE_REOPEN_ALL_REGIONS: - break; // Nothing to undo. - case MODIFY_TABLE_POST_OPERATION: - // TODO-MAYBE: call the coprocessor event to un-modify? - break; - case MODIFY_TABLE_DELETE_FS_LAYOUT: - // Once we reach to this state - we could NOT rollback - as it is tricky to undelete - // the deleted files. We are not suppose to reach here, throw exception so that we know - // there is a code bug to investigate. - assert deleteColumnFamilyInModify; - throw new UnsupportedOperationException(this + " rollback of state=" + state - + " is unsupported."); - case MODIFY_TABLE_REMOVE_REPLICA_COLUMN: - // Undo the replica column update. - updateReplicaColumnsIfNeeded(env, modifiedHTableDescriptor, unmodifiedHTableDescriptor); - break; - case MODIFY_TABLE_UPDATE_TABLE_DESCRIPTOR: - restoreTableDescriptor(env); - break; + + // The delete doesn't have a rollback. The execution will succeed, at some point. + throw new UnsupportedOperationException("unhandled state=" + state); + } + + @Override + protected boolean isRollbackSupported(final ModifyTableState state) { + switch (state) { case MODIFY_TABLE_PRE_OPERATION: - // TODO-MAYBE: call the coprocessor event to un-modify? - break; case MODIFY_TABLE_PREPARE: - break; // Nothing to undo. + return true; default: - throw new UnsupportedOperationException("unhandled state=" + state); - } - } catch (IOException e) { - LOG.warn("Fail trying to rollback modify table=" + getTableName() + " state=" + state, e); - throw e; + return false; } } @@ -212,21 +192,6 @@ public class ModifyTableProcedure return ModifyTableState.MODIFY_TABLE_PREPARE; } - @Override - protected void setNextState(final ModifyTableState state) { - if (aborted.get() && isRollbackSupported(state)) { - setAbortFailure("modify-table", "abort requested"); - } else { - super.setNextState(state); - } - } - - @Override - public boolean abort(final MasterProcedureEnv env) { - aborted.set(true); - return true; - } - @Override protected boolean acquireLock(final MasterProcedureEnv env) { if (env.waitInitialized(this)) return false; @@ -493,24 +458,6 @@ public class ModifyTableProcedure } } - /* - * Check whether we are in the state that can be rollback - */ - private boolean isRollbackSupported(final ModifyTableState state) { - if (deleteColumnFamilyInModify) { - switch (state) { - case MODIFY_TABLE_DELETE_FS_LAYOUT: - case MODIFY_TABLE_POST_OPERATION: - case MODIFY_TABLE_REOPEN_ALL_REGIONS: - // It is not safe to rollback if we reach to these states. - return false; - default: - break; - } - } - return true; - } - private List getRegionInfoList(final MasterProcedureEnv env) throws IOException { if (regionInfoList == null) { regionInfoList = ProcedureSyncWait.getRegionsFromMeta(env, getTableName()); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RestoreSnapshotProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RestoreSnapshotProcedure.java index 6ac6fe77c3e..7b058d2b4bb 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RestoreSnapshotProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RestoreSnapshotProcedure.java @@ -26,7 +26,6 @@ import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -66,8 +65,6 @@ public class RestoreSnapshotProcedure implements TableProcedureInterface { private static final Log LOG = LogFactory.getLog(RestoreSnapshotProcedure.class); - private final AtomicBoolean aborted = new AtomicBoolean(false); - private HTableDescriptor modifiedHTableDescriptor; private List regionsToRestore = null; private List regionsToRemove = null; @@ -155,8 +152,12 @@ public class RestoreSnapshotProcedure throw new UnsupportedOperationException("unhandled state=" + state); } } catch (IOException e) { - LOG.error("Error trying to restore snapshot=" + getTableName() + " state=" + state, e); - setFailure("master-restore-snapshot", e); + if (isRollbackSupported(state)) { + setFailure("master-restore-snapshot", e); + } else { + LOG.warn("Retriable error trying to restore snapshot=" + snapshot.getName() + + " to table=" + getTableName() + " (in state=" + state + ")", e); + } } return Flow.HAS_MORE_STATE; } @@ -164,10 +165,6 @@ public class RestoreSnapshotProcedure @Override protected void rollbackState(final MasterProcedureEnv env, final RestoreSnapshotState state) throws IOException { - if (isTraceEnabled()) { - LOG.trace(this + " rollback state=" + state); - } - if (state == RestoreSnapshotState.RESTORE_SNAPSHOT_PRE_OPERATION) { // nothing to rollback return; @@ -177,6 +174,16 @@ public class RestoreSnapshotProcedure throw new UnsupportedOperationException("unhandled state=" + state); } + @Override + protected boolean isRollbackSupported(final RestoreSnapshotState state) { + switch (state) { + case RESTORE_SNAPSHOT_PRE_OPERATION: + return true; + default: + return false; + } + } + @Override protected RestoreSnapshotState getState(final int stateId) { return RestoreSnapshotState.valueOf(stateId); @@ -192,15 +199,6 @@ public class RestoreSnapshotProcedure return RestoreSnapshotState.RESTORE_SNAPSHOT_PRE_OPERATION; } - @Override - protected void setNextState(final RestoreSnapshotState state) { - if (aborted.get()) { - setAbortFailure("create-table", "abort requested"); - } else { - super.setNextState(state); - } - } - @Override public TableName getTableName() { return modifiedHTableDescriptor.getTableName(); @@ -213,8 +211,8 @@ public class RestoreSnapshotProcedure @Override public boolean abort(final MasterProcedureEnv env) { - aborted.set(true); - return true; + // TODO: We may be able to abort if the procedure is not started yet. + return false; } @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java index 75606c2eb07..4b0674089f7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java @@ -142,7 +142,11 @@ public class TruncateTableProcedure throw new UnsupportedOperationException("unhandled state=" + state); } } catch (HBaseException|IOException e) { - LOG.warn("Retriable error trying to truncate table=" + getTableName() + " state=" + state, e); + if (isRollbackSupported(state)) { + setFailure("master-truncate-table", e); + } else { + LOG.warn("Retriable error trying to truncate table=" + getTableName() + " state=" + state, e); + } } return Flow.HAS_MORE_STATE; } @@ -152,6 +156,7 @@ public class TruncateTableProcedure if (state == TruncateTableState.TRUNCATE_TABLE_PRE_OPERATION) { // nothing to rollback, pre-truncate is just table-state checks. // We can fail if the table does not exist or is not disabled. + // TODO: coprocessor rollback semantic is still undefined. return; } @@ -164,6 +169,16 @@ public class TruncateTableProcedure ProcedurePrepareLatch.releaseLatch(syncLatch, this); } + @Override + protected boolean isRollbackSupported(final TruncateTableState state) { + switch (state) { + case TRUNCATE_TABLE_PRE_OPERATION: + return true; + default: + return false; + } + } + @Override protected TruncateTableState getState(final int stateId) { return TruncateTableState.valueOf(stateId); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java index 2d054d8fee2..e9e8bdc5a2b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java @@ -43,6 +43,7 @@ import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.TableState; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.TableStateManager; +import org.apache.hadoop.hbase.procedure2.Procedure; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; import org.apache.hadoop.hbase.util.ModifyRegionUtils; @@ -189,180 +190,6 @@ public class MasterProcedureTestingUtility { assertTrue(tsm.getTableState(tableName).equals(TableState.State.DISABLED)); } - /** - * Run through all procedure flow states TWICE while also restarting procedure executor at each - * step; i.e force a reread of procedure store. - * - *

It does - *

  1. Execute step N - kill the executor before store update - *
  2. Restart executor/store - *
  3. Execute step N - and then save to store - *
- * - *

This is a good test for finding state that needs persisting and steps that are not - * idempotent. Use this version of the test when a procedure executes all flow steps from start to - * finish. - * @see #testRecoveryAndDoubleExecution(ProcedureExecutor, long) - */ - public static void testRecoveryAndDoubleExecution( - final ProcedureExecutor procExec, final long procId, - final int numSteps, final TState[] states) throws Exception { - ProcedureTestingUtility.waitProcedure(procExec, procId); - assertEquals(false, procExec.isRunning()); - for (int i = 0; i < numSteps; ++i) { - LOG.info("Restart "+ i +" exec state: " + states[i]); - ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId); - ProcedureTestingUtility.restart(procExec); - ProcedureTestingUtility.waitProcedure(procExec, procId); - } - assertEquals(true, procExec.isRunning()); - ProcedureTestingUtility.assertProcNotFailed(procExec, procId); - } - - /** - * Run through all procedure flow states TWICE while also restarting procedure executor at each - * step; i.e force a reread of procedure store. - * - *

It does - *

  1. Execute step N - kill the executor before store update - *
  2. Restart executor/store - *
  3. Execute step N - and then save to store - *
- * - *

This is a good test for finding state that needs persisting and steps that are not - * idempotent. Use this version of the test when the order in which flow steps are executed is - * not start to finish; where the procedure may vary the flow steps dependent on circumstance - * found. - * @see #testRecoveryAndDoubleExecution(ProcedureExecutor, long, int, Object[]) - */ - public static void testRecoveryAndDoubleExecution( - final ProcedureExecutor procExec, final long procId) - throws Exception { - ProcedureTestingUtility.waitProcedure(procExec, procId); - assertEquals(false, procExec.isRunning()); - while (!procExec.isFinished(procId)) { - ProcedureTestingUtility.restart(procExec); - ProcedureTestingUtility.waitProcedure(procExec, procId); - } - assertEquals(true, procExec.isRunning()); - ProcedureTestingUtility.assertProcNotFailed(procExec, procId); - } - - public static void testRollbackAndDoubleExecution( - final ProcedureExecutor procExec, final long procId, - final int lastStep, final TState[] states) throws Exception { - ProcedureTestingUtility.waitProcedure(procExec, procId); - - // Restart the executor and execute the step twice - // execute step N - kill before store update - // restart executor/store - // execute step N - save on store - for (int i = 0; i < lastStep; ++i) { - LOG.info("Restart "+ i +" exec state: " + states[i]); - ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId); - ProcedureTestingUtility.restart(procExec); - ProcedureTestingUtility.waitProcedure(procExec, procId); - } - - // Restart the executor and rollback the step twice - // rollback step N - kill before store update - // restart executor/store - // rollback step N - save on store - InjectAbortOnLoadListener abortListener = new InjectAbortOnLoadListener(procExec); - procExec.registerListener(abortListener); - try { - for (int i = lastStep + 1; i >= 0; --i) { - LOG.info("Restart " + i +" rollback state: "+ states[i]); - ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId); - ProcedureTestingUtility.restart(procExec); - ProcedureTestingUtility.waitProcedure(procExec, procId); - } - } finally { - assertTrue(procExec.unregisterListener(abortListener)); - } - - ProcedureTestingUtility.assertIsAbortException(procExec.getResult(procId)); - } - - public static void testRollbackAndDoubleExecutionAfterPONR( - final ProcedureExecutor procExec, final long procId, - final int lastStep, final TState[] states) throws Exception { - ProcedureTestingUtility.waitProcedure(procExec, procId); - - // Restart the executor and execute the step twice - // execute step N - kill before store update - // restart executor/store - // execute step N - save on store - for (int i = 0; i < lastStep; ++i) { - LOG.info("Restart "+ i +" exec state: " + states[i]); - ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId); - ProcedureTestingUtility.restart(procExec); - ProcedureTestingUtility.waitProcedure(procExec, procId); - } - - // try to inject the abort - ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false); - InjectAbortOnLoadListener abortListener = new InjectAbortOnLoadListener(procExec); - procExec.registerListener(abortListener); - try { - ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId); - ProcedureTestingUtility.restart(procExec); - LOG.info("Restart and execute"); - ProcedureTestingUtility.waitProcedure(procExec, procId); - } finally { - assertTrue(procExec.unregisterListener(abortListener)); - } - - assertEquals(true, procExec.isRunning()); - ProcedureTestingUtility.assertProcNotFailed(procExec, procId); - } - - public static void testRollbackRetriableFailure( - final ProcedureExecutor procExec, final long procId, - final int lastStep, final TState[] states) throws Exception { - ProcedureTestingUtility.waitProcedure(procExec, procId); - - // Restart the executor and execute the step twice - // execute step N - kill before store update - // restart executor/store - // execute step N - save on store - for (int i = 0; i < lastStep; ++i) { - LOG.info("Restart "+ i +" exec state: " + states[i]); - ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId); - ProcedureTestingUtility.restart(procExec); - ProcedureTestingUtility.waitProcedure(procExec, procId); - } - - // execute the rollback - ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false); - InjectAbortOnLoadListener abortListener = new InjectAbortOnLoadListener(procExec); - procExec.registerListener(abortListener); - try { - ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId); - ProcedureTestingUtility.restart(procExec); - LOG.info("Restart and rollback"); - ProcedureTestingUtility.waitProcedure(procExec, procId); - } finally { - assertTrue(procExec.unregisterListener(abortListener)); - } - - ProcedureTestingUtility.assertIsAbortException(procExec.getResult(procId)); - } - - public static void testRestartWithAbort(ProcedureExecutor procExec, - long procId) throws Exception { - InjectAbortOnLoadListener abortListener = new InjectAbortOnLoadListener(procExec); - abortListener.addProcId(procId); - procExec.registerListener(abortListener); - try { - ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false); - ProcedureTestingUtility.restart(procExec); - ProcedureTestingUtility.waitProcedure(procExec, procId); - } finally { - assertTrue(procExec.unregisterListener(abortListener)); - } - } - public static void validateColumnFamilyAddition(final HMaster master, final TableName tableName, final String family) throws IOException { HTableDescriptor htd = master.getTableDescriptors().get(tableName); @@ -442,6 +269,157 @@ public class MasterProcedureTestingUtility { return master.getClusterConnection().getNonceGenerator().newNonce(); } + /** + * Run through all procedure flow states TWICE while also restarting procedure executor at each + * step; i.e force a reread of procedure store. + * + *

It does + *

  1. Execute step N - kill the executor before store update + *
  2. Restart executor/store + *
  3. Execute step N - and then save to store + *
+ * + *

This is a good test for finding state that needs persisting and steps that are not + * idempotent. Use this version of the test when a procedure executes all flow steps from start to + * finish. + * @see #testRecoveryAndDoubleExecution(ProcedureExecutor, long) + */ + public static void testRecoveryAndDoubleExecution( + final ProcedureExecutor procExec, final long procId, + final int numSteps) throws Exception { + testRecoveryAndDoubleExecution(procExec, procId, numSteps, true); + ProcedureTestingUtility.assertProcNotFailed(procExec, procId); + } + + private static void testRecoveryAndDoubleExecution( + final ProcedureExecutor procExec, final long procId, + final int numSteps, final boolean expectExecRunning) throws Exception { + final Procedure proc = procExec.getProcedure(procId); + ProcedureTestingUtility.waitProcedure(procExec, procId); + assertEquals(false, procExec.isRunning()); + + // Restart the executor and execute the step twice + // execute step N - kill before store update + // restart executor/store + // execute step N - save on store + for (int i = 0; i < numSteps; ++i) { + LOG.info("Restart " + i + " exec state: " + proc); + ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId); + ProcedureTestingUtility.restart(procExec); + ProcedureTestingUtility.waitProcedure(procExec, procId); + } + + assertEquals(expectExecRunning, procExec.isRunning()); + } + + /** + * Run through all procedure flow states TWICE while also restarting + * procedure executor at each step; i.e force a reread of procedure store. + * + *

It does + *

  1. Execute step N - kill the executor before store update + *
  2. Restart executor/store + *
  3. Execute step N - and then save to store + *
+ * + *

This is a good test for finding state that needs persisting and steps that are not + * idempotent. Use this version of the test when the order in which flow steps are executed is + * not start to finish; where the procedure may vary the flow steps dependent on circumstance + * found. + * @see #testRecoveryAndDoubleExecution(ProcedureExecutor, long, int) + */ + public static void testRecoveryAndDoubleExecution( + final ProcedureExecutor procExec, final long procId) throws Exception { + final Procedure proc = procExec.getProcedure(procId); + ProcedureTestingUtility.waitProcedure(procExec, procId); + assertEquals(false, procExec.isRunning()); + for (int i = 0; !procExec.isFinished(procId); ++i) { + LOG.info("Restart " + i + " exec state: " + proc); + ProcedureTestingUtility.restart(procExec); + ProcedureTestingUtility.waitProcedure(procExec, procId); + } + assertEquals(true, procExec.isRunning()); + ProcedureTestingUtility.assertProcNotFailed(procExec, procId); + } + + /** + * Execute the procedure up to "lastStep" and then the ProcedureExecutor + * is restarted and an abort() is injected. + * If the procedure implement abort() this should result in rollback being triggered. + * Each rollback step is called twice, by restarting the executor after every step. + * At the end of this call the procedure should be finished and rolledback. + * This method assert on the procedure being terminated with an AbortException. + */ + public static void testRollbackAndDoubleExecution( + final ProcedureExecutor procExec, final long procId, + final int lastStep) throws Exception { + final Procedure proc = procExec.getProcedure(procId); + + // Execute up to last step + testRecoveryAndDoubleExecution(procExec, procId, lastStep, false); + + // Restart the executor and rollback the step twice + // rollback step N - kill before store update + // restart executor/store + // rollback step N - save on store + InjectAbortOnLoadListener abortListener = new InjectAbortOnLoadListener(procExec); + procExec.registerListener(abortListener); + try { + for (int i = 0; !procExec.isFinished(procId); ++i) { + LOG.info("Restart " + i + " rollback state: " + proc); + ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId); + ProcedureTestingUtility.restart(procExec); + ProcedureTestingUtility.waitProcedure(procExec, procId); + } + } finally { + assertTrue(procExec.unregisterListener(abortListener)); + } + + assertEquals(true, procExec.isRunning()); + ProcedureTestingUtility.assertIsAbortException(procExec.getResult(procId)); + } + + /** + * Execute the procedure up to "lastStep" and then the ProcedureExecutor + * is restarted and an abort() is injected. + * If the procedure implement abort() this should result in rollback being triggered. + * At the end of this call the procedure should be finished and rolledback. + * This method assert on the procedure being terminated with an AbortException. + */ + public static void testRollbackRetriableFailure( + final ProcedureExecutor procExec, final long procId, + final int lastStep) throws Exception { + // Execute up to last step + testRecoveryAndDoubleExecution(procExec, procId, lastStep, false); + + // execute the rollback + testRestartWithAbort(procExec, procId); + + assertEquals(true, procExec.isRunning()); + ProcedureTestingUtility.assertIsAbortException(procExec.getResult(procId)); + } + + /** + * Restart the ProcedureExecutor and inject an abort to the specified procedure. + * If the procedure implement abort() this should result in rollback being triggered. + * At the end of this call the procedure should be finished and rolledback, if abort is implemnted + */ + public static void testRestartWithAbort(ProcedureExecutor procExec, + long procId) throws Exception { + ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false); + InjectAbortOnLoadListener abortListener = new InjectAbortOnLoadListener(procExec); + abortListener.addProcId(procId); + procExec.registerListener(abortListener); + try { + ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId); + LOG.info("Restart and rollback procId=" + procId); + ProcedureTestingUtility.restart(procExec); + ProcedureTestingUtility.waitProcedure(procExec, procId); + } finally { + assertTrue(procExec.unregisterListener(abortListener)); + } + } + public static class InjectAbortOnLoadListener implements ProcedureExecutor.ProcedureExecutorListener { private final ProcedureExecutor procExec; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestAddColumnFamilyProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestAddColumnFamilyProcedure.java index a98d4681390..384f45af396 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestAddColumnFamilyProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestAddColumnFamilyProcedure.java @@ -228,8 +228,7 @@ public class TestAddColumnFamilyProcedure { // Restart the executor and execute the step twice int numberOfSteps = AddColumnFamilyState.values().length; - MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, numberOfSteps, - AddColumnFamilyState.values()); + MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, numberOfSteps); MasterProcedureTestingUtility.validateColumnFamilyAddition(UTIL.getHBaseCluster().getMaster(), tableName, cf4); @@ -255,8 +254,7 @@ public class TestAddColumnFamilyProcedure { // Restart the executor and execute the step twice int numberOfSteps = AddColumnFamilyState.values().length; - MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, numberOfSteps, - AddColumnFamilyState.values()); + MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, numberOfSteps); MasterProcedureTestingUtility.validateColumnFamilyAddition(UTIL.getHBaseCluster().getMaster(), tableName, cf5); @@ -280,9 +278,8 @@ public class TestAddColumnFamilyProcedure { nonceGroup, nonce); - int numberOfSteps = AddColumnFamilyState.values().length - 2; // failing in the middle of proc - MasterProcedureTestingUtility.testRollbackAndDoubleExecution(procExec, procId, numberOfSteps, - AddColumnFamilyState.values()); + int numberOfSteps = 1; // failing at "pre operations" + MasterProcedureTestingUtility.testRollbackAndDoubleExecution(procExec, procId, numberOfSteps); MasterProcedureTestingUtility.validateColumnFamilyDeletion(UTIL.getHBaseCluster().getMaster(), tableName, cf6); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCloneSnapshotProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCloneSnapshotProcedure.java index 090a00b4d24..c0851a283ba 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCloneSnapshotProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCloneSnapshotProcedure.java @@ -93,6 +93,12 @@ public class TestCloneSnapshotProcedure { @After public void tearDown() throws Exception { resetProcExecutorTestingKillFlag(); + TableName[] tables = UTIL.getHBaseAdmin().listTableNames(); + for (int i = 0; i < tables.length; ++i) { + UTIL.deleteTable(tables[i]); + } + SnapshotTestingUtils.deleteAllSnapshots(UTIL.getHBaseAdmin()); + snapshot = null; } private void resetProcExecutorTestingKillFlag() { @@ -211,11 +217,7 @@ public class TestCloneSnapshotProcedure { // Restart the executor and execute the step twice int numberOfSteps = CloneSnapshotState.values().length; - MasterProcedureTestingUtility.testRecoveryAndDoubleExecution( - procExec, - procId, - numberOfSteps, - CloneSnapshotState.values()); + MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, numberOfSteps); MasterProcedureTestingUtility.validateTableIsEnabled( UTIL.getHBaseCluster().getMaster(), @@ -238,16 +240,11 @@ public class TestCloneSnapshotProcedure { long procId = procExec.submitProcedure( new CloneSnapshotProcedure(procExec.getEnvironment(), htd, snapshotDesc), nonceGroup, nonce); - int numberOfSteps = CloneSnapshotState.values().length - 2; // failing in the middle of proc - MasterProcedureTestingUtility.testRollbackAndDoubleExecution( - procExec, - procId, - numberOfSteps, - CloneSnapshotState.values()); + int numberOfSteps = 0; // failing at pre operation + MasterProcedureTestingUtility.testRollbackAndDoubleExecution(procExec, procId, numberOfSteps); MasterProcedureTestingUtility.validateTableDeletion( UTIL.getHBaseCluster().getMaster(), clonedTableName); - } private ProcedureExecutor getMasterProcedureExecutor() { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateNamespaceProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateNamespaceProcedure.java index c01755f719e..cc7ed0fc362 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateNamespaceProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateNamespaceProcedure.java @@ -239,11 +239,7 @@ public class TestCreateNamespaceProcedure { // Restart the executor and execute the step twice int numberOfSteps = CreateNamespaceState.values().length; - MasterProcedureTestingUtility.testRecoveryAndDoubleExecution( - procExec, - procId, - numberOfSteps, - CreateNamespaceState.values()); + MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, numberOfSteps); // Validate the creation of namespace ProcedureTestingUtility.assertProcNotFailed(procExec, procId); @@ -265,12 +261,8 @@ public class TestCreateNamespaceProcedure { nonceGroup, nonce); - int numberOfSteps = CreateNamespaceState.values().length - 2; // failing in the middle of proc - MasterProcedureTestingUtility.testRollbackAndDoubleExecution( - procExec, - procId, - numberOfSteps, - CreateNamespaceState.values()); + int numberOfSteps = 0; // failing at pre operation + MasterProcedureTestingUtility.testRollbackAndDoubleExecution(procExec, procId, numberOfSteps); // Validate the non-existence of namespace try { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableProcedure.java index 5cec469da1a..858d5ad063e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableProcedure.java @@ -18,8 +18,6 @@ package org.apache.hadoop.hbase.master.procedure; -import java.io.IOException; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -209,8 +207,7 @@ public class TestCreateTableProcedure { // Restart the executor and execute the step twice // NOTE: the 6 (number of CreateTableState steps) is hardcoded, // so you have to look at this test at least once when you add a new step. - MasterProcedureTestingUtility.testRecoveryAndDoubleExecution( - procExec, procId, 6, CreateTableState.values()); + MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, 6); MasterProcedureTestingUtility.validateTableCreation( UTIL.getHBaseCluster().getMaster(), tableName, regions, "f1", "f2"); @@ -230,66 +227,10 @@ public class TestCreateTableProcedure { testRollbackAndDoubleExecution(htd); } - @Test(timeout=90000) - public void testRollbackRetriableFailure() throws Exception { - final TableName tableName = TableName.valueOf("testRollbackRetriableFailure"); - - // create the table - final ProcedureExecutor procExec = getMasterProcedureExecutor(); - ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); - - // Start the Create procedure && kill the executor - final byte[][] splitKeys = new byte[][] { - Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c") - }; - HTableDescriptor htd = MasterProcedureTestingUtility.createHTD(tableName, "f1", "f2"); - HRegionInfo[] regions = ModifyRegionUtils.createHRegionInfos(htd, splitKeys); - long procId = procExec.submitProcedure( - new FaultyCreateTableProcedure(procExec.getEnvironment(), htd, regions), nonceGroup, nonce); - - // NOTE: the 4 (number of CreateTableState steps) is hardcoded, - // so you have to look at this test at least once when you add a new step. - MasterProcedureTestingUtility.testRollbackRetriableFailure( - procExec, procId, 4, CreateTableState.values()); - - MasterProcedureTestingUtility.validateTableDeletion( - UTIL.getHBaseCluster().getMaster(), tableName); - - // are we able to create the table after a rollback? - resetProcExecutorTestingKillFlag(); - testSimpleCreate(tableName, splitKeys); - } - private ProcedureExecutor getMasterProcedureExecutor() { return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(); } - public static class FaultyCreateTableProcedure extends CreateTableProcedure { - private int retries = 0; - - public FaultyCreateTableProcedure() { - // Required by the Procedure framework to create the procedure on replay - } - - public FaultyCreateTableProcedure(final MasterProcedureEnv env, - final HTableDescriptor hTableDescriptor, final HRegionInfo[] newRegions) - throws IOException { - super(env, hTableDescriptor, newRegions); - } - - @Override - protected void rollbackState(final MasterProcedureEnv env, final CreateTableState state) - throws IOException { - if (retries++ < 3) { - LOG.info("inject rollback failure state=" + state); - throw new IOException("injected failure number " + retries); - } else { - super.rollbackState(env, state); - retries = 0; - } - } - } - private void testRollbackAndDoubleExecution(HTableDescriptor htd) throws Exception { // create the table final ProcedureExecutor procExec = getMasterProcedureExecutor(); @@ -304,10 +245,9 @@ public class TestCreateTableProcedure { long procId = procExec.submitProcedure( new CreateTableProcedure(procExec.getEnvironment(), htd, regions), nonceGroup, nonce); - // NOTE: the 4 (number of CreateTableState steps) is hardcoded, - // so you have to look at this test at least once when you add a new step. - MasterProcedureTestingUtility.testRollbackAndDoubleExecution( - procExec, procId, 4, CreateTableState.values()); + int numberOfSteps = 0; // failing at pre operation + MasterProcedureTestingUtility.testRollbackAndDoubleExecution(procExec, procId, numberOfSteps); + TableName tableName = htd.getTableName(); MasterProcedureTestingUtility.validateTableDeletion( UTIL.getHBaseCluster().getMaster(), tableName); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDeleteColumnFamilyProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDeleteColumnFamilyProcedure.java index 39802745016..bbae80eaf5f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDeleteColumnFamilyProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDeleteColumnFamilyProcedure.java @@ -249,8 +249,7 @@ public class TestDeleteColumnFamilyProcedure { // Restart the executor and execute the step twice int numberOfSteps = DeleteColumnFamilyState.values().length; - MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, numberOfSteps, - DeleteColumnFamilyState.values()); + MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, numberOfSteps); MasterProcedureTestingUtility.validateColumnFamilyDeletion(UTIL.getHBaseCluster().getMaster(), tableName, cf4); @@ -276,8 +275,7 @@ public class TestDeleteColumnFamilyProcedure { // Restart the executor and execute the step twice int numberOfSteps = DeleteColumnFamilyState.values().length; - MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, numberOfSteps, - DeleteColumnFamilyState.values()); + MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, numberOfSteps); MasterProcedureTestingUtility.validateColumnFamilyDeletion(UTIL.getHBaseCluster().getMaster(), tableName, cf5); @@ -302,53 +300,13 @@ public class TestDeleteColumnFamilyProcedure { nonceGroup, nonce); - // Failing before DELETE_COLUMN_FAMILY_DELETE_FS_LAYOUT we should trigger the rollback - // NOTE: the 1 (number before DELETE_COLUMN_FAMILY_DELETE_FS_LAYOUT step) is hardcoded, - // so you have to look at this test at least once when you add a new step. - int numberOfSteps = 1; - MasterProcedureTestingUtility.testRollbackAndDoubleExecution( - procExec, - procId, - numberOfSteps, - DeleteColumnFamilyState.values()); + int numberOfSteps = 1; // failing at pre operation + MasterProcedureTestingUtility.testRollbackAndDoubleExecution(procExec, procId, numberOfSteps); MasterProcedureTestingUtility.validateTableCreation( UTIL.getHBaseCluster().getMaster(), tableName, regions, "f1", "f2", "f3", cf5); } - @Test(timeout = 60000) - public void testRollbackAndDoubleExecutionAfterPONR() throws Exception { - final TableName tableName = TableName.valueOf("testRollbackAndDoubleExecutionAfterPONR"); - final String cf5 = "cf5"; - - final ProcedureExecutor procExec = getMasterProcedureExecutor(); - - // create the table - HRegionInfo[] regions = MasterProcedureTestingUtility.createTable( - procExec, tableName, null, "f1", "f2", "f3", cf5); - ProcedureTestingUtility.waitNoProcedureRunning(procExec); - ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); - - // Start the Delete procedure && kill the executor - long procId = procExec.submitProcedure( - new DeleteColumnFamilyProcedure(procExec.getEnvironment(), tableName, cf5.getBytes()), - nonceGroup, - nonce); - - // Failing after DELETE_COLUMN_FAMILY_DELETE_FS_LAYOUT we should not trigger the rollback. - // NOTE: the 4 (number of DELETE_COLUMN_FAMILY_DELETE_FS_LAYOUT + 1 step) is hardcoded, - // so you have to look at this test at least once when you add a new step. - int numberOfSteps = 4; - MasterProcedureTestingUtility.testRollbackAndDoubleExecutionAfterPONR( - procExec, - procId, - numberOfSteps, - DeleteColumnFamilyState.values()); - - MasterProcedureTestingUtility.validateColumnFamilyDeletion( - UTIL.getHBaseCluster().getMaster(), tableName, cf5); - } - private ProcedureExecutor getMasterProcedureExecutor() { return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDeleteNamespaceProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDeleteNamespaceProcedure.java index 4c5f87bbe99..642b15dcbc0 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDeleteNamespaceProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDeleteNamespaceProcedure.java @@ -215,11 +215,7 @@ public class TestDeleteNamespaceProcedure { // Restart the executor and execute the step twice int numberOfSteps = DeleteNamespaceState.values().length; - MasterProcedureTestingUtility.testRecoveryAndDoubleExecution( - procExec, - procId, - numberOfSteps, - DeleteNamespaceState.values()); + MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, numberOfSteps); // Validate the deletion of namespace ProcedureTestingUtility.assertProcNotFailed(procExec, procId); @@ -237,17 +233,14 @@ public class TestDeleteNamespaceProcedure { ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); // Start the DeleteNamespace procedure && kill the executor + LOG.info("SUBMIT DELTET"); long procId = procExec.submitProcedure( new DeleteNamespaceProcedure(procExec.getEnvironment(), namespaceName), nonceGroup, nonce); - int numberOfSteps = DeleteNamespaceState.values().length - 2; // failing in the middle of proc - MasterProcedureTestingUtility.testRollbackAndDoubleExecution( - procExec, - procId, - numberOfSteps, - DeleteNamespaceState.values()); + int numberOfSteps = 0; // failing at pre operation + MasterProcedureTestingUtility.testRollbackAndDoubleExecution(procExec, procId, numberOfSteps); // Validate the namespace still exists NamespaceDescriptor createdNsDescriptor= diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDeleteTableProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDeleteTableProcedure.java index 7eb12cd6bf1..c5f57fa9171 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDeleteTableProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDeleteTableProcedure.java @@ -229,8 +229,7 @@ public class TestDeleteTableProcedure { // Restart the executor and execute the step twice // NOTE: the 6 (number of DeleteTableState steps) is hardcoded, // so you have to look at this test at least once when you add a new step. - MasterProcedureTestingUtility.testRecoveryAndDoubleExecution( - procExec, procId, 6, DeleteTableState.values()); + MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, 6); MasterProcedureTestingUtility.validateTableDeletion( UTIL.getHBaseCluster().getMaster(), tableName); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDisableTableProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDisableTableProcedure.java index eb58cd5a2a9..6ff6a16c5c1 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDisableTableProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDisableTableProcedure.java @@ -196,11 +196,7 @@ public class TestDisableTableProcedure { // Restart the executor and execute the step twice int numberOfSteps = DisableTableState.values().length; - MasterProcedureTestingUtility.testRecoveryAndDoubleExecution( - procExec, - procId, - numberOfSteps, - DisableTableState.values()); + MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, numberOfSteps); MasterProcedureTestingUtility.validateTableIsDisabled(UTIL.getHBaseCluster().getMaster(), tableName); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDispatchMergingRegionsProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDispatchMergingRegionsProcedure.java index 17d1e757c72..69c56f763e5 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDispatchMergingRegionsProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDispatchMergingRegionsProcedure.java @@ -32,8 +32,6 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; -import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos; -import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.CloneSnapshotState; import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DispatchMergingRegionsState; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; @@ -245,11 +243,7 @@ public class TestDispatchMergingRegionsProcedure { // Restart the executor and execute the step twice int numberOfSteps = DispatchMergingRegionsState.values().length; - MasterProcedureTestingUtility.testRecoveryAndDoubleExecution( - procExec, - procId, - numberOfSteps, - DispatchMergingRegionsState.values()); + MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, numberOfSteps); ProcedureTestingUtility.assertProcNotFailed(procExec, procId); assertEquals(2, admin.getTableRegions(tableName).size()); @@ -283,11 +277,7 @@ public class TestDispatchMergingRegionsProcedure { procExec.getEnvironment(), tableName, regionsToMerge, true)); int numberOfSteps = DispatchMergingRegionsState.values().length - 3; - MasterProcedureTestingUtility.testRollbackAndDoubleExecution( - procExec, - procId, - numberOfSteps, - DispatchMergingRegionsState.values()); + MasterProcedureTestingUtility.testRollbackAndDoubleExecution(procExec, procId, numberOfSteps); } private ProcedureExecutor getMasterProcedureExecutor() { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestEnableTableProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestEnableTableProcedure.java index 5c2aa29e4fa..e0709e44206 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestEnableTableProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestEnableTableProcedure.java @@ -185,11 +185,7 @@ public class TestEnableTableProcedure { // Restart the executor and execute the step twice int numberOfSteps = EnableTableState.values().length; - MasterProcedureTestingUtility.testRecoveryAndDoubleExecution( - procExec, - procId, - numberOfSteps, - EnableTableState.values()); + MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, numberOfSteps); MasterProcedureTestingUtility.validateTableIsEnabled(UTIL.getHBaseCluster().getMaster(), tableName); } @@ -211,12 +207,8 @@ public class TestEnableTableProcedure { long procId = procExec.submitProcedure( new EnableTableProcedure(procExec.getEnvironment(), tableName, false), nonceGroup, nonce); - int numberOfSteps = EnableTableState.values().length - 2; // failing in the middle of proc - MasterProcedureTestingUtility.testRollbackAndDoubleExecution( - procExec, - procId, - numberOfSteps, - EnableTableState.values()); + int numberOfSteps = 1; // failing at pre operation + MasterProcedureTestingUtility.testRollbackAndDoubleExecution(procExec, procId, numberOfSteps); MasterProcedureTestingUtility.validateTableIsDisabled(UTIL.getHBaseCluster().getMaster(), tableName); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestModifyColumnFamilyProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestModifyColumnFamilyProcedure.java index e9834593e82..27bf097bade 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestModifyColumnFamilyProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestModifyColumnFamilyProcedure.java @@ -178,11 +178,7 @@ public class TestModifyColumnFamilyProcedure { // Restart the executor and execute the step twice int numberOfSteps = ModifyColumnFamilyState.values().length; - MasterProcedureTestingUtility.testRecoveryAndDoubleExecution( - procExec, - procId, - numberOfSteps, - ModifyColumnFamilyState.values()); + MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, numberOfSteps); MasterProcedureTestingUtility.validateColumnFamilyModification(UTIL.getHBaseCluster() .getMaster(), tableName, cf3, columnDescriptor); @@ -212,8 +208,7 @@ public class TestModifyColumnFamilyProcedure { // Restart the executor and execute the step twice int numberOfSteps = ModifyColumnFamilyState.values().length; - MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, numberOfSteps, - ModifyColumnFamilyState.values()); + MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, numberOfSteps); MasterProcedureTestingUtility.validateColumnFamilyModification(UTIL.getHBaseCluster() .getMaster(), tableName, cf4, columnDescriptor); @@ -241,13 +236,8 @@ public class TestModifyColumnFamilyProcedure { nonceGroup, nonce); - // Failing in the middle of proc - int numberOfSteps = ModifyColumnFamilyState.values().length - 2; - MasterProcedureTestingUtility.testRollbackAndDoubleExecution( - procExec, - procId, - numberOfSteps, - ModifyColumnFamilyState.values()); + int numberOfSteps = 1; // failing at pre operation + MasterProcedureTestingUtility.testRollbackAndDoubleExecution(procExec, procId, numberOfSteps); } private ProcedureExecutor getMasterProcedureExecutor() { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestModifyNamespaceProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestModifyNamespaceProcedure.java index 9208df7478b..d6a758adcf3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestModifyNamespaceProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestModifyNamespaceProcedure.java @@ -230,11 +230,7 @@ public class TestModifyNamespaceProcedure { // Restart the executor and execute the step twice int numberOfSteps = ModifyNamespaceState.values().length; - MasterProcedureTestingUtility.testRecoveryAndDoubleExecution( - procExec, - procId, - numberOfSteps, - ModifyNamespaceState.values()); + MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, numberOfSteps); ProcedureTestingUtility.assertProcNotFailed(procExec, procId); // Validate @@ -264,13 +260,8 @@ public class TestModifyNamespaceProcedure { nonceGroup, nonce); - // Failing in the middle of proc - int numberOfSteps = ModifyNamespaceState.values().length - 2; - MasterProcedureTestingUtility.testRollbackAndDoubleExecution( - procExec, - procId, - numberOfSteps, - ModifyNamespaceState.values()); + int numberOfSteps = 0; // failing at pre operation + MasterProcedureTestingUtility.testRollbackAndDoubleExecution(procExec, procId, numberOfSteps); // Validate NamespaceDescriptor currentNsDescriptor = diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestModifyTableProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestModifyTableProcedure.java index ab86eda391a..b19f6b8b13e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestModifyTableProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestModifyTableProcedure.java @@ -256,11 +256,7 @@ public class TestModifyTableProcedure { // Restart the executor and execute the step twice int numberOfSteps = ModifyTableState.values().length; - MasterProcedureTestingUtility.testRecoveryAndDoubleExecution( - procExec, - procId, - numberOfSteps, - ModifyTableState.values()); + MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, numberOfSteps); // Validate descriptor HTableDescriptor currentHtd = UTIL.getHBaseAdmin().getTableDescriptor(tableName); @@ -298,8 +294,7 @@ public class TestModifyTableProcedure { // Restart the executor and execute the step twice int numberOfSteps = ModifyTableState.values().length; - MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, numberOfSteps, - ModifyTableState.values()); + MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, numberOfSteps); // Validate descriptor HTableDescriptor currentHtd = UTIL.getHBaseAdmin().getTableDescriptor(tableName); @@ -334,13 +329,8 @@ public class TestModifyTableProcedure { long procId = procExec.submitProcedure( new ModifyTableProcedure(procExec.getEnvironment(), htd), nonceGroup, nonce); - // Restart the executor and rollback the step twice - int numberOfSteps = ModifyTableState.values().length - 4; // failing in the middle of proc - MasterProcedureTestingUtility.testRollbackAndDoubleExecution( - procExec, - procId, - numberOfSteps, - ModifyTableState.values()); + int numberOfSteps = 1; // failing at pre operation + MasterProcedureTestingUtility.testRollbackAndDoubleExecution(procExec, procId, numberOfSteps); // cf2 should not be present MasterProcedureTestingUtility.validateTableCreation(UTIL.getHBaseCluster().getMaster(), @@ -372,58 +362,14 @@ public class TestModifyTableProcedure { new ModifyTableProcedure(procExec.getEnvironment(), htd), nonceGroup, nonce); // Restart the executor and rollback the step twice - int numberOfSteps = ModifyTableState.values().length - 4; // failing in the middle of proc - MasterProcedureTestingUtility.testRollbackAndDoubleExecution( - procExec, - procId, - numberOfSteps, - ModifyTableState.values()); + int numberOfSteps = 1; // failing at pre operation + MasterProcedureTestingUtility.testRollbackAndDoubleExecution(procExec, procId, numberOfSteps); // cf2 should not be present MasterProcedureTestingUtility.validateTableCreation(UTIL.getHBaseCluster().getMaster(), tableName, regions, "cf1"); } - @Test(timeout = 60000) - public void testRollbackAndDoubleExecutionAfterPONR() throws Exception { - final TableName tableName = TableName.valueOf("testRollbackAndDoubleExecutionAfterPONR"); - final String familyToAddName = "cf2"; - final String familyToRemove = "cf1"; - final ProcedureExecutor procExec = getMasterProcedureExecutor(); - - // create the table - HRegionInfo[] regions = MasterProcedureTestingUtility.createTable( - procExec, tableName, null, familyToRemove); - UTIL.getHBaseAdmin().disableTable(tableName); - - ProcedureTestingUtility.waitNoProcedureRunning(procExec); - ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); - - HTableDescriptor htd = new HTableDescriptor(UTIL.getHBaseAdmin().getTableDescriptor(tableName)); - htd.setCompactionEnabled(!htd.isCompactionEnabled()); - htd.addFamily(new HColumnDescriptor(familyToAddName)); - htd.removeFamily(familyToRemove.getBytes()); - htd.setRegionReplication(3); - - // Start the Modify procedure && kill the executor - long procId = procExec.submitProcedure( - new ModifyTableProcedure(procExec.getEnvironment(), htd), nonceGroup, nonce); - - // Failing after MODIFY_TABLE_DELETE_FS_LAYOUT we should not trigger the rollback. - // NOTE: the 5 (number of MODIFY_TABLE_DELETE_FS_LAYOUT + 1 step) is hardcoded, - // so you have to look at this test at least once when you add a new step. - int numberOfSteps = 5; - MasterProcedureTestingUtility.testRollbackAndDoubleExecutionAfterPONR( - procExec, - procId, - numberOfSteps, - ModifyTableState.values()); - - // "cf2" should be added and "cf1" should be removed - MasterProcedureTestingUtility.validateTableCreation(UTIL.getHBaseCluster().getMaster(), - tableName, regions, false, familyToAddName); - } - private ProcedureExecutor getMasterProcedureExecutor() { return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestProcedureAdmin.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestProcedureAdmin.java index 31190c1bad9..ae6d27ff09d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestProcedureAdmin.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestProcedureAdmin.java @@ -131,8 +131,13 @@ public class TestProcedureAdmin { // Submit an un-abortable procedure long procId = procExec.submitProcedure( new DeleteTableProcedure(procExec.getEnvironment(), tableName), nonceGroup, nonce); - // Wait for one step to complete + // Wait for a couple of steps to complete (first step "prepare" is abortable) ProcedureTestingUtility.waitProcedure(procExec, procId); + for (int i = 0; i < 2; ++i) { + ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId); + ProcedureTestingUtility.restart(procExec); + ProcedureTestingUtility.waitProcedure(procExec, procId); + } boolean abortResult = procExec.abort(procId, true); assertFalse(abortResult); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRestoreSnapshotProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRestoreSnapshotProcedure.java index 733dcb9e781..4e6303a50ab 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRestoreSnapshotProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRestoreSnapshotProcedure.java @@ -261,11 +261,7 @@ public class TestRestoreSnapshotProcedure { // Restart the executor and execute the step twice int numberOfSteps = RestoreSnapshotState.values().length; - MasterProcedureTestingUtility.testRecoveryAndDoubleExecution( - procExec, - procId, - numberOfSteps, - RestoreSnapshotState.values()); + MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, numberOfSteps); resetProcExecutorTestingKillFlag(); validateSnapshotRestore(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateTableProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateTableProcedure.java index 6490a929a7f..a7bfe182442 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateTableProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateTableProcedure.java @@ -86,6 +86,8 @@ public class TestTruncateTableProcedure { @After public void tearDown() throws Exception { + final ProcedureExecutor procExec = getMasterProcedureExecutor(); + assertTrue("expected executor to be running", procExec.isRunning()); ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(getMasterProcedureExecutor(), false); for (HTableDescriptor htd: UTIL.getHBaseAdmin().listTables()) { LOG.info("Tear down, remove table=" + htd.getTableName()); @@ -223,8 +225,7 @@ public class TestTruncateTableProcedure { // Restart the executor and execute the step twice // NOTE: the 7 (number of TruncateTableState steps) is hardcoded, // so you have to look at this test at least once when you add a new step. - MasterProcedureTestingUtility.testRecoveryAndDoubleExecution( - procExec, procId, 7, TruncateTableState.values()); + MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, 7); ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false); UTIL.waitUntilAllRegionsAssigned(tableName);