From 2c076a30e5b269113a6ef526d458aafae6fcf339 Mon Sep 17 00:00:00 2001 From: Enis Soztutar Date: Fri, 10 Jul 2015 13:36:45 -0700 Subject: [PATCH] HBASE-13993 WALProcedureStore fencing is not effective if new WAL rolls --- .../store/wal/WALProcedureStore.java | 17 +++++- .../procedure2/ProcedureTestingUtility.java | 4 ++ .../TestMasterFailoverWithProcedures.java | 52 ++++++++++++++----- .../TestWALProcedureStoreOnHDFS.java | 15 ++++++ 4 files changed, 74 insertions(+), 14 deletions(-) diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/WALProcedureStore.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/WALProcedureStore.java index b0e2258e11a..04715dee617 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/WALProcedureStore.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/WALProcedureStore.java @@ -650,7 +650,22 @@ public class WALProcedureStore extends ProcedureStoreBase { } protected boolean rollWriter() throws IOException { - return rollWriter(flushLogId + 1); + // Create new state-log + if (!rollWriter(flushLogId + 1)) { + LOG.warn("someone else has already created log " + flushLogId); + return false; + } + + // We have the lease on the log, + // but we should check if someone else has created new files + if (getMaxLogId(getLogFiles()) > flushLogId) { + LOG.warn("Someone else created new logs. Expected maxLogId < " + flushLogId); + logs.getLast().removeFile(); + return false; + } + + // We have the lease on the log + return true; } private boolean rollWriter(final long logId) throws IOException { diff --git a/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/ProcedureTestingUtility.java b/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/ProcedureTestingUtility.java index 1534a5fbd7a..34774ed8c28 100644 --- a/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/ProcedureTestingUtility.java +++ b/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/ProcedureTestingUtility.java @@ -188,6 +188,10 @@ public class ProcedureTestingUtility { public static class TestProcedure extends Procedure { public TestProcedure() {} + public TestProcedure(long procId) { + this(procId, 0); + } + public TestProcedure(long procId, long parentId) { setProcId(procId); if (parentId > 0) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterFailoverWithProcedures.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterFailoverWithProcedures.java index 429c83b02b2..6e967e1e027 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterFailoverWithProcedures.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterFailoverWithProcedures.java @@ -35,9 +35,9 @@ import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.procedure2.Procedure; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; +import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.TestProcedure; import org.apache.hadoop.hbase.procedure2.store.ProcedureStore; import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore; -import org.apache.hadoop.hbase.procedure2.store.wal.TestWALProcedureStore.TestSequentialProcedure; import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.CreateTableState; import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DeleteTableState; import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DisableTableState; @@ -165,13 +165,32 @@ public class TestMasterFailoverWithProcedures { backupStore3Abort.await(); } - @Test(timeout=60000) + /** + * Tests proper fencing in case the current WAL store is fenced + */ + @Test + public void testWALfencingWithoutWALRolling() throws IOException { + testWALfencing(false); + } + + /** + * Tests proper fencing in case the current WAL store does not receive writes until after the + * new WAL does a couple of WAL rolls. + */ + @Test public void testWALfencingWithWALRolling() throws IOException { + testWALfencing(true); + } + + public void testWALfencing(boolean walRolls) throws IOException { final ProcedureStore procStore = getMasterProcedureExecutor().getStore(); assertTrue("expected WALStore for this test", procStore instanceof WALProcedureStore); HMaster firstMaster = UTIL.getHBaseCluster().getMaster(); + // cause WAL rolling after a delete in WAL: + firstMaster.getConfiguration().setLong("hbase.procedure.store.wal.roll.threshold", 1); + HMaster backupMaster3 = Mockito.mock(HMaster.class); Mockito.doReturn(firstMaster.getConfiguration()).when(backupMaster3).getConfiguration(); Mockito.doReturn(true).when(backupMaster3).isActiveMaster(); @@ -185,20 +204,27 @@ public class TestMasterFailoverWithProcedures { procStore2.start(1); procStore2.recoverLease(); - LOG.info("Inserting into second WALProcedureStore"); - // insert something to the second store then delete it, causing a WAL roll - Procedure proc2 = new TestSequentialProcedure(); - procStore2.insert(proc2, null); - procStore2.rollWriterOrDie(); + // before writing back to the WAL store, optionally do a couple of WAL rolls (which causes + // to delete the old WAL files). + if (walRolls) { + LOG.info("Inserting into second WALProcedureStore, causing WAL rolls"); + for (int i = 0; i < 512; i++) { + // insert something to the second store then delete it, causing a WAL roll(s) + Procedure proc2 = new TestProcedure(i); + procStore2.insert(proc2, null); + procStore2.delete(proc2.getProcId()); // delete the procedure so that the WAL is removed later + } + } + // Now, insert something to the first store, should fail. + // If the store does a WAL roll and continue with another logId without checking higher logIds + // it will incorrectly succeed. LOG.info("Inserting into first WALProcedureStore"); - // insert something to the first store - proc2 = new TestSequentialProcedure(); try { - procStore.insert(proc2, null); - fail("expected RuntimeException 'sync aborted'"); - } catch (RuntimeException e) { - LOG.info("got " + e.getMessage()); + procStore.insert(new TestProcedure(11), null); + fail("Inserting into Procedure Store should have failed"); + } catch (Exception ex) { + LOG.info("Received expected exception", ex); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestWALProcedureStoreOnHDFS.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestWALProcedureStoreOnHDFS.java index 89ad5d61c76..8dc52857bf6 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestWALProcedureStoreOnHDFS.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestWALProcedureStoreOnHDFS.java @@ -187,6 +187,7 @@ public class TestWALProcedureStoreOnHDFS { UTIL.getDFSCluster().restartDataNode(dnCount); for (long i = 2; i < 100; ++i) { store.insert(new TestProcedure(i, -1), null); + waitForNumReplicas(3); Thread.sleep(100); if ((i % 30) == 0) { LOG.info("Restart Data Node"); @@ -195,4 +196,18 @@ public class TestWALProcedureStoreOnHDFS { } assertTrue(store.isRunning()); } + + public void waitForNumReplicas(int numReplicas) throws Exception { + while (UTIL.getDFSCluster().getDataNodes().size() < numReplicas) { + Thread.sleep(100); + } + + for (int i = 0; i < numReplicas; ++i) { + for (DataNode dn: UTIL.getDFSCluster().getDataNodes()) { + while (!dn.isDatanodeFullyStarted()) { + Thread.sleep(100); + } + } + } + } }