HBASE-18543 [AMv2] Fixed and re-enabled TestMasterFailover

* testSimpleMasterFailover - fixed and verified * testPendingOpenOrCloseWhenMasterFailover - removed as logic is based on old code and no longer relevant. TestServerCrashProcedure tests assignments with crashing master and region servers * testMetaInTransitionWhenMasterFailover - verified that it is fixed by patch for HBASE-18511. Signed-off-by: Michael Stack <stack@apache.org>
2017-09-07 13:42:36 -07:00 · 2017-09-07 13:42:36 -07:00 · 5847c901a7
parent 6752eba68f
commit 5847c901a7
1 changed files with 10 additions and 167 deletions
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
@ -1,4 +1,4 @@
-/**
+/*
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
@ -28,23 +28,13 @@ import java.util.List;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.ClusterStatus;
-import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.MetaTableAccessor;
 import org.apache.hadoop.hbase.MiniHBaseCluster;
 import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.master.assignment.RegionStates;
-import org.apache.hadoop.hbase.master.assignment.RegionStateStore;
 import org.apache.hadoop.hbase.master.RegionState.State;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
@ -53,32 +43,15 @@ import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.testclassification.FlakeyTests;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSTableDescriptors;
-import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
-import org.junit.Ignore;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;

@Category({FlakeyTests.class, LargeTests.class})
-@Ignore // Needs to be rewritten for AMv2. Uses tricks not ordained when up on AMv2.
 public class TestMasterFailover {
  private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);

-  HRegion createRegion(final HRegionInfo  hri, final Path rootdir, final Configuration c,
-      final HTableDescriptor htd)
-  throws IOException {
-    HRegion r = HBaseTestingUtility.createRegionAndWAL(hri, rootdir, c, htd);
-    // The above call to create a region will create an wal file.  Each
-    // log file create will also create a running thread to do syncing.  We need
-    // to close out this log else we will have a running thread trying to sync
-    // the file system continuously which is ugly when dfs is taken away at the
-    // end of the test.
-    HBaseTestingUtility.closeRegionAndWAL(r);
-    return r;
-  }
-
  // TODO: Next test to add is with testing permutations of the RIT or the RS
  //       killed are hosting ROOT and hbase:meta regions.

@ -92,7 +65,6 @@ public class TestMasterFailover {
   * Starts with three masters.  Kills a backup master.  Then kills the active
   * master.  Ensures the final master becomes active and we can still contact
   * the cluster.
-   * @throws Exception
   */
  @Test (timeout=240000)
  public void testSimpleMasterFailover() throws Exception {
@ -157,7 +129,7 @@ public class TestMasterFailover {
    assertEquals(2, masterThreads.size());
    int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
    LOG.info("Active master " + active.getServerName() + " managing " + rsCount +  " regions servers");
-    assertEquals(4, rsCount);
+    assertEquals(3, rsCount);

    // Check that ClusterStatus reports the correct active and backup masters
    assertNotNull(active);
@ -190,142 +162,12 @@ public class TestMasterFailover {
    int rss = status.getServersSize();
    LOG.info("Active master " + mastername.getServerName() + " managing " +
      rss +  " region servers");
-    assertEquals(4, rss);
+    assertEquals(3, rss);

    // Stop the cluster
    TEST_UTIL.shutdownMiniCluster();
  }

-  /**
-   * Test region in pending_open/close when master failover
-   */
-  @Test (timeout=180000)
-  public void testPendingOpenOrCloseWhenMasterFailover() throws Exception {
-    final int NUM_MASTERS = 1;
-    final int NUM_RS = 1;
-
-    // Create config to use for this cluster
-    Configuration conf = HBaseConfiguration.create();
-
-    // Start the cluster
-    HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
-    TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
-    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
-    log("Cluster started");
-
-    // get all the master threads
-    List<MasterThread> masterThreads = cluster.getMasterThreads();
-    assertEquals(1, masterThreads.size());
-
-    // only one master thread, let's wait for it to be initialized
-    assertTrue(cluster.waitForActiveAndReadyMaster());
-    HMaster master = masterThreads.get(0).getMaster();
-    assertTrue(master.isActiveMaster());
-    assertTrue(master.isInitialized());
-
-    // Create a table with a region online
-    Table onlineTable = TEST_UTIL.createTable(TableName.valueOf("onlineTable"), "family");
-    onlineTable.close();
-    // Create a table in META, so it has a region offline
-    HTableDescriptor offlineTable = new HTableDescriptor(
-      TableName.valueOf(Bytes.toBytes("offlineTable")));
-    offlineTable.addFamily(new HColumnDescriptor(Bytes.toBytes("family")));
-
-    FileSystem filesystem = FileSystem.get(conf);
-    Path rootdir = FSUtils.getRootDir(conf);
-    FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
-    fstd.createTableDescriptor(offlineTable);
-
-    HRegionInfo hriOffline = new HRegionInfo(offlineTable.getTableName(), null, null);
-    createRegion(hriOffline, rootdir, conf, offlineTable);
-    MetaTableAccessor.addRegionToMeta(master.getConnection(), hriOffline);
-
-    log("Regions in hbase:meta and namespace have been created");
-
-    // at this point we only expect 3 regions to be assigned out
-    // (catalogs and namespace, + 1 online region)
-    assertEquals(3, cluster.countServedRegions());
-    HRegionInfo hriOnline = null;
-    try (RegionLocator locator =
-        TEST_UTIL.getConnection().getRegionLocator(TableName.valueOf("onlineTable"))) {
-      hriOnline = locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
-    }
-    RegionStates regionStates = master.getAssignmentManager().getRegionStates();
-    RegionStateStore stateStore = master.getAssignmentManager().getRegionStateStore();
-
-    // Put the online region in pending_close. It is actually already opened.
-    // This is to simulate that the region close RPC is not sent out before failover
-    RegionState oldState = regionStates.getRegionState(hriOnline);
-    RegionState newState = new RegionState(hriOnline, State.CLOSING, oldState.getServerName());
-    stateStore.updateRegionState(HConstants.NO_SEQNUM, -1, newState, oldState);
-
-    // Put the offline region in pending_open. It is actually not opened yet.
-    // This is to simulate that the region open RPC is not sent out before failover
-    oldState = new RegionState(hriOffline, State.OFFLINE);
-    newState = new RegionState(hriOffline, State.OPENING, newState.getServerName());
-    stateStore.updateRegionState(HConstants.NO_SEQNUM, -1, newState, oldState);
-
-    HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null);
-    createRegion(failedClose, rootdir, conf, offlineTable);
-    MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose);
-
-    oldState = new RegionState(failedClose, State.CLOSING);
-    newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName());
-    stateStore.updateRegionState(HConstants.NO_SEQNUM, -1, newState, oldState);
-
-    HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null);
-    createRegion(failedOpen, rootdir, conf, offlineTable);
-    MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen);
-
-    // Simulate a region transitioning to failed open when the region server reports the
-    // transition as FAILED_OPEN
-    oldState = new RegionState(failedOpen, State.OPENING);
-    newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName());
-    stateStore.updateRegionState(HConstants.NO_SEQNUM, -1, newState, oldState);
-
-    HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null);
-    LOG.info("Failed open NUll server " + failedOpenNullServer.getEncodedName());
-    createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
-    MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpenNullServer);
-
-    // Simulate a region transitioning to failed open when the master couldn't find a plan for
-    // the region
-    oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
-    newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
-    stateStore.updateRegionState(HConstants.NO_SEQNUM, -1, newState, oldState);
-
-    // Stop the master
-    log("Aborting master");
-    cluster.abortMaster(0);
-    cluster.waitOnMaster(0);
-    log("Master has aborted");
-
-    // Start up a new master
-    log("Starting up a new master");
-    master = cluster.startMaster().getMaster();
-    log("Waiting for master to be ready");
-    cluster.waitForActiveAndReadyMaster();
-    log("Master is ready");
-
-    // Wait till no region in transition any more
-    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
-
-    // Get new region states since master restarted
-    regionStates = master.getAssignmentManager().getRegionStates();
-
-    // Both pending_open (RPC sent/not yet) regions should be online
-    assertTrue(regionStates.isRegionOnline(hriOffline));
-    assertTrue(regionStates.isRegionOnline(hriOnline));
-    assertTrue(regionStates.isRegionOnline(failedClose));
-    assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
-    assertTrue(regionStates.isRegionOnline(failedOpen));
-
-    log("Done with verification, shutting down cluster");
-
-    // Done, shutdown the cluster
-    TEST_UTIL.shutdownMiniCluster();
-  }
-
  /**
   * Test meta in transition when master failover
   */
@ -361,9 +203,9 @@ public class TestMasterFailover {
    // meta should remain where it was
    RegionState metaState =
      MetaTableLocator.getMetaRegionState(rs.getZooKeeper());
-    assertEquals("hbase:meta should be onlined on RS",
+    assertEquals("hbase:meta should be online on RS",
      metaState.getServerName(), rs.getServerName());
-    assertEquals("hbase:meta should be onlined on RS",
+    assertEquals("hbase:meta should be online on RS",
      metaState.getState(), State.OPEN);

    // Start up a new master
@ -376,9 +218,9 @@ public class TestMasterFailover {
    // ensure meta is still deployed on RS
    metaState =
      MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
-    assertEquals("hbase:meta should be onlined on RS",
+    assertEquals("hbase:meta should be online on RS",
      metaState.getServerName(), rs.getServerName());
-    assertEquals("hbase:meta should be onlined on RS",
+    assertEquals("hbase:meta should be online on RS",
      metaState.getState(), State.OPEN);

    // Update meta state as OPENING, then kill master
@ -408,9 +250,9 @@ public class TestMasterFailover {

    metaState =
      MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
-    assertEquals("hbase:meta should be onlined on RS",
+    assertEquals("hbase:meta should be online on RS",
      metaState.getServerName(), rs.getServerName());
-    assertEquals("hbase:meta should be onlined on RS",
+    assertEquals("hbase:meta should be online on RS",
      metaState.getState(), State.OPEN);

    // Update meta state as CLOSING, then kill master
@ -431,6 +273,7 @@ public class TestMasterFailover {
    // Start up a new master
    log("Starting up a new master");
    activeMaster = cluster.startMaster().getMaster();
+    assertNotNull(activeMaster);
    log("Waiting for master to be ready");
    cluster.waitForActiveAndReadyMaster();
    log("Master is ready");