HBASE-19335 Fix waitUntilAllRegionsAssigned(). Ignore assignments to killed servers and when region state != OPEN.
Update timeouts for TestRegionObserverInterface. Reason: There are ~10 tests there, each with 5 min individual timeout. Too much. The test class is labelled MediumTests, let's used that with our standard CategoryBasedTimeout. 3 min per test function should be enough even on slower Apache machines.
This commit is contained in:
parent
95e4f059a3
commit
0384561423
|
@ -20,6 +20,7 @@ package org.apache.hadoop.hbase;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
@ -50,6 +51,12 @@ public class DistributedHBaseCluster extends HBaseCluster {
|
||||||
private final Connection connection;
|
private final Connection connection;
|
||||||
|
|
||||||
private ClusterManager clusterManager;
|
private ClusterManager clusterManager;
|
||||||
|
/**
|
||||||
|
* List of RegionServers killed so far. ServerName also comprises startCode of a server,
|
||||||
|
* so any restarted instances of the same server will have different ServerName and will not
|
||||||
|
* coincide with past dead ones. So there's no need to cleanup this list.
|
||||||
|
*/
|
||||||
|
private Set<ServerName> killedRegionServers = new HashSet<>();
|
||||||
|
|
||||||
public DistributedHBaseCluster(Configuration conf, ClusterManager clusterManager)
|
public DistributedHBaseCluster(Configuration conf, ClusterManager clusterManager)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
@ -113,10 +120,16 @@ public class DistributedHBaseCluster extends HBaseCluster {
|
||||||
@Override
|
@Override
|
||||||
public void killRegionServer(ServerName serverName) throws IOException {
|
public void killRegionServer(ServerName serverName) throws IOException {
|
||||||
LOG.info("Aborting RS: " + serverName.getServerName());
|
LOG.info("Aborting RS: " + serverName.getServerName());
|
||||||
|
killedRegionServers.add(serverName);
|
||||||
clusterManager.kill(ServiceType.HBASE_REGIONSERVER,
|
clusterManager.kill(ServiceType.HBASE_REGIONSERVER,
|
||||||
serverName.getHostname(), serverName.getPort());
|
serverName.getHostname(), serverName.getPort());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isKilledRS(ServerName serverName) {
|
||||||
|
return killedRegionServers.contains(serverName);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void stopRegionServer(ServerName serverName) throws IOException {
|
public void stopRegionServer(ServerName serverName) throws IOException {
|
||||||
LOG.info("Stopping RS: " + serverName.getServerName());
|
LOG.info("Stopping RS: " + serverName.getServerName());
|
||||||
|
|
|
@ -48,6 +48,7 @@ import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
|
||||||
import org.apache.yetus.audience.InterfaceAudience;
|
import org.apache.yetus.audience.InterfaceAudience;
|
||||||
import org.apache.zookeeper.KeeperException;
|
import org.apache.zookeeper.KeeperException;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
|
||||||
import org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions;
|
import org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -317,12 +318,14 @@ public class RegionStateStore {
|
||||||
* @param r Result to pull the region state from
|
* @param r Result to pull the region state from
|
||||||
* @return the region state, or null if unknown.
|
* @return the region state, or null if unknown.
|
||||||
*/
|
*/
|
||||||
protected State getRegionState(final Result r, int replicaId) {
|
@VisibleForTesting
|
||||||
|
public static State getRegionState(final Result r, int replicaId) {
|
||||||
Cell cell = r.getColumnLatestCell(HConstants.CATALOG_FAMILY, getStateColumn(replicaId));
|
Cell cell = r.getColumnLatestCell(HConstants.CATALOG_FAMILY, getStateColumn(replicaId));
|
||||||
if (cell == null || cell.getValueLength() == 0) {
|
if (cell == null || cell.getValueLength() == 0) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
return State.valueOf(Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()));
|
return State.valueOf(Bytes.toString(cell.getValueArray(), cell.getValueOffset(),
|
||||||
|
cell.getValueLength()));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static byte[] getStateColumn(int replicaId) {
|
private static byte[] getStateColumn(int replicaId) {
|
||||||
|
|
|
@ -128,6 +128,15 @@ public abstract class HBaseCluster implements Closeable, Configurable {
|
||||||
*/
|
*/
|
||||||
public abstract void killRegionServer(ServerName serverName) throws IOException;
|
public abstract void killRegionServer(ServerName serverName) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Keeping track of killed servers and being able to check if a particular server was killed makes
|
||||||
|
* it possible to do fault tolerance testing for dead servers in a deterministic way. A concrete
|
||||||
|
* example of such case is - killing servers and waiting for all regions of a particular table
|
||||||
|
* to be assigned. We can check for server column in META table and that its value is not one
|
||||||
|
* of the killed servers.
|
||||||
|
*/
|
||||||
|
public abstract boolean isKilledRS(ServerName serverName);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stops the given region server, by attempting a gradual stop.
|
* Stops the given region server, by attempting a gradual stop.
|
||||||
* @return whether the operation finished with success
|
* @return whether the operation finished with success
|
||||||
|
|
|
@ -67,6 +67,8 @@ import org.apache.hadoop.hbase.Waiter.Predicate;
|
||||||
import org.apache.hadoop.hbase.client.ImmutableHRegionInfo;
|
import org.apache.hadoop.hbase.client.ImmutableHRegionInfo;
|
||||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||||
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
|
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
|
||||||
|
import org.apache.hadoop.hbase.master.RegionState;
|
||||||
|
import org.apache.hadoop.hbase.master.assignment.RegionStateStore;
|
||||||
import org.apache.hadoop.hbase.trace.TraceUtil;
|
import org.apache.hadoop.hbase.trace.TraceUtil;
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
|
import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
|
||||||
import org.apache.yetus.audience.InterfaceAudience;
|
import org.apache.yetus.audience.InterfaceAudience;
|
||||||
|
@ -3418,8 +3420,27 @@ public class HBaseTestingUtility extends HBaseCommonTestingUtility {
|
||||||
byte[] b = r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
|
byte[] b = r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
|
||||||
HRegionInfo info = HRegionInfo.parseFromOrNull(b);
|
HRegionInfo info = HRegionInfo.parseFromOrNull(b);
|
||||||
if (info != null && info.getTable().equals(tableName)) {
|
if (info != null && info.getTable().equals(tableName)) {
|
||||||
b = r.getValue(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
|
// Get server hosting this region from catalog family. Return false if no server
|
||||||
allRegionsAssigned &= (b != null);
|
// hosting this region, or if the server hosting this region was recently killed
|
||||||
|
// (for fault tolerance testing).
|
||||||
|
byte[] server =
|
||||||
|
r.getValue(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
|
||||||
|
if (server == null) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
byte[] startCode =
|
||||||
|
r.getValue(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
|
||||||
|
ServerName serverName =
|
||||||
|
ServerName.valueOf(Bytes.toString(server).replaceFirst(":", ",") + "," +
|
||||||
|
Bytes.toLong(startCode));
|
||||||
|
if (getHBaseCluster().isKilledRS(serverName)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (RegionStateStore.getRegionState(r, info.getReplicaId())
|
||||||
|
!= RegionState.State.OPEN) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
@ -21,8 +21,10 @@ package org.apache.hadoop.hbase;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.security.PrivilegedAction;
|
import java.security.PrivilegedAction;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.yetus.audience.InterfaceAudience;
|
import org.apache.yetus.audience.InterfaceAudience;
|
||||||
|
@ -108,6 +110,12 @@ public class MiniHBaseCluster extends HBaseCluster {
|
||||||
public static class MiniHBaseClusterRegionServer extends HRegionServer {
|
public static class MiniHBaseClusterRegionServer extends HRegionServer {
|
||||||
private Thread shutdownThread = null;
|
private Thread shutdownThread = null;
|
||||||
private User user = null;
|
private User user = null;
|
||||||
|
/**
|
||||||
|
* List of RegionServers killed so far. ServerName also comprises startCode of a server,
|
||||||
|
* so any restarted instances of the same server will have different ServerName and will not
|
||||||
|
* coincide with past dead ones. So there's no need to cleanup this list.
|
||||||
|
*/
|
||||||
|
static Set<ServerName> killedServers = new HashSet<>();
|
||||||
|
|
||||||
public MiniHBaseClusterRegionServer(Configuration conf)
|
public MiniHBaseClusterRegionServer(Configuration conf)
|
||||||
throws IOException, InterruptedException {
|
throws IOException, InterruptedException {
|
||||||
|
@ -156,7 +164,8 @@ public class MiniHBaseCluster extends HBaseCluster {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void kill() {
|
protected void kill() {
|
||||||
|
killedServers.add(getServerName());
|
||||||
super.kill();
|
super.kill();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -249,6 +258,11 @@ public class MiniHBaseCluster extends HBaseCluster {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isKilledRS(ServerName serverName) {
|
||||||
|
return MiniHBaseClusterRegionServer.killedServers.contains(serverName);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void stopRegionServer(ServerName serverName) throws IOException {
|
public void stopRegionServer(ServerName serverName) throws IOException {
|
||||||
stopRegionServer(getRegionServerIndex(serverName));
|
stopRegionServer(getRegionServerIndex(serverName));
|
||||||
|
|
|
@ -35,6 +35,7 @@ import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hbase.CategoryBasedTimeout;
|
||||||
import org.apache.hadoop.hbase.Cell;
|
import org.apache.hadoop.hbase.Cell;
|
||||||
import org.apache.hadoop.hbase.CellUtil;
|
import org.apache.hadoop.hbase.CellUtil;
|
||||||
import org.apache.hadoop.hbase.Coprocessor;
|
import org.apache.hadoop.hbase.Coprocessor;
|
||||||
|
@ -88,12 +89,16 @@ import org.junit.Rule;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.junit.experimental.categories.Category;
|
import org.junit.experimental.categories.Category;
|
||||||
import org.junit.rules.TestName;
|
import org.junit.rules.TestName;
|
||||||
|
import org.junit.rules.TestRule;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
|
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
|
||||||
|
|
||||||
@Category({ CoprocessorTests.class, MediumTests.class })
|
@Category({ CoprocessorTests.class, MediumTests.class })
|
||||||
public class TestRegionObserverInterface {
|
public class TestRegionObserverInterface {
|
||||||
private static final Log LOG = LogFactory.getLog(TestRegionObserverInterface.class);
|
private static final Log LOG = LogFactory.getLog(TestRegionObserverInterface.class);
|
||||||
|
@Rule
|
||||||
|
public final TestRule timeout = CategoryBasedTimeout.builder().
|
||||||
|
withTimeout(this.getClass()).withLookingForStuckThread(true).build();
|
||||||
|
|
||||||
public static final TableName TEST_TABLE = TableName.valueOf("TestTable");
|
public static final TableName TEST_TABLE = TableName.valueOf("TestTable");
|
||||||
public final static byte[] A = Bytes.toBytes("a");
|
public final static byte[] A = Bytes.toBytes("a");
|
||||||
|
@ -124,7 +129,7 @@ public class TestRegionObserverInterface {
|
||||||
util.shutdownMiniCluster();
|
util.shutdownMiniCluster();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout = 300000)
|
@Test
|
||||||
public void testRegionObserver() throws IOException {
|
public void testRegionObserver() throws IOException {
|
||||||
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
||||||
// recreate table every time in order to reset the status of the
|
// recreate table every time in order to reset the status of the
|
||||||
|
@ -184,7 +189,7 @@ public class TestRegionObserverInterface {
|
||||||
tableName, new Integer[] { 1, 1, 1, 1 });
|
tableName, new Integer[] { 1, 1, 1, 1 });
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout = 300000)
|
@Test
|
||||||
public void testRowMutation() throws IOException {
|
public void testRowMutation() throws IOException {
|
||||||
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
||||||
Table table = util.createTable(tableName, new byte[][] { A, B, C });
|
Table table = util.createTable(tableName, new byte[][] { A, B, C });
|
||||||
|
@ -216,7 +221,7 @@ public class TestRegionObserverInterface {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout = 300000)
|
@Test
|
||||||
public void testIncrementHook() throws IOException {
|
public void testIncrementHook() throws IOException {
|
||||||
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
||||||
Table table = util.createTable(tableName, new byte[][] { A, B, C });
|
Table table = util.createTable(tableName, new byte[][] { A, B, C });
|
||||||
|
@ -239,7 +244,7 @@ public class TestRegionObserverInterface {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout = 300000)
|
@Test
|
||||||
public void testCheckAndPutHooks() throws IOException {
|
public void testCheckAndPutHooks() throws IOException {
|
||||||
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
||||||
try (Table table = util.createTable(tableName, new byte[][] { A, B, C })) {
|
try (Table table = util.createTable(tableName, new byte[][] { A, B, C })) {
|
||||||
|
@ -260,7 +265,7 @@ public class TestRegionObserverInterface {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout = 300000)
|
@Test
|
||||||
public void testCheckAndDeleteHooks() throws IOException {
|
public void testCheckAndDeleteHooks() throws IOException {
|
||||||
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
||||||
Table table = util.createTable(tableName, new byte[][] { A, B, C });
|
Table table = util.createTable(tableName, new byte[][] { A, B, C });
|
||||||
|
@ -285,7 +290,7 @@ public class TestRegionObserverInterface {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout = 300000)
|
@Test
|
||||||
public void testAppendHook() throws IOException {
|
public void testAppendHook() throws IOException {
|
||||||
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
||||||
Table table = util.createTable(tableName, new byte[][] { A, B, C });
|
Table table = util.createTable(tableName, new byte[][] { A, B, C });
|
||||||
|
@ -308,7 +313,7 @@ public class TestRegionObserverInterface {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout = 300000)
|
@Test
|
||||||
// HBase-3583
|
// HBase-3583
|
||||||
public void testHBase3583() throws IOException {
|
public void testHBase3583() throws IOException {
|
||||||
final TableName tableName = TableName.valueOf(name.getMethodName());
|
final TableName tableName = TableName.valueOf(name.getMethodName());
|
||||||
|
@ -351,7 +356,7 @@ public class TestRegionObserverInterface {
|
||||||
table.close();
|
table.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout = 300000)
|
@Test
|
||||||
public void testHBASE14489() throws IOException {
|
public void testHBASE14489() throws IOException {
|
||||||
final TableName tableName = TableName.valueOf(name.getMethodName());
|
final TableName tableName = TableName.valueOf(name.getMethodName());
|
||||||
Table table = util.createTable(tableName, new byte[][] { A });
|
Table table = util.createTable(tableName, new byte[][] { A });
|
||||||
|
@ -476,7 +481,7 @@ public class TestRegionObserverInterface {
|
||||||
* Tests overriding compaction handling via coprocessor hooks
|
* Tests overriding compaction handling via coprocessor hooks
|
||||||
* @throws Exception
|
* @throws Exception
|
||||||
*/
|
*/
|
||||||
@Test(timeout = 300000)
|
@Test
|
||||||
public void testCompactionOverride() throws Exception {
|
public void testCompactionOverride() throws Exception {
|
||||||
final TableName compactTable = TableName.valueOf(name.getMethodName());
|
final TableName compactTable = TableName.valueOf(name.getMethodName());
|
||||||
Admin admin = util.getAdmin();
|
Admin admin = util.getAdmin();
|
||||||
|
@ -546,7 +551,7 @@ public class TestRegionObserverInterface {
|
||||||
table.close();
|
table.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout = 300000)
|
@Test
|
||||||
public void bulkLoadHFileTest() throws Exception {
|
public void bulkLoadHFileTest() throws Exception {
|
||||||
final String testName = TestRegionObserverInterface.class.getName() + "." + name.getMethodName();
|
final String testName = TestRegionObserverInterface.class.getName() + "." + name.getMethodName();
|
||||||
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
||||||
|
@ -575,7 +580,7 @@ public class TestRegionObserverInterface {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout = 300000)
|
@Test
|
||||||
public void testRecovery() throws Exception {
|
public void testRecovery() throws Exception {
|
||||||
LOG.info(TestRegionObserverInterface.class.getName() + "." + name.getMethodName());
|
LOG.info(TestRegionObserverInterface.class.getName() + "." + name.getMethodName());
|
||||||
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
||||||
|
@ -625,7 +630,7 @@ public class TestRegionObserverInterface {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout = 300000)
|
@Test
|
||||||
public void testPreWALRestoreSkip() throws Exception {
|
public void testPreWALRestoreSkip() throws Exception {
|
||||||
LOG.info(TestRegionObserverInterface.class.getName() + "." + name.getMethodName());
|
LOG.info(TestRegionObserverInterface.class.getName() + "." + name.getMethodName());
|
||||||
TableName tableName = TableName.valueOf(SimpleRegionObserver.TABLE_SKIPPED);
|
TableName tableName = TableName.valueOf(SimpleRegionObserver.TABLE_SKIPPED);
|
||||||
|
|
Loading…
Reference in New Issue