HBASE-19335 Fix waitUntilAllRegionsAssigned(). Ignore assignments to killed servers and when region state != OPEN.
Update timeouts for TestRegionObserverInterface. Reason: There are ~10 tests there, each with 5 min individual timeout. Too much. The test class is labelled MediumTests, let's used that with our standard CategoryBasedTimeout. 3 min per test function should be enough even on slower Apache machines.
This commit is contained in:
parent
95e4f059a3
commit
0384561423
|
@ -20,6 +20,7 @@ package org.apache.hadoop.hbase;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
@ -50,6 +51,12 @@ public class DistributedHBaseCluster extends HBaseCluster {
|
|||
private final Connection connection;
|
||||
|
||||
private ClusterManager clusterManager;
|
||||
/**
|
||||
* List of RegionServers killed so far. ServerName also comprises startCode of a server,
|
||||
* so any restarted instances of the same server will have different ServerName and will not
|
||||
* coincide with past dead ones. So there's no need to cleanup this list.
|
||||
*/
|
||||
private Set<ServerName> killedRegionServers = new HashSet<>();
|
||||
|
||||
public DistributedHBaseCluster(Configuration conf, ClusterManager clusterManager)
|
||||
throws IOException {
|
||||
|
@ -113,10 +120,16 @@ public class DistributedHBaseCluster extends HBaseCluster {
|
|||
@Override
|
||||
public void killRegionServer(ServerName serverName) throws IOException {
|
||||
LOG.info("Aborting RS: " + serverName.getServerName());
|
||||
killedRegionServers.add(serverName);
|
||||
clusterManager.kill(ServiceType.HBASE_REGIONSERVER,
|
||||
serverName.getHostname(), serverName.getPort());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isKilledRS(ServerName serverName) {
|
||||
return killedRegionServers.contains(serverName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void stopRegionServer(ServerName serverName) throws IOException {
|
||||
LOG.info("Stopping RS: " + serverName.getServerName());
|
||||
|
|
|
@ -48,6 +48,7 @@ import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
|
|||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
|
||||
import org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions;
|
||||
|
||||
/**
|
||||
|
@ -317,12 +318,14 @@ public class RegionStateStore {
|
|||
* @param r Result to pull the region state from
|
||||
* @return the region state, or null if unknown.
|
||||
*/
|
||||
protected State getRegionState(final Result r, int replicaId) {
|
||||
@VisibleForTesting
|
||||
public static State getRegionState(final Result r, int replicaId) {
|
||||
Cell cell = r.getColumnLatestCell(HConstants.CATALOG_FAMILY, getStateColumn(replicaId));
|
||||
if (cell == null || cell.getValueLength() == 0) {
|
||||
return null;
|
||||
}
|
||||
return State.valueOf(Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()));
|
||||
return State.valueOf(Bytes.toString(cell.getValueArray(), cell.getValueOffset(),
|
||||
cell.getValueLength()));
|
||||
}
|
||||
|
||||
private static byte[] getStateColumn(int replicaId) {
|
||||
|
|
|
@ -128,6 +128,15 @@ public abstract class HBaseCluster implements Closeable, Configurable {
|
|||
*/
|
||||
public abstract void killRegionServer(ServerName serverName) throws IOException;
|
||||
|
||||
/**
|
||||
* Keeping track of killed servers and being able to check if a particular server was killed makes
|
||||
* it possible to do fault tolerance testing for dead servers in a deterministic way. A concrete
|
||||
* example of such case is - killing servers and waiting for all regions of a particular table
|
||||
* to be assigned. We can check for server column in META table and that its value is not one
|
||||
* of the killed servers.
|
||||
*/
|
||||
public abstract boolean isKilledRS(ServerName serverName);
|
||||
|
||||
/**
|
||||
* Stops the given region server, by attempting a gradual stop.
|
||||
* @return whether the operation finished with success
|
||||
|
|
|
@ -67,6 +67,8 @@ import org.apache.hadoop.hbase.Waiter.Predicate;
|
|||
import org.apache.hadoop.hbase.client.ImmutableHRegionInfo;
|
||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
|
||||
import org.apache.hadoop.hbase.master.RegionState;
|
||||
import org.apache.hadoop.hbase.master.assignment.RegionStateStore;
|
||||
import org.apache.hadoop.hbase.trace.TraceUtil;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
|
@ -3418,8 +3420,27 @@ public class HBaseTestingUtility extends HBaseCommonTestingUtility {
|
|||
byte[] b = r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
|
||||
HRegionInfo info = HRegionInfo.parseFromOrNull(b);
|
||||
if (info != null && info.getTable().equals(tableName)) {
|
||||
b = r.getValue(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
|
||||
allRegionsAssigned &= (b != null);
|
||||
// Get server hosting this region from catalog family. Return false if no server
|
||||
// hosting this region, or if the server hosting this region was recently killed
|
||||
// (for fault tolerance testing).
|
||||
byte[] server =
|
||||
r.getValue(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
|
||||
if (server == null) {
|
||||
return false;
|
||||
} else {
|
||||
byte[] startCode =
|
||||
r.getValue(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
|
||||
ServerName serverName =
|
||||
ServerName.valueOf(Bytes.toString(server).replaceFirst(":", ",") + "," +
|
||||
Bytes.toLong(startCode));
|
||||
if (getHBaseCluster().isKilledRS(serverName)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (RegionStateStore.getRegionState(r, info.getReplicaId())
|
||||
!= RegionState.State.OPEN) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
|
|
|
@ -21,8 +21,10 @@ package org.apache.hadoop.hbase;
|
|||
import java.io.IOException;
|
||||
import java.security.PrivilegedAction;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
|
||||
import java.util.Set;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
|
@ -108,6 +110,12 @@ public class MiniHBaseCluster extends HBaseCluster {
|
|||
public static class MiniHBaseClusterRegionServer extends HRegionServer {
|
||||
private Thread shutdownThread = null;
|
||||
private User user = null;
|
||||
/**
|
||||
* List of RegionServers killed so far. ServerName also comprises startCode of a server,
|
||||
* so any restarted instances of the same server will have different ServerName and will not
|
||||
* coincide with past dead ones. So there's no need to cleanup this list.
|
||||
*/
|
||||
static Set<ServerName> killedServers = new HashSet<>();
|
||||
|
||||
public MiniHBaseClusterRegionServer(Configuration conf)
|
||||
throws IOException, InterruptedException {
|
||||
|
@ -156,7 +164,8 @@ public class MiniHBaseCluster extends HBaseCluster {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void kill() {
|
||||
protected void kill() {
|
||||
killedServers.add(getServerName());
|
||||
super.kill();
|
||||
}
|
||||
|
||||
|
@ -249,6 +258,11 @@ public class MiniHBaseCluster extends HBaseCluster {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isKilledRS(ServerName serverName) {
|
||||
return MiniHBaseClusterRegionServer.killedServers.contains(serverName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void stopRegionServer(ServerName serverName) throws IOException {
|
||||
stopRegionServer(getRegionServerIndex(serverName));
|
||||
|
|
|
@ -35,6 +35,7 @@ import org.apache.commons.logging.LogFactory;
|
|||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.CategoryBasedTimeout;
|
||||
import org.apache.hadoop.hbase.Cell;
|
||||
import org.apache.hadoop.hbase.CellUtil;
|
||||
import org.apache.hadoop.hbase.Coprocessor;
|
||||
|
@ -88,12 +89,16 @@ import org.junit.Rule;
|
|||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
import org.junit.rules.TestName;
|
||||
import org.junit.rules.TestRule;
|
||||
|
||||
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
|
||||
|
||||
@Category({ CoprocessorTests.class, MediumTests.class })
|
||||
public class TestRegionObserverInterface {
|
||||
private static final Log LOG = LogFactory.getLog(TestRegionObserverInterface.class);
|
||||
@Rule
|
||||
public final TestRule timeout = CategoryBasedTimeout.builder().
|
||||
withTimeout(this.getClass()).withLookingForStuckThread(true).build();
|
||||
|
||||
public static final TableName TEST_TABLE = TableName.valueOf("TestTable");
|
||||
public final static byte[] A = Bytes.toBytes("a");
|
||||
|
@ -124,7 +129,7 @@ public class TestRegionObserverInterface {
|
|||
util.shutdownMiniCluster();
|
||||
}
|
||||
|
||||
@Test(timeout = 300000)
|
||||
@Test
|
||||
public void testRegionObserver() throws IOException {
|
||||
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
||||
// recreate table every time in order to reset the status of the
|
||||
|
@ -184,7 +189,7 @@ public class TestRegionObserverInterface {
|
|||
tableName, new Integer[] { 1, 1, 1, 1 });
|
||||
}
|
||||
|
||||
@Test(timeout = 300000)
|
||||
@Test
|
||||
public void testRowMutation() throws IOException {
|
||||
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
||||
Table table = util.createTable(tableName, new byte[][] { A, B, C });
|
||||
|
@ -216,7 +221,7 @@ public class TestRegionObserverInterface {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(timeout = 300000)
|
||||
@Test
|
||||
public void testIncrementHook() throws IOException {
|
||||
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
||||
Table table = util.createTable(tableName, new byte[][] { A, B, C });
|
||||
|
@ -239,7 +244,7 @@ public class TestRegionObserverInterface {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(timeout = 300000)
|
||||
@Test
|
||||
public void testCheckAndPutHooks() throws IOException {
|
||||
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
||||
try (Table table = util.createTable(tableName, new byte[][] { A, B, C })) {
|
||||
|
@ -260,7 +265,7 @@ public class TestRegionObserverInterface {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(timeout = 300000)
|
||||
@Test
|
||||
public void testCheckAndDeleteHooks() throws IOException {
|
||||
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
||||
Table table = util.createTable(tableName, new byte[][] { A, B, C });
|
||||
|
@ -285,7 +290,7 @@ public class TestRegionObserverInterface {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(timeout = 300000)
|
||||
@Test
|
||||
public void testAppendHook() throws IOException {
|
||||
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
||||
Table table = util.createTable(tableName, new byte[][] { A, B, C });
|
||||
|
@ -308,7 +313,7 @@ public class TestRegionObserverInterface {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(timeout = 300000)
|
||||
@Test
|
||||
// HBase-3583
|
||||
public void testHBase3583() throws IOException {
|
||||
final TableName tableName = TableName.valueOf(name.getMethodName());
|
||||
|
@ -351,7 +356,7 @@ public class TestRegionObserverInterface {
|
|||
table.close();
|
||||
}
|
||||
|
||||
@Test(timeout = 300000)
|
||||
@Test
|
||||
public void testHBASE14489() throws IOException {
|
||||
final TableName tableName = TableName.valueOf(name.getMethodName());
|
||||
Table table = util.createTable(tableName, new byte[][] { A });
|
||||
|
@ -476,7 +481,7 @@ public class TestRegionObserverInterface {
|
|||
* Tests overriding compaction handling via coprocessor hooks
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test(timeout = 300000)
|
||||
@Test
|
||||
public void testCompactionOverride() throws Exception {
|
||||
final TableName compactTable = TableName.valueOf(name.getMethodName());
|
||||
Admin admin = util.getAdmin();
|
||||
|
@ -546,7 +551,7 @@ public class TestRegionObserverInterface {
|
|||
table.close();
|
||||
}
|
||||
|
||||
@Test(timeout = 300000)
|
||||
@Test
|
||||
public void bulkLoadHFileTest() throws Exception {
|
||||
final String testName = TestRegionObserverInterface.class.getName() + "." + name.getMethodName();
|
||||
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
||||
|
@ -575,7 +580,7 @@ public class TestRegionObserverInterface {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(timeout = 300000)
|
||||
@Test
|
||||
public void testRecovery() throws Exception {
|
||||
LOG.info(TestRegionObserverInterface.class.getName() + "." + name.getMethodName());
|
||||
final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
|
||||
|
@ -625,7 +630,7 @@ public class TestRegionObserverInterface {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(timeout = 300000)
|
||||
@Test
|
||||
public void testPreWALRestoreSkip() throws Exception {
|
||||
LOG.info(TestRegionObserverInterface.class.getName() + "." + name.getMethodName());
|
||||
TableName tableName = TableName.valueOf(SimpleRegionObserver.TABLE_SKIPPED);
|
||||
|
|
Loading…
Reference in New Issue