HBASE-20104 Fix infinite loop of RIT when creating table on a rsgroup that has no online servers

Signed-off-by: tedyu <yuzhihong@gmail.com>
This commit is contained in:
haxiaolin 2018-03-01 15:58:20 +08:00 committed by tedyu
parent c4401b6073
commit 1fbe4deb01
3 changed files with 97 additions and 17 deletions

View File

@ -34,6 +34,7 @@ import java.util.Set;
import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.Coprocessor;
import org.apache.hadoop.hbase.CoprocessorEnvironment;
import org.apache.hadoop.hbase.HBaseIOException;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
@ -361,20 +362,34 @@ public class RSGroupAdminEndpoint extends RSGroupAdminService
}
void assignTableToGroup(HTableDescriptor desc) throws IOException {
RSGroupInfo rsGroupInfo = preGetRSGroupInfoOfTable(desc);
if (!rsGroupInfo.containsTable(desc.getTableName())) {
groupAdminServer.moveTables(Sets.newHashSet(desc.getTableName()), rsGroupInfo.getName());
}
}
public boolean rsgroupHasOnlineServers(HTableDescriptor desc) throws IOException {
RSGroupInfo rsGroupInfo = preGetRSGroupInfoOfTable(desc);
for (ServerName onlineServer : master.getServerManager().createDestinationServersList()) {
if (rsGroupInfo.getServers().contains(onlineServer.getAddress())) {
return true;
}
}
return false;
}
public RSGroupInfo preGetRSGroupInfoOfTable(HTableDescriptor desc) throws IOException{
String groupName =
master.getNamespaceDescriptor(desc.getTableName().getNamespaceAsString())
master.getTableNamespaceManager().get(desc.getTableName().getNamespaceAsString())
.getConfigurationValue(RSGroupInfo.NAMESPACE_DESC_PROP_GROUP);
if (groupName == null) {
groupName = RSGroupInfo.DEFAULT_GROUP;
}
RSGroupInfo rsGroupInfo = groupAdminServer.getRSGroupInfo(groupName);
if (rsGroupInfo == null) {
throw new ConstraintException("Default RSGroup (" + groupName + ") for this table's "
+ "namespace does not exist.");
}
if (!rsGroupInfo.containsTable(desc.getTableName())) {
groupAdminServer.moveTables(Sets.newHashSet(desc.getTableName()), groupName);
RSGroupInfo RSGroupInfo = groupAdminServer.getRSGroupInfo(groupName);
if (RSGroupInfo == null) {
throw new ConstraintException("RSGroup " + groupName + " does not exist.");
}
return RSGroupInfo;
}
/////////////////////////////////////////////////////////////////////////////
@ -408,7 +423,10 @@ public class RSGroupAdminEndpoint extends RSGroupAdminService
public void preCreateTableHandler(ObserverContext<MasterCoprocessorEnvironment> ctx,
HTableDescriptor desc,
HRegionInfo[] regions) throws IOException {
if (!desc.getTableName().isSystemTable() && !rsgroupHasOnlineServers(desc)) {
throw new HBaseIOException("No online servers in the rsgroup, which table " +
desc.getTableName().getNameAsString() + " belongs to");
}
}
@Override

View File

@ -34,7 +34,6 @@ import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.Waiter;
import org.apache.hadoop.hbase.Waiter.Predicate;
import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.ServerManager;
import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
import org.apache.hadoop.hbase.net.Address;
@ -61,7 +60,7 @@ import static org.junit.Assert.fail;
@Category({MediumTests.class})
public class TestRSGroups extends TestRSGroupsBase {
protected static final Log LOG = LogFactory.getLog(TestRSGroups.class);
private static HMaster master;
private static boolean init = false;
private static RSGroupAdminEndpoint RSGroupAdminEndpoint;

View File

@ -26,9 +26,14 @@ import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import java.io.IOException;
import java.security.SecureRandom;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
@ -58,14 +63,9 @@ import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetServerInfoRequest;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Assert;
import org.junit.Test;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
public abstract class TestRSGroupsBase {
protected static final Log LOG = LogFactory.getLog(TestRSGroupsBase.class);
@ -79,6 +79,7 @@ public abstract class TestRSGroupsBase {
protected static HBaseAdmin admin;
protected static HBaseCluster cluster;
protected static RSGroupAdmin rsGroupAdmin;
protected static HMaster master;
public final static long WAIT_TIMEOUT = 60000*5;
public final static int NUM_SLAVES_BASE = 4; //number of slaves for the smallest cluster
@ -965,4 +966,66 @@ public abstract class TestRSGroupsBase {
assertFalse(newGroupServers.contains(targetServer.getAddress()));
assertEquals(2, newGroupServers.size());
}
@Test
public void testCreateWhenRsgroupNoOnlineServers() throws Exception {
LOG.info("testCreateWhenRsgroupNoOnlineServers");
String testRSGroupName = "appInfo";
// make rsgroup has only one server and stop this server
final RSGroupInfo appInfo = addGroup(rsGroupAdmin, testRSGroupName, 1);
Iterator<Address> iterator = appInfo.getServers().iterator();
ServerName targetServer = ServerName.parseServerName(iterator.next().toString());
AdminProtos.AdminService.BlockingInterface targetRS =
((ClusterConnection) admin.getConnection()).getAdmin(targetServer);
targetServer = ProtobufUtil.toServerName(targetRS.getServerInfo(null,
GetServerInfoRequest.newBuilder().build()).getServerInfo().getServerName());
assertEquals(1, rsGroupAdmin.getRSGroupInfo(testRSGroupName).getServers().size());
assertTrue(master.getServerManager().getOnlineServers().containsKey(targetServer));
try {
targetRS.stopServer(null,
AdminProtos.StopServerRequest.newBuilder().setReason("Die").build());
} catch(Exception e) {
}
TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return !master.getServerManager().areDeadServersInProgress()
&& cluster.getClusterStatus().getDeadServerNames().size() > 0;
}
});
assertTrue(!master.getServerManager().getOnlineServers().containsKey(targetServer));
// test create table when rsgroup has no online servers
final TableName tableName = TableName.valueOf(tablePrefix + "_ns", "_testCreate");
admin.createNamespace(NamespaceDescriptor.create(tableName.getNamespaceAsString())
.addConfiguration(RSGroupInfo.NAMESPACE_DESC_PROP_GROUP, appInfo.getName()).build());
final HTableDescriptor desc = new HTableDescriptor(tableName);
desc.addFamily(new HColumnDescriptor("f"));
try {
admin.createTable(desc);
fail("Shouldn't create table successfully!");
} catch (Exception e) {
LOG.debug("create table error", e);
}
// add another online server to rsgroup, and test create table
RSGroupInfo defaultInfo = rsGroupAdmin.getRSGroupInfo(RSGroupInfo.DEFAULT_GROUP);
Set<Address> set = new HashSet<Address>();
for(ServerName sn : master.getServerManager().getOnlineServersList()) {
if(defaultInfo.getServers().contains(sn.getAddress())) {
set.add(sn.getAddress());
break;
}
}
rsGroupAdmin.moveServers(set, testRSGroupName);
assertEquals(2, rsGroupAdmin.getRSGroupInfo(testRSGroupName).getServers().size());
admin.createTable(desc);
// wait for created table to be assigned
TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
@Override public boolean evaluate() throws Exception {
return getTableRegionMap().get(desc.getTableName()) != null;
}
});
}
}