HBASE-21704 The implementation of DistributedHBaseCluster.getServerHoldingRegion is incorrect

This commit is contained in:
Duo Zhang 2019-01-11 17:45:12 +08:00 committed by zhangduo
parent d04282627f
commit d7db78b74c
2 changed files with 20 additions and 30 deletions

View File

@ -31,14 +31,13 @@ import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.ClusterConnection; import org.apache.hadoop.hbase.client.ClusterConnection;
import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionLocator; import org.apache.hadoop.hbase.client.RegionLocator;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.util.Threads;
import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ServerInfo;
import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.MasterService; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.MasterService;
@ -281,20 +280,16 @@ public class DistributedHBaseCluster extends HBaseCluster {
@Override @Override
public ServerName getServerHoldingRegion(TableName tn, byte[] regionName) throws IOException { public ServerName getServerHoldingRegion(TableName tn, byte[] regionName) throws IOException {
byte[] startKey = RegionInfo.getStartKey(regionName);
HRegionLocation regionLoc = null; HRegionLocation regionLoc = null;
try (RegionLocator locator = connection.getRegionLocator(tn)) { try (RegionLocator locator = connection.getRegionLocator(tn)) {
regionLoc = locator.getRegionLocation(regionName, true); regionLoc = locator.getRegionLocation(startKey, true);
} }
if (regionLoc == null) { if (regionLoc == null) {
LOG.warn("Cannot find region server holding region " + Bytes.toString(regionName) + LOG.warn("Cannot find region server holding region " + Bytes.toStringBinary(regionName));
", start key [" + Bytes.toString(HRegionInfo.getStartKey(regionName)) + "]");
return null; return null;
} }
return regionLoc.getServerName();
AdminProtos.AdminService.BlockingInterface client =
((ClusterConnection)this.connection).getAdmin(regionLoc.getServerName());
ServerInfo info = ProtobufUtil.getServerInfo(null, client);
return ProtobufUtil.toServerName(info.getServerName());
} }
@Override @Override
@ -335,17 +330,15 @@ public class DistributedHBaseCluster extends HBaseCluster {
//check whether current master has changed //check whether current master has changed
final ServerName initMaster = initial.getMasterName(); final ServerName initMaster = initial.getMasterName();
if (!ServerName.isSameAddress(initMaster, current.getMasterName())) { if (!ServerName.isSameAddress(initMaster, current.getMasterName())) {
LOG.info("Restoring cluster - Initial active master : " LOG.info("Restoring cluster - Initial active master : " + initMaster.getAddress() +
+ initMaster.getHostAndPort() " has changed to : " + current.getMasterName().getAddress());
+ " has changed to : "
+ current.getMasterName().getHostAndPort());
// If initial master is stopped, start it, before restoring the state. // If initial master is stopped, start it, before restoring the state.
// It will come up as a backup master, if there is already an active master. // It will come up as a backup master, if there is already an active master.
try { try {
if (!clusterManager.isRunning(ServiceType.HBASE_MASTER, if (!clusterManager.isRunning(ServiceType.HBASE_MASTER,
initMaster.getHostname(), initMaster.getPort())) { initMaster.getHostname(), initMaster.getPort())) {
LOG.info("Restoring cluster - starting initial active master at:" LOG.info("Restoring cluster - starting initial active master at:"
+ initMaster.getHostAndPort()); + initMaster.getAddress());
startMaster(initMaster.getHostname(), initMaster.getPort()); startMaster(initMaster.getHostname(), initMaster.getPort());
} }
@ -376,7 +369,7 @@ public class DistributedHBaseCluster extends HBaseCluster {
backup.getHostname(), backup.getHostname(),
backup.getPort())) { backup.getPort())) {
LOG.info("Restoring cluster - starting initial backup master: " LOG.info("Restoring cluster - starting initial backup master: "
+ backup.getHostAndPort()); + backup.getAddress());
startMaster(backup.getHostname(), backup.getPort()); startMaster(backup.getHostname(), backup.getPort());
} }
} catch (IOException ex) { } catch (IOException ex) {
@ -400,7 +393,7 @@ public class DistributedHBaseCluster extends HBaseCluster {
for (ServerName sn:toStart) { for (ServerName sn:toStart) {
try { try {
if(!clusterManager.isRunning(ServiceType.HBASE_MASTER, sn.getHostname(), sn.getPort())) { if(!clusterManager.isRunning(ServiceType.HBASE_MASTER, sn.getHostname(), sn.getPort())) {
LOG.info("Restoring cluster - starting initial backup master: " + sn.getHostAndPort()); LOG.info("Restoring cluster - starting initial backup master: " + sn.getAddress());
startMaster(sn.getHostname(), sn.getPort()); startMaster(sn.getHostname(), sn.getPort());
} }
} catch (IOException ex) { } catch (IOException ex) {
@ -411,7 +404,7 @@ public class DistributedHBaseCluster extends HBaseCluster {
for (ServerName sn:toKill) { for (ServerName sn:toKill) {
try { try {
if(clusterManager.isRunning(ServiceType.HBASE_MASTER, sn.getHostname(), sn.getPort())) { if(clusterManager.isRunning(ServiceType.HBASE_MASTER, sn.getHostname(), sn.getPort())) {
LOG.info("Restoring cluster - stopping backup master: " + sn.getHostAndPort()); LOG.info("Restoring cluster - stopping backup master: " + sn.getAddress());
stopMaster(sn); stopMaster(sn);
} }
} catch (IOException ex) { } catch (IOException ex) {
@ -461,11 +454,9 @@ public class DistributedHBaseCluster extends HBaseCluster {
for(ServerName sn:toStart) { for(ServerName sn:toStart) {
try { try {
if (!clusterManager.isRunning(ServiceType.HBASE_REGIONSERVER, if (!clusterManager.isRunning(ServiceType.HBASE_REGIONSERVER, sn.getHostname(),
sn.getHostname(), sn.getPort()) && master.getPort() != sn.getPort()) {
sn.getPort()) LOG.info("Restoring cluster - starting initial region server: " + sn.getAddress());
&& master.getPort() != sn.getPort()) {
LOG.info("Restoring cluster - starting initial region server: " + sn.getHostAndPort());
startRegionServer(sn.getHostname(), sn.getPort()); startRegionServer(sn.getHostname(), sn.getPort());
} }
} catch (IOException ex) { } catch (IOException ex) {
@ -475,11 +466,9 @@ public class DistributedHBaseCluster extends HBaseCluster {
for(ServerName sn:toKill) { for(ServerName sn:toKill) {
try { try {
if (clusterManager.isRunning(ServiceType.HBASE_REGIONSERVER, if (clusterManager.isRunning(ServiceType.HBASE_REGIONSERVER, sn.getHostname(),
sn.getHostname(), sn.getPort()) && master.getPort() != sn.getPort()) {
sn.getPort()) LOG.info("Restoring cluster - stopping initial region server: " + sn.getAddress());
&& master.getPort() != sn.getPort()){
LOG.info("Restoring cluster - stopping initial region server: " + sn.getHostAndPort());
stopRegionServer(sn); stopRegionServer(sn);
} }
} catch (IOException ex) { } catch (IOException ex) {

View File

@ -21,6 +21,7 @@ import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.util.Threads;
import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -336,7 +337,7 @@ public abstract class HBaseCluster implements Closeable, Configurable {
*/ */
public ServerName getServerHoldingMeta() throws IOException { public ServerName getServerHoldingMeta() throws IOException {
return getServerHoldingRegion(TableName.META_TABLE_NAME, return getServerHoldingRegion(TableName.META_TABLE_NAME,
HRegionInfo.FIRST_META_REGIONINFO.getRegionName()); RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionName());
} }
/** /**
@ -346,7 +347,7 @@ public abstract class HBaseCluster implements Closeable, Configurable {
* @return ServerName that hosts the region or null * @return ServerName that hosts the region or null
*/ */
public abstract ServerName getServerHoldingRegion(final TableName tn, byte[] regionName) public abstract ServerName getServerHoldingRegion(final TableName tn, byte[] regionName)
throws IOException; throws IOException;
/** /**
* @return whether we are interacting with a distributed cluster as opposed to an * @return whether we are interacting with a distributed cluster as opposed to an