HBASE-11574. Addendum that should fix a unit test (TestMetaWithReplicas#testChangingReplicaCount) that fails once in a while
This commit is contained in:
parent
15a4738470
commit
7c8aa2e963
|
@ -109,6 +109,7 @@ import org.apache.hadoop.hbase.monitoring.MonitoredTask;
|
||||||
import org.apache.hadoop.hbase.monitoring.TaskMonitor;
|
import org.apache.hadoop.hbase.monitoring.TaskMonitor;
|
||||||
import org.apache.hadoop.hbase.procedure.MasterProcedureManagerHost;
|
import org.apache.hadoop.hbase.procedure.MasterProcedureManagerHost;
|
||||||
import org.apache.hadoop.hbase.procedure.flush.MasterFlushTableProcedureManager;
|
import org.apache.hadoop.hbase.procedure.flush.MasterFlushTableProcedureManager;
|
||||||
|
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionServerInfo;
|
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionServerInfo;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
|
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
|
||||||
import org.apache.hadoop.hbase.quotas.MasterQuotaManager;
|
import org.apache.hadoop.hbase.quotas.MasterQuotaManager;
|
||||||
|
@ -124,6 +125,7 @@ import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.apache.hadoop.hbase.util.CompressionTest;
|
import org.apache.hadoop.hbase.util.CompressionTest;
|
||||||
import org.apache.hadoop.hbase.util.EncryptionTest;
|
import org.apache.hadoop.hbase.util.EncryptionTest;
|
||||||
import org.apache.hadoop.hbase.util.FSUtils;
|
import org.apache.hadoop.hbase.util.FSUtils;
|
||||||
|
import org.apache.hadoop.hbase.util.HBaseFsckRepair;
|
||||||
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
|
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
|
||||||
import org.apache.hadoop.hbase.util.HasThread;
|
import org.apache.hadoop.hbase.util.HasThread;
|
||||||
import org.apache.hadoop.hbase.util.Pair;
|
import org.apache.hadoop.hbase.util.Pair;
|
||||||
|
@ -799,7 +801,10 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
|
||||||
int replicaId = zooKeeper.getMetaReplicaIdFromZnode(metaReplicaZnode);
|
int replicaId = zooKeeper.getMetaReplicaIdFromZnode(metaReplicaZnode);
|
||||||
if (replicaId >= numMetaReplicasConfigured) {
|
if (replicaId >= numMetaReplicasConfigured) {
|
||||||
RegionState r = MetaTableLocator.getMetaRegionState(zkw, replicaId);
|
RegionState r = MetaTableLocator.getMetaRegionState(zkw, replicaId);
|
||||||
serverManager.sendRegionClose(r.getServerName(), r.getRegion());
|
LOG.info("Closing excess replica of meta region " + r.getRegion());
|
||||||
|
// send a close and wait for a max of 30 seconds
|
||||||
|
ServerManager.closeRegionSilentlyAndWait(getConnection(), r.getServerName(),
|
||||||
|
r.getRegion(), 30000);
|
||||||
ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(replicaId));
|
ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(replicaId));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,7 @@ import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.hbase.ClockOutOfSyncException;
|
import org.apache.hadoop.hbase.ClockOutOfSyncException;
|
||||||
import org.apache.hadoop.hbase.HRegionInfo;
|
import org.apache.hadoop.hbase.HRegionInfo;
|
||||||
|
import org.apache.hadoop.hbase.NotServingRegionException;
|
||||||
import org.apache.hadoop.hbase.RegionLoad;
|
import org.apache.hadoop.hbase.RegionLoad;
|
||||||
import org.apache.hadoop.hbase.Server;
|
import org.apache.hadoop.hbase.Server;
|
||||||
import org.apache.hadoop.hbase.ServerLoad;
|
import org.apache.hadoop.hbase.ServerLoad;
|
||||||
|
@ -767,6 +768,35 @@ public class ServerManager {
|
||||||
return sendRegionClose(server, region, null);
|
return sendRegionClose(server, region, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Contacts a region server and waits up to timeout ms
|
||||||
|
* to close the region. This bypasses the active hmaster.
|
||||||
|
*/
|
||||||
|
public static void closeRegionSilentlyAndWait(ClusterConnection connection,
|
||||||
|
ServerName server, HRegionInfo region, long timeout) throws IOException, InterruptedException {
|
||||||
|
AdminService.BlockingInterface rs = connection.getAdmin(server);
|
||||||
|
try {
|
||||||
|
ProtobufUtil.closeRegion(rs, server, region.getRegionName());
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.warn("Exception when closing region: " + region.getRegionNameAsString(), e);
|
||||||
|
}
|
||||||
|
long expiration = timeout + System.currentTimeMillis();
|
||||||
|
while (System.currentTimeMillis() < expiration) {
|
||||||
|
try {
|
||||||
|
HRegionInfo rsRegion =
|
||||||
|
ProtobufUtil.getRegionInfo(rs, region.getRegionName());
|
||||||
|
if (rsRegion == null) return;
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
if (ioe instanceof NotServingRegionException) // no need to retry again
|
||||||
|
return;
|
||||||
|
LOG.warn("Exception when retrieving regioninfo from: " + region.getRegionNameAsString(), ioe);
|
||||||
|
}
|
||||||
|
Thread.sleep(1000);
|
||||||
|
}
|
||||||
|
throw new IOException("Region " + region + " failed to close within"
|
||||||
|
+ " timeout " + timeout);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sends an MERGE REGIONS RPC to the specified server to merge the specified
|
* Sends an MERGE REGIONS RPC to the specified server to merge the specified
|
||||||
* regions.
|
* regions.
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.hadoop.hbase.TableName;
|
||||||
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
|
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.hbase.client.Admin;
|
import org.apache.hadoop.hbase.client.Admin;
|
||||||
|
import org.apache.hadoop.hbase.client.ClusterConnection;
|
||||||
import org.apache.hadoop.hbase.client.Connection;
|
import org.apache.hadoop.hbase.client.Connection;
|
||||||
import org.apache.hadoop.hbase.client.ConnectionFactory;
|
import org.apache.hadoop.hbase.client.ConnectionFactory;
|
||||||
import org.apache.hadoop.hbase.client.HConnection;
|
import org.apache.hadoop.hbase.client.HConnection;
|
||||||
|
@ -38,8 +39,7 @@ import org.apache.hadoop.hbase.client.HTable;
|
||||||
import org.apache.hadoop.hbase.client.Put;
|
import org.apache.hadoop.hbase.client.Put;
|
||||||
import org.apache.hadoop.hbase.client.Table;
|
import org.apache.hadoop.hbase.client.Table;
|
||||||
import org.apache.hadoop.hbase.master.RegionState;
|
import org.apache.hadoop.hbase.master.RegionState;
|
||||||
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
|
import org.apache.hadoop.hbase.master.ServerManager;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService;
|
|
||||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||||
import org.apache.zookeeper.KeeperException;
|
import org.apache.zookeeper.KeeperException;
|
||||||
|
|
||||||
|
@ -153,29 +153,10 @@ public class HBaseFsckRepair {
|
||||||
@SuppressWarnings("deprecation")
|
@SuppressWarnings("deprecation")
|
||||||
public static void closeRegionSilentlyAndWait(HConnection connection,
|
public static void closeRegionSilentlyAndWait(HConnection connection,
|
||||||
ServerName server, HRegionInfo region) throws IOException, InterruptedException {
|
ServerName server, HRegionInfo region) throws IOException, InterruptedException {
|
||||||
AdminService.BlockingInterface rs = connection.getAdmin(server);
|
|
||||||
try {
|
|
||||||
ProtobufUtil.closeRegion(rs, server, region.getRegionName());
|
|
||||||
} catch (IOException e) {
|
|
||||||
LOG.warn("Exception when closing region: " + region.getRegionNameAsString(), e);
|
|
||||||
}
|
|
||||||
long timeout = connection.getConfiguration()
|
long timeout = connection.getConfiguration()
|
||||||
.getLong("hbase.hbck.close.timeout", 120000);
|
.getLong("hbase.hbck.close.timeout", 120000);
|
||||||
long expiration = timeout + System.currentTimeMillis();
|
ServerManager.closeRegionSilentlyAndWait((ClusterConnection)connection, server,
|
||||||
while (System.currentTimeMillis() < expiration) {
|
region, timeout);
|
||||||
try {
|
|
||||||
HRegionInfo rsRegion =
|
|
||||||
ProtobufUtil.getRegionInfo(rs, region.getRegionName());
|
|
||||||
if (rsRegion == null) return;
|
|
||||||
} catch (IOException ioe) {
|
|
||||||
if (ioe instanceof NotServingRegionException) // no need to retry again
|
|
||||||
return;
|
|
||||||
LOG.warn("Exception when retrieving regioninfo from: " + region.getRegionNameAsString(), ioe);
|
|
||||||
}
|
|
||||||
Thread.sleep(1000);
|
|
||||||
}
|
|
||||||
throw new IOException("Region " + region + " failed to close within"
|
|
||||||
+ " timeout " + timeout);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue