HBASE-11574. Addendum that should fix a unit test (TestMetaWithReplicas#testChangingReplicaCount) that fails once in a while
This commit is contained in:
parent
15a4738470
commit
7c8aa2e963
|
@ -109,6 +109,7 @@ import org.apache.hadoop.hbase.monitoring.MonitoredTask;
|
|||
import org.apache.hadoop.hbase.monitoring.TaskMonitor;
|
||||
import org.apache.hadoop.hbase.procedure.MasterProcedureManagerHost;
|
||||
import org.apache.hadoop.hbase.procedure.flush.MasterFlushTableProcedureManager;
|
||||
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
|
||||
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionServerInfo;
|
||||
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
|
||||
import org.apache.hadoop.hbase.quotas.MasterQuotaManager;
|
||||
|
@ -124,6 +125,7 @@ import org.apache.hadoop.hbase.util.Bytes;
|
|||
import org.apache.hadoop.hbase.util.CompressionTest;
|
||||
import org.apache.hadoop.hbase.util.EncryptionTest;
|
||||
import org.apache.hadoop.hbase.util.FSUtils;
|
||||
import org.apache.hadoop.hbase.util.HBaseFsckRepair;
|
||||
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
|
||||
import org.apache.hadoop.hbase.util.HasThread;
|
||||
import org.apache.hadoop.hbase.util.Pair;
|
||||
|
@ -799,7 +801,10 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
|
|||
int replicaId = zooKeeper.getMetaReplicaIdFromZnode(metaReplicaZnode);
|
||||
if (replicaId >= numMetaReplicasConfigured) {
|
||||
RegionState r = MetaTableLocator.getMetaRegionState(zkw, replicaId);
|
||||
serverManager.sendRegionClose(r.getServerName(), r.getRegion());
|
||||
LOG.info("Closing excess replica of meta region " + r.getRegion());
|
||||
// send a close and wait for a max of 30 seconds
|
||||
ServerManager.closeRegionSilentlyAndWait(getConnection(), r.getServerName(),
|
||||
r.getRegion(), 30000);
|
||||
ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(replicaId));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,6 +39,7 @@ import org.apache.commons.logging.LogFactory;
|
|||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hbase.ClockOutOfSyncException;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.NotServingRegionException;
|
||||
import org.apache.hadoop.hbase.RegionLoad;
|
||||
import org.apache.hadoop.hbase.Server;
|
||||
import org.apache.hadoop.hbase.ServerLoad;
|
||||
|
@ -767,6 +768,35 @@ public class ServerManager {
|
|||
return sendRegionClose(server, region, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Contacts a region server and waits up to timeout ms
|
||||
* to close the region. This bypasses the active hmaster.
|
||||
*/
|
||||
public static void closeRegionSilentlyAndWait(ClusterConnection connection,
|
||||
ServerName server, HRegionInfo region, long timeout) throws IOException, InterruptedException {
|
||||
AdminService.BlockingInterface rs = connection.getAdmin(server);
|
||||
try {
|
||||
ProtobufUtil.closeRegion(rs, server, region.getRegionName());
|
||||
} catch (IOException e) {
|
||||
LOG.warn("Exception when closing region: " + region.getRegionNameAsString(), e);
|
||||
}
|
||||
long expiration = timeout + System.currentTimeMillis();
|
||||
while (System.currentTimeMillis() < expiration) {
|
||||
try {
|
||||
HRegionInfo rsRegion =
|
||||
ProtobufUtil.getRegionInfo(rs, region.getRegionName());
|
||||
if (rsRegion == null) return;
|
||||
} catch (IOException ioe) {
|
||||
if (ioe instanceof NotServingRegionException) // no need to retry again
|
||||
return;
|
||||
LOG.warn("Exception when retrieving regioninfo from: " + region.getRegionNameAsString(), ioe);
|
||||
}
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
throw new IOException("Region " + region + " failed to close within"
|
||||
+ " timeout " + timeout);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sends an MERGE REGIONS RPC to the specified server to merge the specified
|
||||
* regions.
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.hadoop.hbase.TableName;
|
|||
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
|
||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.client.Admin;
|
||||
import org.apache.hadoop.hbase.client.ClusterConnection;
|
||||
import org.apache.hadoop.hbase.client.Connection;
|
||||
import org.apache.hadoop.hbase.client.ConnectionFactory;
|
||||
import org.apache.hadoop.hbase.client.HConnection;
|
||||
|
@ -38,8 +39,7 @@ import org.apache.hadoop.hbase.client.HTable;
|
|||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.client.Table;
|
||||
import org.apache.hadoop.hbase.master.RegionState;
|
||||
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
|
||||
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService;
|
||||
import org.apache.hadoop.hbase.master.ServerManager;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
|
||||
|
@ -153,29 +153,10 @@ public class HBaseFsckRepair {
|
|||
@SuppressWarnings("deprecation")
|
||||
public static void closeRegionSilentlyAndWait(HConnection connection,
|
||||
ServerName server, HRegionInfo region) throws IOException, InterruptedException {
|
||||
AdminService.BlockingInterface rs = connection.getAdmin(server);
|
||||
try {
|
||||
ProtobufUtil.closeRegion(rs, server, region.getRegionName());
|
||||
} catch (IOException e) {
|
||||
LOG.warn("Exception when closing region: " + region.getRegionNameAsString(), e);
|
||||
}
|
||||
long timeout = connection.getConfiguration()
|
||||
.getLong("hbase.hbck.close.timeout", 120000);
|
||||
long expiration = timeout + System.currentTimeMillis();
|
||||
while (System.currentTimeMillis() < expiration) {
|
||||
try {
|
||||
HRegionInfo rsRegion =
|
||||
ProtobufUtil.getRegionInfo(rs, region.getRegionName());
|
||||
if (rsRegion == null) return;
|
||||
} catch (IOException ioe) {
|
||||
if (ioe instanceof NotServingRegionException) // no need to retry again
|
||||
return;
|
||||
LOG.warn("Exception when retrieving regioninfo from: " + region.getRegionNameAsString(), ioe);
|
||||
}
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
throw new IOException("Region " + region + " failed to close within"
|
||||
+ " timeout " + timeout);
|
||||
ServerManager.closeRegionSilentlyAndWait((ClusterConnection)connection, server,
|
||||
region, timeout);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue