HBASE-7250 create integration test for balancing regions and killing region servers - 2
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1420002 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
bd5a2f972f
commit
fd5c422536
|
@ -142,6 +142,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
|
||||||
protected HBaseCluster cluster;
|
protected HBaseCluster cluster;
|
||||||
protected ClusterStatus initialStatus;
|
protected ClusterStatus initialStatus;
|
||||||
protected ServerName[] initialServers;
|
protected ServerName[] initialServers;
|
||||||
|
protected Random random = new Random();
|
||||||
|
|
||||||
void init(ActionContext context) throws Exception {
|
void init(ActionContext context) throws Exception {
|
||||||
this.context = context;
|
this.context = context;
|
||||||
|
@ -151,7 +152,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
|
||||||
initialServers = regionServers.toArray(new ServerName[regionServers.size()]);
|
initialServers = regionServers.toArray(new ServerName[regionServers.size()]);
|
||||||
}
|
}
|
||||||
|
|
||||||
void perform() throws Exception { };
|
protected void perform() throws Exception { };
|
||||||
|
|
||||||
// TODO: perhaps these methods should be elsewhere?
|
// TODO: perhaps these methods should be elsewhere?
|
||||||
/** Returns current region servers */
|
/** Returns current region servers */
|
||||||
|
@ -189,6 +190,40 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
|
||||||
LOG.info("Started region server:" + server + ". Reported num of rs:"
|
LOG.info("Started region server:" + server + ". Reported num of rs:"
|
||||||
+ cluster.getClusterStatus().getServersSize());
|
+ cluster.getClusterStatus().getServersSize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected void unbalanceRegions(ClusterStatus clusterStatus,
|
||||||
|
List<ServerName> fromServers, List<ServerName> toServers,
|
||||||
|
double fractionOfRegions) throws Exception {
|
||||||
|
List<byte[]> victimRegions = new LinkedList<byte[]>();
|
||||||
|
for (ServerName server : fromServers) {
|
||||||
|
ServerLoad serverLoad = clusterStatus.getLoad(server);
|
||||||
|
// Ugh.
|
||||||
|
List<byte[]> regions = new LinkedList<byte[]>(serverLoad.getRegionsLoad().keySet());
|
||||||
|
int victimRegionCount = (int)Math.ceil(fractionOfRegions * regions.size());
|
||||||
|
LOG.debug("Removing " + victimRegionCount + " regions from " + server.getServerName());
|
||||||
|
for (int i = 0; i < victimRegionCount; ++i) {
|
||||||
|
int victimIx = random.nextInt(regions.size());
|
||||||
|
String regionId = HRegionInfo.encodeRegionName(regions.remove(victimIx));
|
||||||
|
victimRegions.add(Bytes.toBytes(regionId));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG.info("Moving " + victimRegions.size() + " regions from " + fromServers.size()
|
||||||
|
+ " servers to " + toServers.size() + " different servers");
|
||||||
|
HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin();
|
||||||
|
for (byte[] victimRegion : victimRegions) {
|
||||||
|
int targetIx = random.nextInt(toServers.size());
|
||||||
|
admin.move(victimRegion, Bytes.toBytes(toServers.get(targetIx).getServerName()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void forceBalancer() throws Exception {
|
||||||
|
HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin();
|
||||||
|
boolean result = admin.balancer();
|
||||||
|
if (!result) {
|
||||||
|
LOG.error("Balancer didn't succeed");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class RestartActionBase extends Action {
|
private static class RestartActionBase extends Action {
|
||||||
|
@ -221,7 +256,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
|
||||||
super(sleepTime);
|
super(sleepTime);
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
void perform() throws Exception {
|
protected void perform() throws Exception {
|
||||||
LOG.info("Performing action: Restart active master");
|
LOG.info("Performing action: Restart active master");
|
||||||
|
|
||||||
ServerName master = cluster.getClusterStatus().getMaster();
|
ServerName master = cluster.getClusterStatus().getMaster();
|
||||||
|
@ -235,7 +270,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
void perform() throws Exception {
|
protected void perform() throws Exception {
|
||||||
LOG.info("Performing action: Restart random region server");
|
LOG.info("Performing action: Restart random region server");
|
||||||
ServerName server = selectRandomItem(getCurrentServers());
|
ServerName server = selectRandomItem(getCurrentServers());
|
||||||
|
|
||||||
|
@ -248,7 +283,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
|
||||||
super(sleepTime);
|
super(sleepTime);
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
void perform() throws Exception {
|
protected void perform() throws Exception {
|
||||||
LOG.info("Performing action: Restart region server holding META");
|
LOG.info("Performing action: Restart region server holding META");
|
||||||
ServerName server = cluster.getServerHoldingMeta();
|
ServerName server = cluster.getServerHoldingMeta();
|
||||||
if (server == null) {
|
if (server == null) {
|
||||||
|
@ -264,7 +299,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
|
||||||
super(sleepTime);
|
super(sleepTime);
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
void perform() throws Exception {
|
protected void perform() throws Exception {
|
||||||
LOG.info("Performing action: Restart region server holding ROOT");
|
LOG.info("Performing action: Restart region server holding ROOT");
|
||||||
ServerName server = cluster.getServerHoldingMeta();
|
ServerName server = cluster.getServerHoldingMeta();
|
||||||
if (server == null) {
|
if (server == null) {
|
||||||
|
@ -287,7 +322,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
void perform() throws Exception {
|
protected void perform() throws Exception {
|
||||||
LOG.info(String.format("Performing action: Batch restarting %d%% of region servers",
|
LOG.info(String.format("Performing action: Batch restarting %d%% of region servers",
|
||||||
(int)(ratio * 100)));
|
(int)(ratio * 100)));
|
||||||
List<ServerName> selectedServers = selectRandomItems(getCurrentServers(), ratio);
|
List<ServerName> selectedServers = selectRandomItems(getCurrentServers(), ratio);
|
||||||
|
@ -329,10 +364,9 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
void perform() throws Exception {
|
protected void perform() throws Exception {
|
||||||
LOG.info(String.format("Performing action: Rolling batch restarting %d%% of region servers",
|
LOG.info(String.format("Performing action: Rolling batch restarting %d%% of region servers",
|
||||||
(int)(ratio * 100)));
|
(int)(ratio * 100)));
|
||||||
Random random = new Random();
|
|
||||||
List<ServerName> selectedServers = selectRandomItems(getCurrentServers(), ratio);
|
List<ServerName> selectedServers = selectRandomItems(getCurrentServers(), ratio);
|
||||||
|
|
||||||
Queue<ServerName> serversToBeKilled = new LinkedList<ServerName>(selectedServers);
|
Queue<ServerName> serversToBeKilled = new LinkedList<ServerName>(selectedServers);
|
||||||
|
@ -365,7 +399,6 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
|
||||||
public static class UnbalanceRegionsAction extends Action {
|
public static class UnbalanceRegionsAction extends Action {
|
||||||
private double fractionOfRegions;
|
private double fractionOfRegions;
|
||||||
private double fractionOfServers;
|
private double fractionOfServers;
|
||||||
private Random random = new Random();
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unbalances the regions on the cluster by choosing "target" servers, and moving
|
* Unbalances the regions on the cluster by choosing "target" servers, and moving
|
||||||
|
@ -379,51 +412,25 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
void perform() throws Exception {
|
protected void perform() throws Exception {
|
||||||
LOG.info("Unbalancing regions");
|
LOG.info("Unbalancing regions");
|
||||||
ClusterStatus status = this.cluster.getClusterStatus();
|
ClusterStatus status = this.cluster.getClusterStatus();
|
||||||
List<ServerName> victimServers = new LinkedList<ServerName>(status.getServers());
|
List<ServerName> victimServers = new LinkedList<ServerName>(status.getServers());
|
||||||
int targetServerCount = (int)Math.ceil(fractionOfServers * victimServers.size());
|
int targetServerCount = (int)Math.ceil(fractionOfServers * victimServers.size());
|
||||||
List<byte[]> targetServers = new ArrayList<byte[]>(targetServerCount);
|
List<ServerName> targetServers = new ArrayList<ServerName>(targetServerCount);
|
||||||
for (int i = 0; i < targetServerCount; ++i) {
|
for (int i = 0; i < targetServerCount; ++i) {
|
||||||
int victimIx = random.nextInt(victimServers.size());
|
int victimIx = random.nextInt(victimServers.size());
|
||||||
String serverName = victimServers.remove(victimIx).getServerName();
|
targetServers.add(victimServers.remove(victimIx));
|
||||||
targetServers.add(Bytes.toBytes(serverName));
|
|
||||||
}
|
|
||||||
|
|
||||||
List<byte[]> victimRegions = new LinkedList<byte[]>();
|
|
||||||
for (ServerName server : victimServers) {
|
|
||||||
ServerLoad serverLoad = status.getLoad(server);
|
|
||||||
// Ugh.
|
|
||||||
List<byte[]> regions = new LinkedList<byte[]>(serverLoad.getRegionsLoad().keySet());
|
|
||||||
int victimRegionCount = (int)Math.ceil(fractionOfRegions * regions.size());
|
|
||||||
LOG.debug("Removing " + victimRegionCount + " regions from " + server.getServerName());
|
|
||||||
for (int i = 0; i < victimRegionCount; ++i) {
|
|
||||||
int victimIx = random.nextInt(regions.size());
|
|
||||||
String regionId = HRegionInfo.encodeRegionName(regions.remove(victimIx));
|
|
||||||
victimRegions.add(Bytes.toBytes(regionId));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG.info("Moving " + victimRegions.size() + " regions from " + victimServers.size()
|
|
||||||
+ " servers to " + targetServers.size() + " different servers");
|
|
||||||
HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin();
|
|
||||||
for (byte[] victimRegion : victimRegions) {
|
|
||||||
int targetIx = random.nextInt(targetServers.size());
|
|
||||||
admin.move(victimRegion, targetServers.get(targetIx));
|
|
||||||
}
|
}
|
||||||
|
unbalanceRegions(status, victimServers, targetServers, fractionOfRegions);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class ForceBalancerAction extends Action {
|
public static class ForceBalancerAction extends Action {
|
||||||
@Override
|
@Override
|
||||||
void perform() throws Exception {
|
protected void perform() throws Exception {
|
||||||
LOG.info("Balancing regions");
|
LOG.info("Balancing regions");
|
||||||
HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin();
|
forceBalancer();
|
||||||
boolean result = admin.balancer();
|
|
||||||
if (!result) {
|
|
||||||
LOG.error("Balancer didn't succeed");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue