HBASE-7250 create integration test for balancing regions and killing region servers - 2

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1420002 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2012-12-11 05:45:53 +00:00
parent bd5a2f972f
commit fd5c422536
1 changed files with 48 additions and 41 deletions

View File

@ -142,6 +142,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
protected HBaseCluster cluster; protected HBaseCluster cluster;
protected ClusterStatus initialStatus; protected ClusterStatus initialStatus;
protected ServerName[] initialServers; protected ServerName[] initialServers;
protected Random random = new Random();
void init(ActionContext context) throws Exception { void init(ActionContext context) throws Exception {
this.context = context; this.context = context;
@ -151,7 +152,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
initialServers = regionServers.toArray(new ServerName[regionServers.size()]); initialServers = regionServers.toArray(new ServerName[regionServers.size()]);
} }
void perform() throws Exception { }; protected void perform() throws Exception { };
// TODO: perhaps these methods should be elsewhere? // TODO: perhaps these methods should be elsewhere?
/** Returns current region servers */ /** Returns current region servers */
@ -189,6 +190,40 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
LOG.info("Started region server:" + server + ". Reported num of rs:" LOG.info("Started region server:" + server + ". Reported num of rs:"
+ cluster.getClusterStatus().getServersSize()); + cluster.getClusterStatus().getServersSize());
} }
protected void unbalanceRegions(ClusterStatus clusterStatus,
List<ServerName> fromServers, List<ServerName> toServers,
double fractionOfRegions) throws Exception {
List<byte[]> victimRegions = new LinkedList<byte[]>();
for (ServerName server : fromServers) {
ServerLoad serverLoad = clusterStatus.getLoad(server);
// Ugh.
List<byte[]> regions = new LinkedList<byte[]>(serverLoad.getRegionsLoad().keySet());
int victimRegionCount = (int)Math.ceil(fractionOfRegions * regions.size());
LOG.debug("Removing " + victimRegionCount + " regions from " + server.getServerName());
for (int i = 0; i < victimRegionCount; ++i) {
int victimIx = random.nextInt(regions.size());
String regionId = HRegionInfo.encodeRegionName(regions.remove(victimIx));
victimRegions.add(Bytes.toBytes(regionId));
}
}
LOG.info("Moving " + victimRegions.size() + " regions from " + fromServers.size()
+ " servers to " + toServers.size() + " different servers");
HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin();
for (byte[] victimRegion : victimRegions) {
int targetIx = random.nextInt(toServers.size());
admin.move(victimRegion, Bytes.toBytes(toServers.get(targetIx).getServerName()));
}
}
protected void forceBalancer() throws Exception {
HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin();
boolean result = admin.balancer();
if (!result) {
LOG.error("Balancer didn't succeed");
}
}
} }
private static class RestartActionBase extends Action { private static class RestartActionBase extends Action {
@ -221,7 +256,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
super(sleepTime); super(sleepTime);
} }
@Override @Override
void perform() throws Exception { protected void perform() throws Exception {
LOG.info("Performing action: Restart active master"); LOG.info("Performing action: Restart active master");
ServerName master = cluster.getClusterStatus().getMaster(); ServerName master = cluster.getClusterStatus().getMaster();
@ -235,7 +270,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
} }
@Override @Override
void perform() throws Exception { protected void perform() throws Exception {
LOG.info("Performing action: Restart random region server"); LOG.info("Performing action: Restart random region server");
ServerName server = selectRandomItem(getCurrentServers()); ServerName server = selectRandomItem(getCurrentServers());
@ -248,7 +283,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
super(sleepTime); super(sleepTime);
} }
@Override @Override
void perform() throws Exception { protected void perform() throws Exception {
LOG.info("Performing action: Restart region server holding META"); LOG.info("Performing action: Restart region server holding META");
ServerName server = cluster.getServerHoldingMeta(); ServerName server = cluster.getServerHoldingMeta();
if (server == null) { if (server == null) {
@ -264,7 +299,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
super(sleepTime); super(sleepTime);
} }
@Override @Override
void perform() throws Exception { protected void perform() throws Exception {
LOG.info("Performing action: Restart region server holding ROOT"); LOG.info("Performing action: Restart region server holding ROOT");
ServerName server = cluster.getServerHoldingMeta(); ServerName server = cluster.getServerHoldingMeta();
if (server == null) { if (server == null) {
@ -287,7 +322,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
} }
@Override @Override
void perform() throws Exception { protected void perform() throws Exception {
LOG.info(String.format("Performing action: Batch restarting %d%% of region servers", LOG.info(String.format("Performing action: Batch restarting %d%% of region servers",
(int)(ratio * 100))); (int)(ratio * 100)));
List<ServerName> selectedServers = selectRandomItems(getCurrentServers(), ratio); List<ServerName> selectedServers = selectRandomItems(getCurrentServers(), ratio);
@ -329,10 +364,9 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
} }
@Override @Override
void perform() throws Exception { protected void perform() throws Exception {
LOG.info(String.format("Performing action: Rolling batch restarting %d%% of region servers", LOG.info(String.format("Performing action: Rolling batch restarting %d%% of region servers",
(int)(ratio * 100))); (int)(ratio * 100)));
Random random = new Random();
List<ServerName> selectedServers = selectRandomItems(getCurrentServers(), ratio); List<ServerName> selectedServers = selectRandomItems(getCurrentServers(), ratio);
Queue<ServerName> serversToBeKilled = new LinkedList<ServerName>(selectedServers); Queue<ServerName> serversToBeKilled = new LinkedList<ServerName>(selectedServers);
@ -365,7 +399,6 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
public static class UnbalanceRegionsAction extends Action { public static class UnbalanceRegionsAction extends Action {
private double fractionOfRegions; private double fractionOfRegions;
private double fractionOfServers; private double fractionOfServers;
private Random random = new Random();
/** /**
* Unbalances the regions on the cluster by choosing "target" servers, and moving * Unbalances the regions on the cluster by choosing "target" servers, and moving
@ -379,51 +412,25 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
} }
@Override @Override
void perform() throws Exception { protected void perform() throws Exception {
LOG.info("Unbalancing regions"); LOG.info("Unbalancing regions");
ClusterStatus status = this.cluster.getClusterStatus(); ClusterStatus status = this.cluster.getClusterStatus();
List<ServerName> victimServers = new LinkedList<ServerName>(status.getServers()); List<ServerName> victimServers = new LinkedList<ServerName>(status.getServers());
int targetServerCount = (int)Math.ceil(fractionOfServers * victimServers.size()); int targetServerCount = (int)Math.ceil(fractionOfServers * victimServers.size());
List<byte[]> targetServers = new ArrayList<byte[]>(targetServerCount); List<ServerName> targetServers = new ArrayList<ServerName>(targetServerCount);
for (int i = 0; i < targetServerCount; ++i) { for (int i = 0; i < targetServerCount; ++i) {
int victimIx = random.nextInt(victimServers.size()); int victimIx = random.nextInt(victimServers.size());
String serverName = victimServers.remove(victimIx).getServerName(); targetServers.add(victimServers.remove(victimIx));
targetServers.add(Bytes.toBytes(serverName));
}
List<byte[]> victimRegions = new LinkedList<byte[]>();
for (ServerName server : victimServers) {
ServerLoad serverLoad = status.getLoad(server);
// Ugh.
List<byte[]> regions = new LinkedList<byte[]>(serverLoad.getRegionsLoad().keySet());
int victimRegionCount = (int)Math.ceil(fractionOfRegions * regions.size());
LOG.debug("Removing " + victimRegionCount + " regions from " + server.getServerName());
for (int i = 0; i < victimRegionCount; ++i) {
int victimIx = random.nextInt(regions.size());
String regionId = HRegionInfo.encodeRegionName(regions.remove(victimIx));
victimRegions.add(Bytes.toBytes(regionId));
}
}
LOG.info("Moving " + victimRegions.size() + " regions from " + victimServers.size()
+ " servers to " + targetServers.size() + " different servers");
HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin();
for (byte[] victimRegion : victimRegions) {
int targetIx = random.nextInt(targetServers.size());
admin.move(victimRegion, targetServers.get(targetIx));
} }
unbalanceRegions(status, victimServers, targetServers, fractionOfRegions);
} }
} }
public static class ForceBalancerAction extends Action { public static class ForceBalancerAction extends Action {
@Override @Override
void perform() throws Exception { protected void perform() throws Exception {
LOG.info("Balancing regions"); LOG.info("Balancing regions");
HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin(); forceBalancer();
boolean result = admin.balancer();
if (!result) {
LOG.error("Balancer didn't succeed");
}
} }
} }