HBASE-4402 Retaining locality after restart broken

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1179809 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2011-10-06 20:10:08 +00:00
parent da04d73e70
commit c15feb5fbf
4 changed files with 84 additions and 16 deletions

View File

@ -341,6 +341,7 @@ Release 0.92.0 - Unreleased
(Kay Kay)
HBASE-4481 TestMergeTool failed in 0.92 build 20
HBASE-4386 Fix a potential NPE in TaskMonitor (todd)
HBASE-4402 Retaining locality after restart broken
TESTS
HBASE-4450 test for number of blocks read: to serve as baseline for expected

View File

@ -620,7 +620,7 @@ public class AssignmentManager extends ZooKeeperListener {
(System.currentTimeMillis() - 15000);
LOG.debug("Handling transition=" + data.getEventType() +
", server=" + data.getOrigin() + ", region=" +
prettyPrintedRegionName +
(prettyPrintedRegionName == null? "null": prettyPrintedRegionName) +
(lateEvent? ", which is more than 15 seconds late" : ""));
RegionState regionState = regionsInTransition.get(encodedName);
switch (data.getEventType()) {

View File

@ -31,6 +31,7 @@ import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Random;
import java.util.Set;
import java.util.TreeMap;
import org.apache.commons.logging.Log;
@ -46,7 +47,10 @@ import org.apache.hadoop.hbase.TableExistsException;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.util.Bytes;
import com.google.common.base.Joiner;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.MinMaxPriorityQueue;
import com.google.common.collect.Sets;
/**
* Makes decisions about the placement and movement of Regions across
@ -576,20 +580,71 @@ public class DefaultLoadBalancer implements LoadBalancer {
*/
public Map<ServerName, List<HRegionInfo>> retainAssignment(
Map<HRegionInfo, ServerName> regions, List<ServerName> servers) {
// Group all of the old assignments by their hostname.
// We can't group directly by ServerName since the servers all have
// new start-codes.
// Group the servers by their hostname. It's possible we have multiple
// servers on the same host on different ports.
ArrayListMultimap<String, ServerName> serversByHostname =
ArrayListMultimap.create();
for (ServerName server : servers) {
serversByHostname.put(server.getHostname(), server);
}
// Now come up with new assignments
Map<ServerName, List<HRegionInfo>> assignments =
new TreeMap<ServerName, List<HRegionInfo>>();
for (ServerName server : servers) {
assignments.put(server, new ArrayList<HRegionInfo>());
}
for (Map.Entry<HRegionInfo, ServerName> region : regions.entrySet()) {
ServerName sn = region.getValue();
if (sn != null && servers.contains(sn)) {
assignments.get(sn).add(region.getKey());
// Collection of the hostnames that used to have regions
// assigned, but for which we no longer have any RS running
// after the cluster restart.
Set<String> oldHostsNoLongerPresent = Sets.newTreeSet();
int numRandomAssignments = 0;
int numRetainedAssigments = 0;
for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
HRegionInfo region = entry.getKey();
ServerName oldServerName = entry.getValue();
List<ServerName> localServers = new ArrayList<ServerName>();
if (oldServerName != null) {
localServers = serversByHostname.get(oldServerName.getHostname());
}
if (localServers.isEmpty()) {
// No servers on the new cluster match up with this hostname,
// assign randomly.
ServerName randomServer = servers.get(RANDOM.nextInt(servers.size()));
assignments.get(randomServer).add(region);
numRandomAssignments++;
if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname());
} else if (localServers.size() == 1) {
// the usual case - one new server on same host
assignments.get(localServers.get(0)).add(region);
numRetainedAssigments++;
} else {
int size = assignments.size();
assignments.get(servers.get(RANDOM.nextInt(size))).add(region.getKey());
// multiple new servers in the cluster on this same host
int size = localServers.size();
ServerName target = localServers.get(RANDOM.nextInt(size));
assignments.get(target).add(region);
numRetainedAssigments++;
}
}
String randomAssignMsg = "";
if (numRandomAssignments > 0) {
randomAssignMsg = numRandomAssignments + " regions were assigned " +
"to random hosts, since the old hosts for these regions are no " +
"longer present in the cluster. These hosts were:\n " +
Joiner.on("\n ").join(oldHostsNoLongerPresent);
}
LOG.info("Reassigned " + regions.size() + " regions. " +
numRetainedAssigments + " retained the pre-restart assignment. " +
randomAssignMsg);
return assignments;
}

View File

@ -277,7 +277,12 @@ public class TestDefaultLoadBalancer {
Map<HRegionInfo, ServerName> existing =
new TreeMap<HRegionInfo, ServerName>();
for (int i = 0; i < regions.size(); i++) {
existing.put(regions.get(i), servers.get(i % servers.size()).getServerName());
ServerName sn = servers.get(i % servers.size()).getServerName();
// The old server would have had same host and port, but different
// start code!
ServerName snWithOldStartCode =
new ServerName(sn.getHostname(), sn.getPort(), sn.getStartcode() - 10);
existing.put(regions.get(i), snWithOldStartCode);
}
List<ServerName> listOfServerNames = getListOfServerNames(servers);
Map<ServerName, List<HRegionInfo>> assignment =
@ -296,9 +301,9 @@ public class TestDefaultLoadBalancer {
// Remove two of the servers that were previously there
List<ServerAndLoad> servers3 =
new ArrayList<ServerAndLoad>(servers);
servers3.remove(servers3.size()-1);
servers3.remove(servers3.size()-2);
listOfServerNames = getListOfServerNames(servers2);
servers3.remove(0);
servers3.remove(0);
listOfServerNames = getListOfServerNames(servers3);
assignment = loadBalancer.retainAssignment(existing, listOfServerNames);
assertRetainedAssignment(existing, listOfServerNames, assignment);
}
@ -338,13 +343,20 @@ public class TestDefaultLoadBalancer {
assertEquals(existing.size(), assignedRegions.size());
// Verify condition 2, if server had existing assignment, must have same
Set<ServerName> onlineAddresses = new TreeSet<ServerName>();
for (ServerName s : servers) onlineAddresses.add(s);
Set<String> onlineHostNames = new TreeSet<String>();
for (ServerName s : servers) {
onlineHostNames.add(s.getHostname());
}
for (Map.Entry<ServerName, List<HRegionInfo>> a : assignment.entrySet()) {
ServerName assignedTo = a.getKey();
for (HRegionInfo r : a.getValue()) {
ServerName address = existing.get(r);
if (address != null && onlineAddresses.contains(address)) {
assertTrue(a.getKey().equals(address));
if (address != null && onlineHostNames.contains(address.getHostname())) {
// this region was prevously assigned somewhere, and that
// host is still around, then it should be re-assigned on the
// same host
assertEquals(address.getHostname(), assignedTo.getHostname());
}
}
}
@ -470,7 +482,7 @@ public class TestDefaultLoadBalancer {
ServerName sn = this.serverQueue.poll();
return new ServerAndLoad(sn, numRegionsPerServer);
}
String host = "127.0.0.1";
String host = "server" + rand.nextInt(100000);
int port = rand.nextInt(60000);
long startCode = rand.nextLong();
ServerName sn = new ServerName(host, port, startCode);