HBASE-6012 Handling RegionOpeningState for bulk assign (Chunhui)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1349377 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Zhihong Yu 2012-06-12 14:47:59 +00:00
parent 6610703576
commit b8d3979505
7 changed files with 134 additions and 55 deletions

View File

@ -1475,7 +1475,20 @@ public class AssignmentManager extends ZooKeeperListener {
getLong("hbase.regionserver.rpc.startup.waittime", 60000); getLong("hbase.regionserver.rpc.startup.waittime", 60000);
while (!this.master.isStopped()) { while (!this.master.isStopped()) {
try { try {
this.serverManager.sendRegionOpen(destination, regions); List<RegionOpeningState> regionOpeningStateList = this.serverManager
.sendRegionOpen(destination, regions);
if (regionOpeningStateList == null) {
// Failed getting RPC connection to this server
return false;
}
for (int i = 0; i < regionOpeningStateList.size(); i++) {
if (regionOpeningStateList.get(i) == RegionOpeningState.ALREADY_OPENED) {
processAlreadyOpenedRegion(regions.get(i), destination);
} else if (regionOpeningStateList.get(i) == RegionOpeningState.FAILED_OPENING) {
// Failed opening this region, reassign it
assign(regions.get(i), true, true);
}
}
break; break;
} catch (RemoteException e) { } catch (RemoteException e) {
IOException decodedException = e.unwrapRemoteException(); IOException decodedException = e.unwrapRemoteException();
@ -1534,6 +1547,9 @@ public class AssignmentManager extends ZooKeeperListener {
failedPlans.put(e.getKey(), e.getValue()); failedPlans.put(e.getKey(), e.getValue());
} }
} catch (Throwable t) { } catch (Throwable t) {
LOG.warn("Failed bulking assigning " + e.getValue().size()
+ " region(s) to " + e.getKey().getServerName()
+ ", and continue to bulk assign others", t);
failedPlans.put(e.getKey(), e.getValue()); failedPlans.put(e.getKey(), e.getValue());
} }
} }
@ -1545,7 +1561,9 @@ public class AssignmentManager extends ZooKeeperListener {
+ " regions to server " + e.getKey() + ", reassigning them"); + " regions to server " + e.getKey() + ", reassigning them");
reassigningRegions.addAll(e.getValue()); reassigningRegions.addAll(e.getValue());
} }
assign(reassigningRegions, servers); for (HRegionInfo region : reassigningRegions) {
assign(region, true, true);
}
} }
} }
@ -1723,31 +1741,10 @@ public class AssignmentManager extends ZooKeeperListener {
RegionOpeningState regionOpenState = serverManager.sendRegionOpen(plan RegionOpeningState regionOpenState = serverManager.sendRegionOpen(plan
.getDestination(), state.getRegion(), versionOfOfflineNode); .getDestination(), state.getRegion(), versionOfOfflineNode);
if (regionOpenState == RegionOpeningState.ALREADY_OPENED) { if (regionOpenState == RegionOpeningState.ALREADY_OPENED) {
// Remove region from in-memory transition and unassigned node from ZK processAlreadyOpenedRegion(state.getRegion(), plan.getDestination());
// While trying to enable the table the regions of the table were } else if (regionOpenState == RegionOpeningState.FAILED_OPENING) {
// already enabled. // Failed opening this region
LOG.debug("ALREADY_OPENED region " + state.getRegion().getRegionNameAsString() + throw new Exception("Get regionOpeningState=" + regionOpenState);
" to " + plan.getDestination().toString());
String encodedRegionName = state.getRegion()
.getEncodedName();
try {
ZKAssign.deleteOfflineNode(master.getZooKeeper(), encodedRegionName);
} catch (KeeperException.NoNodeException e) {
if(LOG.isDebugEnabled()){
LOG.debug("The unassigned node "+encodedRegionName+" doesnot exist.");
}
} catch (KeeperException e) {
master.abort(
"Error deleting OFFLINED node in ZK for transition ZK node ("
+ encodedRegionName + ")", e);
}
// no lock concurrent ok -> sequentially consistent
this.regionsInTransition.remove(plan.getRegionInfo().getEncodedName());
synchronized (this.regions) {
this.regions.put(plan.getRegionInfo(), plan.getDestination());
addToServers(plan.getDestination(), plan.getRegionInfo());
}
} }
break; break;
} catch (Throwable t) { } catch (Throwable t) {
@ -1779,6 +1776,36 @@ public class AssignmentManager extends ZooKeeperListener {
} }
} }
private void processAlreadyOpenedRegion(HRegionInfo region, ServerName sn) {
// Remove region from in-memory transition and unassigned node from ZK
// While trying to enable the table the regions of the table were
// already enabled.
LOG.debug("ALREADY_OPENED region " + region.getRegionNameAsString()
+ " to " + sn);
String encodedRegionName = region.getEncodedName();
try {
ZKAssign.deleteOfflineNode(master.getZooKeeper(), encodedRegionName);
} catch (KeeperException.NoNodeException e) {
if (LOG.isDebugEnabled()) {
LOG.debug("The unassigned node " + encodedRegionName
+ " doesnot exist.");
}
} catch (KeeperException e) {
master.abort(
"Error deleting OFFLINED node in ZK for transition ZK node ("
+ encodedRegionName + ")", e);
}
// no lock concurrent ok -> sequentially consistent
this.regionsInTransition.remove(region.getEncodedName());
synchronized (this.regions) {
this.regions.put(region, sn);
addToServers(sn, region);
}
}
private boolean isDisabledorDisablingRegionInRIT(final HRegionInfo region) { private boolean isDisabledorDisablingRegionInRIT(final HRegionInfo region) {
String tableName = region.getTableNameAsString(); String tableName = region.getTableNameAsString();
boolean disabled = this.zkTable.isDisabledTable(tableName); boolean disabled = this.zkTable.isDisabledTable(tableName);

View File

@ -511,16 +511,20 @@ public class ServerManager {
* <p> * <p>
* @param server server to open a region * @param server server to open a region
* @param regions regions to open * @param regions regions to open
* @return a list of region opening states
*/ */
public void sendRegionOpen(ServerName server, List<HRegionInfo> regions) public List<RegionOpeningState> sendRegionOpen(ServerName server,
List<HRegionInfo> regions)
throws IOException { throws IOException {
AdminProtocol admin = getServerConnection(server); AdminProtocol admin = getServerConnection(server);
if (admin == null) { if (admin == null) {
LOG.warn("Attempting to send OPEN RPC to server " + server.toString() + LOG.warn("Attempting to send OPEN RPC to server " + server.toString() +
" failed because no RPC connection found to this server"); " failed because no RPC connection found to this server");
return; return null;
} }
ProtobufUtil.openRegion(admin, regions);
OpenRegionResponse response = ProtobufUtil.openRegion(admin, regions);
return ResponseConverter.getRegionOpeningStateList(response);
} }
/** /**

View File

@ -43,6 +43,7 @@ import org.apache.hadoop.hbase.master.DeadServer;
import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.ServerManager; import org.apache.hadoop.hbase.master.ServerManager;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
/** /**
@ -305,6 +306,16 @@ public class ServerShutdownHandler extends EventHandler {
+ " because it has been opened in " + " because it has been opened in "
+ addressFromAM.getServerName()); + addressFromAM.getServerName());
} else { } else {
if (rit != null) {
//clean zk node
try{
LOG.info("Reassigning region with rs =" + rit + " and deleting zk node if exists");
ZKAssign.deleteNodeFailSilent(services.getZooKeeper(), e.getKey());
}catch (KeeperException ke) {
this.server.abort("Unexpected ZK exception deleting unassigned node " + e.getKey(), ke);
return;
}
}
toAssignRegions.add(e.getKey()); toAssignRegions.add(e.getKey());
} }
} else if (rit != null && (rit.isSplitting() || rit.isSplit())) { } else if (rit != null && (rit.isSplitting() || rit.isSplit())) {

View File

@ -78,6 +78,7 @@ import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetServerInfoRespo
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetStoreFileRequest; import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetStoreFileRequest;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetStoreFileResponse; import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetStoreFileResponse;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.OpenRegionRequest; import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.OpenRegionRequest;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.OpenRegionResponse;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.ReplicateWALEntryRequest; import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.ReplicateWALEntryRequest;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.ServerInfo; import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.ServerInfo;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.SplitRegionRequest; import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.SplitRegionRequest;
@ -1286,7 +1287,6 @@ public final class ProtobufUtil {
/** /**
* A helper to open a region using admin protocol. * A helper to open a region using admin protocol.
*
* @param admin * @param admin
* @param region * @param region
* @throws IOException * @throws IOException
@ -1304,17 +1304,18 @@ public final class ProtobufUtil {
/** /**
* A helper to open a list of regions using admin protocol. * A helper to open a list of regions using admin protocol.
* *
* @param admin * @param admin
* @param regions * @param regions
* @return OpenRegionResponse
* @throws IOException * @throws IOException
*/ */
public static void openRegion(final AdminProtocol admin, public static OpenRegionResponse openRegion(final AdminProtocol admin,
final List<HRegionInfo> regions) throws IOException { final List<HRegionInfo> regions) throws IOException {
OpenRegionRequest request = OpenRegionRequest request =
RequestConverter.buildOpenRegionRequest(regions); RequestConverter.buildOpenRegionRequest(regions);
try { try {
admin.openRegion(null, request); return admin.openRegion(null, request);
} catch (ServiceException se) { } catch (ServiceException se) {
throw getRemoteException(se); throw getRemoteException(se);
} }

View File

@ -156,6 +156,23 @@ public final class ResponseConverter {
proto.getOpeningState(0).name()); proto.getOpeningState(0).name());
} }
/**
* Get a list of region opening state from a OpenRegionResponse
*
* @param proto the OpenRegionResponse
* @return the list of region opening state
*/
public static List<RegionOpeningState> getRegionOpeningStateList(
final OpenRegionResponse proto) {
if (proto == null) return null;
List<RegionOpeningState> regionOpeningStates = new ArrayList<RegionOpeningState>();
for (int i = 0; i < proto.getOpeningStateCount(); i++) {
regionOpeningStates.add(RegionOpeningState.valueOf(
proto.getOpeningState(i).name()));
}
return regionOpeningStates;
}
/** /**
* Check if the region is closed from a CloseRegionResponse * Check if the region is closed from a CloseRegionResponse
* *

View File

@ -3363,56 +3363,74 @@ public class HRegionServer implements ClientProtocol,
} }
try { try {
checkOpen(); checkOpen();
requestCount.incrementAndGet(); } catch (IOException ie) {
OpenRegionResponse.Builder builder = OpenRegionResponse.newBuilder(); throw new ServiceException(ie);
Map<String, HTableDescriptor> htds = }
new HashMap<String, HTableDescriptor>(request.getRegionList().size()); requestCount.incrementAndGet();
for (RegionInfo regionInfo: request.getRegionList()) { OpenRegionResponse.Builder builder = OpenRegionResponse.newBuilder();
HRegionInfo region = HRegionInfo.convert(regionInfo); Map<String, HTableDescriptor> htds = new HashMap<String, HTableDescriptor>(
request.getRegionList().size());
boolean isBulkAssign = request.getRegionList().size() > 1;
for (RegionInfo regionInfo : request.getRegionList()) {
HRegionInfo region = HRegionInfo.convert(regionInfo);
try {
checkIfRegionInTransition(region, OPEN); checkIfRegionInTransition(region, OPEN);
HRegion onlineRegion = getFromOnlineRegions(region.getEncodedName()); HRegion onlineRegion = getFromOnlineRegions(region.getEncodedName());
if (null != onlineRegion) { if (null != onlineRegion) {
// See HBASE-5094. Cross check with META if still this RS is owning the // See HBASE-5094. Cross check with META if still this RS is owning
// region. // the region.
Pair<HRegionInfo, ServerName> p = MetaReader.getRegion( Pair<HRegionInfo, ServerName> p = MetaReader.getRegion(
this.catalogTracker, region.getRegionName()); this.catalogTracker, region.getRegionName());
if (this.getServerName().equals(p.getSecond())) { if (this.getServerName().equals(p.getSecond())) {
LOG.warn("Attempted open of " + region.getEncodedName() LOG.warn("Attempted open of " + region.getEncodedName()
+ " but already online on this server"); + " but already online on this server");
builder.addOpeningState(RegionOpeningState.ALREADY_OPENED); builder.addOpeningState(RegionOpeningState.ALREADY_OPENED);
continue; continue;
} else { } else {
LOG.warn("The region " + region.getEncodedName() LOG.warn("The region " + region.getEncodedName()
+ " is online on this server but META does not have this server."); + " is online on this server but META does not have this server.");
removeFromOnlineRegions(region.getEncodedName(), null); removeFromOnlineRegions(region.getEncodedName(), null);
} }
} }
LOG.info("Received request to open region: " LOG.info("Received request to open region: " + region.getRegionNameAsString() + " on "
+ region.getRegionNameAsString() + " on "+this.serverNameFromMasterPOV); + this.serverNameFromMasterPOV);
HTableDescriptor htd = htds.get(region.getTableNameAsString()); HTableDescriptor htd = htds.get(region.getTableNameAsString());
if (htd == null) { if (htd == null) {
htd = this.tableDescriptors.get(region.getTableName()); htd = this.tableDescriptors.get(region.getTableName());
htds.put(region.getTableNameAsString(), htd); htds.put(region.getTableNameAsString(), htd);
} }
this.regionsInTransitionInRS.putIfAbsent(region.getEncodedNameAsBytes(), true); this.regionsInTransitionInRS.putIfAbsent(
region.getEncodedNameAsBytes(), true);
// Need to pass the expected version in the constructor. // Need to pass the expected version in the constructor.
if (region.isRootRegion()) { if (region.isRootRegion()) {
this.service.submit(new OpenRootHandler(this, this, region, htd, this.service.submit(new OpenRootHandler(this, this, region, htd,
versionOfOfflineNode)); versionOfOfflineNode));
} else if (region.isMetaRegion()) { } else if (region.isMetaRegion()) {
this.service.submit(new OpenMetaHandler(this, this, region, htd, this.service.submit(new OpenMetaHandler(this, this, region, htd,
versionOfOfflineNode)); versionOfOfflineNode));
} else { } else {
this.service.submit(new OpenRegionHandler(this, this, region, htd, this.service.submit(new OpenRegionHandler(this, this, region, htd,
versionOfOfflineNode)); versionOfOfflineNode));
} }
builder.addOpeningState(RegionOpeningState.OPENED); builder.addOpeningState(RegionOpeningState.OPENED);
} catch (RegionAlreadyInTransitionException rie) {
LOG.warn("Region is already in transition", rie);
if (isBulkAssign) {
builder.addOpeningState(RegionOpeningState.OPENED);
} else {
throw new ServiceException(rie);
}
} catch (IOException ie) {
LOG.warn("Failed opening region " + region.getRegionNameAsString(), ie);
if (isBulkAssign) {
builder.addOpeningState(RegionOpeningState.FAILED_OPENING);
} else {
throw new ServiceException(ie);
}
} }
return builder.build();
} catch (IOException ie) {
throw new ServiceException(ie);
} }
return builder.build();
} }
/** /**

View File

@ -580,6 +580,7 @@ public class TestAssignmentManager {
MasterServices services = Mockito.mock(MasterServices.class); MasterServices services = Mockito.mock(MasterServices.class);
Mockito.when(services.getAssignmentManager()).thenReturn(am); Mockito.when(services.getAssignmentManager()).thenReturn(am);
Mockito.when(services.getServerManager()).thenReturn(this.serverManager); Mockito.when(services.getServerManager()).thenReturn(this.serverManager);
Mockito.when(services.getZooKeeper()).thenReturn(this.watcher);
ServerShutdownHandler handler = new ServerShutdownHandler(this.server, ServerShutdownHandler handler = new ServerShutdownHandler(this.server,
services, deadServers, SERVERNAME_A, false); services, deadServers, SERVERNAME_A, false);
handler.process(); handler.process();