HBASE-5196 Failure in region split after PONR could cause region hole (Jimmy Xiang)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1231302 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Zhihong Yu 2012-01-13 21:17:30 +00:00
parent 8d07130c67
commit ee03d1626d
4 changed files with 56 additions and 10 deletions

View File

@ -478,6 +478,7 @@ Release 0.92.0 - Unreleased
HBASE-5137 MasterFileSystem.splitLog() should abort even if waitOnSafeMode() throws IOException(Ted)
HBASE-5121 MajorCompaction may affect scan's correctness (chunhui shen and Lars H)
HBASE-5143 Fix config typo in pluggable load balancer factory (Harsh J)
HBASE-5196 Failure in region split after PONR could cause region hole (Jimmy Xiang)
TESTS
HBASE-4450 test for number of blocks read: to serve as baseline for expected

View File

@ -25,6 +25,7 @@ import java.lang.reflect.InvocationTargetException;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
@ -68,11 +69,13 @@ import org.apache.hadoop.hbase.ipc.HMasterInterface;
import org.apache.hadoop.hbase.ipc.HMasterRegionInterface;
import org.apache.hadoop.hbase.ipc.ProtocolSignature;
import org.apache.hadoop.hbase.ipc.RpcServer;
import org.apache.hadoop.hbase.master.CatalogJanitor.SplitParentFirstComparator;
import org.apache.hadoop.hbase.master.handler.CreateTableHandler;
import org.apache.hadoop.hbase.master.handler.DeleteTableHandler;
import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
import org.apache.hadoop.hbase.master.handler.ModifyTableHandler;
import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler;
import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
@ -528,6 +531,10 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
this.balancer.setClusterStatus(getClusterStatus());
this.balancer.setMasterServices(this);
// Fixing up missing daughters if any
status.setStatus("Fixing up missing daughters");
fixupDaughters(status);
// Start balancer and meta catalog janitor after meta and regions have
// been assigned.
status.setStatus("Starting balancer and catalog janitor");
@ -622,6 +629,39 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
return assigned;
}
void fixupDaughters(final MonitoredTask status) throws IOException {
final Map<HRegionInfo, Result> offlineSplitParents =
new HashMap<HRegionInfo, Result>();
// This visitor collects offline split parents in the .META. table
MetaReader.Visitor visitor = new MetaReader.Visitor() {
@Override
public boolean visit(Result r) throws IOException {
if (r == null || r.isEmpty()) return true;
HRegionInfo info =
MetaReader.parseHRegionInfoFromCatalogResult(
r, HConstants.REGIONINFO_QUALIFIER);
if (info == null) return true; // Keep scanning
if (info.isOffline() && info.isSplit()) {
offlineSplitParents.put(info, r);
}
// Returning true means "keep scanning"
return true;
}
};
// Run full scan of .META. catalog table passing in our custom visitor
MetaReader.fullScan(this.catalogTracker, visitor);
// Now work on our list of found parents. See if any we can clean up.
int fixups = 0;
for (Map.Entry<HRegionInfo, Result> e : offlineSplitParents.entrySet()) {
fixups += ServerShutdownHandler.fixupDaughters(
e.getValue(), assignmentManager, catalogTracker);
}
if (fixups != 0) {
LOG.info("Scanned the catalog and fixed up " + fixups +
" missing daughter region(s)");
}
}
/**
* Expire a server if we find it is one of the online servers set.
* @param sn ServerName to check.

View File

@ -342,31 +342,34 @@ public class ServerShutdownHandler extends EventHandler {
* Check that daughter regions are up in .META. and if not, add them.
* @param hris All regions for this server in meta.
* @param result The contents of the parent row in .META.
* @return the number of daughters missing and fixed
* @throws IOException
*/
static void fixupDaughters(final Result result,
public static int fixupDaughters(final Result result,
final AssignmentManager assignmentManager,
final CatalogTracker catalogTracker)
throws IOException {
fixupDaughter(result, HConstants.SPLITA_QUALIFIER, assignmentManager,
catalogTracker);
fixupDaughter(result, HConstants.SPLITB_QUALIFIER, assignmentManager,
catalogTracker);
int fixedA = fixupDaughter(result, HConstants.SPLITA_QUALIFIER,
assignmentManager, catalogTracker);
int fixedB = fixupDaughter(result, HConstants.SPLITB_QUALIFIER,
assignmentManager, catalogTracker);
return fixedA + fixedB;
}
/**
* Check individual daughter is up in .META.; fixup if its not.
* @param result The contents of the parent row in .META.
* @param qualifier Which daughter to check for.
* @return 1 if the daughter is missing and fixed. Otherwise 0
* @throws IOException
*/
static void fixupDaughter(final Result result, final byte [] qualifier,
static int fixupDaughter(final Result result, final byte [] qualifier,
final AssignmentManager assignmentManager,
final CatalogTracker catalogTracker)
throws IOException {
HRegionInfo daughter =
MetaReader.parseHRegionInfoFromCatalogResult(result, qualifier);
if (daughter == null) return;
if (daughter == null) return 0;
if (isDaughterMissing(catalogTracker, daughter)) {
LOG.info("Fixup; missing daughter " + daughter.getRegionNameAsString());
MetaEditor.addDaughter(catalogTracker, daughter, null);
@ -377,9 +380,11 @@ public class ServerShutdownHandler extends EventHandler {
// And assign it.
assignmentManager.assign(daughter, true);
return 1;
} else {
LOG.debug("Daughter " + daughter.getRegionNameAsString() + " present");
}
return 0;
}
/**

View File

@ -68,7 +68,7 @@ class SplitRequest implements Runnable {
} catch (Exception e) {
try {
LOG.info("Running rollback/cleanup of failed split of " +
parent.getRegionNameAsString() + "; " + e.getMessage());
parent.getRegionNameAsString() + "; " + e.getMessage(), e);
if (st.rollback(this.server, this.server)) {
LOG.info("Successful rollback of failed split of " +
parent.getRegionNameAsString());