HBASE-5196 Failure in region split after PONR could cause region hole (Jimmy Xiang)
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1231302 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8d07130c67
commit
ee03d1626d
|
@ -478,6 +478,7 @@ Release 0.92.0 - Unreleased
|
|||
HBASE-5137 MasterFileSystem.splitLog() should abort even if waitOnSafeMode() throws IOException(Ted)
|
||||
HBASE-5121 MajorCompaction may affect scan's correctness (chunhui shen and Lars H)
|
||||
HBASE-5143 Fix config typo in pluggable load balancer factory (Harsh J)
|
||||
HBASE-5196 Failure in region split after PONR could cause region hole (Jimmy Xiang)
|
||||
|
||||
TESTS
|
||||
HBASE-4450 test for number of blocks read: to serve as baseline for expected
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.lang.reflect.InvocationTargetException;
|
|||
import java.net.InetAddress;
|
||||
import java.net.InetSocketAddress;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
@ -68,11 +69,13 @@ import org.apache.hadoop.hbase.ipc.HMasterInterface;
|
|||
import org.apache.hadoop.hbase.ipc.HMasterRegionInterface;
|
||||
import org.apache.hadoop.hbase.ipc.ProtocolSignature;
|
||||
import org.apache.hadoop.hbase.ipc.RpcServer;
|
||||
import org.apache.hadoop.hbase.master.CatalogJanitor.SplitParentFirstComparator;
|
||||
import org.apache.hadoop.hbase.master.handler.CreateTableHandler;
|
||||
import org.apache.hadoop.hbase.master.handler.DeleteTableHandler;
|
||||
import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
|
||||
import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
|
||||
import org.apache.hadoop.hbase.master.handler.ModifyTableHandler;
|
||||
import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
|
||||
import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler;
|
||||
import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
|
||||
import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
|
||||
|
@ -528,6 +531,10 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
|||
this.balancer.setClusterStatus(getClusterStatus());
|
||||
this.balancer.setMasterServices(this);
|
||||
|
||||
// Fixing up missing daughters if any
|
||||
status.setStatus("Fixing up missing daughters");
|
||||
fixupDaughters(status);
|
||||
|
||||
// Start balancer and meta catalog janitor after meta and regions have
|
||||
// been assigned.
|
||||
status.setStatus("Starting balancer and catalog janitor");
|
||||
|
@ -622,6 +629,39 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
|||
return assigned;
|
||||
}
|
||||
|
||||
void fixupDaughters(final MonitoredTask status) throws IOException {
|
||||
final Map<HRegionInfo, Result> offlineSplitParents =
|
||||
new HashMap<HRegionInfo, Result>();
|
||||
// This visitor collects offline split parents in the .META. table
|
||||
MetaReader.Visitor visitor = new MetaReader.Visitor() {
|
||||
@Override
|
||||
public boolean visit(Result r) throws IOException {
|
||||
if (r == null || r.isEmpty()) return true;
|
||||
HRegionInfo info =
|
||||
MetaReader.parseHRegionInfoFromCatalogResult(
|
||||
r, HConstants.REGIONINFO_QUALIFIER);
|
||||
if (info == null) return true; // Keep scanning
|
||||
if (info.isOffline() && info.isSplit()) {
|
||||
offlineSplitParents.put(info, r);
|
||||
}
|
||||
// Returning true means "keep scanning"
|
||||
return true;
|
||||
}
|
||||
};
|
||||
// Run full scan of .META. catalog table passing in our custom visitor
|
||||
MetaReader.fullScan(this.catalogTracker, visitor);
|
||||
// Now work on our list of found parents. See if any we can clean up.
|
||||
int fixups = 0;
|
||||
for (Map.Entry<HRegionInfo, Result> e : offlineSplitParents.entrySet()) {
|
||||
fixups += ServerShutdownHandler.fixupDaughters(
|
||||
e.getValue(), assignmentManager, catalogTracker);
|
||||
}
|
||||
if (fixups != 0) {
|
||||
LOG.info("Scanned the catalog and fixed up " + fixups +
|
||||
" missing daughter region(s)");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expire a server if we find it is one of the online servers set.
|
||||
* @param sn ServerName to check.
|
||||
|
|
|
@ -342,31 +342,34 @@ public class ServerShutdownHandler extends EventHandler {
|
|||
* Check that daughter regions are up in .META. and if not, add them.
|
||||
* @param hris All regions for this server in meta.
|
||||
* @param result The contents of the parent row in .META.
|
||||
* @return the number of daughters missing and fixed
|
||||
* @throws IOException
|
||||
*/
|
||||
static void fixupDaughters(final Result result,
|
||||
public static int fixupDaughters(final Result result,
|
||||
final AssignmentManager assignmentManager,
|
||||
final CatalogTracker catalogTracker)
|
||||
throws IOException {
|
||||
fixupDaughter(result, HConstants.SPLITA_QUALIFIER, assignmentManager,
|
||||
catalogTracker);
|
||||
fixupDaughter(result, HConstants.SPLITB_QUALIFIER, assignmentManager,
|
||||
catalogTracker);
|
||||
int fixedA = fixupDaughter(result, HConstants.SPLITA_QUALIFIER,
|
||||
assignmentManager, catalogTracker);
|
||||
int fixedB = fixupDaughter(result, HConstants.SPLITB_QUALIFIER,
|
||||
assignmentManager, catalogTracker);
|
||||
return fixedA + fixedB;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check individual daughter is up in .META.; fixup if its not.
|
||||
* @param result The contents of the parent row in .META.
|
||||
* @param qualifier Which daughter to check for.
|
||||
* @return 1 if the daughter is missing and fixed. Otherwise 0
|
||||
* @throws IOException
|
||||
*/
|
||||
static void fixupDaughter(final Result result, final byte [] qualifier,
|
||||
static int fixupDaughter(final Result result, final byte [] qualifier,
|
||||
final AssignmentManager assignmentManager,
|
||||
final CatalogTracker catalogTracker)
|
||||
throws IOException {
|
||||
HRegionInfo daughter =
|
||||
MetaReader.parseHRegionInfoFromCatalogResult(result, qualifier);
|
||||
if (daughter == null) return;
|
||||
if (daughter == null) return 0;
|
||||
if (isDaughterMissing(catalogTracker, daughter)) {
|
||||
LOG.info("Fixup; missing daughter " + daughter.getRegionNameAsString());
|
||||
MetaEditor.addDaughter(catalogTracker, daughter, null);
|
||||
|
@ -377,9 +380,11 @@ public class ServerShutdownHandler extends EventHandler {
|
|||
|
||||
// And assign it.
|
||||
assignmentManager.assign(daughter, true);
|
||||
return 1;
|
||||
} else {
|
||||
LOG.debug("Daughter " + daughter.getRegionNameAsString() + " present");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -68,7 +68,7 @@ class SplitRequest implements Runnable {
|
|||
} catch (Exception e) {
|
||||
try {
|
||||
LOG.info("Running rollback/cleanup of failed split of " +
|
||||
parent.getRegionNameAsString() + "; " + e.getMessage());
|
||||
parent.getRegionNameAsString() + "; " + e.getMessage(), e);
|
||||
if (st.rollback(this.server, this.server)) {
|
||||
LOG.info("Successful rollback of failed split of " +
|
||||
parent.getRegionNameAsString());
|
||||
|
|
Loading…
Reference in New Issue