HBASE-5360 [uberhbck] Add options for how to handle offline split parents.
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1351169 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e796407da8
commit
8ee16d9aad
|
@ -73,6 +73,7 @@ import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
|
||||||
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
|
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
|
||||||
import org.apache.hadoop.hbase.client.Put;
|
import org.apache.hadoop.hbase.client.Put;
|
||||||
import org.apache.hadoop.hbase.client.Result;
|
import org.apache.hadoop.hbase.client.Result;
|
||||||
|
import org.apache.hadoop.hbase.client.RowMutations;
|
||||||
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
|
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
|
||||||
import org.apache.hadoop.hbase.io.hfile.HFile;
|
import org.apache.hadoop.hbase.io.hfile.HFile;
|
||||||
import org.apache.hadoop.hbase.master.MasterFileSystem;
|
import org.apache.hadoop.hbase.master.MasterFileSystem;
|
||||||
|
@ -172,6 +173,7 @@ public class HBaseFsck {
|
||||||
private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
|
private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
|
||||||
private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
|
private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
|
||||||
private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
|
private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
|
||||||
|
private boolean fixSplitParents = false; // fix lingering split parents
|
||||||
|
|
||||||
// limit checking/fixes to listed tables, if empty attempt to check/fix all
|
// limit checking/fixes to listed tables, if empty attempt to check/fix all
|
||||||
// -ROOT- and .META. are always checked
|
// -ROOT- and .META. are always checked
|
||||||
|
@ -1181,6 +1183,29 @@ public class HBaseFsck {
|
||||||
LOG.info("Deleted " + hi.metaEntry.getRegionNameAsString() + " from META" );
|
LOG.info("Deleted " + hi.metaEntry.getRegionNameAsString() + " from META" );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reset the split parent region info in meta table
|
||||||
|
*/
|
||||||
|
private void resetSplitParent(HbckInfo hi) throws IOException {
|
||||||
|
RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
|
||||||
|
Delete d = new Delete(hi.metaEntry.getRegionName());
|
||||||
|
d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
|
||||||
|
d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
|
||||||
|
mutations.add(d);
|
||||||
|
|
||||||
|
Put p = new Put(hi.metaEntry.getRegionName());
|
||||||
|
HRegionInfo hri = new HRegionInfo(hi.metaEntry);
|
||||||
|
hri.setOffline(false);
|
||||||
|
hri.setSplit(false);
|
||||||
|
p.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
|
||||||
|
Writables.getBytes(hri));
|
||||||
|
mutations.add(p);
|
||||||
|
|
||||||
|
meta.mutateRow(mutations);
|
||||||
|
meta.flushCommits();
|
||||||
|
LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This backwards-compatibility wrapper for permanently offlining a region
|
* This backwards-compatibility wrapper for permanently offlining a region
|
||||||
* that should not be alive. If the region server does not support the
|
* that should not be alive. If the region server does not support the
|
||||||
|
@ -1320,9 +1345,6 @@ public class HBaseFsck {
|
||||||
}
|
}
|
||||||
if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
|
if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
|
||||||
return;
|
return;
|
||||||
} else if (inMeta && inHdfs && !isDeployed && splitParent) {
|
|
||||||
LOG.warn("Region " + descriptiveName + " is a split parent in META and in HDFS");
|
|
||||||
return;
|
|
||||||
} else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
|
} else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
|
||||||
LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
|
LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
|
||||||
"tabled that is not deployed");
|
"tabled that is not deployed");
|
||||||
|
@ -1379,6 +1401,14 @@ public class HBaseFsck {
|
||||||
}
|
}
|
||||||
|
|
||||||
// ========== Cases where the region is in META =============
|
// ========== Cases where the region is in META =============
|
||||||
|
} else if (inMeta && inHdfs && !isDeployed && splitParent) {
|
||||||
|
errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
|
||||||
|
+ descriptiveName + " is a split parent in META, in HDFS, "
|
||||||
|
+ "and not deployed on any region server. This could be transient.");
|
||||||
|
if (shouldFixSplitParents()) {
|
||||||
|
setShouldRerun();
|
||||||
|
resetSplitParent(hbi);
|
||||||
|
}
|
||||||
} else if (inMeta && !inHdfs && !isDeployed) {
|
} else if (inMeta && !inHdfs && !isDeployed) {
|
||||||
errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
|
errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
|
||||||
+ descriptiveName + " found in META, but not in HDFS "
|
+ descriptiveName + " found in META, but not in HDFS "
|
||||||
|
@ -2505,7 +2535,7 @@ public class HBaseFsck {
|
||||||
MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
|
MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
|
||||||
FIRST_REGION_STARTKEY_NOT_EMPTY, DUPE_STARTKEYS,
|
FIRST_REGION_STARTKEY_NOT_EMPTY, DUPE_STARTKEYS,
|
||||||
HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
|
HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
|
||||||
ORPHAN_HDFS_REGION
|
ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT
|
||||||
}
|
}
|
||||||
public void clear();
|
public void clear();
|
||||||
public void report(String message);
|
public void report(String message);
|
||||||
|
@ -2908,6 +2938,14 @@ public class HBaseFsck {
|
||||||
return sidelineBigOverlaps;
|
return sidelineBigOverlaps;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setFixSplitParents(boolean shouldFix) {
|
||||||
|
fixSplitParents = shouldFix;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean shouldFixSplitParents() {
|
||||||
|
return fixSplitParents;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param mm maximum number of regions to merge into a single region.
|
* @param mm maximum number of regions to merge into a single region.
|
||||||
*/
|
*/
|
||||||
|
@ -2972,6 +3010,7 @@ public class HBaseFsck {
|
||||||
System.err.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
|
System.err.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
|
||||||
System.err.println(" -sidelineBigOverlaps When fixing region overlaps, allow to sideline big overlaps");
|
System.err.println(" -sidelineBigOverlaps When fixing region overlaps, allow to sideline big overlaps");
|
||||||
System.err.println(" -maxOverlapsToSideline <n> When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
|
System.err.println(" -maxOverlapsToSideline <n> When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
|
||||||
|
System.err.println(" -fixSplitParents Try to force offline split parents to be online.");
|
||||||
System.err.println("");
|
System.err.println("");
|
||||||
System.err.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
|
System.err.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
|
||||||
"-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps");
|
"-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps");
|
||||||
|
@ -3046,6 +3085,8 @@ public class HBaseFsck {
|
||||||
fsck.setFixVersionFile(true);
|
fsck.setFixVersionFile(true);
|
||||||
} else if (cmd.equals("-sidelineBigOverlaps")) {
|
} else if (cmd.equals("-sidelineBigOverlaps")) {
|
||||||
fsck.setSidelineBigOverlaps(true);
|
fsck.setSidelineBigOverlaps(true);
|
||||||
|
} else if (cmd.equals("-fixSplitParents")) {
|
||||||
|
fsck.setFixSplitParents(true);
|
||||||
} else if (cmd.equals("-repair")) {
|
} else if (cmd.equals("-repair")) {
|
||||||
// this attempts to merge overlapping hdfs regions, needs testing
|
// this attempts to merge overlapping hdfs regions, needs testing
|
||||||
// under load
|
// under load
|
||||||
|
@ -3056,6 +3097,7 @@ public class HBaseFsck {
|
||||||
fsck.setFixHdfsOverlaps(true);
|
fsck.setFixHdfsOverlaps(true);
|
||||||
fsck.setFixVersionFile(true);
|
fsck.setFixVersionFile(true);
|
||||||
fsck.setSidelineBigOverlaps(true);
|
fsck.setSidelineBigOverlaps(true);
|
||||||
|
fsck.setFixSplitParents(false);
|
||||||
} else if (cmd.equals("-repairHoles")) {
|
} else if (cmd.equals("-repairHoles")) {
|
||||||
// this will make all missing hdfs regions available but may lose data
|
// this will make all missing hdfs regions available but may lose data
|
||||||
fsck.setFixHdfsHoles(true);
|
fsck.setFixHdfsHoles(true);
|
||||||
|
@ -3064,6 +3106,7 @@ public class HBaseFsck {
|
||||||
fsck.setFixAssignments(true);
|
fsck.setFixAssignments(true);
|
||||||
fsck.setFixHdfsOverlaps(false);
|
fsck.setFixHdfsOverlaps(false);
|
||||||
fsck.setSidelineBigOverlaps(false);
|
fsck.setSidelineBigOverlaps(false);
|
||||||
|
fsck.setFixSplitParents(false);
|
||||||
} else if (cmd.equals("-maxOverlapsToSideline")) {
|
} else if (cmd.equals("-maxOverlapsToSideline")) {
|
||||||
if (i == args.length - 1) {
|
if (i == args.length - 1) {
|
||||||
System.err.println("-maxOverlapsToSideline needs a numeric value argument.");
|
System.err.println("-maxOverlapsToSideline needs a numeric value argument.");
|
||||||
|
|
|
@ -45,6 +45,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
import org.apache.hadoop.hbase.HColumnDescriptor;
|
import org.apache.hadoop.hbase.HColumnDescriptor;
|
||||||
import org.apache.hadoop.hbase.HConstants;
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
import org.apache.hadoop.hbase.HRegionInfo;
|
import org.apache.hadoop.hbase.HRegionInfo;
|
||||||
|
import org.apache.hadoop.hbase.HRegionLocation;
|
||||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||||
import org.apache.hadoop.hbase.MediumTests;
|
import org.apache.hadoop.hbase.MediumTests;
|
||||||
import org.apache.hadoop.hbase.MiniHBaseCluster;
|
import org.apache.hadoop.hbase.MiniHBaseCluster;
|
||||||
|
@ -52,6 +53,7 @@ import org.apache.hadoop.hbase.RegionTransition;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.client.AdminProtocol;
|
import org.apache.hadoop.hbase.client.AdminProtocol;
|
||||||
import org.apache.hadoop.hbase.client.Delete;
|
import org.apache.hadoop.hbase.client.Delete;
|
||||||
|
import org.apache.hadoop.hbase.client.Get;
|
||||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||||
import org.apache.hadoop.hbase.client.HConnection;
|
import org.apache.hadoop.hbase.client.HConnection;
|
||||||
import org.apache.hadoop.hbase.client.HTable;
|
import org.apache.hadoop.hbase.client.HTable;
|
||||||
|
@ -1012,6 +1014,85 @@ public class TestHBaseFsck {
|
||||||
deleteTable(table2);
|
deleteTable(table2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* A split parent in meta, in hdfs, and not deployed
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testLingeringSplitParent() throws Exception {
|
||||||
|
String table = "testLingeringSplitParent";
|
||||||
|
try {
|
||||||
|
setupTable(table);
|
||||||
|
assertEquals(ROWKEYS.length, countRows());
|
||||||
|
|
||||||
|
// make sure data in regions, if in hlog only there is no data loss
|
||||||
|
TEST_UTIL.getHBaseAdmin().flush(table);
|
||||||
|
HRegionLocation location = tbl.getRegionLocation("B");
|
||||||
|
|
||||||
|
// Delete one region from meta, but not hdfs, unassign it.
|
||||||
|
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
|
||||||
|
Bytes.toBytes("C"), true, true, false);
|
||||||
|
|
||||||
|
// Create a new meta entry to fake it as a split parent.
|
||||||
|
HTable meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getName());
|
||||||
|
HRegionInfo hri = location.getRegionInfo();
|
||||||
|
|
||||||
|
HRegionInfo a = new HRegionInfo(tbl.getTableName(),
|
||||||
|
Bytes.toBytes("B"), Bytes.toBytes("BM"));
|
||||||
|
HRegionInfo b = new HRegionInfo(tbl.getTableName(),
|
||||||
|
Bytes.toBytes("BM"), Bytes.toBytes("C"));
|
||||||
|
Put p = new Put(hri.getRegionName());
|
||||||
|
hri.setOffline(true);
|
||||||
|
hri.setSplit(true);
|
||||||
|
p.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
|
||||||
|
Writables.getBytes(hri));
|
||||||
|
p.add(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER,
|
||||||
|
Writables.getBytes(a));
|
||||||
|
p.add(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER,
|
||||||
|
Writables.getBytes(b));
|
||||||
|
meta.put(p);
|
||||||
|
meta.flushCommits();
|
||||||
|
TEST_UTIL.getHBaseAdmin().flush(HConstants.META_TABLE_NAME);
|
||||||
|
|
||||||
|
HBaseFsck hbck = doFsck(conf, false);
|
||||||
|
assertErrors(hbck, new ERROR_CODE[] {
|
||||||
|
ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
|
||||||
|
|
||||||
|
// regular repair cannot fix lingering split parent
|
||||||
|
hbck = doFsck(conf, true);
|
||||||
|
assertErrors(hbck, new ERROR_CODE[] {
|
||||||
|
ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
|
||||||
|
assertFalse(hbck.shouldRerun());
|
||||||
|
hbck = doFsck(conf, false);
|
||||||
|
assertErrors(hbck, new ERROR_CODE[] {
|
||||||
|
ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
|
||||||
|
|
||||||
|
// fix lingering split parent
|
||||||
|
hbck = new HBaseFsck(conf);
|
||||||
|
hbck.connect();
|
||||||
|
hbck.setDisplayFullReport(); // i.e. -details
|
||||||
|
hbck.setTimeLag(0);
|
||||||
|
hbck.setFixSplitParents(true);
|
||||||
|
hbck.onlineHbck();
|
||||||
|
assertTrue(hbck.shouldRerun());
|
||||||
|
|
||||||
|
Get get = new Get(hri.getRegionName());
|
||||||
|
Result result = meta.get(get);
|
||||||
|
assertTrue(result.getColumn(HConstants.CATALOG_FAMILY,
|
||||||
|
HConstants.SPLITA_QUALIFIER).isEmpty());
|
||||||
|
assertTrue(result.getColumn(HConstants.CATALOG_FAMILY,
|
||||||
|
HConstants.SPLITB_QUALIFIER).isEmpty());
|
||||||
|
TEST_UTIL.getHBaseAdmin().flush(HConstants.META_TABLE_NAME);
|
||||||
|
|
||||||
|
// fix other issues
|
||||||
|
doFsck(conf, true);
|
||||||
|
|
||||||
|
// check that all are fixed
|
||||||
|
assertNoErrors(doFsck(conf, false));
|
||||||
|
assertEquals(ROWKEYS.length, countRows());
|
||||||
|
} finally {
|
||||||
|
deleteTable(table);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@org.junit.Rule
|
@org.junit.Rule
|
||||||
public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
|
public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
|
||||||
|
|
Loading…
Reference in New Issue