HBASE-7777. HBCK check for lingering split parents should check for child regions

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1444311 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Enis Soztutar 2013-02-09 01:44:36 +00:00
parent e3b688719c
commit b947807291
3 changed files with 288 additions and 131 deletions

View File

@ -241,7 +241,7 @@ public class HBaseFsck extends Configured implements Tool {
* When initially looking at HDFS, we attempt to find any orphaned data.
*/
private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
private Map<String, Set<String>> orphanTableDirs = new HashMap<String, Set<String>>();
/**
@ -400,7 +400,7 @@ public class HBaseFsck extends Configured implements Tool {
if (!checkMetaOnly) {
reportTablesInFlux();
}
// get regions according to what is online on each RegionServer
loadDeployedRegions();
@ -798,19 +798,21 @@ public class HBaseFsck extends Configured implements Tool {
if (!orphanTableDirs.containsKey(tableName)) {
LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
//should only report once for each table
errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
"Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
Set<String> columns = new HashSet<String>();
orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
}
}
}
modTInfo.addRegionInfo(hbi);
if (!hbi.isSkipChecks()) {
modTInfo.addRegionInfo(hbi);
}
}
return tablesInfo;
}
/**
* To get the column family list according to the column family dirs
* @param columns
@ -828,7 +830,7 @@ public class HBaseFsck extends Configured implements Tool {
}
return columns;
}
/**
* To fabricate a .tableinfo file with following contents<br>
* 1. the correct tablename <br>
@ -846,7 +848,7 @@ public class HBaseFsck extends Configured implements Tool {
FSTableDescriptors.createTableDescriptor(htd, getConf(), true);
return true;
}
/**
* To fix orphan table by creating a .tableinfo file under tableDir <br>
* 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
@ -1653,6 +1655,18 @@ public class HBaseFsck extends Configured implements Tool {
// ========== Cases where the region is in META =============
} else if (inMeta && inHdfs && !isDeployed && splitParent) {
// check whether this is an actual error, or just transient state where parent
// is not cleaned
if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
// check that split daughters are there
HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
if (infoA != null && infoB != null) {
// we already processed or will process daughters. Move on, nothing to see here.
hbi.setSkipChecks(true);
return;
}
}
errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
+ descriptiveName + " is a split parent in META, in HDFS, "
+ "and not deployed on any region server. This could be transient.");
@ -1783,7 +1797,9 @@ public class HBaseFsck extends Configured implements Tool {
modTInfo.addServer(server);
}
modTInfo.addRegionInfo(hbi);
if (!hbi.isSkipChecks()) {
modTInfo.addRegionInfo(hbi);
}
tablesInfo.put(tableName, modTInfo);
}
@ -2547,7 +2563,8 @@ public class HBaseFsck extends Configured implements Tool {
|| hri.isMetaRegion() || hri.isRootRegion())) {
return true;
}
MetaEntry m = new MetaEntry(hri, sn, ts);
PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
MetaEntry m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
HbckInfo hbInfo = new HbckInfo(m);
HbckInfo previous = regionInfoMap.put(hri.getEncodedName(), hbInfo);
if (previous != null) {
@ -2586,11 +2603,19 @@ public class HBaseFsck extends Configured implements Tool {
static class MetaEntry extends HRegionInfo {
ServerName regionServer; // server hosting this region
long modTime; // timestamp of most recent modification metadata
HRegionInfo splitA, splitB; //split daughters
public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) {
this(rinfo, regionServer, modTime, null, null);
}
public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime,
HRegionInfo splitA, HRegionInfo splitB) {
super(rinfo);
this.regionServer = regionServer;
this.modTime = modTime;
this.splitA = splitA;
this.splitB = splitB;
}
public boolean equals(Object o) {
@ -2639,6 +2664,7 @@ public class HBaseFsck extends Configured implements Tool {
private HdfsEntry hdfsEntry = null; // info in HDFS
private List<OnlineEntry> deployedEntries = Lists.newArrayList(); // on Region Server
private List<ServerName> deployedOn = Lists.newArrayList(); // info on RS's
private boolean skipChecks = false; // whether to skip further checks to this region info.
HbckInfo(MetaEntry metaEntry) {
this.metaEntry = metaEntry;
@ -2756,6 +2782,14 @@ public class HBaseFsck extends Configured implements Tool {
}
return hdfsEntry.hri;
}
public void setSkipChecks(boolean skipChecks) {
this.skipChecks = skipChecks;
}
public boolean isSkipChecks() {
return skipChecks;
}
}
final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
@ -3232,15 +3266,15 @@ public class HBaseFsck extends Configured implements Tool {
boolean shouldFixHdfsHoles() {
return fixHdfsHoles;
}
public void setFixTableOrphans(boolean shouldFix) {
fixTableOrphans = shouldFix;
}
boolean shouldFixTableOrphans() {
return fixTableOrphans;
}
public void setFixHdfsOverlaps(boolean shouldFix) {
fixHdfsOverlaps = shouldFix;
}

View File

@ -207,7 +207,6 @@ public class TestEndToEndSplitTransaction {
HTable table;
byte[] tableName, family;
HBaseAdmin admin;
HTable metaTable;
HRegionServer rs;
RegionSplitter(HTable table) throws IOException {
@ -216,7 +215,6 @@ public class TestEndToEndSplitTransaction {
this.family = table.getTableDescriptor().getFamiliesKeys().iterator().next();
admin = TEST_UTIL.getHBaseAdmin();
rs = TEST_UTIL.getMiniHBaseCluster().getRegionServer(0);
metaTable = new HTable(conf, HConstants.META_TABLE_NAME);
}
public void run() {
@ -247,14 +245,14 @@ public class TestEndToEndSplitTransaction {
addData(start);
addData(mid);
flushAndBlockUntilDone(region.getRegionName());
compactAndBlockUntilDone(region.getRegionName());
flushAndBlockUntilDone(admin, rs, region.getRegionName());
compactAndBlockUntilDone(admin, rs, region.getRegionName());
log("Initiating region split for:" + region.getRegionNameAsString());
try {
admin.split(region.getRegionName(), splitPoint);
//wait until the split is complete
blockUntilRegionSplit(50000, region.getRegionName(), true);
blockUntilRegionSplit(conf, 50000, region.getRegionName(), true);
} catch (NotServingRegionException ex) {
//ignore
@ -262,10 +260,6 @@ public class TestEndToEndSplitTransaction {
}
} catch (Throwable ex) {
this.ex = ex;
} finally {
if (metaTable != null) {
IOUtils.closeQuietly(metaTable);
}
}
}
@ -278,106 +272,6 @@ public class TestEndToEndSplitTransaction {
}
table.flushCommits();
}
void flushAndBlockUntilDone(byte[] regionName) throws IOException, InterruptedException {
log("flushing region: " + Bytes.toStringBinary(regionName));
admin.flush(regionName);
log("blocking until flush is complete: " + Bytes.toStringBinary(regionName));
Threads.sleepWithoutInterrupt(500);
while (rs.cacheFlusher.getFlushQueueSize() > 0) {
Threads.sleep(50);
}
}
void compactAndBlockUntilDone(byte[] regionName) throws IOException,
InterruptedException {
log("Compacting region: " + Bytes.toStringBinary(regionName));
admin.majorCompact(regionName);
log("blocking until compaction is complete: " + Bytes.toStringBinary(regionName));
Threads.sleepWithoutInterrupt(500);
while (rs.compactSplitThread.getCompactionQueueSize() > 0) {
Threads.sleep(50);
}
}
/** bloks until the region split is complete in META and region server opens the daughters */
void blockUntilRegionSplit(long timeout, final byte[] regionName, boolean waitForDaughters)
throws IOException, InterruptedException {
long start = System.currentTimeMillis();
log("blocking until region is split:" + Bytes.toStringBinary(regionName));
HRegionInfo daughterA = null, daughterB = null;
while (System.currentTimeMillis() - start < timeout) {
Result result = getRegionRow(regionName);
if (result == null) {
break;
}
HRegionInfo region = HRegionInfo.getHRegionInfo(result);
if(region.isSplitParent()) {
log("found parent region: " + region.toString());
PairOfSameType<HRegionInfo> pair = HRegionInfo.getDaughterRegions(result);
daughterA = pair.getFirst();
daughterB = pair.getSecond();
break;
}
sleep(100);
}
//if we are here, this means the region split is complete or timed out
if (waitForDaughters) {
long rem = timeout - (System.currentTimeMillis() - start);
blockUntilRegionIsInMeta(rem, daughterA.getRegionName());
rem = timeout - (System.currentTimeMillis() - start);
blockUntilRegionIsInMeta(rem, daughterB.getRegionName());
rem = timeout - (System.currentTimeMillis() - start);
blockUntilRegionIsOpenedByRS(rem, daughterA.getRegionName());
rem = timeout - (System.currentTimeMillis() - start);
blockUntilRegionIsOpenedByRS(rem, daughterB.getRegionName());
}
}
Result getRegionRow(byte[] regionName) throws IOException {
Get get = new Get(regionName);
return metaTable.get(get);
}
void blockUntilRegionIsInMeta(long timeout, byte[] regionName)
throws IOException, InterruptedException {
log("blocking until region is in META: " + Bytes.toStringBinary(regionName));
long start = System.currentTimeMillis();
while (System.currentTimeMillis() - start < timeout) {
Result result = getRegionRow(regionName);
if (result != null) {
HRegionInfo info = HRegionInfo.getHRegionInfo(result);
if (info != null && !info.isOffline()) {
log("found region in META: " + Bytes.toStringBinary(regionName));
break;
}
}
sleep(10);
}
}
void blockUntilRegionIsOpenedByRS(long timeout, byte[] regionName)
throws IOException, InterruptedException {
log("blocking until region is opened by region server: " + Bytes.toStringBinary(regionName));
long start = System.currentTimeMillis();
while (System.currentTimeMillis() - start < timeout) {
List<HRegion> regions = rs.getOnlineRegions(tableName);
for (HRegion region : regions) {
if (Bytes.compareTo(region.getRegionName(), regionName) == 0) {
log("found region open in RS: " + Bytes.toStringBinary(regionName));
return;
}
}
sleep(10);
}
}
}
/**
@ -484,5 +378,118 @@ public class TestEndToEndSplitTransaction {
LOG.info(msg);
}
/* some utility methods for split tests */
public static void flushAndBlockUntilDone(HBaseAdmin admin, HRegionServer rs, byte[] regionName)
throws IOException, InterruptedException {
log("flushing region: " + Bytes.toStringBinary(regionName));
admin.flush(regionName);
log("blocking until flush is complete: " + Bytes.toStringBinary(regionName));
Threads.sleepWithoutInterrupt(500);
while (rs.cacheFlusher.getFlushQueueSize() > 0) {
Threads.sleep(50);
}
}
public static void compactAndBlockUntilDone(HBaseAdmin admin, HRegionServer rs, byte[] regionName)
throws IOException, InterruptedException {
log("Compacting region: " + Bytes.toStringBinary(regionName));
admin.majorCompact(regionName);
log("blocking until compaction is complete: " + Bytes.toStringBinary(regionName));
Threads.sleepWithoutInterrupt(500);
while (rs.compactSplitThread.getCompactionQueueSize() > 0) {
Threads.sleep(50);
}
}
/** Blocks until the region split is complete in META and region server opens the daughters */
public static void blockUntilRegionSplit(Configuration conf, long timeout,
final byte[] regionName, boolean waitForDaughters)
throws IOException, InterruptedException {
long start = System.currentTimeMillis();
log("blocking until region is split:" + Bytes.toStringBinary(regionName));
HRegionInfo daughterA = null, daughterB = null;
HTable metaTable = new HTable(conf, HConstants.META_TABLE_NAME);
try {
while (System.currentTimeMillis() - start < timeout) {
Result result = getRegionRow(metaTable, regionName);
if (result == null) {
break;
}
HRegionInfo region = HRegionInfo.getHRegionInfo(result);
if(region.isSplitParent()) {
log("found parent region: " + region.toString());
PairOfSameType<HRegionInfo> pair = HRegionInfo.getDaughterRegions(result);
daughterA = pair.getFirst();
daughterB = pair.getSecond();
break;
}
Threads.sleep(100);
}
//if we are here, this means the region split is complete or timed out
if (waitForDaughters) {
long rem = timeout - (System.currentTimeMillis() - start);
blockUntilRegionIsInMeta(metaTable, rem, daughterA);
rem = timeout - (System.currentTimeMillis() - start);
blockUntilRegionIsInMeta(metaTable, rem, daughterB);
rem = timeout - (System.currentTimeMillis() - start);
blockUntilRegionIsOpened(conf, rem, daughterA);
rem = timeout - (System.currentTimeMillis() - start);
blockUntilRegionIsOpened(conf, rem, daughterB);
}
} finally {
IOUtils.closeQuietly(metaTable);
}
}
public static Result getRegionRow(HTable metaTable, byte[] regionName) throws IOException {
Get get = new Get(regionName);
return metaTable.get(get);
}
public static void blockUntilRegionIsInMeta(HTable metaTable, long timeout, HRegionInfo hri)
throws IOException, InterruptedException {
log("blocking until region is in META: " + hri.getRegionNameAsString());
long start = System.currentTimeMillis();
while (System.currentTimeMillis() - start < timeout) {
Result result = getRegionRow(metaTable, hri.getRegionName());
if (result != null) {
HRegionInfo info = HRegionInfo.getHRegionInfo(result);
if (info != null && !info.isOffline()) {
log("found region in META: " + hri.getRegionNameAsString());
break;
}
}
Threads.sleep(10);
}
}
public static void blockUntilRegionIsOpened(Configuration conf, long timeout, HRegionInfo hri)
throws IOException, InterruptedException {
log("blocking until region is opened for reading:" + hri.getRegionNameAsString());
long start = System.currentTimeMillis();
HTable table = new HTable(conf, hri.getTableName());
try {
Get get = new Get(hri.getStartKey());
while (System.currentTimeMillis() - start < timeout) {
try {
table.get(get);
break;
} catch(IOException ex) {
//wait some more
}
Threads.sleep(10);
}
} finally {
IOUtils.closeQuietly(table);
}
}
}

View File

@ -37,6 +37,7 @@ import java.util.Map.Entry;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@ -71,11 +72,12 @@ import org.apache.hadoop.hbase.master.RegionStates;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction;
import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
import org.apache.hadoop.hbase.util.HBaseFsck.PrintingErrorReporter;
import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo;
import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
import org.apache.hadoop.hbase.util.HBaseFsck.HbckInfo;
import org.apache.hadoop.hbase.util.HBaseFsck.PrintingErrorReporter;
import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo;
import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
@ -402,7 +404,7 @@ public class TestHBaseFsck {
deleteTable(table);
}
}
@Test
public void testHbckFixOrphanTable() throws Exception {
String table = "tableInfo";
@ -411,31 +413,31 @@ public class TestHBaseFsck {
try {
setupTable(table);
HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
Path hbaseTableDir = new Path(conf.get(HConstants.HBASE_DIR) + "/" + table );
fs = hbaseTableDir.getFileSystem(conf);
FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
tableinfo = status.getPath();
fs.rename(tableinfo, new Path("/.tableinfo"));
//to report error if .tableinfo is missing.
HBaseFsck hbck = doFsck(conf, false);
HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLEINFO_FILE });
// fix OrphanTable with default .tableinfo (htd not yet cached on master)
hbck = doFsck(conf, true);
assertNoErrors(hbck);
status = null;
status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
assertNotNull(status);
HTableDescriptor htd = admin.getTableDescriptor(table.getBytes());
htd.setValue("NOT_DEFAULT", "true");
admin.disableTable(table);
admin.modifyTable(table.getBytes(), htd);
admin.enableTable(table);
fs.delete(status.getPath(), true);
// fix OrphanTable with cache
htd = admin.getTableDescriptor(table.getBytes()); // warms up cached htd on master
hbck = doFsck(conf, true);
@ -1194,6 +1196,7 @@ public class TestHBaseFsck {
@Test
public void testLingeringSplitParent() throws Exception {
String table = "testLingeringSplitParent";
HTable meta = null;
try {
setupTable(table);
assertEquals(ROWKEYS.length, countRows());
@ -1207,7 +1210,7 @@ public class TestHBaseFsck {
Bytes.toBytes("C"), true, true, false);
// Create a new meta entry to fake it as a split parent.
HTable meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getName());
meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getName());
HRegionInfo hri = location.getRegionInfo();
HRegionInfo a = new HRegionInfo(tbl.getTableName(),
@ -1260,6 +1263,119 @@ public class TestHBaseFsck {
assertEquals(ROWKEYS.length, countRows());
} finally {
deleteTable(table);
IOUtils.closeQuietly(meta);
}
}
/**
* Tests that LINGERING_SPLIT_PARENT is not erroneously reported for
* valid cases where the daughters are there.
*/
@Test
public void testValidLingeringSplitParent() throws Exception {
String table = "testLingeringSplitParent";
HTable meta = null;
try {
setupTable(table);
assertEquals(ROWKEYS.length, countRows());
// make sure data in regions, if in hlog only there is no data loss
TEST_UTIL.getHBaseAdmin().flush(table);
HRegionLocation location = tbl.getRegionLocation("B");
meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getName());
HRegionInfo hri = location.getRegionInfo();
// do a regular split
HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
byte[] regionName = location.getRegionInfo().getRegionName();
admin.split(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
TestEndToEndSplitTransaction.blockUntilRegionSplit(
TEST_UTIL.getConfiguration(), 60000, regionName, true);
// TODO: fixHdfsHoles does not work against splits, since the parent dir lingers on
// for some time until children references are deleted. HBCK erroneously sees this as
// overlapping regions
HBaseFsck hbck = doFsck(conf, true, true, false, false, false, true, true, true, null);
assertErrors(hbck, new ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported
// assert that the split META entry is still there.
Get get = new Get(hri.getRegionName());
Result result = meta.get(get);
assertNotNull(result);
assertNotNull(HRegionInfo.getHRegionInfo(result));
assertEquals(ROWKEYS.length, countRows());
// assert that we still have the split regions
assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions pre-split.
assertNoErrors(doFsck(conf, false));
} finally {
deleteTable(table);
IOUtils.closeQuietly(meta);
}
}
/**
* Split crashed after write to META finished for the parent region, but
* failed to write daughters (pre HBASE-7721 codebase)
*/
@Test
public void testSplitDaughtersNotInMeta() throws Exception {
String table = "testSplitdaughtersNotInMeta";
HTable meta = null;
try {
setupTable(table);
assertEquals(ROWKEYS.length, countRows());
// make sure data in regions, if in hlog only there is no data loss
TEST_UTIL.getHBaseAdmin().flush(table);
HRegionLocation location = tbl.getRegionLocation("B");
meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getName());
HRegionInfo hri = location.getRegionInfo();
// do a regular split
HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
byte[] regionName = location.getRegionInfo().getRegionName();
admin.split(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
TestEndToEndSplitTransaction.blockUntilRegionSplit(
TEST_UTIL.getConfiguration(), 60000, regionName, true);
PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(meta.get(new Get(regionName)));
// Delete daughter regions from meta, but not hdfs, unassign it.
Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations();
undeployRegion(admin, hris.get(daughters.getFirst()), daughters.getFirst());
undeployRegion(admin, hris.get(daughters.getSecond()), daughters.getSecond());
meta.delete(new Delete(daughters.getFirst().getRegionName()));
meta.delete(new Delete(daughters.getSecond().getRegionName()));
meta.flushCommits();
HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN}); //no LINGERING_SPLIT_PARENT
// now fix it. The fix should not revert the region split, but add daughters to META
hbck = doFsck(conf, true, true, false, false, false, false, false, false, null);
assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
// assert that the split META entry is still there.
Get get = new Get(hri.getRegionName());
Result result = meta.get(get);
assertNotNull(result);
assertNotNull(HRegionInfo.getHRegionInfo(result));
assertEquals(ROWKEYS.length, countRows());
// assert that we still have the split regions
assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions pre-split.
assertNoErrors(doFsck(conf, false)); //should be fixed by now
} finally {
deleteTable(table);
IOUtils.closeQuietly(meta);
}
}