HBASE-10955 HBCK leaves the region in masters in-memory RegionStates if region hdfs dir is lost

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1587772 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Enis Soztutar 2014-04-16 01:42:55 +00:00
parent 9fcb2ae865
commit 585a4f7778
3 changed files with 44 additions and 18 deletions

View File

@ -516,6 +516,7 @@ public class HBaseFsck extends Configured {
public byte [] metaLastKey;
public byte [] storesFirstKey;
public byte [] storesLastKey;
@Override
public String toString () {
return "regionName=" + Bytes.toStringBinary(regionName) +
"\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
@ -1864,7 +1865,7 @@ public class HBaseFsck extends Configured {
// these problems from META.
if (shouldFixAssignments()) {
errors.print("Trying to fix unassigned region...");
closeRegion(hbi);// Close region will cause RS to abort.
undeployRegions(hbi);
}
if (shouldFixMeta()) {
// wait for it to complete
@ -2021,13 +2022,13 @@ public class HBaseFsck extends Configured {
// rename the contained into the container.
FileSystem fs = targetRegionDir.getFileSystem(getConf());
FileStatus[] dirs = null;
try {
try {
dirs = fs.listStatus(contained.getHdfsRegionDir());
} catch (FileNotFoundException fnfe) {
// region we are attempting to merge in is not present! Since this is a merge, there is
// no harm skipping this region if it does not exist.
if (!fs.exists(contained.getHdfsRegionDir())) {
LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
+ " is missing. Assuming already sidelined or moved.");
} else {
sidelineRegionDir(fs, contained);
@ -2037,7 +2038,7 @@ public class HBaseFsck extends Configured {
if (dirs == null) {
if (!fs.exists(contained.getHdfsRegionDir())) {
LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
+ " already sidelined.");
} else {
sidelineRegionDir(fs, contained);
@ -2087,20 +2088,20 @@ public class HBaseFsck extends Configured {
static class WorkItemOverlapMerge implements Callable<Void> {
private TableIntegrityErrorHandler handler;
Collection<HbckInfo> overlapgroup;
WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) {
this.handler = handler;
this.overlapgroup = overlapgroup;
}
@Override
public Void call() throws Exception {
handler.handleOverlapGroup(overlapgroup);
return null;
}
};
/**
* Maintain information about a particular table.
*/
@ -2317,7 +2318,7 @@ public class HBaseFsck extends Configured {
HTableDescriptor htd = getTableInfo().getHTD();
HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), holeStartKey, holeStopKey);
HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
LOG.info("Plugged hold by creating new empty region: "+ newRegion + " " +region);
LOG.info("Plugged hole by creating new empty region: "+ newRegion + " " +region);
fixes++;
}
@ -2329,7 +2330,7 @@ public class HBaseFsck extends Configured {
* Cases:
* - Clean regions that overlap
* - Only .oldlogs regions (can't find start/stop range, or figure out)
*
*
* This is basically threadsafe, except for the fixer increment in mergeOverlaps.
*/
@Override
@ -2610,11 +2611,11 @@ public class HBaseFsck extends Configured {
private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey)
throws IOException {
// we parallelize overlap handler for the case we have lots of groups to fix. We can
// safely assume each group is independent.
// safely assume each group is independent.
List<WorkItemOverlapMerge> merges = new ArrayList<WorkItemOverlapMerge>(overlapGroups.size());
List<Future<Void>> rets;
for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
//
//
merges.add(new WorkItemOverlapMerge(overlap, handler));
}
try {
@ -2858,7 +2859,7 @@ public class HBaseFsck extends Configured {
} else {
throw new IOException("Two entries in hbase:meta are same " + previous);
}
PairOfSameType<HRegionInfo> mergeRegions = HRegionInfo.getMergeRegions(result);
for (HRegionInfo mergeRegion : new HRegionInfo[] {
mergeRegions.getFirst(), mergeRegions.getSecond() }) {
@ -3845,7 +3846,7 @@ public class HBaseFsck extends Configured {
return hbck.getRetCode();
}
};
public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException,
ServiceException, InterruptedException {

View File

@ -40,7 +40,6 @@ import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.wal.HLog;
import org.apache.zookeeper.KeeperException;
/**
@ -187,12 +186,10 @@ public class HBaseFsckRepair {
HRegionInfo hri, HTableDescriptor htd) throws IOException {
// Create HRegion
Path root = FSUtils.getRootDir(conf);
HRegion region = HRegion.createHRegion(hri, root, conf, htd);
HLog hlog = region.getLog();
HRegion region = HRegion.createHRegion(hri, root, conf, htd, null);
// Close the new region to flush to disk. Close log file too.
region.close();
hlog.closeAndDelete();
return region;
}
}

View File

@ -1481,6 +1481,34 @@ public class TestHBaseFsck {
}
}
/**
* This creates and fixes a bad table with a missing region which is the 1st region -- hole in
* meta and data missing in the fs.
*/
@Test(timeout=120000)
public void testRegionDeployedNotInHdfs() throws Exception {
TableName table =
TableName.valueOf("testSingleRegionDeployedNotInHdfs");
try {
setupTable(table);
TEST_UTIL.getHBaseAdmin().flush(table.getName());
// Mess it up by deleting region dir
deleteRegion(conf, tbl.getTableDescriptor(),
HConstants.EMPTY_START_ROW, Bytes.toBytes("A"), false,
false, true);
HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
// fix hole
doFsck(conf, true);
// check that hole fixed
assertNoErrors(doFsck(conf, false));
} finally {
deleteTable(table);
}
}
/**
* This creates and fixes a bad table with missing last region -- hole in meta and data missing in
* the fs.