HBASE-10955 HBCK leaves the region in masters in-memory RegionStates if region hdfs dir is lost

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1587772 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Enis Soztutar 2014-04-16 01:42:55 +00:00
parent 9fcb2ae865
commit 585a4f7778
3 changed files with 44 additions and 18 deletions

View File

@ -516,6 +516,7 @@ public class HBaseFsck extends Configured {
public byte [] metaLastKey; public byte [] metaLastKey;
public byte [] storesFirstKey; public byte [] storesFirstKey;
public byte [] storesLastKey; public byte [] storesLastKey;
@Override
public String toString () { public String toString () {
return "regionName=" + Bytes.toStringBinary(regionName) + return "regionName=" + Bytes.toStringBinary(regionName) +
"\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) + "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
@ -1864,7 +1865,7 @@ public class HBaseFsck extends Configured {
// these problems from META. // these problems from META.
if (shouldFixAssignments()) { if (shouldFixAssignments()) {
errors.print("Trying to fix unassigned region..."); errors.print("Trying to fix unassigned region...");
closeRegion(hbi);// Close region will cause RS to abort. undeployRegions(hbi);
} }
if (shouldFixMeta()) { if (shouldFixMeta()) {
// wait for it to complete // wait for it to complete
@ -2021,13 +2022,13 @@ public class HBaseFsck extends Configured {
// rename the contained into the container. // rename the contained into the container.
FileSystem fs = targetRegionDir.getFileSystem(getConf()); FileSystem fs = targetRegionDir.getFileSystem(getConf());
FileStatus[] dirs = null; FileStatus[] dirs = null;
try { try {
dirs = fs.listStatus(contained.getHdfsRegionDir()); dirs = fs.listStatus(contained.getHdfsRegionDir());
} catch (FileNotFoundException fnfe) { } catch (FileNotFoundException fnfe) {
// region we are attempting to merge in is not present! Since this is a merge, there is // region we are attempting to merge in is not present! Since this is a merge, there is
// no harm skipping this region if it does not exist. // no harm skipping this region if it does not exist.
if (!fs.exists(contained.getHdfsRegionDir())) { if (!fs.exists(contained.getHdfsRegionDir())) {
LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir() LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
+ " is missing. Assuming already sidelined or moved."); + " is missing. Assuming already sidelined or moved.");
} else { } else {
sidelineRegionDir(fs, contained); sidelineRegionDir(fs, contained);
@ -2037,7 +2038,7 @@ public class HBaseFsck extends Configured {
if (dirs == null) { if (dirs == null) {
if (!fs.exists(contained.getHdfsRegionDir())) { if (!fs.exists(contained.getHdfsRegionDir())) {
LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir() LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
+ " already sidelined."); + " already sidelined.");
} else { } else {
sidelineRegionDir(fs, contained); sidelineRegionDir(fs, contained);
@ -2087,20 +2088,20 @@ public class HBaseFsck extends Configured {
static class WorkItemOverlapMerge implements Callable<Void> { static class WorkItemOverlapMerge implements Callable<Void> {
private TableIntegrityErrorHandler handler; private TableIntegrityErrorHandler handler;
Collection<HbckInfo> overlapgroup; Collection<HbckInfo> overlapgroup;
WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) { WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) {
this.handler = handler; this.handler = handler;
this.overlapgroup = overlapgroup; this.overlapgroup = overlapgroup;
} }
@Override @Override
public Void call() throws Exception { public Void call() throws Exception {
handler.handleOverlapGroup(overlapgroup); handler.handleOverlapGroup(overlapgroup);
return null; return null;
} }
}; };
/** /**
* Maintain information about a particular table. * Maintain information about a particular table.
*/ */
@ -2317,7 +2318,7 @@ public class HBaseFsck extends Configured {
HTableDescriptor htd = getTableInfo().getHTD(); HTableDescriptor htd = getTableInfo().getHTD();
HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), holeStartKey, holeStopKey); HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), holeStartKey, holeStopKey);
HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd); HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
LOG.info("Plugged hold by creating new empty region: "+ newRegion + " " +region); LOG.info("Plugged hole by creating new empty region: "+ newRegion + " " +region);
fixes++; fixes++;
} }
@ -2329,7 +2330,7 @@ public class HBaseFsck extends Configured {
* Cases: * Cases:
* - Clean regions that overlap * - Clean regions that overlap
* - Only .oldlogs regions (can't find start/stop range, or figure out) * - Only .oldlogs regions (can't find start/stop range, or figure out)
* *
* This is basically threadsafe, except for the fixer increment in mergeOverlaps. * This is basically threadsafe, except for the fixer increment in mergeOverlaps.
*/ */
@Override @Override
@ -2610,11 +2611,11 @@ public class HBaseFsck extends Configured {
private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey) private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey)
throws IOException { throws IOException {
// we parallelize overlap handler for the case we have lots of groups to fix. We can // we parallelize overlap handler for the case we have lots of groups to fix. We can
// safely assume each group is independent. // safely assume each group is independent.
List<WorkItemOverlapMerge> merges = new ArrayList<WorkItemOverlapMerge>(overlapGroups.size()); List<WorkItemOverlapMerge> merges = new ArrayList<WorkItemOverlapMerge>(overlapGroups.size());
List<Future<Void>> rets; List<Future<Void>> rets;
for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) { for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
// //
merges.add(new WorkItemOverlapMerge(overlap, handler)); merges.add(new WorkItemOverlapMerge(overlap, handler));
} }
try { try {
@ -2858,7 +2859,7 @@ public class HBaseFsck extends Configured {
} else { } else {
throw new IOException("Two entries in hbase:meta are same " + previous); throw new IOException("Two entries in hbase:meta are same " + previous);
} }
PairOfSameType<HRegionInfo> mergeRegions = HRegionInfo.getMergeRegions(result); PairOfSameType<HRegionInfo> mergeRegions = HRegionInfo.getMergeRegions(result);
for (HRegionInfo mergeRegion : new HRegionInfo[] { for (HRegionInfo mergeRegion : new HRegionInfo[] {
mergeRegions.getFirst(), mergeRegions.getSecond() }) { mergeRegions.getFirst(), mergeRegions.getSecond() }) {
@ -3845,7 +3846,7 @@ public class HBaseFsck extends Configured {
return hbck.getRetCode(); return hbck.getRetCode();
} }
}; };
public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException, public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException,
ServiceException, InterruptedException { ServiceException, InterruptedException {

View File

@ -40,7 +40,6 @@ import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService; import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService;
import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.wal.HLog;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
/** /**
@ -187,12 +186,10 @@ public class HBaseFsckRepair {
HRegionInfo hri, HTableDescriptor htd) throws IOException { HRegionInfo hri, HTableDescriptor htd) throws IOException {
// Create HRegion // Create HRegion
Path root = FSUtils.getRootDir(conf); Path root = FSUtils.getRootDir(conf);
HRegion region = HRegion.createHRegion(hri, root, conf, htd); HRegion region = HRegion.createHRegion(hri, root, conf, htd, null);
HLog hlog = region.getLog();
// Close the new region to flush to disk. Close log file too. // Close the new region to flush to disk. Close log file too.
region.close(); region.close();
hlog.closeAndDelete();
return region; return region;
} }
} }

View File

@ -1481,6 +1481,34 @@ public class TestHBaseFsck {
} }
} }
/**
* This creates and fixes a bad table with a missing region which is the 1st region -- hole in
* meta and data missing in the fs.
*/
@Test(timeout=120000)
public void testRegionDeployedNotInHdfs() throws Exception {
TableName table =
TableName.valueOf("testSingleRegionDeployedNotInHdfs");
try {
setupTable(table);
TEST_UTIL.getHBaseAdmin().flush(table.getName());
// Mess it up by deleting region dir
deleteRegion(conf, tbl.getTableDescriptor(),
HConstants.EMPTY_START_ROW, Bytes.toBytes("A"), false,
false, true);
HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
// fix hole
doFsck(conf, true);
// check that hole fixed
assertNoErrors(doFsck(conf, false));
} finally {
deleteTable(table);
}
}
/** /**
* This creates and fixes a bad table with missing last region -- hole in meta and data missing in * This creates and fixes a bad table with missing last region -- hole in meta and data missing in
* the fs. * the fs.