HBASE-5599 [hbck] handle NO_VERSION_FILE and SHOULD_NOT_BE_DEPLOYED inconsistencies (fulin wang)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1324881 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jonathan Hsieh 2012-04-11 17:20:19 +00:00
parent 4e9a9e0585
commit a2ff41653a
4 changed files with 138 additions and 9 deletions

View File

@ -117,7 +117,7 @@ extends RetriesExhaustedException {
addrs.append("servers with issues: ");
Set<String> uniqAddr = new HashSet<String>();
uniqAddr.addAll(hostnamePort);
for(String addr : uniqAddr) {
addrs.append(addr).append(", ");
}

View File

@ -165,6 +165,7 @@ public class HBaseFsck {
private boolean fixHdfsHoles = false; // fix fs holes?
private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
// limit fixes to listed tables, if empty atttempt to fix all
private List<byte[]> tablesToFix = new ArrayList<byte[]>();
@ -1008,6 +1009,15 @@ public class HBaseFsck {
if (!foundVersionFile) {
errors.reportError(ERROR_CODE.NO_VERSION_FILE,
"Version file does not exist in root dir " + rootDir);
if (shouldFixVersionFile()) {
LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
+ " file.");
setShouldRerun();
FSUtils.setVersion(fs, rootDir, conf.getInt(
HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), conf.getInt(
HConstants.VERSION_FILE_WRITE_ATTEMPTS,
HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
}
}
// level 1: <HBASE_DIR>/*
@ -1361,10 +1371,14 @@ public class HBaseFsck {
+ " not deployed on any region server.");
tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
} else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED, "UNHANDLED CASE:" +
" Region " + descriptiveName + " should not be deployed according " +
errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
"Region " + descriptiveName + " should not be deployed according " +
"to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
// TODO test and handle this case.
if (shouldFixAssignments()) {
errors.print("Trying to close the region " + descriptiveName);
setShouldRerun();
HBaseFsckRepair.fixMultiAssignment(admin, hbi.metaEntry, hbi.deployedOn);
}
} else if (inMeta && inHdfs && isMultiplyDeployed) {
errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
+ " is listed in META on region server " + hbi.metaEntry.regionServer
@ -2739,6 +2753,14 @@ public class HBaseFsck {
return fixHdfsOrphans;
}
public void setFixVersionFile(boolean shouldFix) {
fixVersionFile = shouldFix;
}
public boolean shouldFixVersionFile() {
return fixVersionFile;
}
/**
* @param mm maximum number of regions to merge into a single region.
*/
@ -2797,9 +2819,11 @@ public class HBaseFsck {
System.err.println(" -fixHdfsHoles Try to fix region holes in hdfs.");
System.err.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs");
System.err.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs.");
System.err.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs.");
System.err.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
System.err.println("");
System.err.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles -fixHdfsOrphans -fixHdfsOverlaps");
System.err.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
"-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile");
System.err.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles -fixHdfsOrphans");
Runtime.getRuntime().exit(-2);
@ -2862,6 +2886,8 @@ public class HBaseFsck {
fsck.setFixHdfsOrphans(true);
} else if (cmd.equals("-fixHdfsOverlaps")) {
fsck.setFixHdfsOverlaps(true);
} else if (cmd.equals("-fixVersionFile")) {
fsck.setFixVersionFile(true);
} else if (cmd.equals("-repair")) {
// this attempts to merge overlapping hdfs regions, needs testing
// under load
@ -2870,6 +2896,7 @@ public class HBaseFsck {
fsck.setFixMeta(true);
fsck.setFixAssignments(true);
fsck.setFixHdfsOverlaps(true);
fsck.setFixVersionFile(true);
} else if (cmd.equals("-repairHoles")) {
// this will make all missing hdfs regions available but may lose data
fsck.setFixHdfsHoles(true);
@ -2919,6 +2946,7 @@ public class HBaseFsck {
fsck.setFixMeta(false);
fsck.setFixHdfsHoles(false);
fsck.setFixHdfsOverlaps(false);
fsck.setFixVersionFile(false);
fsck.errors.resetErrors();
code = fsck.onlineHbck();
}

View File

@ -23,6 +23,7 @@ import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors;
import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
@ -36,7 +37,6 @@ import java.util.Map.Entry;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.ClusterStatus;
@ -45,6 +45,7 @@ import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.MediumTests;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.client.Delete;
@ -55,9 +56,15 @@ import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.executor.RegionTransitionData;
import org.apache.hadoop.hbase.executor.EventHandler.EventType;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.util.HBaseFsck;
import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@ -73,6 +80,7 @@ public class TestHBaseFsck {
private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
private final static Configuration conf = TEST_UTIL.getConfiguration();
private final static byte[] FAM = Bytes.toBytes("fam");
private final static int REGION_ONLINE_TIMEOUT = 300;
// for the instance, reset every test run
private HTable tbl;
@ -851,8 +859,100 @@ public class TestHBaseFsck {
fail("Should have failed with IOException");
}
/**
* when the hbase.version file missing, It is fix the fault.
*/
@Test
public void testNoVersionFile() throws Exception {
// delete the hbase.version file
Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
FileSystem fs = rootDir.getFileSystem(conf);
Path versionFile = new Path(rootDir, HConstants.VERSION_FILE_NAME);
fs.delete(versionFile, true);
// test
HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_VERSION_FILE });
// fix hbase.version missing
doFsck(conf, true);
// no version file fixed
assertNoErrors(doFsck(conf, false));
}
/**
* the region is not deployed when the table is disabled.
*/
@Test
public void testRegionShouldNotDeployed() throws Exception {
String table = "tableRegionShouldNotDeployed";
try {
LOG.info("Starting testRegionShouldNotDeployed.");
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
assertTrue(cluster.waitForActiveAndReadyMaster());
// Create a ZKW to use in the test
ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL);
FileSystem filesystem = FileSystem.get(conf);
Path rootdir = filesystem.makeQualified(new Path(conf
.get(HConstants.HBASE_DIR)));
byte[][] SPLIT_KEYS = new byte[][] { new byte[0], Bytes.toBytes("aaa"),
Bytes.toBytes("bbb"), Bytes.toBytes("ccc"), Bytes.toBytes("ddd") };
HTableDescriptor htdDisabled = new HTableDescriptor(Bytes.toBytes(table));
htdDisabled.addFamily(new HColumnDescriptor(FAM));
// Write the .tableinfo
FSTableDescriptors
.createTableDescriptor(filesystem, rootdir, htdDisabled);
List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
// Let's just assign everything to first RS
HRegionServer hrs = cluster.getRegionServer(0);
ServerName serverName = hrs.getServerName();
// create region files.
TEST_UTIL.getHBaseAdmin().disableTable(table);
TEST_UTIL.getHBaseAdmin().enableTable(table);
// Region of disable table was opened on RS
TEST_UTIL.getHBaseAdmin().disableTable(table);
HRegionInfo region = disabledRegions.remove(0);
ZKAssign.createNodeOffline(zkw, region, serverName);
hrs.openRegion(region);
int iTimes = 0;
while (true) {
RegionTransitionData rtd = ZKAssign.getData(zkw,
region.getEncodedName());
if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
break;
}
Thread.sleep(100);
iTimes++;
if (iTimes >= REGION_ONLINE_TIMEOUT) {
break;
}
}
assertTrue(iTimes < REGION_ONLINE_TIMEOUT);
HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.SHOULD_NOT_BE_DEPLOYED });
// fix this fault
doFsck(conf, true);
// check result
assertNoErrors(doFsck(conf, false));
} finally {
TEST_UTIL.getHBaseAdmin().enableTable(table);
deleteTable(table);
}
}
@org.junit.Rule
public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
}

View File

@ -29,12 +29,12 @@ import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
public class HbckTestingUtil {
public static HBaseFsck doFsck(Configuration conf, boolean fix) throws Exception {
return doFsck(conf, fix, fix, fix, fix,fix);
return doFsck(conf, fix, fix, fix, fix,fix, fix);
}
public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments,
boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps,
boolean fixHdfsOrphans) throws Exception {
boolean fixHdfsOrphans, boolean fixVersionFile) throws Exception {
HBaseFsck fsck = new HBaseFsck(conf);
fsck.connect();
fsck.setDisplayFullReport(); // i.e. -details
@ -44,6 +44,7 @@ public class HbckTestingUtil {
fsck.setFixHdfsHoles(fixHdfsHoles);
fsck.setFixHdfsOverlaps(fixHdfsOverlaps);
fsck.setFixHdfsOrphans(fixHdfsOrphans);
fsck.setFixVersionFile(fixVersionFile);
fsck.onlineHbck();
return fsck;
}