HBASE-9698 HBCK does not handle tables with no regions left

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1528989 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Enis Soztutar 2013-10-03 20:11:58 +00:00
parent 3be068f6da
commit a07ef888e1
4 changed files with 135 additions and 23 deletions

View File

@ -807,6 +807,8 @@ public class HBaseFsck extends Configured implements Tool {
}
}
Path hbaseRoot = FSUtils.getRootDir(getConf());
FileSystem fs = hbaseRoot.getFileSystem(getConf());
// serialized table info gathering.
for (HbckInfo hbi: hbckInfos) {
@ -828,12 +830,10 @@ public class HBaseFsck extends Configured implements Tool {
if (modTInfo == null) {
// only executed once per table.
modTInfo = new TableInfo(tableName);
Path hbaseRoot = FSUtils.getRootDir(getConf());
tablesInfo.put(tableName, modTInfo);
try {
HTableDescriptor htd =
FSTableDescriptors.getTableDescriptorFromFs(hbaseRoot.getFileSystem(getConf()),
hbaseRoot, tableName);
FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
modTInfo.htds.add(htd);
} catch (IOException ioe) {
if (!orphanTableDirs.containsKey(tableName)) {
@ -851,6 +851,8 @@ public class HBaseFsck extends Configured implements Tool {
}
}
loadTableInfosForTablesWithNoRegion();
return tablesInfo;
}
@ -1849,6 +1851,8 @@ public class HBaseFsck extends Configured implements Tool {
tablesInfo.put(tableName, modTInfo);
}
loadTableInfosForTablesWithNoRegion();
for (TableInfo tInfo : tablesInfo.values()) {
TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
if (!tInfo.checkRegionChain(handler)) {
@ -1858,6 +1862,21 @@ public class HBaseFsck extends Configured implements Tool {
return tablesInfo;
}
/** Loads table info's for tables that may not have been included, since there are no
* regions reported for the table, but table dir is there in hdfs
*/
private void loadTableInfosForTablesWithNoRegion() throws IOException {
Map<String, HTableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
for (HTableDescriptor htd : allTables.values()) {
TableName tableName = htd.getTableName();
if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
TableInfo tableInfo = new TableInfo(tableName);
tableInfo.htds.add(htd);
tablesInfo.put(htd.getTableName(), tableInfo);
}
}
}
/**
* Merge hdfs data by moving from contained HbckInfo into targetRegionDir.
* @return number of file move fixes done to merge regions.
@ -2085,6 +2104,7 @@ public class HBaseFsck extends Configured implements Tool {
* missing from META, HBase doesn't acknowledge the existance of the
* table.
*/
@Override
public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
"First region should start with an empty key. Creating a new " +
@ -2102,6 +2122,7 @@ public class HBaseFsck extends Configured implements Tool {
fixes++;
}
@Override
public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
"Last region should end with an empty key. Creating a new "
@ -2121,6 +2142,7 @@ public class HBaseFsck extends Configured implements Tool {
* There is a hole in the hdfs regions that violates the table integrity
* rules. Create a new empty region that patches the hole.
*/
@Override
public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
errors.reportError(
ERROR_CODE.HOLE_IN_REGION_CHAIN,
@ -2305,6 +2327,12 @@ public class HBaseFsck extends Configured implements Tool {
byte[] prevKey = null;
byte[] problemKey = null;
if (splits.size() == 0) {
// no region for this table
handler.handleHoleInRegionChain(HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
}
for (byte[] key : splits) {
Collection<HbckInfo> ranges = regions.get(key);
if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
@ -2462,7 +2490,7 @@ public class HBaseFsck extends Configured implements Tool {
* @return tables that have not been modified recently
* @throws IOException if an error is encountered
*/
HTableDescriptor[] getTables(AtomicInteger numSkipped) {
HTableDescriptor[] getTables(AtomicInteger numSkipped) {
List<TableName> tableNames = new ArrayList<TableName>();
long now = System.currentTimeMillis();
@ -2482,18 +2510,17 @@ public class HBaseFsck extends Configured implements Tool {
return getHTableDescriptors(tableNames);
}
HTableDescriptor[] getHTableDescriptors(List<TableName> tableNames) {
HTableDescriptor[] getHTableDescriptors(List<TableName> tableNames) {
HTableDescriptor[] htd = new HTableDescriptor[0];
try {
LOG.info("getHTableDescriptors == tableNames => " + tableNames);
htd = new HBaseAdmin(getConf()).getTableDescriptorsByTableName(tableNames);
} catch (IOException e) {
LOG.debug("Exception getting table descriptors", e);
}
return htd;
try {
LOG.info("getHTableDescriptors == tableNames => " + tableNames);
htd = new HBaseAdmin(getConf()).getTableDescriptorsByTableName(tableNames);
} catch (IOException e) {
LOG.debug("Exception getting table descriptors", e);
}
return htd;
}
/**
* Gets the entry in regionInfo corresponding to the the given encoded
* region name. If the region has not been seen yet, a new entry is added
@ -2575,11 +2602,13 @@ public class HBaseFsck extends Configured implements Tool {
// comparator to sort KeyValues with latest modtime
final Comparator<Cell> comp = new Comparator<Cell>() {
@Override
public int compare(Cell k1, Cell k2) {
return (int)(k1.getTimestamp() - k2.getTimestamp());
}
};
@Override
public boolean processRow(Result result) throws IOException {
try {
@ -2654,6 +2683,7 @@ public class HBaseFsck extends Configured implements Tool {
this.splitB = splitB;
}
@Override
public boolean equals(Object o) {
boolean superEq = super.equals(o);
if (!superEq) {
@ -2701,6 +2731,7 @@ public class HBaseFsck extends Configured implements Tool {
HRegionInfo hri;
ServerName hsa;
@Override
public String toString() {
return hsa.toString() + ";" + hri.getRegionNameAsString();
}
@ -2729,6 +2760,7 @@ public class HBaseFsck extends Configured implements Tool {
this.deployedOn.add(server);
}
@Override
public synchronized String toString() {
StringBuilder sb = new StringBuilder();
sb.append("{ meta => ");
@ -2960,12 +2992,14 @@ public class HBaseFsck extends Configured implements Tool {
// for use by unit tests to verify which errors were discovered
private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
@Override
public void clear() {
errorTables.clear();
errorList.clear();
errorCount = 0;
}
@Override
public synchronized void reportError(ERROR_CODE errorCode, String message) {
if (errorCode == ERROR_CODE.WRONG_USAGE) {
System.err.println(message);
@ -2980,11 +3014,13 @@ public class HBaseFsck extends Configured implements Tool {
showProgress = 0;
}
@Override
public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
errorTables.add(table);
reportError(errorCode, message);
}
@Override
public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
HbckInfo info) {
errorTables.add(table);
@ -2992,6 +3028,7 @@ public class HBaseFsck extends Configured implements Tool {
reportError(errorCode, reference + " " + message);
}
@Override
public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
HbckInfo info1, HbckInfo info2) {
errorTables.add(table);
@ -3000,6 +3037,7 @@ public class HBaseFsck extends Configured implements Tool {
reportError(errorCode, reference + " " + message);
}
@Override
public synchronized void reportError(String message) {
reportError(ERROR_CODE.UNKNOWN, message);
}
@ -3009,6 +3047,7 @@ public class HBaseFsck extends Configured implements Tool {
* where the actual error would have been reported previously.
* @param message
*/
@Override
public synchronized void report(String message) {
if (! summary) {
System.out.println("ERROR: " + message);
@ -3016,6 +3055,7 @@ public class HBaseFsck extends Configured implements Tool {
showProgress = 0;
}
@Override
public synchronized int summarize() {
System.out.println(Integer.toString(errorCount) +
" inconsistencies detected.");
@ -3028,10 +3068,12 @@ public class HBaseFsck extends Configured implements Tool {
}
}
@Override
public ArrayList<ERROR_CODE> getErrorList() {
return errorList;
}
@Override
public synchronized void print(String message) {
if (!summary) {
System.out.println(message);
@ -3048,6 +3090,7 @@ public class HBaseFsck extends Configured implements Tool {
errorCount = 0;
}
@Override
public synchronized void detail(String message) {
if (details) {
System.out.println(message);
@ -3055,6 +3098,7 @@ public class HBaseFsck extends Configured implements Tool {
showProgress = 0;
}
@Override
public synchronized void progress() {
if (showProgress++ == 10) {
if (!summary) {

View File

@ -50,7 +50,6 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
@ -60,6 +59,7 @@ import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.LargeTests;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.catalog.MetaEditor;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Durability;
@ -1066,7 +1066,7 @@ public class TestHBaseFsck {
// make sure data in regions, if in hlog only there is no data loss
TEST_UTIL.getHBaseAdmin().flush(table.getName());
// Mess it up by leaving a giant hole in meta
// Mess it up by deleting hdfs dirs
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""),
Bytes.toBytes("A"), false, false, true); // don't rm meta
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
@ -1076,6 +1076,9 @@ public class TestHBaseFsck {
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"),
Bytes.toBytes(""), false, false, true); // don't rm meta
// also remove the table directory in hdfs
deleteTableDir(table);
HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS,
ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS,
@ -1084,7 +1087,7 @@ public class TestHBaseFsck {
assertEquals(0, hbck.getOverlapGroups(table).size());
// fix hole
doFsck(conf, true); // in 0.92+, meta entries auto create regiondirs
doFsck(conf, true); // detect dangling regions and remove those
// check that hole fixed
assertNoErrors(doFsck(conf,false));
@ -1092,6 +1095,15 @@ public class TestHBaseFsck {
TEST_UTIL.getHBaseAdmin().tableExists(table));
}
public void deleteTableDir(TableName table) throws IOException {
Path rootDir = FSUtils.getRootDir(conf);
FileSystem fs = rootDir.getFileSystem(conf);
Path p = FSUtils.getTableDir(rootDir, table);
HBaseFsck.debugLsr(conf, p);
boolean success = fs.delete(p, true);
LOG.info("Deleted " + p + " sucessfully? " + success);
}
/**
* when the hbase.version file missing, It is fix the fault.
*/
@ -1749,9 +1761,11 @@ public class TestHBaseFsck {
// inject a fault in the hfcc created.
final FileSystem fs = FileSystem.get(conf);
HBaseFsck hbck = new HBaseFsck(conf, exec) {
@Override
public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
boolean attemptedFirstHFile = false;
@Override
protected void checkHFile(Path p) throws IOException {
if (!attemptedFirstHFile) {
attemptedFirstHFile = true;
@ -1778,9 +1792,11 @@ public class TestHBaseFsck {
// inject a fault in the hfcc created.
final FileSystem fs = FileSystem.get(conf);
HBaseFsck hbck = new HBaseFsck(conf, exec) {
@Override
public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
boolean attemptedFirstFamDir = false;
@Override
protected void checkColFamDir(Path p) throws IOException {
if (!attemptedFirstFamDir) {
attemptedFirstFamDir = true;
@ -1805,9 +1821,11 @@ public class TestHBaseFsck {
// inject a fault in the hfcc created.
final FileSystem fs = FileSystem.get(conf);
HBaseFsck hbck = new HBaseFsck(conf, exec) {
@Override
public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
boolean attemptedFirstRegionDir = false;
@Override
protected void checkRegionDir(Path p) throws IOException {
if (!attemptedFirstRegionDir) {
attemptedFirstRegionDir = true;
@ -1927,61 +1945,75 @@ public class TestHBaseFsck {
static class MockErrorReporter implements ErrorReporter {
static int calledCount = 0;
@Override
public void clear() {
calledCount++;
}
@Override
public void report(String message) {
calledCount++;
}
@Override
public void reportError(String message) {
calledCount++;
}
@Override
public void reportError(ERROR_CODE errorCode, String message) {
calledCount++;
}
@Override
public void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
calledCount++;
}
@Override
public void reportError(ERROR_CODE errorCode,
String message, TableInfo table, HbckInfo info) {
calledCount++;
}
@Override
public void reportError(ERROR_CODE errorCode, String message,
TableInfo table, HbckInfo info1, HbckInfo info2) {
calledCount++;
}
@Override
public int summarize() {
return ++calledCount;
}
@Override
public void detail(String details) {
calledCount++;
}
@Override
public ArrayList<ERROR_CODE> getErrorList() {
calledCount++;
return new ArrayList<ERROR_CODE>();
}
@Override
public void progress() {
calledCount++;
}
@Override
public void print(String message) {
calledCount++;
}
@Override
public void resetErrors() {
calledCount++;
}
@Override
public boolean tableHasErrors(TableInfo table) {
calledCount++;
return false;
@ -2014,6 +2046,7 @@ public class TestHBaseFsck {
final CountDownLatch latch = new CountDownLatch(1);
new Thread() {
@Override
public void run() {
TableLock readLock = tableLockManager.writeLock(TableName.valueOf("foo"),
"testCheckTableLocks");
@ -2071,7 +2104,7 @@ public class TestHBaseFsck {
hbck = doFsck(conf, false);
assertNoErrors(hbck);
}
private void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs,
boolean regionInfoOnly) throws IOException, InterruptedException {
HConnection connection = HConnectionManager.getConnection(conf);
@ -2106,7 +2139,41 @@ public class TestHBaseFsck {
HBaseFsck.debugLsr(conf, p);
}
}
@Test
public void testTableWithNoRegions() throws Exception {
// We might end up with empty regions in a table
// see also testNoHdfsTable()
TableName table =
TableName.valueOf(name.getMethodName());
try {
// create table with one region
HTableDescriptor desc = new HTableDescriptor(table);
HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
TEST_UTIL.getHBaseAdmin().createTable(desc);
tbl = new HTable(TEST_UTIL.getConfiguration(), table, executorService);
// Mess it up by leaving a hole in the assignment, meta, and hdfs data
deleteRegion(conf, tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, false,
false, true);
HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
doFsck(conf, true);
// fix hole
doFsck(conf, true);
// check that hole fixed
assertNoErrors(doFsck(conf, false));
} finally {
deleteTable(table);
}
}
@org.junit.Rule
public TestName name = new TestName();
}

View File

@ -21,7 +21,6 @@ import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.util.Arrays;
@ -97,7 +96,8 @@ public class TestOfflineMetaRebuildHole extends OfflineMetaRebuildTestCore {
assertErrors(doFsck(conf, false), new ERROR_CODE[] {
ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
ERROR_CODE.NOT_IN_META_OR_DEPLOYED});
ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
ERROR_CODE.HOLE_IN_REGION_CHAIN});
}
}

View File

@ -79,7 +79,7 @@ public class TestOfflineMetaRebuildOverlap extends OfflineMetaRebuildTestCore {
// bring up the minicluster
TEST_UTIL.startMiniZKCluster(); // tables seem enabled by default
TEST_UTIL.restartHBaseCluster(3);
ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL);
LOG.info("Waiting for no more RIT");
@ -93,7 +93,7 @@ public class TestOfflineMetaRebuildOverlap extends OfflineMetaRebuildTestCore {
.getMaster().getAssignmentManager().getRegionStates().getRegionsInTransition());
Thread.sleep(1000);
}
// Meta still messed up.
assertEquals(1, scanMeta());
HTableDescriptor[] htbls = TEST_UTIL.getHBaseAdmin().listTables();
@ -107,7 +107,8 @@ public class TestOfflineMetaRebuildOverlap extends OfflineMetaRebuildTestCore {
ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
ERROR_CODE.NOT_IN_META_OR_DEPLOYED});
ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
ERROR_CODE.HOLE_IN_REGION_CHAIN});
}
}