HBASE-3695 Some improvements to Hbck to test the entire region chain in Meta and provide better error reporting

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1099566 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2011-05-04 19:28:45 +00:00
parent 4e4ded4cde
commit 3d6b1485bf
4 changed files with 340 additions and 120 deletions

View File

@ -285,6 +285,8 @@ Release 0.90.3 - Unreleased
HBASE-3580 Remove RS from DeadServer when new instance checks in
HBASE-2470 Add Scan.setTimeRange() support in Shell (Harsh J Chouraria)
HBASE-3805 Log RegionState that are processed too late in the master
HBASE-3695 Some improvements to Hbck to test the entire region chain in
Meta and provide better error reporting (Marc Limotte)
TASKS
HBASE-3748 Add rolling of thrift/rest daemons to graceful_stop.sh script

View File

@ -20,14 +20,7 @@
package org.apache.hadoop.hbase.util;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
@ -69,6 +62,8 @@ import org.apache.zookeeper.KeeperException;
import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
import static org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
/**
* Check consistency among the in-memory states of the master and the
* region server(s) and the state of data in HDFS.
@ -101,7 +96,7 @@ public class HBaseFsck {
private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
private int numThreads = MAX_NUM_THREADS;
ThreadPoolExecutor executor; // threads to retrieve data from regionservers
ThreadPoolExecutor executor; // threads to retrieve data from regionservers
/**
* Constructor
@ -140,20 +135,21 @@ public class HBaseFsck {
tablesInfo.clear();
emptyRegionInfoQualifiers.clear();
disabledTables.clear();
errors.clear();
// get a list of all regions from the master. This involves
// scanning the META table
if (!recordRootRegion()) {
// Will remove later if we can fix it
errors.reportError("Encountered fatal error. Exitting...");
errors.reportError("Encountered fatal error. Exiting...");
return -1;
}
getMetaEntries();
// Check if .META. is found only once and on the right place
// Check if .META. is found only once and in the right place
if (!checkMetaEntries()) {
// Will remove later if we can fix it
errors.reportError("Encountered fatal error. Exitting...");
errors.reportError("Encountered fatal error. Exiting...");
return -1;
}
@ -225,6 +221,10 @@ public class HBaseFsck {
return errors.summarize();
}
public ErrorReporter getErrors() {
return errors;
}
/**
* Load the list of disabled tables in ZK into local set.
* @throws ZooKeeperConnectionException
@ -281,7 +281,8 @@ public class HBaseFsck {
// verify that version file exists
if (!foundVersionFile) {
errors.reportError("Version file does not exist in root dir " + rootDir);
errors.reportError(ERROR_CODE.NO_VERSION_FILE,
"Version file does not exist in root dir " + rootDir);
}
// level 1: <HBASE_DIR>/*
@ -315,7 +316,8 @@ public class HBaseFsck {
// Check if Root region is valid and existing
if (rootLocation == null || rootLocation.getRegionInfo() == null ||
rootLocation.getHostname() == null) {
errors.reportError("Root Region or some of its attributes is null.");
errors.reportError(ERROR_CODE.NULL_ROOT_REGION,
"Root Region or some of its attributes are null.");
return false;
}
ServerName sn;
@ -428,7 +430,7 @@ public class HBaseFsck {
LOG.debug("Region " + descriptiveName + " offline, ignoring.");
return;
} else if (recentlyModified) {
LOG.info("Region " + descriptiveName + " was recently modified -- skipping");
LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
return;
}
// ========== Cases where the region is not in META =============
@ -436,24 +438,29 @@ public class HBaseFsck {
// We shouldn't have record of this region at all then!
assert false : "Entry for region with no data";
} else if (!inMeta && !inHdfs && isDeployed) {
errors.reportError("Region " + descriptiveName + ", key=" + key + ", not on HDFS or in META but " +
"deployed on " + Joiner.on(", ").join(hbi.deployedOn));
errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
+ descriptiveName + ", key=" + key + ", not on HDFS or in META but " +
"deployed on " + Joiner.on(", ").join(hbi.deployedOn));
} else if (!inMeta && inHdfs && !isDeployed) {
errors.reportError("Region " + descriptiveName + " on HDFS, but not listed in META " +
"or deployed on any region server.");
errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
+ descriptiveName + " on HDFS, but not listed in META " +
"or deployed on any region server.");
} else if (!inMeta && inHdfs && isDeployed) {
errors.reportError("Region " + descriptiveName + " not in META, but deployed on " +
Joiner.on(", ").join(hbi.deployedOn));
errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
+ " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
// ========== Cases where the region is in META =============
} else if (inMeta && !inHdfs && !isDeployed) {
errors.reportError("Region " + descriptiveName + " found in META, but not in HDFS " +
"or deployed on any region server.");
errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
+ descriptiveName + " found in META, but not in HDFS "
+ "or deployed on any region server.");
} else if (inMeta && !inHdfs && isDeployed) {
errors.reportError("Region " + descriptiveName + " found in META, but not in HDFS, " +
"and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
+ " found in META, but not in HDFS, " +
"and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
} else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
errors.reportError("Region " + descriptiveName + " not deployed on any region server.");
errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
+ " not deployed on any region server.");
// If we are trying to fix the errors
if (shouldFix()) {
errors.print("Trying to fix unassigned region...");
@ -461,12 +468,14 @@ public class HBaseFsck {
HBaseFsckRepair.fixUnassigned(this.conf, hbi.metaEntry);
}
} else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
errors.reportError("Region " + descriptiveName + " should not be deployed according " +
"to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED, "Region "
+ descriptiveName + " should not be deployed according " +
"to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
} else if (inMeta && inHdfs && isMultiplyDeployed) {
errors.reportError("Region " + descriptiveName + " is listed in META on region server " +
hbi.metaEntry.regionServer + " but is multiply assigned to region servers " +
Joiner.on(", ").join(hbi.deployedOn));
errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
+ " is listed in META on region server " + hbi.metaEntry.regionServer
+ " but is multiply assigned to region servers " +
Joiner.on(", ").join(hbi.deployedOn));
// If we are trying to fix the errors
if (shouldFix()) {
errors.print("Trying to fix assignment error...");
@ -474,9 +483,10 @@ public class HBaseFsck {
HBaseFsckRepair.fixDupeAssignment(this.conf, hbi.metaEntry, hbi.deployedOn);
}
} else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
errors.reportError("Region " + descriptiveName + " listed in META on region server " +
hbi.metaEntry.regionServer + " but found on region server " +
hbi.deployedOn.get(0));
errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
+ descriptiveName + " listed in META on region server " +
hbi.metaEntry.regionServer + " but found on region server " +
hbi.deployedOn.get(0));
// If we are trying to fix the errors
if (shouldFix()) {
errors.print("Trying to fix assignment error...");
@ -484,13 +494,14 @@ public class HBaseFsck {
HBaseFsckRepair.fixDupeAssignment(this.conf, hbi.metaEntry, hbi.deployedOn);
}
} else {
errors.reportError("Region " + descriptiveName + " is in an unforeseen state:" +
" inMeta=" + inMeta +
" inHdfs=" + inHdfs +
" isDeployed=" + isDeployed +
" isMultiplyDeployed=" + isMultiplyDeployed +
" deploymentMatchesMeta=" + deploymentMatchesMeta +
" shouldBeDeployed=" + shouldBeDeployed);
errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
" is in an unforeseen state:" +
" inMeta=" + inMeta +
" inHdfs=" + inHdfs +
" isDeployed=" + isDeployed +
" isMultiplyDeployed=" + isMultiplyDeployed +
" deploymentMatchesMeta=" + deploymentMatchesMeta +
" shouldBeDeployed=" + shouldBeDeployed);
}
}
@ -504,10 +515,15 @@ public class HBaseFsck {
// Check only valid, working regions
if (hbi.metaEntry == null) continue;
if (hbi.metaEntry.regionServer == null) continue;
if (hbi.foundRegionDir == null) continue;
if (hbi.deployedOn.size() != 1) continue;
if (hbi.onlyEdits) continue;
// Missing regionDir or over-deployment is checked elsewhere. Include
// these cases in modTInfo, so we can evaluate those regions as part of
// the region chain in META
//if (hbi.foundRegionDir == null) continue;
//if (hbi.deployedOn.size() != 1) continue;
if (hbi.deployedOn.size() == 0) continue;
// We should be safe here
String tableName = hbi.metaEntry.getTableDesc().getNameAsString();
TInfo modTInfo = tablesInfo.get(tableName);
@ -517,13 +533,16 @@ public class HBaseFsck {
for (ServerName server : hbi.deployedOn) {
modTInfo.addServer(server);
}
modTInfo.addEdge(hbi.metaEntry.getStartKey(), hbi.metaEntry.getEndKey());
//modTInfo.addEdge(hbi.metaEntry.getStartKey(), hbi.metaEntry.getEndKey());
modTInfo.addRegionInfo(hbi);
tablesInfo.put(tableName, modTInfo);
}
for (TInfo tInfo : tablesInfo.values()) {
if (!tInfo.check()) {
errors.reportError("Found inconsistency in table " + tInfo.getName());
if (!tInfo.checkRegionChain()) {
errors.report("Found inconsistency in table " + tInfo.getName());
}
}
}
@ -533,17 +552,16 @@ public class HBaseFsck {
*/
private class TInfo {
String tableName;
TreeMap <byte[], byte[]> edges;
TreeSet <ServerName> deployedOn;
List<HbckInfo> regions = new ArrayList<HbckInfo>();
TInfo(String name) {
this.tableName = name;
edges = new TreeMap <byte[], byte[]> (Bytes.BYTES_COMPARATOR);
deployedOn = new TreeSet <ServerName>();
}
public void addEdge(byte[] fromNode, byte[] toNode) {
this.edges.put(fromNode, toNode);
public void addRegionInfo (HbckInfo r) {
regions.add(r);
}
public void addServer(ServerName server) {
@ -555,46 +573,77 @@ public class HBaseFsck {
}
public int getNumRegions() {
return edges.size();
return regions.size();
}
public boolean check() {
byte[] last = new byte[0];
byte[] next = new byte[0];
TreeSet <byte[]> visited = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
// Each table should start with a zero-length byte[] and end at a
// zero-length byte[]. Just follow the edges to see if this is true
while (true) {
// Check if chain is broken
if (!edges.containsKey(last)) {
errors.detail("Chain of regions in table " + tableName +
" is broken; edges does not contain " + Bytes.toString(last));
return false;
}
next = edges.get(last);
// Found a cycle
if (visited.contains(next)) {
errors.detail("Chain of regions in table " + tableName +
" has a cycle around " + Bytes.toString(next));
return false;
}
// Mark next node as visited
visited.add(next);
// If next is zero-length byte[] we are possibly at the end of the chain
if (next.length == 0) {
// If we have visited all elements we are fine
if (edges.size() != visited.size()) {
errors.detail("Chain of regions in table " + tableName +
" contains less elements than are listed in META; visited=" + visited.size() +
", edges=" + edges.size());
return false;
/**
* Check the region chain (from META) of this table. We are looking for
* holes, overlaps, and cycles.
* @return false if there are errors
*/
public boolean checkRegionChain() {
Collections.sort(regions);
HbckInfo last = null;
for (HbckInfo r : regions) {
if (last == null) {
// This is the first region, check that the start key is empty
if (! Bytes.equals(r.metaEntry.getStartKey(), HConstants.EMPTY_BYTE_ARRAY)) {
errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
"First region should start with an empty key.",
this, r);
}
return true;
} else {
// Check if endKey < startKey
// Previous implementation of this code checked for a cycle in the
// region chain. A cycle would imply that the endKey comes before
// the startKey (i.e. endKey < startKey).
if (! Bytes.equals(r.metaEntry.getEndKey(), HConstants.EMPTY_BYTE_ARRAY)) {
// continue with this check if this is not the last region
int cmpRegionKeys = Bytes.compareTo(r.metaEntry.getStartKey(),
r.metaEntry.getEndKey());
if (cmpRegionKeys > 0) {
errors.reportError(ERROR_CODE.REGION_CYCLE,
String.format("The endkey for this region comes before the "
+ "startkey, startkey=%s, endkey=%s",
Bytes.toString(r.metaEntry.getStartKey()),
Bytes.toString(r.metaEntry.getEndKey())),
this, r, last);
}
}
// Check if the startkeys are different
if (Bytes.equals(r.metaEntry.getStartKey(), last.metaEntry.getStartKey())) {
errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
"Two regions have the same startkey: "
+ Bytes.toString(r.metaEntry.getStartKey()),
this, r, last);
} else {
// Check that the startkey is the same as the previous end key
int cmp = Bytes.compareTo(r.metaEntry.getStartKey(),
last.metaEntry.getEndKey());
if (cmp > 0) {
// hole
errors.reportError(ERROR_CODE.HOLE_IN_REGION_CHAIN,
"There is a hole in the region chain.",
this, r, last);
} else if (cmp < 0) {
// overlap
errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
"There is an overlap in the region chain.",
this, r, last);
}
}
}
last = next;
last = r;
}
// How did we get here?
return errors.getErrorList().size() == 0;
}
}
/**
@ -603,7 +652,6 @@ public class HBaseFsck {
* if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
* SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
* milliseconds specified by timelag, then the table is a candidate to be returned.
* @param regionList - all entries found in .META
* @return tables that have not been modified recently
* @throws IOException if an error is encountered
*/
@ -668,7 +716,7 @@ public class HBaseFsck {
// If there is no region holding .META.
if (metaRegions.size() == 0) {
errors.reportError(".META. is not found on any region.");
errors.reportError(ERROR_CODE.NO_META_REGION, ".META. is not found on any region.");
if (shouldFix()) {
errors.print("Trying to fix a problem with .META...");
setShouldRerun();
@ -678,7 +726,7 @@ public class HBaseFsck {
}
// If there are more than one regions pretending to hold the .META.
else if (metaRegions.size() > 1) {
errors.reportError(".META. is found on more than one region.");
errors.reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region.");
if (shouldFix()) {
errors.print("Trying to fix a problem with .META...");
setShouldRerun();
@ -773,7 +821,7 @@ public class HBaseFsck {
/**
* Maintain information about a particular region.
*/
static class HbckInfo {
static class HbckInfo implements Comparable {
boolean onlyEdits = false;
MetaEntry metaEntry = null;
FileStatus foundRegionDir = null;
@ -796,6 +844,16 @@ public class HBaseFsck {
return "UNKNOWN_REGION on " + Joiner.on(", ").join(deployedOn);
}
}
@Override
public int compareTo(Object o) {
HbckInfo other = (HbckInfo) o;
int startComparison = Bytes.compareTo(this.metaEntry.getStartKey(), other.metaEntry.getStartKey());
if (startComparison != 0)
return startComparison;
else
return Bytes.compareTo(this.metaEntry.getEndKey(), other.metaEntry.getEndKey());
}
}
/**
@ -804,10 +862,10 @@ public class HBaseFsck {
private void printTableSummary() {
System.out.println("Summary:");
for (TInfo tInfo : tablesInfo.values()) {
if (tInfo.check()) {
System.out.println(" " + tInfo.getName() + " is okay.");
} else {
if (errors.tableHasErrors(tInfo)) {
System.out.println("Table " + tInfo.getName() + " is inconsistent.");
} else {
System.out.println(" " + tInfo.getName() + " is okay.");
}
System.out.println(" Number of regions: " + tInfo.getNumRegions());
System.out.print(" Deployed on: ");
@ -819,19 +877,45 @@ public class HBaseFsck {
}
interface ErrorReporter {
public static enum ERROR_CODE {
UNKNOWN, NO_META_REGION, NULL_ROOT_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META, NOT_DEPLOYED,
MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
FIRST_REGION_STARTKEY_NOT_EMPTY, DUPE_STARTKEYS,
HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE
}
public void clear();
public void report(String message);
public void reportError(String message);
public void reportError(ERROR_CODE errorCode, String message);
public void reportError(ERROR_CODE errorCode, String message, TInfo table, HbckInfo info);
public void reportError(ERROR_CODE errorCode, String message, TInfo table, HbckInfo info1, HbckInfo info2);
public int summarize();
public void detail(String details);
public ArrayList<ERROR_CODE> getErrorList();
public void progress();
public void print(String message);
public void resetErrors();
public boolean tableHasErrors(TInfo table);
}
private static class PrintingErrorReporter implements ErrorReporter {
public int errorCount = 0;
private int showProgress;
public synchronized void reportError(String message) {
Set<TInfo> errorTables = new HashSet<TInfo>();
// for use by unit tests to verify which errors were discovered
private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
public void clear() {
errorTables.clear();
errorList.clear();
errorCount = 0;
}
public synchronized void reportError(ERROR_CODE errorCode, String message) {
errorList.add(errorCode);
if (!summary) {
System.out.println("ERROR: " + message);
}
@ -839,6 +923,37 @@ public class HBaseFsck {
showProgress = 0;
}
public synchronized void reportError(ERROR_CODE errorCode, String message, TInfo table,
HbckInfo info) {
errorTables.add(table);
String reference = "(region " + info.metaEntry.getRegionNameAsString() + ")";
reportError(errorCode, reference + " " + message);
}
public synchronized void reportError(ERROR_CODE errorCode, String message, TInfo table,
HbckInfo info1, HbckInfo info2) {
errorTables.add(table);
String reference = "(regions " + info1.metaEntry.getRegionNameAsString()
+ " and " + info2.metaEntry.getRegionNameAsString() + ")";
reportError(errorCode, reference + " " + message);
}
public synchronized void reportError(String message) {
reportError(ERROR_CODE.UNKNOWN, message);
}
/**
* Report error information, but do not increment the error count. Intended for cases
* where the actual error would have been reported previously.
* @param message
*/
public synchronized void report(String message) {
if (! summary) {
System.out.println("ERROR: " + message);
}
showProgress = 0;
}
public synchronized int summarize() {
System.out.println(Integer.toString(errorCount) +
" inconsistencies detected.");
@ -851,12 +966,21 @@ public class HBaseFsck {
}
}
public ArrayList<ERROR_CODE> getErrorList() {
return errorList;
}
public synchronized void print(String message) {
if (!summary) {
System.out.println(message);
}
}
@Override
public boolean tableHasErrors(TInfo table) {
return errorTables.contains(table);
}
@Override
public void resetErrors() {
errorCount = 0;
@ -923,14 +1047,14 @@ public class HBaseFsck {
}
}
// check to see if the existance of this region matches the region in META
// check to see if the existence of this region matches the region in META
for (HRegionInfo r:regions) {
HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
hbi.addServer(rsinfo);
}
} catch (IOException e) { // unable to connect to the region server.
errors.reportError("RegionServer: " + rsinfo.getServerName() +
" Unable to fetch region information. " + e);
errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
" Unable to fetch region information. " + e);
} finally {
done = true;
notifyAll(); // wakeup anybody waiting for this item to be done
@ -1000,7 +1124,7 @@ public class HBaseFsck {
}
}
} catch (IOException e) { // unable to connect to the region server.
errors.reportError("Table Directory: " + tableDir.getPath().getName() +
errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: " + tableDir.getPath().getName() +
" Unable to fetch region information. " + e);
} finally {
done = true;

View File

@ -89,6 +89,7 @@ public class HBaseTestingUtility {
*/
private boolean passedZkCluster = false;
private MiniDFSCluster dfsCluster = null;
private MiniHBaseCluster hbaseCluster = null;
private MiniMRCluster mrCluster = null;
// If non-null, then already a cluster running.
@ -113,6 +114,10 @@ public class HBaseTestingUtility {
this.conf = conf;
}
public MiniHBaseCluster getHbaseCluster() {
return hbaseCluster;
}
/**
* Returns this classes's instance of {@link Configuration}. Be careful how
* you use the returned Configuration since {@link HConnection} instances

View File

@ -19,23 +19,28 @@
*/
package org.apache.hadoop.hbase.util;
import static org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
public class TestHBaseFsck {
final Log LOG = LogFactory.getLog(getClass());
@ -50,32 +55,39 @@ public class TestHBaseFsck {
TEST_UTIL.startMiniCluster(3);
}
private int doFsck(boolean fix) throws Exception {
private List doFsck(boolean fix) throws Exception {
HBaseFsck fsck = new HBaseFsck(conf);
fsck.displayFullReport();
fsck.displayFullReport(); // i.e. -details
fsck.setTimeLag(0);
fsck.setFixErrors(fix);
// Most basic check ever, 0 tables
return fsck.doWork();
fsck.doWork();
return fsck.getErrors().getErrorList();
}
private void assertNoErrors(List errs) throws Exception {
assertEquals(0, errs.size());
}
private void assertErrors(List errs, ERROR_CODE[] expectedErrors) {
assertEquals(Arrays.asList(expectedErrors), errs);
}
@Test
public void testHBaseFsck() throws Exception {
int result = doFsck(false);
assertEquals(0, result);
assertNoErrors(doFsck(false));
TEST_UTIL.createTable(TABLE, FAM);
// We created 1 table, should be fine
result = doFsck(false);
assertEquals(0, result);
assertNoErrors(doFsck(false));
// Now let's mess it up and change the assignment in .META. to
// point to a different region server
HTable meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getName());
ResultScanner scanner = meta.getScanner(new Scan());
resforloop : for (Result res : scanner) {
resforloop:
for (Result res : scanner) {
long startCode = Bytes.toLong(res.getValue(HConstants.CATALOG_FAMILY,
HConstants.STARTCODE_QUALIFIER));
@ -98,13 +110,90 @@ public class TestHBaseFsck {
}
// Try to fix the data
result = doFsck(true);
assertEquals(-1, result);
assertErrors(doFsck(true), new ERROR_CODE[]{
ERROR_CODE.SERVER_DOES_NOT_MATCH_META});
Thread.sleep(15000);
result = doFsck(false);
// Should have fixed
assertEquals(0, result);
// Should be fixed now
assertNoErrors(doFsck(false));
// comment needed - what is the purpose of this line
new HTable(conf, TABLE).getScanner(new Scan());
}
private HRegionInfo createRegion(Configuration conf, final HTableDescriptor
htd, byte[] startKey, byte[] endKey)
throws IOException {
HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
HRegionInfo hri = new HRegionInfo(htd, startKey, endKey);
Put put = new Put(hri.getRegionName());
put.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
Writables.getBytes(hri));
meta.put(put);
return hri;
}
@Test
/**
* Tests for inconsistencies in the META data (duplicate start keys, or holes)
*/
public void testHBaseFsckMeta() throws Exception {
assertNoErrors(doFsck(false));
HTable tbl = TEST_UTIL.createTable(Bytes.toBytes("table2"), FAM);
HRegionInfo hriOrig = tbl.getRegionsInfo().keySet().iterator().next();
HServerAddress rsAddressOrig = tbl.getRegionsInfo().get(hriOrig);
byte[][] startKeys = new byte[][]{
HConstants.EMPTY_BYTE_ARRAY,
Bytes.toBytes("A"),
Bytes.toBytes("B"),
Bytes.toBytes("C")
};
TEST_UTIL.createMultiRegions(conf, tbl, FAM, startKeys);
Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
FileSystem fs = rootDir.getFileSystem(conf);
Path p = new Path(rootDir + "/table2", hriOrig.getEncodedName());
fs.delete(p, true);
Thread.sleep(1 * 1000);
ArrayList servers = new ArrayList();
servers.add(rsAddressOrig);
HBaseFsckRepair.fixDupeAssignment(conf, hriOrig, servers);
// We created 1 table, should be fine
assertNoErrors(doFsck(false));
// Now let's mess it up, by adding a region with a duplicate startkey
HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
Bytes.toBytes("A"), Bytes.toBytes("A2"));
TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
.waitForAssignment(hriDupe);
assertErrors(doFsck(false), new ERROR_CODE[]{ERROR_CODE.DUPE_STARTKEYS});
// Mess it up by creating an overlap in the metadata
HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
Bytes.toBytes("A2"), Bytes.toBytes("B2"));
TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
.waitForAssignment(hriOverlap);
assertErrors(doFsck(false), new ERROR_CODE[]{
ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
ERROR_CODE.OVERLAP_IN_REGION_CHAIN});
// Mess it up by leaving a hole in the meta data
HRegionInfo hriHole = createRegion(conf, tbl.getTableDescriptor(),
Bytes.toBytes("D"), Bytes.toBytes("E"));
TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriHole);
TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
.waitForAssignment(hriHole);
// assertError(doFsck(false), ERROR_CODE.OVERLAP_IN_REGION_CHAIN);
assertErrors(doFsck(false), new ERROR_CODE[]{ ERROR_CODE.DUPE_STARTKEYS,
ERROR_CODE.OVERLAP_IN_REGION_CHAIN, ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
ERROR_CODE.HOLE_IN_REGION_CHAIN });
}
}