HBASE-5128 [uber hbck] Online automated repair of table integrity and region consistency problems

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1304665 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jonathan Hsieh 2012-03-23 23:53:55 +00:00
parent 560173f756
commit f2d637ffa5
14 changed files with 2254 additions and 432 deletions

View File

@ -509,8 +509,16 @@ public class HFile {
preferredEncodingInCache, hfs); preferredEncodingInCache, hfs);
} }
/**
*
* @param fs filesystem
* @param path Path to file to read
* @param cacheConf This must not be null. @see {@link org.apache.hadoop.hbase.io.hfile.CacheConfig#CacheConfig(Configuration)}
* @return an active Reader instance.
*/
public static Reader createReader( public static Reader createReader(
FileSystem fs, Path path, CacheConfig cacheConf) throws IOException { FileSystem fs, Path path, CacheConfig cacheConf) throws IOException {
Preconditions.checkNotNull(cacheConf, "Cannot create Reader with null CacheConf");
return createReaderWithEncoding(fs, path, cacheConf, return createReaderWithEncoding(fs, path, cacheConf,
DataBlockEncoding.NONE); DataBlockEncoding.NONE);
} }

View File

@ -218,7 +218,6 @@ public interface HMasterInterface extends VersionedProtocol {
public void unassign(final byte [] regionName, final boolean force) public void unassign(final byte [] regionName, final boolean force)
throws IOException; throws IOException;
/** /**
* Offline a region from the assignment manager's in-memory state. The * Offline a region from the assignment manager's in-memory state. The
* region should be in a closed state and there will be no attempt to * region should be in a closed state and there will be no attempt to

View File

@ -1034,8 +1034,9 @@ public class AssignmentManager extends ZooKeeperListener {
regionInfo = regionState.getRegion(); regionInfo = regionState.getRegion();
} else { } else {
try { try {
regionInfo = MetaReader.getRegion(catalogTracker, byte[] name = data.getRegionName();
data.getRegionName()).getFirst(); Pair<HRegionInfo, ServerName> p = MetaReader.getRegion(catalogTracker, name);
regionInfo = p.getFirst();
} catch (IOException e) { } catch (IOException e) {
LOG.info("Exception reading META doing HBCK repair operation", e); LOG.info("Exception reading META doing HBCK repair operation", e);
return; return;

View File

@ -1904,18 +1904,17 @@ Server {
public double getAverageLoad() { public double getAverageLoad() {
return this.assignmentManager.getAverageLoad(); return this.assignmentManager.getAverageLoad();
} }
/** /**
* Special method, only used by hbck. * Special method, only used by hbck.
*/ */
@Override @Override
public void offline(final byte[] regionName) public void offline(final byte[] regionName) throws IOException {
throws IOException {
Pair<HRegionInfo, ServerName> pair = Pair<HRegionInfo, ServerName> pair =
MetaReader.getRegion(this.catalogTracker, regionName); MetaReader.getRegion(this.catalogTracker, regionName);
if (pair == null) throw new UnknownRegionException(Bytes.toStringBinary(regionName)); if (pair == null) throw new UnknownRegionException(Bytes.toStringBinary(regionName));
HRegionInfo hri = pair.getFirst(); HRegionInfo hri = pair.getFirst();
this.assignmentManager.regionOffline(hri); this.assignmentManager.regionOffline(hri);
} }
/** /**

File diff suppressed because it is too large Load Diff

View File

@ -21,44 +21,55 @@ package org.apache.hadoop.hbase.util;
import java.io.IOException; import java.io.IOException;
import java.util.List; import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.ipc.HRegionInterface; import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.master.AssignmentManager.RegionState;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.wal.HLog;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
/**
* This class contains helper methods that repair parts of hbase's filesystem
* contents.
*/
@InterfaceAudience.Public @InterfaceAudience.Public
@InterfaceStability.Evolving @InterfaceStability.Evolving
public class HBaseFsckRepair { public class HBaseFsckRepair {
public static final Log LOG = LogFactory.getLog(HBaseFsckRepair.class);
/** /**
* Fix dupe assignment by doing silent closes on each RS hosting the region * Fix multiple assignment by doing silent closes on each RS hosting the region
* and then force ZK unassigned node to OFFLINE to trigger assignment by * and then force ZK unassigned node to OFFLINE to trigger assignment by
* master. * master.
* @param admin *
* @param region * @param admin HBase admin used to undeploy
* @param servers * @param region Region to undeploy
* @throws IOException * @param servers list of Servers to undeploy from
* @throws KeeperException
* @throws InterruptedException
*/ */
public static void fixDupeAssignment(HBaseAdmin admin, HRegionInfo region, public static void fixMultiAssignment(HBaseAdmin admin, HRegionInfo region,
List<ServerName> servers) List<ServerName> servers)
throws IOException, KeeperException, InterruptedException { throws IOException, KeeperException, InterruptedException {
HRegionInfo actualRegion = new HRegionInfo(region); HRegionInfo actualRegion = new HRegionInfo(region);
// Close region on the servers silently // Close region on the servers silently
for(ServerName server : servers) { for(ServerName server : servers) {
closeRegionSilentlyAndWait(admin.getConfiguration(), server, actualRegion); closeRegionSilentlyAndWait(admin, server, actualRegion);
} }
// Force ZK node to OFFLINE so master assigns // Force ZK node to OFFLINE so master assigns
@ -67,58 +78,133 @@ public class HBaseFsckRepair {
/** /**
* Fix unassigned by creating/transition the unassigned ZK node for this * Fix unassigned by creating/transition the unassigned ZK node for this
* region to OFFLINE state with a special flag to tell the master that this * region to OFFLINE state with a special flag to tell the master that this is
* is a forced operation by HBCK. * a forced operation by HBCK.
* @param admin *
* This assumes that info is in META.
*
* @param conf
* @param region * @param region
* @throws IOException * @throws IOException
* @throws KeeperException * @throws KeeperException
*/ */
public static void fixUnassigned(HBaseAdmin admin, HRegionInfo region) public static void fixUnassigned(HBaseAdmin admin, HRegionInfo region)
throws IOException, KeeperException { throws IOException, KeeperException {
HRegionInfo actualRegion = new HRegionInfo(region); HRegionInfo actualRegion = new HRegionInfo(region);
// Force ZK node to OFFLINE so master assigns // Force ZK node to OFFLINE so master assigns
forceOfflineInZK(admin, actualRegion); forceOfflineInZK(admin, actualRegion);
} }
/**
* In 0.90, this forces an HRI offline by setting the RegionTransitionData
* in ZK to have HBCK_CODE_NAME as the server. This is a special case in
* the AssignmentManager that attempts an assign call by the master.
*
* @see org.apache.hadoop.hbase.master.AssignementManager#handleHBCK
*
* This doesn't seem to work properly in the updated version of 0.92+'s hbck
* so we use assign to force the region into transition. This has the
* side-effect of requiring a HRegionInfo that considers regionId (timestamp)
* in comparators that is addressed by HBASE-5563.
*/
private static void forceOfflineInZK(HBaseAdmin admin, final HRegionInfo region) private static void forceOfflineInZK(HBaseAdmin admin, final HRegionInfo region)
throws ZooKeeperConnectionException, KeeperException, IOException { throws ZooKeeperConnectionException, KeeperException, IOException {
admin.assign(region.getRegionName()); admin.assign(region.getRegionName());
} }
private static void closeRegionSilentlyAndWait(Configuration conf, /*
ServerName server, HRegionInfo region) throws IOException, * Should we check all assignments or just not in RIT?
InterruptedException { */
HConnection connection = HConnectionManager.getConnection(conf); public static void waitUntilAssigned(HBaseAdmin admin,
boolean success = false; HRegionInfo region) throws IOException, InterruptedException {
HConnection connection = admin.getConnection();
try { try {
HRegionInterface rs = long timeout = admin.getConfiguration().getLong("hbase.hbck.assign.timeout", 120000);
connection.getHRegionConnection(server.getHostname(), server.getPort());
rs.closeRegion(region, false);
long timeout = conf.getLong("hbase.hbck.close.timeout", 120000);
long expiration = timeout + System.currentTimeMillis(); long expiration = timeout + System.currentTimeMillis();
while (System.currentTimeMillis() < expiration) { while (System.currentTimeMillis() < expiration) {
try { try {
HRegionInfo rsRegion = rs.getRegionInfo(region.getRegionName()); Map<String, RegionState> rits=
if (rsRegion == null) admin.getClusterStatus().getRegionsInTransition();
throw new NotServingRegionException();
} catch (Exception e) { if (rits.keySet() != null && !rits.keySet().contains(region.getEncodedName())) {
success = true; // yay! no longer RIT
return; return;
}
// still in rit
LOG.info("Region still in transition, waiting for "
+ "it to become assigned: " + region);
} catch (IOException e) {
LOG.warn("Exception when waiting for region to become assigned,"
+ " retrying", e);
} }
Thread.sleep(1000); Thread.sleep(1000);
} }
throw new IOException("Region " + region + " failed to close within" throw new IOException("Region " + region + " failed to move out of " +
+ " timeout " + timeout); "transition within timeout " + timeout + "ms");
} finally { } finally {
try { try {
connection.close(); connection.close();
} catch (IOException ioe) { } catch (IOException ioe) {
if (success) { throw ioe;
throw ioe;
}
} }
} }
} }
/**
* Contacts a region server and waits up to hbase.hbck.close.timeout ms
* (default 120s) to close the region. This bypasses the active hmaster.
*/
public static void closeRegionSilentlyAndWait(HBaseAdmin admin,
ServerName server, HRegionInfo region) throws IOException, InterruptedException {
HConnection connection = admin.getConnection();
HRegionInterface rs = connection.getHRegionConnection(server.getHostname(),
server.getPort());
rs.closeRegion(region, false);
long timeout = admin.getConfiguration()
.getLong("hbase.hbck.close.timeout", 120000);
long expiration = timeout + System.currentTimeMillis();
while (System.currentTimeMillis() < expiration) {
try {
HRegionInfo rsRegion = rs.getRegionInfo(region.getRegionName());
if (rsRegion == null)
return;
} catch (IOException ioe) {
return;
}
Thread.sleep(1000);
}
throw new IOException("Region " + region + " failed to close within"
+ " timeout " + timeout);
}
/**
* Puts the specified HRegionInfo into META.
*/
public static void fixMetaHoleOnline(Configuration conf,
HRegionInfo hri) throws IOException {
Put p = new Put(hri.getRegionName());
p.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
Writables.getBytes(hri));
HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
meta.put(p);
meta.close();
}
/**
* Creates, flushes, and closes a new region.
*/
public static HRegion createHDFSRegionDir(Configuration conf,
HRegionInfo hri, HTableDescriptor htd) throws IOException {
// Create HRegion
Path root = FSUtils.getRootDir(conf);
HRegion region = HRegion.createHRegion(hri, root, conf, htd);
HLog hlog = region.getLog();
// Close the new region to flush to disk. Close log file too.
region.close();
hlog.closeAndDelete();
return region;
}
} }

View File

@ -44,7 +44,6 @@ import org.apache.hadoop.io.MultipleIOException;
@InterfaceStability.Evolving @InterfaceStability.Evolving
public class OfflineMetaRepair { public class OfflineMetaRepair {
private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName()); private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
HBaseFsck fsck;
protected static void printUsageAndExit() { protected static void printUsageAndExit() {
System.err.println("Usage: OfflineMetaRepair [opts] "); System.err.println("Usage: OfflineMetaRepair [opts] ");
@ -52,6 +51,8 @@ public class OfflineMetaRepair {
System.err System.err
.println(" -details Display full report of all regions."); .println(" -details Display full report of all regions.");
System.err.println(" -base <hdfs://> Base Hbase Data directory"); System.err.println(" -base <hdfs://> Base Hbase Data directory");
System.err.println(" -fix Auto fix as many problems as possible");
System.err.println(" -fixHoles Auto fix as region holes");
Runtime.getRuntime().exit(-2); Runtime.getRuntime().exit(-2);
} }
@ -67,18 +68,24 @@ public class OfflineMetaRepair {
Configuration conf = HBaseConfiguration.create(); Configuration conf = HBaseConfiguration.create();
conf.set("fs.defaultFS", conf.get(HConstants.HBASE_DIR)); conf.set("fs.defaultFS", conf.get(HConstants.HBASE_DIR));
HBaseFsck fsck = new HBaseFsck(conf); HBaseFsck fsck = new HBaseFsck(conf);
boolean fixHoles = false;
// Process command-line args. // Process command-line args.
for (int i = 0; i < args.length; i++) { for (int i = 0; i < args.length; i++) {
String cmd = args[i]; String cmd = args[i];
if (cmd.equals("-details")) { if (cmd.equals("-details")) {
fsck.displayFullReport(); fsck.setDisplayFullReport();
} else if (cmd.equals("-base")) { } else if (cmd.equals("-base")) {
// update hbase root dir to user-specified base // update hbase root dir to user-specified base
i++; i++;
String path = args[i]; String path = args[i];
conf.set(HConstants.HBASE_DIR, path); conf.set(HConstants.HBASE_DIR, path);
conf.set("fs.defaultFS", conf.get(HConstants.HBASE_DIR)); conf.set("fs.defaultFS", conf.get(HConstants.HBASE_DIR));
} else if (cmd.equals("-fixHoles")) {
fixHoles = true;
} else if (cmd.equals("-fix")) {
// make all fix options true
fixHoles = true;
} else { } else {
String str = "Unknown command line option : " + cmd; String str = "Unknown command line option : " + cmd;
LOG.info(str); LOG.info(str);
@ -91,7 +98,7 @@ public class OfflineMetaRepair {
// threads cleanly, so we do a System.exit. // threads cleanly, so we do a System.exit.
boolean success = false; boolean success = false;
try { try {
success = fsck.rebuildMeta(); success = fsck.rebuildMeta(fixHoles);
} catch (MultipleIOException mioes) { } catch (MultipleIOException mioes) {
for (IOException ioe : mioes.getExceptions()) { for (IOException ioe : mioes.getExceptions()) {
LOG.error("Bailed out due to:", ioe); LOG.error("Bailed out due to:", ioe);

View File

@ -1211,11 +1211,16 @@ public class HBaseTestingUtility {
List<byte[]> rows = new ArrayList<byte[]>(); List<byte[]> rows = new ArrayList<byte[]>();
ResultScanner s = t.getScanner(new Scan()); ResultScanner s = t.getScanner(new Scan());
for (Result result : s) { for (Result result : s) {
HRegionInfo info = Writables.getHRegionInfo( byte[] val = result.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
result.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER)); if (val == null) {
LOG.error("No region info for row " + Bytes.toString(result.getRow()));
// TODO figure out what to do for this new hosed case.
continue;
}
HRegionInfo info = Writables.getHRegionInfo(val);
if (Bytes.compareTo(info.getTableName(), tableName) == 0) { if (Bytes.compareTo(info.getTableName(), tableName) == 0) {
LOG.info("getMetaTableRows: row -> " + LOG.info("getMetaTableRows: row -> " +
Bytes.toStringBinary(result.getRow())); Bytes.toStringBinary(result.getRow()) + info);
rows.add(result.getRow()); rows.add(result.getRow());
} }
} }

View File

@ -23,8 +23,12 @@ import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors; import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors;
import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck; import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
@ -32,16 +36,27 @@ import java.util.Map.Entry;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.*; import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MediumTests;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE; import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
import org.junit.AfterClass; import org.junit.AfterClass;
@ -54,16 +69,20 @@ import org.junit.experimental.categories.Category;
*/ */
@Category(MediumTests.class) @Category(MediumTests.class)
public class TestHBaseFsck { public class TestHBaseFsck {
final Log LOG = LogFactory.getLog(getClass()); final static Log LOG = LogFactory.getLog(TestHBaseFsck.class);
private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
private final static Configuration conf = TEST_UTIL.getConfiguration(); private final static Configuration conf = TEST_UTIL.getConfiguration();
private final static byte[] FAM = Bytes.toBytes("fam"); private final static byte[] FAM = Bytes.toBytes("fam");
// for the instance, reset every test run // for the instance, reset every test run
private HTable tbl; private HTable tbl;
private final static byte[][] splits= new byte[][] { Bytes.toBytes("A"), private final static byte[][] SPLITS = new byte[][] { Bytes.toBytes("A"),
Bytes.toBytes("B"), Bytes.toBytes("C") }; Bytes.toBytes("B"), Bytes.toBytes("C") };
// one row per region.
private final static byte[][] ROWKEYS= new byte[][] {
Bytes.toBytes("00"), Bytes.toBytes("50"), Bytes.toBytes("A0"), Bytes.toBytes("A5"),
Bytes.toBytes("B0"), Bytes.toBytes("B5"), Bytes.toBytes("C0"), Bytes.toBytes("C5") };
@BeforeClass @BeforeClass
public static void setUpBeforeClass() throws Exception { public static void setUpBeforeClass() throws Exception {
TEST_UTIL.getConfiguration().setBoolean("hbase.master.distributed.log.splitting", false); TEST_UTIL.getConfiguration().setBoolean("hbase.master.distributed.log.splitting", false);
@ -117,8 +136,8 @@ public class TestHBaseFsck {
assertErrors(doFsck(conf, true), new ERROR_CODE[]{ assertErrors(doFsck(conf, true), new ERROR_CODE[]{
ERROR_CODE.SERVER_DOES_NOT_MATCH_META}); ERROR_CODE.SERVER_DOES_NOT_MATCH_META});
// fixing assignements require opening regions is not synchronous. To make // fixing assignments require opening regions is not synchronous. To make
// the test pass consistentyl so for now we bake in some sleep to let it // the test pass consistently so for now we bake in some sleep to let it
// finish. 1s seems sufficient. // finish. 1s seems sufficient.
Thread.sleep(1000); Thread.sleep(1000);
@ -135,6 +154,9 @@ public class TestHBaseFsck {
meta.close(); meta.close();
} }
/**
* Create a new region in META.
*/
private HRegionInfo createRegion(Configuration conf, final HTableDescriptor private HRegionInfo createRegion(Configuration conf, final HTableDescriptor
htd, byte[] startKey, byte[] endKey) htd, byte[] startKey, byte[] endKey)
throws IOException { throws IOException {
@ -147,47 +169,102 @@ public class TestHBaseFsck {
return hri; return hri;
} }
public void dumpMeta(HTableDescriptor htd) throws IOException { /**
List<byte[]> metaRows = TEST_UTIL.getMetaTableRows(htd.getName()); * Debugging method to dump the contents of meta.
*/
private void dumpMeta(byte[] tableName) throws IOException {
List<byte[]> metaRows = TEST_UTIL.getMetaTableRows(tableName);
for (byte[] row : metaRows) { for (byte[] row : metaRows) {
LOG.info(Bytes.toString(row)); LOG.info(Bytes.toString(row));
} }
} }
private void deleteRegion(Configuration conf, final HTableDescriptor htd, /**
byte[] startKey, byte[] endKey) throws IOException { * This method is used to undeploy a region -- close it and attempt to
* remove its state from the Master.
*/
private void undeployRegion(HBaseAdmin admin, ServerName sn,
HRegionInfo hri) throws IOException, InterruptedException {
try {
HBaseFsckRepair.closeRegionSilentlyAndWait(admin, sn, hri);
admin.getMaster().offline(hri.getRegionName());
} catch (IOException ioe) {
LOG.warn("Got exception when attempting to offline region "
+ Bytes.toString(hri.getRegionName()), ioe);
}
}
/**
* Delete a region from assignments, meta, or completely from hdfs.
* @param unassign if true unassign region if assigned
* @param metaRow if true remove region's row from META
* @param hdfs if true remove region's dir in HDFS
*/
private void deleteRegion(Configuration conf, final HTableDescriptor htd,
byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
boolean hdfs) throws IOException, InterruptedException {
deleteRegion(conf, htd, startKey, endKey, unassign, metaRow, hdfs, false);
}
LOG.info("Before delete:"); /**
dumpMeta(htd); * Delete a region from assignments, meta, or completely from hdfs.
* @param unassign if true unassign region if assigned
* @param metaRow if true remove region's row from META
* @param hdfs if true remove region's dir in HDFS
* @param regionInfoOnly if true remove a region dir's .regioninfo file
*/
private void deleteRegion(Configuration conf, final HTableDescriptor htd,
byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
boolean hdfs, boolean regionInfoOnly) throws IOException, InterruptedException {
LOG.info("** Before delete:");
dumpMeta(htd.getName());
Map<HRegionInfo, HServerAddress> hris = tbl.getRegionsInfo(); Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations();
for (Entry<HRegionInfo, HServerAddress> e: hris.entrySet()) { for (Entry<HRegionInfo, ServerName> e: hris.entrySet()) {
HRegionInfo hri = e.getKey(); HRegionInfo hri = e.getKey();
HServerAddress hsa = e.getValue(); ServerName hsa = e.getValue();
if (Bytes.compareTo(hri.getStartKey(), startKey) == 0 if (Bytes.compareTo(hri.getStartKey(), startKey) == 0
&& Bytes.compareTo(hri.getEndKey(), endKey) == 0) { && Bytes.compareTo(hri.getEndKey(), endKey) == 0) {
LOG.info("RegionName: " +hri.getRegionNameAsString()); LOG.info("RegionName: " +hri.getRegionNameAsString());
byte[] deleteRow = hri.getRegionName(); byte[] deleteRow = hri.getRegionName();
TEST_UTIL.getHBaseAdmin().unassign(deleteRow, true);
LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString()); if (unassign) {
Path rootDir = new Path(conf.get(HConstants.HBASE_DIR)); LOG.info("Undeploying region " + hri + " from server " + hsa);
FileSystem fs = rootDir.getFileSystem(conf); undeployRegion(new HBaseAdmin(conf), hsa, hri);
Path p = new Path(rootDir + "/" + htd.getNameAsString(), hri.getEncodedName()); }
fs.delete(p, true);
HTable meta = new HTable(conf, HConstants.META_TABLE_NAME); if (regionInfoOnly) {
Delete delete = new Delete(deleteRow); LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
meta.delete(delete); Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
FileSystem fs = rootDir.getFileSystem(conf);
Path p = new Path(rootDir + "/" + htd.getNameAsString(), hri.getEncodedName());
Path hriPath = new Path(p, HRegion.REGIONINFO_FILE);
fs.delete(hriPath, true);
}
if (hdfs) {
LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
FileSystem fs = rootDir.getFileSystem(conf);
Path p = new Path(rootDir + "/" + htd.getNameAsString(), hri.getEncodedName());
HBaseFsck.debugLsr(conf, p);
boolean success = fs.delete(p, true);
LOG.info("Deleted " + p + " sucessfully? " + success);
HBaseFsck.debugLsr(conf, p);
}
if (metaRow) {
HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
Delete delete = new Delete(deleteRow);
meta.delete(delete);
}
} }
LOG.info(hri.toString() + hsa.toString()); LOG.info(hri.toString() + hsa.toString());
} }
TEST_UTIL.getMetaTableRows(htd.getName()); TEST_UTIL.getMetaTableRows(htd.getName());
LOG.info("After delete:"); LOG.info("*** After delete:");
dumpMeta(htd); dumpMeta(htd.getName());
} }
/** /**
@ -201,11 +278,32 @@ public class TestHBaseFsck {
HTableDescriptor desc = new HTableDescriptor(tablename); HTableDescriptor desc = new HTableDescriptor(tablename);
HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM)); HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
desc.addFamily(hcd); // If a table has no CF's it doesn't get checked desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
TEST_UTIL.getHBaseAdmin().createTable(desc, splits); TEST_UTIL.getHBaseAdmin().createTable(desc, SPLITS);
tbl = new HTable(TEST_UTIL.getConfiguration(), tablename); tbl = new HTable(TEST_UTIL.getConfiguration(), tablename);
List<Put> puts = new ArrayList<Put>();
for (byte[] row : ROWKEYS) {
Put p = new Put(row);
p.add(FAM, Bytes.toBytes("val"), row);
puts.add(p);
}
tbl.put(puts);
tbl.flushCommits();
return tbl; return tbl;
} }
/**
* Counts the number of row to verify data loss or non-dataloss.
*/
int countRows() throws IOException {
Scan s = new Scan();
ResultScanner rs = tbl.getScanner(s);
int i = 0;
while(rs.next() !=null) {
i++;
}
return i;
}
/** /**
* delete table in preparation for next test * delete table in preparation for next test
@ -214,14 +312,21 @@ public class TestHBaseFsck {
* @throws IOException * @throws IOException
*/ */
void deleteTable(String tablename) throws IOException { void deleteTable(String tablename) throws IOException {
HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); HBaseAdmin admin = new HBaseAdmin(conf);
admin.getConnection().clearRegionCache();
byte[] tbytes = Bytes.toBytes(tablename); byte[] tbytes = Bytes.toBytes(tablename);
admin.disableTable(tbytes); admin.disableTableAsync(tbytes);
while (!admin.isTableDisabled(tbytes)) {
try {
Thread.sleep(250);
} catch (InterruptedException e) {
e.printStackTrace();
fail("Interrupted when trying to disable table " + tablename);
}
}
admin.deleteTable(tbytes); admin.deleteTable(tbytes);
} }
/** /**
* This creates a clean table and confirms that the table is clean. * This creates a clean table and confirms that the table is clean.
*/ */
@ -234,18 +339,21 @@ public class TestHBaseFsck {
assertNoErrors(hbck); assertNoErrors(hbck);
setupTable(table); setupTable(table);
assertEquals(ROWKEYS.length, countRows());
// We created 1 table, should be fine // We created 1 table, should be fine
hbck = doFsck(conf, false); hbck = doFsck(conf, false);
assertNoErrors(hbck); assertNoErrors(hbck);
assertEquals(0, hbck.getOverlapGroups(table).size()); assertEquals(0, hbck.getOverlapGroups(table).size());
assertEquals(ROWKEYS.length, countRows());
} finally { } finally {
deleteTable(table); deleteTable(table);
} }
} }
/** /**
* This creates a bad table with regions that have a duplicate start key * This create and fixes a bad table with regions that have a duplicate
* start key
*/ */
@Test @Test
public void testDupeStartKey() throws Exception { public void testDupeStartKey() throws Exception {
@ -253,6 +361,7 @@ public class TestHBaseFsck {
try { try {
setupTable(table); setupTable(table);
assertNoErrors(doFsck(conf, false)); assertNoErrors(doFsck(conf, false));
assertEquals(ROWKEYS.length, countRows());
// Now let's mess it up, by adding a region with a duplicate startkey // Now let's mess it up, by adding a region with a duplicate startkey
HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(), HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
@ -265,13 +374,112 @@ public class TestHBaseFsck {
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS, assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
ERROR_CODE.DUPE_STARTKEYS}); ERROR_CODE.DUPE_STARTKEYS});
assertEquals(2, hbck.getOverlapGroups(table).size()); assertEquals(2, hbck.getOverlapGroups(table).size());
assertEquals(ROWKEYS.length, countRows()); // seems like the "bigger" region won.
// fix the degenerate region.
doFsck(conf,true);
// check that the degenerate region is gone and no data loss
HBaseFsck hbck2 = doFsck(conf,false);
assertNoErrors(hbck2);
assertEquals(0, hbck2.getOverlapGroups(table).size());
assertEquals(ROWKEYS.length, countRows());
} finally { } finally {
deleteTable(table); deleteTable(table);
} }
} }
/** /**
* This creates a bad table with regions that has startkey == endkey * Get region info from local cluster.
*/
Map<ServerName, List<String>> getDeployedHRIs(HBaseAdmin admin)
throws IOException {
ClusterStatus status = admin.getMaster().getClusterStatus();
Collection<ServerName> regionServers = status.getServers();
Map<ServerName, List<String>> mm =
new HashMap<ServerName, List<String>>();
HConnection connection = admin.getConnection();
for (ServerName hsi : regionServers) {
HRegionInterface server =
connection.getHRegionConnection(hsi.getHostname(), hsi.getPort());
// list all online regions from this region server
List<HRegionInfo> regions = server.getOnlineRegions();
List<String> regionNames = new ArrayList<String>();
for (HRegionInfo hri : regions) {
regionNames.add(hri.getRegionNameAsString());
}
mm.put(hsi, regionNames);
}
return mm;
}
/**
* Returns the HSI a region info is on.
*/
ServerName findDeployedHSI(Map<ServerName, List<String>> mm, HRegionInfo hri) {
for (Map.Entry<ServerName,List <String>> e : mm.entrySet()) {
if (e.getValue().contains(hri.getRegionNameAsString())) {
return e.getKey();
}
}
return null;
}
/**
* This create and fixes a bad table with regions that have a duplicate
* start key
*/
@Test
public void testDupeRegion() throws Exception {
String table = "tableDupeRegion";
try {
setupTable(table);
assertNoErrors(doFsck(conf, false));
assertEquals(ROWKEYS.length, countRows());
// Now let's mess it up, by adding a region with a duplicate startkey
HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
Bytes.toBytes("A"), Bytes.toBytes("B"));
TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
.waitForAssignment(hriDupe);
// Yikes! The assignment manager can't tell between diff between two
// different regions with the same start/endkeys since it doesn't
// differentiate on ts/regionId! We actually need to recheck
// deployments!
HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
ServerName hsi;
while ( (hsi = findDeployedHSI(getDeployedHRIs(admin), hriDupe)) == null) {
Thread.sleep(250);
}
LOG.debug("Finished assignment of dupe region");
// TODO why is dupe region different from dupe start keys?
HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
ERROR_CODE.DUPE_STARTKEYS});
assertEquals(2, hbck.getOverlapGroups(table).size());
assertEquals(ROWKEYS.length, countRows()); // seems like the "bigger" region won.
// fix the degenerate region.
doFsck(conf,true);
// check that the degenerate region is gone and no data loss
HBaseFsck hbck2 = doFsck(conf,false);
assertNoErrors(hbck2);
assertEquals(0, hbck2.getOverlapGroups(table).size());
assertEquals(ROWKEYS.length, countRows());
} finally {
deleteTable(table);
}
}
/**
* This creates and fixes a bad table with regions that has startkey == endkey
*/ */
@Test @Test
public void testDegenerateRegions() throws Exception { public void testDegenerateRegions() throws Exception {
@ -279,6 +487,7 @@ public class TestHBaseFsck {
try { try {
setupTable(table); setupTable(table);
assertNoErrors(doFsck(conf,false)); assertNoErrors(doFsck(conf,false));
assertEquals(ROWKEYS.length, countRows());
// Now let's mess it up, by adding a region with a duplicate startkey // Now let's mess it up, by adding a region with a duplicate startkey
HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(), HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
@ -291,19 +500,111 @@ public class TestHBaseFsck {
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DEGENERATE_REGION, assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DEGENERATE_REGION,
ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.DUPE_STARTKEYS}); ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.DUPE_STARTKEYS});
assertEquals(2, hbck.getOverlapGroups(table).size()); assertEquals(2, hbck.getOverlapGroups(table).size());
assertEquals(ROWKEYS.length, countRows());
// fix the degenerate region.
doFsck(conf,true);
// check that the degenerate region is gone and no data loss
HBaseFsck hbck2 = doFsck(conf,false);
assertNoErrors(hbck2);
assertEquals(0, hbck2.getOverlapGroups(table).size());
assertEquals(ROWKEYS.length, countRows());
} finally { } finally {
deleteTable(table); deleteTable(table);
} }
} }
/** /**
* This creates a bad table where a start key contained in another region. * This creates and fixes a bad table where a region is completely contained
* by another region.
*/
@Test
public void testContainedRegionOverlap() throws Exception {
String table = "tableContainedRegionOverlap";
try {
setupTable(table);
assertEquals(ROWKEYS.length, countRows());
// Mess it up by creating an overlap in the metadata
HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
Bytes.toBytes("A2"), Bytes.toBytes("B"));
TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
.waitForAssignment(hriOverlap);
HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] {
ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
assertEquals(2, hbck.getOverlapGroups(table).size());
assertEquals(ROWKEYS.length, countRows());
// fix the problem.
doFsck(conf, true);
// verify that overlaps are fixed
HBaseFsck hbck2 = doFsck(conf,false);
assertNoErrors(hbck2);
assertEquals(0, hbck2.getOverlapGroups(table).size());
assertEquals(ROWKEYS.length, countRows());
} finally {
deleteTable(table);
}
}
/**
* This creates and fixes a bad table where a region is completely contained
* by another region, and there is a hole (sort of like a bad split)
*/
@Test
public void testOverlapAndOrphan() throws Exception {
String table = "tableOverlapAndOrphan";
try {
setupTable(table);
assertEquals(ROWKEYS.length, countRows());
// Mess it up by creating an overlap in the metadata
TEST_UTIL.getHBaseAdmin().disableTable(table);
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
Bytes.toBytes("B"), true, true, false, true);
TEST_UTIL.getHBaseAdmin().enableTable(table);
HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
Bytes.toBytes("A2"), Bytes.toBytes("B"));
TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
.waitForAssignment(hriOverlap);
HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] {
ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
ERROR_CODE.HOLE_IN_REGION_CHAIN});
// fix the problem.
doFsck(conf, true);
// verify that overlaps are fixed
HBaseFsck hbck2 = doFsck(conf,false);
assertNoErrors(hbck2);
assertEquals(0, hbck2.getOverlapGroups(table).size());
assertEquals(ROWKEYS.length, countRows());
} finally {
deleteTable(table);
}
}
/**
* This creates and fixes a bad table where a region overlaps two regions --
* a start key contained in another region and its end key is contained in
* yet another region.
*/ */
@Test @Test
public void testCoveredStartKey() throws Exception { public void testCoveredStartKey() throws Exception {
String table = "tableCoveredStartKey"; String table = "tableCoveredStartKey";
try { try {
setupTable(table); setupTable(table);
assertEquals(ROWKEYS.length, countRows());
// Mess it up by creating an overlap in the metadata // Mess it up by creating an overlap in the metadata
HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(), HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
@ -317,40 +618,239 @@ public class TestHBaseFsck {
ERROR_CODE.OVERLAP_IN_REGION_CHAIN, ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
ERROR_CODE.OVERLAP_IN_REGION_CHAIN }); ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
assertEquals(3, hbck.getOverlapGroups(table).size()); assertEquals(3, hbck.getOverlapGroups(table).size());
assertEquals(ROWKEYS.length, countRows());
// fix the problem.
doFsck(conf, true);
// verify that overlaps are fixed
HBaseFsck hbck2 = doFsck(conf, false);
assertErrors(hbck2, new ERROR_CODE[0]);
assertEquals(0, hbck2.getOverlapGroups(table).size());
assertEquals(ROWKEYS.length, countRows());
} finally { } finally {
deleteTable(table); deleteTable(table);
} }
} }
/** /**
* This creates a bad table with a hole in meta. * This creates and fixes a bad table with a missing region -- hole in meta
* and data missing in the fs.
*/ */
@Test @Test
public void testMetaHole() throws Exception { public void testRegionHole() throws Exception {
String table = "tableMetaHole"; String table = "tableRegionHole";
try { try {
setupTable(table); setupTable(table);
assertEquals(ROWKEYS.length, countRows());
// Mess it up by leaving a hole in the meta data // Mess it up by leaving a hole in the assignment, meta, and hdfs data
HRegionInfo hriHole = createRegion(conf, tbl.getTableDescriptor(),
Bytes.toBytes("D"), Bytes.toBytes(""));
TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriHole);
TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
.waitForAssignment(hriHole);
TEST_UTIL.getHBaseAdmin().disableTable(table); TEST_UTIL.getHBaseAdmin().disableTable(table);
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes("")); deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
Bytes.toBytes("C"), true, true, true);
TEST_UTIL.getHBaseAdmin().enableTable(table); TEST_UTIL.getHBaseAdmin().enableTable(table);
HBaseFsck hbck = doFsck(conf, false); HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.HOLE_IN_REGION_CHAIN }); assertErrors(hbck, new ERROR_CODE[] {
ERROR_CODE.HOLE_IN_REGION_CHAIN});
// holes are separate from overlap groups // holes are separate from overlap groups
assertEquals(0, hbck.getOverlapGroups(table).size()); assertEquals(0, hbck.getOverlapGroups(table).size());
// fix hole
doFsck(conf, true);
// check that hole fixed
assertNoErrors(doFsck(conf,false));
assertEquals(ROWKEYS.length - 2 , countRows()); // lost a region so lost a row
} finally { } finally {
deleteTable(table); deleteTable(table);
} }
} }
/**
* This creates and fixes a bad table with a missing region -- hole in meta
* and data present but .regioinfino missing (an orphan hdfs region)in the fs.
*/
@Test
public void testHDFSRegioninfoMissing() throws Exception {
String table = "tableHDFSRegioininfoMissing";
try {
setupTable(table);
assertEquals(ROWKEYS.length, countRows());
// Mess it up by leaving a hole in the meta data
TEST_UTIL.getHBaseAdmin().disableTable(table);
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
Bytes.toBytes("C"), true, true, false, true);
TEST_UTIL.getHBaseAdmin().enableTable(table);
HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] {
ERROR_CODE.ORPHAN_HDFS_REGION,
ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
ERROR_CODE.HOLE_IN_REGION_CHAIN});
// holes are separate from overlap groups
assertEquals(0, hbck.getOverlapGroups(table).size());
// fix hole
doFsck(conf, true);
// check that hole fixed
assertNoErrors(doFsck(conf, false));
assertEquals(ROWKEYS.length, countRows());
} finally {
deleteTable(table);
}
}
/**
* This creates and fixes a bad table with a region that is missing meta and
* not assigned to a region server.
*/
@Test
public void testNotInMetaOrDeployedHole() throws Exception {
String table = "tableNotInMetaOrDeployedHole";
try {
setupTable(table);
assertEquals(ROWKEYS.length, countRows());
// Mess it up by leaving a hole in the meta data
TEST_UTIL.getHBaseAdmin().disableTable(table);
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
Bytes.toBytes("C"), true, true, false); // don't rm from fs
TEST_UTIL.getHBaseAdmin().enableTable(table);
HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] {
ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
// holes are separate from overlap groups
assertEquals(0, hbck.getOverlapGroups(table).size());
// fix hole
assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
// check that hole fixed
assertNoErrors(doFsck(conf,false));
assertEquals(ROWKEYS.length, countRows());
} finally {
deleteTable(table);
}
}
/**
* This creates fixes a bad table with a hole in meta.
*/
@Test
public void testNotInMetaHole() throws Exception {
String table = "tableNotInMetaHole";
try {
setupTable(table);
assertEquals(ROWKEYS.length, countRows());
// Mess it up by leaving a hole in the meta data
TEST_UTIL.getHBaseAdmin().disableTable(table);
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
Bytes.toBytes("C"), false, true, false); // don't rm from fs
TEST_UTIL.getHBaseAdmin().enableTable(table);
HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] {
ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
// holes are separate from overlap groups
assertEquals(0, hbck.getOverlapGroups(table).size());
// fix hole
assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
// check that hole fixed
assertNoErrors(doFsck(conf,false));
assertEquals(ROWKEYS.length, countRows());
} finally {
deleteTable(table);
}
}
/**
* This creates and fixes a bad table with a region that is in meta but has
* no deployment or data hdfs
*/
@Test
public void testNotInHdfs() throws Exception {
String table = "tableNotInHdfs";
try {
setupTable(table);
assertEquals(ROWKEYS.length, countRows());
// make sure data in regions, if in hlog only there is no data loss
TEST_UTIL.getHBaseAdmin().flush(table);
// Mess it up by leaving a hole in the hdfs data
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
Bytes.toBytes("C"), false, false, true); // don't rm meta
HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
// holes are separate from overlap groups
assertEquals(0, hbck.getOverlapGroups(table).size());
// fix hole
doFsck(conf, true);
// check that hole fixed
assertNoErrors(doFsck(conf,false));
assertEquals(ROWKEYS.length - 2, countRows());
} finally {
deleteTable(table);
}
}
/**
* This creates entries in META with no hdfs data. This should cleanly
* remove the table.
*/
@Test
public void testNoHdfsTable() throws Exception {
String table = "NoHdfsTable";
setupTable(table);
assertEquals(ROWKEYS.length, countRows());
// make sure data in regions, if in hlog only there is no data loss
TEST_UTIL.getHBaseAdmin().flush(table);
// Mess it up by leaving a giant hole in meta
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""),
Bytes.toBytes("A"), false, false, true); // don't rm meta
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
Bytes.toBytes("B"), false, false, true); // don't rm meta
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
Bytes.toBytes("C"), false, false, true); // don't rm meta
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"),
Bytes.toBytes(""), false, false, true); // don't rm meta
HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS,
ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS,
ERROR_CODE.NOT_IN_HDFS,});
// holes are separate from overlap groups
assertEquals(0, hbck.getOverlapGroups(table).size());
// fix hole
doFsck(conf, true); // in 0.92+, meta entries auto create regiondirs
// check that hole fixed
assertNoErrors(doFsck(conf,false));
try {
assertEquals(0, countRows());
} catch (IOException ioe) {
// we've actually deleted the table already. :)
return;
}
fail("Should have failed with IOException");
}
@org.junit.Rule @org.junit.Rule
public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu = public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
new org.apache.hadoop.hbase.ResourceCheckerJUnitRule(); new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();

View File

@ -96,14 +96,6 @@ public class TestHBaseFsckComparator {
assertTrue(HBaseFsck.cmp.compare(hi2, hi1) > 0); assertTrue(HBaseFsck.cmp.compare(hi2, hi1) > 0);
} }
@Test
public void testTiebreaker() {
HbckInfo hi1 = genHbckInfo(table, keyA, keyC, 0);
HbckInfo hi2 = genHbckInfo(table, keyA, keyC, 1);
assertTrue(HBaseFsck.cmp.compare(hi1, hi2) < 0);
assertTrue(HBaseFsck.cmp.compare(hi2, hi1) > 0);
}
@org.junit.Rule @org.junit.Rule
public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu = public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
new org.apache.hadoop.hbase.ResourceCheckerJUnitRule(); new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();

View File

@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.util.hbck;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
@ -28,18 +29,29 @@ import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
public class HbckTestingUtil { public class HbckTestingUtil {
public static HBaseFsck doFsck(Configuration conf, boolean fix) throws Exception { public static HBaseFsck doFsck(Configuration conf, boolean fix) throws Exception {
return doFsck(conf, fix, fix, fix, fix,fix);
}
public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments,
boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps,
boolean fixHdfsOrphans) throws Exception {
HBaseFsck fsck = new HBaseFsck(conf); HBaseFsck fsck = new HBaseFsck(conf);
fsck.connect(); fsck.connect();
fsck.displayFullReport(); // i.e. -details fsck.setDisplayFullReport(); // i.e. -details
fsck.setTimeLag(0); fsck.setTimeLag(0);
fsck.setFixErrors(fix); fsck.setFixAssignments(fixAssignments);
fsck.doWork(); fsck.setFixMeta(fixMeta);
fsck.setFixHdfsHoles(fixHdfsHoles);
fsck.setFixHdfsOverlaps(fixHdfsOverlaps);
fsck.setFixHdfsOrphans(fixHdfsOrphans);
fsck.onlineHbck();
return fsck; return fsck;
} }
public static void assertNoErrors(HBaseFsck fsck) throws Exception { public static void assertNoErrors(HBaseFsck fsck) throws Exception {
List<ERROR_CODE> errs = fsck.getErrors().getErrorList(); List<ERROR_CODE> errs = fsck.getErrors().getErrorList();
assertEquals(0, errs.size()); assertEquals(new ArrayList<ERROR_CODE>(), errs);
} }
public static void assertErrors(HBaseFsck fsck, ERROR_CODE[] expectedErrors) { public static void assertErrors(HBaseFsck fsck, ERROR_CODE[] expectedErrors) {

View File

@ -61,7 +61,7 @@ public class TestOfflineMetaRebuildBase extends OfflineMetaRebuildTestCore {
// rebuild meta table from scratch // rebuild meta table from scratch
HBaseFsck fsck = new HBaseFsck(conf); HBaseFsck fsck = new HBaseFsck(conf);
assertTrue(fsck.rebuildMeta()); assertTrue(fsck.rebuildMeta(false));
// bring up the minicluster // bring up the minicluster
TEST_UTIL.startMiniZKCluster(); // tables seem enabled by default TEST_UTIL.startMiniZKCluster(); // tables seem enabled by default

View File

@ -21,6 +21,7 @@ import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck; import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.util.Arrays; import java.util.Arrays;
@ -64,7 +65,7 @@ public class TestOfflineMetaRebuildHole extends OfflineMetaRebuildTestCore {
// attempt to rebuild meta table from scratch // attempt to rebuild meta table from scratch
HBaseFsck fsck = new HBaseFsck(conf); HBaseFsck fsck = new HBaseFsck(conf);
assertFalse(fsck.rebuildMeta()); assertFalse(fsck.rebuildMeta(false));
// bring up the minicluster // bring up the minicluster
TEST_UTIL.startMiniZKCluster(); // tables seem enabled by default TEST_UTIL.startMiniZKCluster(); // tables seem enabled by default

View File

@ -69,7 +69,7 @@ public class TestOfflineMetaRebuildOverlap extends OfflineMetaRebuildTestCore {
// attempt to rebuild meta table from scratch // attempt to rebuild meta table from scratch
HBaseFsck fsck = new HBaseFsck(conf); HBaseFsck fsck = new HBaseFsck(conf);
assertFalse(fsck.rebuildMeta()); assertFalse(fsck.rebuildMeta(false));
Multimap<byte[], HbckInfo> problems = fsck.getOverlapGroups(table); Multimap<byte[], HbckInfo> problems = fsck.getOverlapGroups(table);
assertEquals(1, problems.keySet().size()); assertEquals(1, problems.keySet().size());