HBASE-5128 [uber hbck] Online automated repair of table integrity and region consistency problems
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1304665 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
560173f756
commit
f2d637ffa5
|
@ -509,8 +509,16 @@ public class HFile {
|
|||
preferredEncodingInCache, hfs);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param fs filesystem
|
||||
* @param path Path to file to read
|
||||
* @param cacheConf This must not be null. @see {@link org.apache.hadoop.hbase.io.hfile.CacheConfig#CacheConfig(Configuration)}
|
||||
* @return an active Reader instance.
|
||||
*/
|
||||
public static Reader createReader(
|
||||
FileSystem fs, Path path, CacheConfig cacheConf) throws IOException {
|
||||
Preconditions.checkNotNull(cacheConf, "Cannot create Reader with null CacheConf");
|
||||
return createReaderWithEncoding(fs, path, cacheConf,
|
||||
DataBlockEncoding.NONE);
|
||||
}
|
||||
|
|
|
@ -218,7 +218,6 @@ public interface HMasterInterface extends VersionedProtocol {
|
|||
public void unassign(final byte [] regionName, final boolean force)
|
||||
throws IOException;
|
||||
|
||||
|
||||
/**
|
||||
* Offline a region from the assignment manager's in-memory state. The
|
||||
* region should be in a closed state and there will be no attempt to
|
||||
|
|
|
@ -1034,8 +1034,9 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
regionInfo = regionState.getRegion();
|
||||
} else {
|
||||
try {
|
||||
regionInfo = MetaReader.getRegion(catalogTracker,
|
||||
data.getRegionName()).getFirst();
|
||||
byte[] name = data.getRegionName();
|
||||
Pair<HRegionInfo, ServerName> p = MetaReader.getRegion(catalogTracker, name);
|
||||
regionInfo = p.getFirst();
|
||||
} catch (IOException e) {
|
||||
LOG.info("Exception reading META doing HBCK repair operation", e);
|
||||
return;
|
||||
|
|
|
@ -1904,18 +1904,17 @@ Server {
|
|||
public double getAverageLoad() {
|
||||
return this.assignmentManager.getAverageLoad();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Special method, only used by hbck.
|
||||
*/
|
||||
@Override
|
||||
public void offline(final byte[] regionName)
|
||||
throws IOException {
|
||||
public void offline(final byte[] regionName) throws IOException {
|
||||
Pair<HRegionInfo, ServerName> pair =
|
||||
MetaReader.getRegion(this.catalogTracker, regionName);
|
||||
if (pair == null) throw new UnknownRegionException(Bytes.toStringBinary(regionName));
|
||||
HRegionInfo hri = pair.getFirst();
|
||||
this.assignmentManager.regionOffline(hri);
|
||||
this.assignmentManager.regionOffline(hri);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -21,44 +21,55 @@ package org.apache.hadoop.hbase.util;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.classification.InterfaceStability;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.NotServingRegionException;
|
||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
|
||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||
import org.apache.hadoop.hbase.client.HConnection;
|
||||
import org.apache.hadoop.hbase.client.HConnectionManager;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.ipc.HRegionInterface;
|
||||
import org.apache.hadoop.hbase.master.AssignmentManager.RegionState;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||
import org.apache.hadoop.hbase.regionserver.wal.HLog;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
|
||||
/**
|
||||
* This class contains helper methods that repair parts of hbase's filesystem
|
||||
* contents.
|
||||
*/
|
||||
@InterfaceAudience.Public
|
||||
@InterfaceStability.Evolving
|
||||
public class HBaseFsckRepair {
|
||||
public static final Log LOG = LogFactory.getLog(HBaseFsckRepair.class);
|
||||
|
||||
/**
|
||||
* Fix dupe assignment by doing silent closes on each RS hosting the region
|
||||
* Fix multiple assignment by doing silent closes on each RS hosting the region
|
||||
* and then force ZK unassigned node to OFFLINE to trigger assignment by
|
||||
* master.
|
||||
* @param admin
|
||||
* @param region
|
||||
* @param servers
|
||||
* @throws IOException
|
||||
* @throws KeeperException
|
||||
* @throws InterruptedException
|
||||
*
|
||||
* @param admin HBase admin used to undeploy
|
||||
* @param region Region to undeploy
|
||||
* @param servers list of Servers to undeploy from
|
||||
*/
|
||||
public static void fixDupeAssignment(HBaseAdmin admin, HRegionInfo region,
|
||||
public static void fixMultiAssignment(HBaseAdmin admin, HRegionInfo region,
|
||||
List<ServerName> servers)
|
||||
throws IOException, KeeperException, InterruptedException {
|
||||
|
||||
HRegionInfo actualRegion = new HRegionInfo(region);
|
||||
|
||||
// Close region on the servers silently
|
||||
for(ServerName server : servers) {
|
||||
closeRegionSilentlyAndWait(admin.getConfiguration(), server, actualRegion);
|
||||
closeRegionSilentlyAndWait(admin, server, actualRegion);
|
||||
}
|
||||
|
||||
// Force ZK node to OFFLINE so master assigns
|
||||
|
@ -67,58 +78,133 @@ public class HBaseFsckRepair {
|
|||
|
||||
/**
|
||||
* Fix unassigned by creating/transition the unassigned ZK node for this
|
||||
* region to OFFLINE state with a special flag to tell the master that this
|
||||
* is a forced operation by HBCK.
|
||||
* @param admin
|
||||
* region to OFFLINE state with a special flag to tell the master that this is
|
||||
* a forced operation by HBCK.
|
||||
*
|
||||
* This assumes that info is in META.
|
||||
*
|
||||
* @param conf
|
||||
* @param region
|
||||
* @throws IOException
|
||||
* @throws KeeperException
|
||||
*/
|
||||
public static void fixUnassigned(HBaseAdmin admin, HRegionInfo region)
|
||||
throws IOException, KeeperException {
|
||||
throws IOException, KeeperException {
|
||||
HRegionInfo actualRegion = new HRegionInfo(region);
|
||||
|
||||
// Force ZK node to OFFLINE so master assigns
|
||||
forceOfflineInZK(admin, actualRegion);
|
||||
}
|
||||
|
||||
/**
|
||||
* In 0.90, this forces an HRI offline by setting the RegionTransitionData
|
||||
* in ZK to have HBCK_CODE_NAME as the server. This is a special case in
|
||||
* the AssignmentManager that attempts an assign call by the master.
|
||||
*
|
||||
* @see org.apache.hadoop.hbase.master.AssignementManager#handleHBCK
|
||||
*
|
||||
* This doesn't seem to work properly in the updated version of 0.92+'s hbck
|
||||
* so we use assign to force the region into transition. This has the
|
||||
* side-effect of requiring a HRegionInfo that considers regionId (timestamp)
|
||||
* in comparators that is addressed by HBASE-5563.
|
||||
*/
|
||||
private static void forceOfflineInZK(HBaseAdmin admin, final HRegionInfo region)
|
||||
throws ZooKeeperConnectionException, KeeperException, IOException {
|
||||
admin.assign(region.getRegionName());
|
||||
}
|
||||
|
||||
private static void closeRegionSilentlyAndWait(Configuration conf,
|
||||
ServerName server, HRegionInfo region) throws IOException,
|
||||
InterruptedException {
|
||||
HConnection connection = HConnectionManager.getConnection(conf);
|
||||
boolean success = false;
|
||||
/*
|
||||
* Should we check all assignments or just not in RIT?
|
||||
*/
|
||||
public static void waitUntilAssigned(HBaseAdmin admin,
|
||||
HRegionInfo region) throws IOException, InterruptedException {
|
||||
HConnection connection = admin.getConnection();
|
||||
|
||||
try {
|
||||
HRegionInterface rs =
|
||||
connection.getHRegionConnection(server.getHostname(), server.getPort());
|
||||
rs.closeRegion(region, false);
|
||||
long timeout = conf.getLong("hbase.hbck.close.timeout", 120000);
|
||||
long timeout = admin.getConfiguration().getLong("hbase.hbck.assign.timeout", 120000);
|
||||
long expiration = timeout + System.currentTimeMillis();
|
||||
while (System.currentTimeMillis() < expiration) {
|
||||
try {
|
||||
HRegionInfo rsRegion = rs.getRegionInfo(region.getRegionName());
|
||||
if (rsRegion == null)
|
||||
throw new NotServingRegionException();
|
||||
} catch (Exception e) {
|
||||
success = true;
|
||||
return;
|
||||
Map<String, RegionState> rits=
|
||||
admin.getClusterStatus().getRegionsInTransition();
|
||||
|
||||
if (rits.keySet() != null && !rits.keySet().contains(region.getEncodedName())) {
|
||||
// yay! no longer RIT
|
||||
return;
|
||||
}
|
||||
// still in rit
|
||||
LOG.info("Region still in transition, waiting for "
|
||||
+ "it to become assigned: " + region);
|
||||
} catch (IOException e) {
|
||||
LOG.warn("Exception when waiting for region to become assigned,"
|
||||
+ " retrying", e);
|
||||
}
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
throw new IOException("Region " + region + " failed to close within"
|
||||
+ " timeout " + timeout);
|
||||
throw new IOException("Region " + region + " failed to move out of " +
|
||||
"transition within timeout " + timeout + "ms");
|
||||
} finally {
|
||||
try {
|
||||
connection.close();
|
||||
} catch (IOException ioe) {
|
||||
if (success) {
|
||||
throw ioe;
|
||||
}
|
||||
throw ioe;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Contacts a region server and waits up to hbase.hbck.close.timeout ms
|
||||
* (default 120s) to close the region. This bypasses the active hmaster.
|
||||
*/
|
||||
public static void closeRegionSilentlyAndWait(HBaseAdmin admin,
|
||||
ServerName server, HRegionInfo region) throws IOException, InterruptedException {
|
||||
HConnection connection = admin.getConnection();
|
||||
HRegionInterface rs = connection.getHRegionConnection(server.getHostname(),
|
||||
server.getPort());
|
||||
rs.closeRegion(region, false);
|
||||
long timeout = admin.getConfiguration()
|
||||
.getLong("hbase.hbck.close.timeout", 120000);
|
||||
long expiration = timeout + System.currentTimeMillis();
|
||||
while (System.currentTimeMillis() < expiration) {
|
||||
try {
|
||||
HRegionInfo rsRegion = rs.getRegionInfo(region.getRegionName());
|
||||
if (rsRegion == null)
|
||||
return;
|
||||
} catch (IOException ioe) {
|
||||
return;
|
||||
}
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
throw new IOException("Region " + region + " failed to close within"
|
||||
+ " timeout " + timeout);
|
||||
}
|
||||
|
||||
/**
|
||||
* Puts the specified HRegionInfo into META.
|
||||
*/
|
||||
public static void fixMetaHoleOnline(Configuration conf,
|
||||
HRegionInfo hri) throws IOException {
|
||||
Put p = new Put(hri.getRegionName());
|
||||
p.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
|
||||
Writables.getBytes(hri));
|
||||
HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
|
||||
meta.put(p);
|
||||
meta.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates, flushes, and closes a new region.
|
||||
*/
|
||||
public static HRegion createHDFSRegionDir(Configuration conf,
|
||||
HRegionInfo hri, HTableDescriptor htd) throws IOException {
|
||||
// Create HRegion
|
||||
Path root = FSUtils.getRootDir(conf);
|
||||
HRegion region = HRegion.createHRegion(hri, root, conf, htd);
|
||||
HLog hlog = region.getLog();
|
||||
|
||||
// Close the new region to flush to disk. Close log file too.
|
||||
region.close();
|
||||
hlog.closeAndDelete();
|
||||
return region;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -44,7 +44,6 @@ import org.apache.hadoop.io.MultipleIOException;
|
|||
@InterfaceStability.Evolving
|
||||
public class OfflineMetaRepair {
|
||||
private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
|
||||
HBaseFsck fsck;
|
||||
|
||||
protected static void printUsageAndExit() {
|
||||
System.err.println("Usage: OfflineMetaRepair [opts] ");
|
||||
|
@ -52,6 +51,8 @@ public class OfflineMetaRepair {
|
|||
System.err
|
||||
.println(" -details Display full report of all regions.");
|
||||
System.err.println(" -base <hdfs://> Base Hbase Data directory");
|
||||
System.err.println(" -fix Auto fix as many problems as possible");
|
||||
System.err.println(" -fixHoles Auto fix as region holes");
|
||||
Runtime.getRuntime().exit(-2);
|
||||
}
|
||||
|
||||
|
@ -67,18 +68,24 @@ public class OfflineMetaRepair {
|
|||
Configuration conf = HBaseConfiguration.create();
|
||||
conf.set("fs.defaultFS", conf.get(HConstants.HBASE_DIR));
|
||||
HBaseFsck fsck = new HBaseFsck(conf);
|
||||
boolean fixHoles = false;
|
||||
|
||||
// Process command-line args.
|
||||
for (int i = 0; i < args.length; i++) {
|
||||
String cmd = args[i];
|
||||
if (cmd.equals("-details")) {
|
||||
fsck.displayFullReport();
|
||||
fsck.setDisplayFullReport();
|
||||
} else if (cmd.equals("-base")) {
|
||||
// update hbase root dir to user-specified base
|
||||
i++;
|
||||
String path = args[i];
|
||||
conf.set(HConstants.HBASE_DIR, path);
|
||||
conf.set("fs.defaultFS", conf.get(HConstants.HBASE_DIR));
|
||||
} else if (cmd.equals("-fixHoles")) {
|
||||
fixHoles = true;
|
||||
} else if (cmd.equals("-fix")) {
|
||||
// make all fix options true
|
||||
fixHoles = true;
|
||||
} else {
|
||||
String str = "Unknown command line option : " + cmd;
|
||||
LOG.info(str);
|
||||
|
@ -91,7 +98,7 @@ public class OfflineMetaRepair {
|
|||
// threads cleanly, so we do a System.exit.
|
||||
boolean success = false;
|
||||
try {
|
||||
success = fsck.rebuildMeta();
|
||||
success = fsck.rebuildMeta(fixHoles);
|
||||
} catch (MultipleIOException mioes) {
|
||||
for (IOException ioe : mioes.getExceptions()) {
|
||||
LOG.error("Bailed out due to:", ioe);
|
||||
|
|
|
@ -1211,11 +1211,16 @@ public class HBaseTestingUtility {
|
|||
List<byte[]> rows = new ArrayList<byte[]>();
|
||||
ResultScanner s = t.getScanner(new Scan());
|
||||
for (Result result : s) {
|
||||
HRegionInfo info = Writables.getHRegionInfo(
|
||||
result.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER));
|
||||
byte[] val = result.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
|
||||
if (val == null) {
|
||||
LOG.error("No region info for row " + Bytes.toString(result.getRow()));
|
||||
// TODO figure out what to do for this new hosed case.
|
||||
continue;
|
||||
}
|
||||
HRegionInfo info = Writables.getHRegionInfo(val);
|
||||
if (Bytes.compareTo(info.getTableName(), tableName) == 0) {
|
||||
LOG.info("getMetaTableRows: row -> " +
|
||||
Bytes.toStringBinary(result.getRow()));
|
||||
Bytes.toStringBinary(result.getRow()) + info);
|
||||
rows.add(result.getRow());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,8 +23,12 @@ import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
|
|||
import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors;
|
||||
import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
@ -32,16 +36,27 @@ import java.util.Map.Entry;
|
|||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.*;
|
||||
import org.apache.hadoop.hbase.ClusterStatus;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
import org.apache.hadoop.hbase.HColumnDescriptor;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||
import org.apache.hadoop.hbase.MediumTests;
|
||||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.client.Delete;
|
||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||
import org.apache.hadoop.hbase.client.HConnection;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.ResultScanner;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.ipc.HRegionInterface;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||
import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.junit.AfterClass;
|
||||
|
@ -54,16 +69,20 @@ import org.junit.experimental.categories.Category;
|
|||
*/
|
||||
@Category(MediumTests.class)
|
||||
public class TestHBaseFsck {
|
||||
final Log LOG = LogFactory.getLog(getClass());
|
||||
final static Log LOG = LogFactory.getLog(TestHBaseFsck.class);
|
||||
private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
|
||||
private final static Configuration conf = TEST_UTIL.getConfiguration();
|
||||
private final static byte[] FAM = Bytes.toBytes("fam");
|
||||
|
||||
// for the instance, reset every test run
|
||||
private HTable tbl;
|
||||
private final static byte[][] splits= new byte[][] { Bytes.toBytes("A"),
|
||||
private final static byte[][] SPLITS = new byte[][] { Bytes.toBytes("A"),
|
||||
Bytes.toBytes("B"), Bytes.toBytes("C") };
|
||||
|
||||
// one row per region.
|
||||
private final static byte[][] ROWKEYS= new byte[][] {
|
||||
Bytes.toBytes("00"), Bytes.toBytes("50"), Bytes.toBytes("A0"), Bytes.toBytes("A5"),
|
||||
Bytes.toBytes("B0"), Bytes.toBytes("B5"), Bytes.toBytes("C0"), Bytes.toBytes("C5") };
|
||||
|
||||
@BeforeClass
|
||||
public static void setUpBeforeClass() throws Exception {
|
||||
TEST_UTIL.getConfiguration().setBoolean("hbase.master.distributed.log.splitting", false);
|
||||
|
@ -117,8 +136,8 @@ public class TestHBaseFsck {
|
|||
assertErrors(doFsck(conf, true), new ERROR_CODE[]{
|
||||
ERROR_CODE.SERVER_DOES_NOT_MATCH_META});
|
||||
|
||||
// fixing assignements require opening regions is not synchronous. To make
|
||||
// the test pass consistentyl so for now we bake in some sleep to let it
|
||||
// fixing assignments require opening regions is not synchronous. To make
|
||||
// the test pass consistently so for now we bake in some sleep to let it
|
||||
// finish. 1s seems sufficient.
|
||||
Thread.sleep(1000);
|
||||
|
||||
|
@ -135,6 +154,9 @@ public class TestHBaseFsck {
|
|||
meta.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new region in META.
|
||||
*/
|
||||
private HRegionInfo createRegion(Configuration conf, final HTableDescriptor
|
||||
htd, byte[] startKey, byte[] endKey)
|
||||
throws IOException {
|
||||
|
@ -147,47 +169,102 @@ public class TestHBaseFsck {
|
|||
return hri;
|
||||
}
|
||||
|
||||
public void dumpMeta(HTableDescriptor htd) throws IOException {
|
||||
List<byte[]> metaRows = TEST_UTIL.getMetaTableRows(htd.getName());
|
||||
/**
|
||||
* Debugging method to dump the contents of meta.
|
||||
*/
|
||||
private void dumpMeta(byte[] tableName) throws IOException {
|
||||
List<byte[]> metaRows = TEST_UTIL.getMetaTableRows(tableName);
|
||||
for (byte[] row : metaRows) {
|
||||
LOG.info(Bytes.toString(row));
|
||||
}
|
||||
}
|
||||
|
||||
private void deleteRegion(Configuration conf, final HTableDescriptor htd,
|
||||
byte[] startKey, byte[] endKey) throws IOException {
|
||||
/**
|
||||
* This method is used to undeploy a region -- close it and attempt to
|
||||
* remove its state from the Master.
|
||||
*/
|
||||
private void undeployRegion(HBaseAdmin admin, ServerName sn,
|
||||
HRegionInfo hri) throws IOException, InterruptedException {
|
||||
try {
|
||||
HBaseFsckRepair.closeRegionSilentlyAndWait(admin, sn, hri);
|
||||
admin.getMaster().offline(hri.getRegionName());
|
||||
} catch (IOException ioe) {
|
||||
LOG.warn("Got exception when attempting to offline region "
|
||||
+ Bytes.toString(hri.getRegionName()), ioe);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Delete a region from assignments, meta, or completely from hdfs.
|
||||
* @param unassign if true unassign region if assigned
|
||||
* @param metaRow if true remove region's row from META
|
||||
* @param hdfs if true remove region's dir in HDFS
|
||||
*/
|
||||
private void deleteRegion(Configuration conf, final HTableDescriptor htd,
|
||||
byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
|
||||
boolean hdfs) throws IOException, InterruptedException {
|
||||
deleteRegion(conf, htd, startKey, endKey, unassign, metaRow, hdfs, false);
|
||||
}
|
||||
|
||||
LOG.info("Before delete:");
|
||||
dumpMeta(htd);
|
||||
/**
|
||||
* Delete a region from assignments, meta, or completely from hdfs.
|
||||
* @param unassign if true unassign region if assigned
|
||||
* @param metaRow if true remove region's row from META
|
||||
* @param hdfs if true remove region's dir in HDFS
|
||||
* @param regionInfoOnly if true remove a region dir's .regioninfo file
|
||||
*/
|
||||
private void deleteRegion(Configuration conf, final HTableDescriptor htd,
|
||||
byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
|
||||
boolean hdfs, boolean regionInfoOnly) throws IOException, InterruptedException {
|
||||
LOG.info("** Before delete:");
|
||||
dumpMeta(htd.getName());
|
||||
|
||||
Map<HRegionInfo, HServerAddress> hris = tbl.getRegionsInfo();
|
||||
for (Entry<HRegionInfo, HServerAddress> e: hris.entrySet()) {
|
||||
Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations();
|
||||
for (Entry<HRegionInfo, ServerName> e: hris.entrySet()) {
|
||||
HRegionInfo hri = e.getKey();
|
||||
HServerAddress hsa = e.getValue();
|
||||
if (Bytes.compareTo(hri.getStartKey(), startKey) == 0
|
||||
ServerName hsa = e.getValue();
|
||||
if (Bytes.compareTo(hri.getStartKey(), startKey) == 0
|
||||
&& Bytes.compareTo(hri.getEndKey(), endKey) == 0) {
|
||||
|
||||
LOG.info("RegionName: " +hri.getRegionNameAsString());
|
||||
byte[] deleteRow = hri.getRegionName();
|
||||
TEST_UTIL.getHBaseAdmin().unassign(deleteRow, true);
|
||||
|
||||
LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
|
||||
Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
|
||||
FileSystem fs = rootDir.getFileSystem(conf);
|
||||
Path p = new Path(rootDir + "/" + htd.getNameAsString(), hri.getEncodedName());
|
||||
fs.delete(p, true);
|
||||
if (unassign) {
|
||||
LOG.info("Undeploying region " + hri + " from server " + hsa);
|
||||
undeployRegion(new HBaseAdmin(conf), hsa, hri);
|
||||
}
|
||||
|
||||
HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
|
||||
Delete delete = new Delete(deleteRow);
|
||||
meta.delete(delete);
|
||||
if (regionInfoOnly) {
|
||||
LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
|
||||
Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
|
||||
FileSystem fs = rootDir.getFileSystem(conf);
|
||||
Path p = new Path(rootDir + "/" + htd.getNameAsString(), hri.getEncodedName());
|
||||
Path hriPath = new Path(p, HRegion.REGIONINFO_FILE);
|
||||
fs.delete(hriPath, true);
|
||||
}
|
||||
|
||||
if (hdfs) {
|
||||
LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
|
||||
Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
|
||||
FileSystem fs = rootDir.getFileSystem(conf);
|
||||
Path p = new Path(rootDir + "/" + htd.getNameAsString(), hri.getEncodedName());
|
||||
HBaseFsck.debugLsr(conf, p);
|
||||
boolean success = fs.delete(p, true);
|
||||
LOG.info("Deleted " + p + " sucessfully? " + success);
|
||||
HBaseFsck.debugLsr(conf, p);
|
||||
}
|
||||
|
||||
if (metaRow) {
|
||||
HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
|
||||
Delete delete = new Delete(deleteRow);
|
||||
meta.delete(delete);
|
||||
}
|
||||
}
|
||||
LOG.info(hri.toString() + hsa.toString());
|
||||
}
|
||||
|
||||
TEST_UTIL.getMetaTableRows(htd.getName());
|
||||
LOG.info("After delete:");
|
||||
dumpMeta(htd);
|
||||
|
||||
LOG.info("*** After delete:");
|
||||
dumpMeta(htd.getName());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -201,11 +278,32 @@ public class TestHBaseFsck {
|
|||
HTableDescriptor desc = new HTableDescriptor(tablename);
|
||||
HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
|
||||
desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
|
||||
TEST_UTIL.getHBaseAdmin().createTable(desc, splits);
|
||||
TEST_UTIL.getHBaseAdmin().createTable(desc, SPLITS);
|
||||
tbl = new HTable(TEST_UTIL.getConfiguration(), tablename);
|
||||
|
||||
List<Put> puts = new ArrayList<Put>();
|
||||
for (byte[] row : ROWKEYS) {
|
||||
Put p = new Put(row);
|
||||
p.add(FAM, Bytes.toBytes("val"), row);
|
||||
puts.add(p);
|
||||
}
|
||||
tbl.put(puts);
|
||||
tbl.flushCommits();
|
||||
return tbl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Counts the number of row to verify data loss or non-dataloss.
|
||||
*/
|
||||
int countRows() throws IOException {
|
||||
Scan s = new Scan();
|
||||
ResultScanner rs = tbl.getScanner(s);
|
||||
int i = 0;
|
||||
while(rs.next() !=null) {
|
||||
i++;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
/**
|
||||
* delete table in preparation for next test
|
||||
|
@ -214,14 +312,21 @@ public class TestHBaseFsck {
|
|||
* @throws IOException
|
||||
*/
|
||||
void deleteTable(String tablename) throws IOException {
|
||||
HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
|
||||
HBaseAdmin admin = new HBaseAdmin(conf);
|
||||
admin.getConnection().clearRegionCache();
|
||||
byte[] tbytes = Bytes.toBytes(tablename);
|
||||
admin.disableTable(tbytes);
|
||||
admin.disableTableAsync(tbytes);
|
||||
while (!admin.isTableDisabled(tbytes)) {
|
||||
try {
|
||||
Thread.sleep(250);
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
fail("Interrupted when trying to disable table " + tablename);
|
||||
}
|
||||
}
|
||||
admin.deleteTable(tbytes);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* This creates a clean table and confirms that the table is clean.
|
||||
*/
|
||||
|
@ -234,18 +339,21 @@ public class TestHBaseFsck {
|
|||
assertNoErrors(hbck);
|
||||
|
||||
setupTable(table);
|
||||
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
|
||||
// We created 1 table, should be fine
|
||||
hbck = doFsck(conf, false);
|
||||
assertNoErrors(hbck);
|
||||
assertEquals(0, hbck.getOverlapGroups(table).size());
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
} finally {
|
||||
deleteTable(table);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This creates a bad table with regions that have a duplicate start key
|
||||
* This create and fixes a bad table with regions that have a duplicate
|
||||
* start key
|
||||
*/
|
||||
@Test
|
||||
public void testDupeStartKey() throws Exception {
|
||||
|
@ -253,6 +361,7 @@ public class TestHBaseFsck {
|
|||
try {
|
||||
setupTable(table);
|
||||
assertNoErrors(doFsck(conf, false));
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
|
||||
// Now let's mess it up, by adding a region with a duplicate startkey
|
||||
HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
|
||||
|
@ -265,13 +374,112 @@ public class TestHBaseFsck {
|
|||
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
|
||||
ERROR_CODE.DUPE_STARTKEYS});
|
||||
assertEquals(2, hbck.getOverlapGroups(table).size());
|
||||
assertEquals(ROWKEYS.length, countRows()); // seems like the "bigger" region won.
|
||||
|
||||
// fix the degenerate region.
|
||||
doFsck(conf,true);
|
||||
|
||||
// check that the degenerate region is gone and no data loss
|
||||
HBaseFsck hbck2 = doFsck(conf,false);
|
||||
assertNoErrors(hbck2);
|
||||
assertEquals(0, hbck2.getOverlapGroups(table).size());
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
} finally {
|
||||
deleteTable(table);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This creates a bad table with regions that has startkey == endkey
|
||||
* Get region info from local cluster.
|
||||
*/
|
||||
Map<ServerName, List<String>> getDeployedHRIs(HBaseAdmin admin)
|
||||
throws IOException {
|
||||
ClusterStatus status = admin.getMaster().getClusterStatus();
|
||||
Collection<ServerName> regionServers = status.getServers();
|
||||
Map<ServerName, List<String>> mm =
|
||||
new HashMap<ServerName, List<String>>();
|
||||
HConnection connection = admin.getConnection();
|
||||
for (ServerName hsi : regionServers) {
|
||||
HRegionInterface server =
|
||||
connection.getHRegionConnection(hsi.getHostname(), hsi.getPort());
|
||||
|
||||
// list all online regions from this region server
|
||||
List<HRegionInfo> regions = server.getOnlineRegions();
|
||||
List<String> regionNames = new ArrayList<String>();
|
||||
for (HRegionInfo hri : regions) {
|
||||
regionNames.add(hri.getRegionNameAsString());
|
||||
}
|
||||
mm.put(hsi, regionNames);
|
||||
}
|
||||
return mm;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the HSI a region info is on.
|
||||
*/
|
||||
ServerName findDeployedHSI(Map<ServerName, List<String>> mm, HRegionInfo hri) {
|
||||
for (Map.Entry<ServerName,List <String>> e : mm.entrySet()) {
|
||||
if (e.getValue().contains(hri.getRegionNameAsString())) {
|
||||
return e.getKey();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* This create and fixes a bad table with regions that have a duplicate
|
||||
* start key
|
||||
*/
|
||||
@Test
|
||||
public void testDupeRegion() throws Exception {
|
||||
String table = "tableDupeRegion";
|
||||
try {
|
||||
setupTable(table);
|
||||
assertNoErrors(doFsck(conf, false));
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
|
||||
// Now let's mess it up, by adding a region with a duplicate startkey
|
||||
HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
|
||||
Bytes.toBytes("A"), Bytes.toBytes("B"));
|
||||
|
||||
TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
|
||||
TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
|
||||
.waitForAssignment(hriDupe);
|
||||
|
||||
// Yikes! The assignment manager can't tell between diff between two
|
||||
// different regions with the same start/endkeys since it doesn't
|
||||
// differentiate on ts/regionId! We actually need to recheck
|
||||
// deployments!
|
||||
HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
|
||||
ServerName hsi;
|
||||
while ( (hsi = findDeployedHSI(getDeployedHRIs(admin), hriDupe)) == null) {
|
||||
Thread.sleep(250);
|
||||
}
|
||||
|
||||
LOG.debug("Finished assignment of dupe region");
|
||||
|
||||
// TODO why is dupe region different from dupe start keys?
|
||||
HBaseFsck hbck = doFsck(conf, false);
|
||||
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
|
||||
ERROR_CODE.DUPE_STARTKEYS});
|
||||
assertEquals(2, hbck.getOverlapGroups(table).size());
|
||||
assertEquals(ROWKEYS.length, countRows()); // seems like the "bigger" region won.
|
||||
|
||||
// fix the degenerate region.
|
||||
doFsck(conf,true);
|
||||
|
||||
// check that the degenerate region is gone and no data loss
|
||||
HBaseFsck hbck2 = doFsck(conf,false);
|
||||
assertNoErrors(hbck2);
|
||||
assertEquals(0, hbck2.getOverlapGroups(table).size());
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
} finally {
|
||||
deleteTable(table);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This creates and fixes a bad table with regions that has startkey == endkey
|
||||
*/
|
||||
@Test
|
||||
public void testDegenerateRegions() throws Exception {
|
||||
|
@ -279,6 +487,7 @@ public class TestHBaseFsck {
|
|||
try {
|
||||
setupTable(table);
|
||||
assertNoErrors(doFsck(conf,false));
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
|
||||
// Now let's mess it up, by adding a region with a duplicate startkey
|
||||
HRegionInfo hriDupe = createRegion(conf, tbl.getTableDescriptor(),
|
||||
|
@ -291,19 +500,111 @@ public class TestHBaseFsck {
|
|||
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DEGENERATE_REGION,
|
||||
ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.DUPE_STARTKEYS});
|
||||
assertEquals(2, hbck.getOverlapGroups(table).size());
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
|
||||
// fix the degenerate region.
|
||||
doFsck(conf,true);
|
||||
|
||||
// check that the degenerate region is gone and no data loss
|
||||
HBaseFsck hbck2 = doFsck(conf,false);
|
||||
assertNoErrors(hbck2);
|
||||
assertEquals(0, hbck2.getOverlapGroups(table).size());
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
} finally {
|
||||
deleteTable(table);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This creates a bad table where a start key contained in another region.
|
||||
* This creates and fixes a bad table where a region is completely contained
|
||||
* by another region.
|
||||
*/
|
||||
@Test
|
||||
public void testContainedRegionOverlap() throws Exception {
|
||||
String table = "tableContainedRegionOverlap";
|
||||
try {
|
||||
setupTable(table);
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
|
||||
// Mess it up by creating an overlap in the metadata
|
||||
HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
|
||||
Bytes.toBytes("A2"), Bytes.toBytes("B"));
|
||||
TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
|
||||
TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
|
||||
.waitForAssignment(hriOverlap);
|
||||
|
||||
HBaseFsck hbck = doFsck(conf, false);
|
||||
assertErrors(hbck, new ERROR_CODE[] {
|
||||
ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
|
||||
assertEquals(2, hbck.getOverlapGroups(table).size());
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
|
||||
// fix the problem.
|
||||
doFsck(conf, true);
|
||||
|
||||
// verify that overlaps are fixed
|
||||
HBaseFsck hbck2 = doFsck(conf,false);
|
||||
assertNoErrors(hbck2);
|
||||
assertEquals(0, hbck2.getOverlapGroups(table).size());
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
} finally {
|
||||
deleteTable(table);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This creates and fixes a bad table where a region is completely contained
|
||||
* by another region, and there is a hole (sort of like a bad split)
|
||||
*/
|
||||
@Test
|
||||
public void testOverlapAndOrphan() throws Exception {
|
||||
String table = "tableOverlapAndOrphan";
|
||||
try {
|
||||
setupTable(table);
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
|
||||
// Mess it up by creating an overlap in the metadata
|
||||
TEST_UTIL.getHBaseAdmin().disableTable(table);
|
||||
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
|
||||
Bytes.toBytes("B"), true, true, false, true);
|
||||
TEST_UTIL.getHBaseAdmin().enableTable(table);
|
||||
|
||||
HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
|
||||
Bytes.toBytes("A2"), Bytes.toBytes("B"));
|
||||
TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
|
||||
TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
|
||||
.waitForAssignment(hriOverlap);
|
||||
|
||||
HBaseFsck hbck = doFsck(conf, false);
|
||||
assertErrors(hbck, new ERROR_CODE[] {
|
||||
ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
|
||||
ERROR_CODE.HOLE_IN_REGION_CHAIN});
|
||||
|
||||
// fix the problem.
|
||||
doFsck(conf, true);
|
||||
|
||||
// verify that overlaps are fixed
|
||||
HBaseFsck hbck2 = doFsck(conf,false);
|
||||
assertNoErrors(hbck2);
|
||||
assertEquals(0, hbck2.getOverlapGroups(table).size());
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
} finally {
|
||||
deleteTable(table);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This creates and fixes a bad table where a region overlaps two regions --
|
||||
* a start key contained in another region and its end key is contained in
|
||||
* yet another region.
|
||||
*/
|
||||
@Test
|
||||
public void testCoveredStartKey() throws Exception {
|
||||
String table = "tableCoveredStartKey";
|
||||
try {
|
||||
setupTable(table);
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
|
||||
// Mess it up by creating an overlap in the metadata
|
||||
HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
|
||||
|
@ -317,40 +618,239 @@ public class TestHBaseFsck {
|
|||
ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
|
||||
ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
|
||||
assertEquals(3, hbck.getOverlapGroups(table).size());
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
|
||||
// fix the problem.
|
||||
doFsck(conf, true);
|
||||
|
||||
// verify that overlaps are fixed
|
||||
HBaseFsck hbck2 = doFsck(conf, false);
|
||||
assertErrors(hbck2, new ERROR_CODE[0]);
|
||||
assertEquals(0, hbck2.getOverlapGroups(table).size());
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
} finally {
|
||||
deleteTable(table);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This creates a bad table with a hole in meta.
|
||||
* This creates and fixes a bad table with a missing region -- hole in meta
|
||||
* and data missing in the fs.
|
||||
*/
|
||||
@Test
|
||||
public void testMetaHole() throws Exception {
|
||||
String table = "tableMetaHole";
|
||||
public void testRegionHole() throws Exception {
|
||||
String table = "tableRegionHole";
|
||||
try {
|
||||
setupTable(table);
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
|
||||
// Mess it up by leaving a hole in the meta data
|
||||
HRegionInfo hriHole = createRegion(conf, tbl.getTableDescriptor(),
|
||||
Bytes.toBytes("D"), Bytes.toBytes(""));
|
||||
TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriHole);
|
||||
TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
|
||||
.waitForAssignment(hriHole);
|
||||
|
||||
// Mess it up by leaving a hole in the assignment, meta, and hdfs data
|
||||
TEST_UTIL.getHBaseAdmin().disableTable(table);
|
||||
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""));
|
||||
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
|
||||
Bytes.toBytes("C"), true, true, true);
|
||||
TEST_UTIL.getHBaseAdmin().enableTable(table);
|
||||
|
||||
HBaseFsck hbck = doFsck(conf, false);
|
||||
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.HOLE_IN_REGION_CHAIN });
|
||||
assertErrors(hbck, new ERROR_CODE[] {
|
||||
ERROR_CODE.HOLE_IN_REGION_CHAIN});
|
||||
// holes are separate from overlap groups
|
||||
assertEquals(0, hbck.getOverlapGroups(table).size());
|
||||
|
||||
// fix hole
|
||||
doFsck(conf, true);
|
||||
|
||||
// check that hole fixed
|
||||
assertNoErrors(doFsck(conf,false));
|
||||
assertEquals(ROWKEYS.length - 2 , countRows()); // lost a region so lost a row
|
||||
} finally {
|
||||
deleteTable(table);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This creates and fixes a bad table with a missing region -- hole in meta
|
||||
* and data present but .regioinfino missing (an orphan hdfs region)in the fs.
|
||||
*/
|
||||
@Test
|
||||
public void testHDFSRegioninfoMissing() throws Exception {
|
||||
String table = "tableHDFSRegioininfoMissing";
|
||||
try {
|
||||
setupTable(table);
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
|
||||
// Mess it up by leaving a hole in the meta data
|
||||
TEST_UTIL.getHBaseAdmin().disableTable(table);
|
||||
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
|
||||
Bytes.toBytes("C"), true, true, false, true);
|
||||
TEST_UTIL.getHBaseAdmin().enableTable(table);
|
||||
|
||||
HBaseFsck hbck = doFsck(conf, false);
|
||||
assertErrors(hbck, new ERROR_CODE[] {
|
||||
ERROR_CODE.ORPHAN_HDFS_REGION,
|
||||
ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
|
||||
ERROR_CODE.HOLE_IN_REGION_CHAIN});
|
||||
// holes are separate from overlap groups
|
||||
assertEquals(0, hbck.getOverlapGroups(table).size());
|
||||
|
||||
// fix hole
|
||||
doFsck(conf, true);
|
||||
|
||||
// check that hole fixed
|
||||
assertNoErrors(doFsck(conf, false));
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
} finally {
|
||||
deleteTable(table);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This creates and fixes a bad table with a region that is missing meta and
|
||||
* not assigned to a region server.
|
||||
*/
|
||||
@Test
|
||||
public void testNotInMetaOrDeployedHole() throws Exception {
|
||||
String table = "tableNotInMetaOrDeployedHole";
|
||||
try {
|
||||
setupTable(table);
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
|
||||
// Mess it up by leaving a hole in the meta data
|
||||
TEST_UTIL.getHBaseAdmin().disableTable(table);
|
||||
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
|
||||
Bytes.toBytes("C"), true, true, false); // don't rm from fs
|
||||
TEST_UTIL.getHBaseAdmin().enableTable(table);
|
||||
|
||||
HBaseFsck hbck = doFsck(conf, false);
|
||||
assertErrors(hbck, new ERROR_CODE[] {
|
||||
ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
|
||||
// holes are separate from overlap groups
|
||||
assertEquals(0, hbck.getOverlapGroups(table).size());
|
||||
|
||||
// fix hole
|
||||
assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
|
||||
ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
|
||||
|
||||
// check that hole fixed
|
||||
assertNoErrors(doFsck(conf,false));
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
} finally {
|
||||
deleteTable(table);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This creates fixes a bad table with a hole in meta.
|
||||
*/
|
||||
@Test
|
||||
public void testNotInMetaHole() throws Exception {
|
||||
String table = "tableNotInMetaHole";
|
||||
try {
|
||||
setupTable(table);
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
|
||||
// Mess it up by leaving a hole in the meta data
|
||||
TEST_UTIL.getHBaseAdmin().disableTable(table);
|
||||
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
|
||||
Bytes.toBytes("C"), false, true, false); // don't rm from fs
|
||||
TEST_UTIL.getHBaseAdmin().enableTable(table);
|
||||
|
||||
HBaseFsck hbck = doFsck(conf, false);
|
||||
assertErrors(hbck, new ERROR_CODE[] {
|
||||
ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
|
||||
// holes are separate from overlap groups
|
||||
assertEquals(0, hbck.getOverlapGroups(table).size());
|
||||
|
||||
// fix hole
|
||||
assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
|
||||
ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
|
||||
|
||||
// check that hole fixed
|
||||
assertNoErrors(doFsck(conf,false));
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
} finally {
|
||||
deleteTable(table);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This creates and fixes a bad table with a region that is in meta but has
|
||||
* no deployment or data hdfs
|
||||
*/
|
||||
@Test
|
||||
public void testNotInHdfs() throws Exception {
|
||||
String table = "tableNotInHdfs";
|
||||
try {
|
||||
setupTable(table);
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
|
||||
// make sure data in regions, if in hlog only there is no data loss
|
||||
TEST_UTIL.getHBaseAdmin().flush(table);
|
||||
|
||||
// Mess it up by leaving a hole in the hdfs data
|
||||
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
|
||||
Bytes.toBytes("C"), false, false, true); // don't rm meta
|
||||
|
||||
HBaseFsck hbck = doFsck(conf, false);
|
||||
assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
|
||||
// holes are separate from overlap groups
|
||||
assertEquals(0, hbck.getOverlapGroups(table).size());
|
||||
|
||||
// fix hole
|
||||
doFsck(conf, true);
|
||||
|
||||
// check that hole fixed
|
||||
assertNoErrors(doFsck(conf,false));
|
||||
assertEquals(ROWKEYS.length - 2, countRows());
|
||||
} finally {
|
||||
deleteTable(table);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This creates entries in META with no hdfs data. This should cleanly
|
||||
* remove the table.
|
||||
*/
|
||||
@Test
|
||||
public void testNoHdfsTable() throws Exception {
|
||||
String table = "NoHdfsTable";
|
||||
setupTable(table);
|
||||
assertEquals(ROWKEYS.length, countRows());
|
||||
|
||||
// make sure data in regions, if in hlog only there is no data loss
|
||||
TEST_UTIL.getHBaseAdmin().flush(table);
|
||||
|
||||
// Mess it up by leaving a giant hole in meta
|
||||
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""),
|
||||
Bytes.toBytes("A"), false, false, true); // don't rm meta
|
||||
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
|
||||
Bytes.toBytes("B"), false, false, true); // don't rm meta
|
||||
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
|
||||
Bytes.toBytes("C"), false, false, true); // don't rm meta
|
||||
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"),
|
||||
Bytes.toBytes(""), false, false, true); // don't rm meta
|
||||
|
||||
HBaseFsck hbck = doFsck(conf, false);
|
||||
assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS,
|
||||
ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS,
|
||||
ERROR_CODE.NOT_IN_HDFS,});
|
||||
// holes are separate from overlap groups
|
||||
assertEquals(0, hbck.getOverlapGroups(table).size());
|
||||
|
||||
// fix hole
|
||||
doFsck(conf, true); // in 0.92+, meta entries auto create regiondirs
|
||||
|
||||
// check that hole fixed
|
||||
assertNoErrors(doFsck(conf,false));
|
||||
|
||||
try {
|
||||
assertEquals(0, countRows());
|
||||
} catch (IOException ioe) {
|
||||
// we've actually deleted the table already. :)
|
||||
return;
|
||||
}
|
||||
fail("Should have failed with IOException");
|
||||
}
|
||||
|
||||
@org.junit.Rule
|
||||
public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
|
||||
new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
|
||||
|
|
|
@ -96,14 +96,6 @@ public class TestHBaseFsckComparator {
|
|||
assertTrue(HBaseFsck.cmp.compare(hi2, hi1) > 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTiebreaker() {
|
||||
HbckInfo hi1 = genHbckInfo(table, keyA, keyC, 0);
|
||||
HbckInfo hi2 = genHbckInfo(table, keyA, keyC, 1);
|
||||
assertTrue(HBaseFsck.cmp.compare(hi1, hi2) < 0);
|
||||
assertTrue(HBaseFsck.cmp.compare(hi2, hi1) > 0);
|
||||
}
|
||||
|
||||
@org.junit.Rule
|
||||
public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
|
||||
new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.util.hbck;
|
|||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -28,18 +29,29 @@ import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
|
|||
|
||||
public class HbckTestingUtil {
|
||||
public static HBaseFsck doFsck(Configuration conf, boolean fix) throws Exception {
|
||||
return doFsck(conf, fix, fix, fix, fix,fix);
|
||||
}
|
||||
|
||||
public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments,
|
||||
boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps,
|
||||
boolean fixHdfsOrphans) throws Exception {
|
||||
HBaseFsck fsck = new HBaseFsck(conf);
|
||||
fsck.connect();
|
||||
fsck.displayFullReport(); // i.e. -details
|
||||
fsck.setDisplayFullReport(); // i.e. -details
|
||||
fsck.setTimeLag(0);
|
||||
fsck.setFixErrors(fix);
|
||||
fsck.doWork();
|
||||
fsck.setFixAssignments(fixAssignments);
|
||||
fsck.setFixMeta(fixMeta);
|
||||
fsck.setFixHdfsHoles(fixHdfsHoles);
|
||||
fsck.setFixHdfsOverlaps(fixHdfsOverlaps);
|
||||
fsck.setFixHdfsOrphans(fixHdfsOrphans);
|
||||
fsck.onlineHbck();
|
||||
return fsck;
|
||||
}
|
||||
|
||||
|
||||
public static void assertNoErrors(HBaseFsck fsck) throws Exception {
|
||||
List<ERROR_CODE> errs = fsck.getErrors().getErrorList();
|
||||
assertEquals(0, errs.size());
|
||||
assertEquals(new ArrayList<ERROR_CODE>(), errs);
|
||||
}
|
||||
|
||||
public static void assertErrors(HBaseFsck fsck, ERROR_CODE[] expectedErrors) {
|
||||
|
|
|
@ -61,7 +61,7 @@ public class TestOfflineMetaRebuildBase extends OfflineMetaRebuildTestCore {
|
|||
|
||||
// rebuild meta table from scratch
|
||||
HBaseFsck fsck = new HBaseFsck(conf);
|
||||
assertTrue(fsck.rebuildMeta());
|
||||
assertTrue(fsck.rebuildMeta(false));
|
||||
|
||||
// bring up the minicluster
|
||||
TEST_UTIL.startMiniZKCluster(); // tables seem enabled by default
|
||||
|
|
|
@ -21,6 +21,7 @@ import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
|
|||
import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
|
@ -64,7 +65,7 @@ public class TestOfflineMetaRebuildHole extends OfflineMetaRebuildTestCore {
|
|||
|
||||
// attempt to rebuild meta table from scratch
|
||||
HBaseFsck fsck = new HBaseFsck(conf);
|
||||
assertFalse(fsck.rebuildMeta());
|
||||
assertFalse(fsck.rebuildMeta(false));
|
||||
|
||||
// bring up the minicluster
|
||||
TEST_UTIL.startMiniZKCluster(); // tables seem enabled by default
|
||||
|
|
|
@ -69,7 +69,7 @@ public class TestOfflineMetaRebuildOverlap extends OfflineMetaRebuildTestCore {
|
|||
|
||||
// attempt to rebuild meta table from scratch
|
||||
HBaseFsck fsck = new HBaseFsck(conf);
|
||||
assertFalse(fsck.rebuildMeta());
|
||||
assertFalse(fsck.rebuildMeta(false));
|
||||
|
||||
Multimap<byte[], HbckInfo> problems = fsck.getOverlapGroups(table);
|
||||
assertEquals(1, problems.keySet().size());
|
||||
|
|
Loading…
Reference in New Issue