HBASE-2819 hbck should have the ability to repair basic problems

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1031694 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2010-11-05 18:20:43 +00:00
parent 92e0f47b8b
commit b09838d4f4
13 changed files with 963 additions and 336 deletions

View File

@ -1102,7 +1102,7 @@ Release 0.21.0 - Unreleased
a minute
HBASE-3189 Stagger Major Compactions (Nicolas Spiegelberg via Stack)
HBASE-2564 [rest] Tests use deprecated foundation
HBASE-2819 hbck should have the ability to repair basic problems
NEW FEATURES
HBASE-1961 HBase EC2 scripts

View File

@ -325,9 +325,10 @@ public class MetaReader {
*/
public static Pair<HRegionInfo, HServerInfo> metaRowToRegionPairWithInfo(
Result data) throws IOException {
HRegionInfo info = Writables.getHRegionInfo(
data.getValue(HConstants.CATALOG_FAMILY,
HConstants.REGIONINFO_QUALIFIER));
byte [] bytes = data.getValue(HConstants.CATALOG_FAMILY,
HConstants.REGIONINFO_QUALIFIER);
if (bytes == null) return null;
HRegionInfo info = Writables.getHRegionInfo(bytes);
final byte[] value = data.getValue(HConstants.CATALOG_FAMILY,
HConstants.SERVER_QUALIFIER);
if (value != null && value.length > 0) {

View File

@ -737,12 +737,22 @@ public class HBaseAdmin implements Abortable {
HServerAddress hsa = new HServerAddress(hostAndPort);
Pair<HRegionInfo, HServerAddress> pair =
MetaReader.getRegion(ct, regionname);
if (pair == null || pair.getSecond() == null) {
LOG.info("No server in .META. for " +
Bytes.toString(regionname) + "; pair=" + pair);
} else {
closeRegion(hsa, pair.getFirst());
}
} else {
Pair<HRegionInfo, HServerAddress> pair =
MetaReader.getRegion(ct, regionname);
if (pair == null || pair.getSecond() == null) {
LOG.info("No server in .META. for " +
Bytes.toString(regionname) + "; pair=" + pair);
} else {
closeRegion(pair.getSecond(), pair.getFirst());
}
}
} finally {
cleanupCatalogTracker(ct);
}
@ -783,12 +793,18 @@ public class HBaseAdmin implements Abortable {
if (isRegionName) {
Pair<HRegionInfo, HServerAddress> pair =
MetaReader.getRegion(getCatalogTracker(), tableNameOrRegionName);
if (pair == null || pair.getSecond() == null) {
LOG.info("No server in .META. for " +
Bytes.toString(tableNameOrRegionName) + "; pair=" + pair);
} else {
flush(pair.getSecond(), pair.getFirst());
}
} else {
List<Pair<HRegionInfo, HServerAddress>> pairs =
MetaReader.getTableRegionsAndLocations(getCatalogTracker(),
Bytes.toString(tableNameOrRegionName));
for (Pair<HRegionInfo, HServerAddress> pair: pairs) {
if (pair.getSecond() == null) continue;
flush(pair.getSecond(), pair.getFirst());
}
}
@ -871,12 +887,18 @@ public class HBaseAdmin implements Abortable {
if (isRegionName(tableNameOrRegionName)) {
Pair<HRegionInfo, HServerAddress> pair =
MetaReader.getRegion(ct, tableNameOrRegionName);
if (pair == null || pair.getSecond() == null) {
LOG.info("No server in .META. for " +
Bytes.toString(tableNameOrRegionName) + "; pair=" + pair);
} else {
compact(pair.getSecond(), pair.getFirst(), major);
}
} else {
List<Pair<HRegionInfo, HServerAddress>> pairs =
MetaReader.getTableRegionsAndLocations(ct,
Bytes.toString(tableNameOrRegionName));
for (Pair<HRegionInfo, HServerAddress> pair: pairs) {
if (pair.getSecond() == null) continue;
compact(pair.getSecond(), pair.getFirst(), major);
}
}
@ -956,12 +978,19 @@ public class HBaseAdmin implements Abortable {
// Its a possible region name.
Pair<HRegionInfo, HServerAddress> pair =
MetaReader.getRegion(getCatalogTracker(), tableNameOrRegionName);
if (pair == null || pair.getSecond() == null) {
LOG.info("No server in .META. for " +
Bytes.toString(tableNameOrRegionName) + "; pair=" + pair);
} else {
split(pair.getSecond(), pair.getFirst());
}
} else {
List<Pair<HRegionInfo, HServerAddress>> pairs =
MetaReader.getTableRegionsAndLocations(getCatalogTracker(),
Bytes.toString(tableNameOrRegionName));
for (Pair<HRegionInfo, HServerAddress> pair: pairs) {
// May not be a server for a particular row
if (pair.getSecond() == null) continue;
split(pair.getSecond(), pair.getFirst());
}
}

View File

@ -23,7 +23,7 @@ import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.NavigableSet;
import java.util.List;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.atomic.AtomicInteger;
@ -38,6 +38,7 @@ import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.HServerInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
@ -45,30 +46,36 @@ import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
import org.apache.hadoop.hbase.ipc.HMasterInterface;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.regionserver.wal.HLog;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Writables;
import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
/**
* Check consistency among the in-memory states of the master and the
* region server(s) and the state of data in HDFS.
*/
public class HBaseFsck extends HBaseAdmin {
public class HBaseFsck {
public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
private Configuration conf;
private FileSystem fs;
private Path rootDir;
private ClusterStatus status;
private HMasterInterface master;
private HConnection connection;
private TreeMap<HRegionInfo, MetaEntry> metaEntries;
private boolean details = false; // do we display the full report?
private TreeMap<String, HbckInfo> regionInfo = new TreeMap<String, HbckInfo>();
private TreeMap<String, TInfo> tablesInfo = new TreeMap<String, TInfo>();
ErrorReporter errors = new PrintingErrorReporter();
private static boolean details = false; // do we display the full report
private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
private boolean fix = false; // do we want to try fixing the errors?
private boolean rerun = false; // if we tried to fix something rerun hbck
private static boolean summary = false; // if we want to print less output
/**
* Constructor
@ -79,19 +86,11 @@ public class HBaseFsck extends HBaseAdmin {
*/
public HBaseFsck(Configuration conf)
throws MasterNotRunningException, ZooKeeperConnectionException, IOException {
super(conf);
this.conf = conf;
// setup filesystem properties
this.rootDir = new Path(conf.get(HConstants.HBASE_DIR));
this.fs = rootDir.getFileSystem(conf);
// fetch information from master
master = getMaster();
status = master.getClusterStatus();
connection = getConnection();
this.metaEntries = new TreeMap<HRegionInfo, MetaEntry>();
HBaseAdmin admin = new HBaseAdmin(conf);
status = admin.getMaster().getClusterStatus();
connection = admin.getConnection();
}
/**
@ -101,250 +100,416 @@ public class HBaseFsck extends HBaseAdmin {
*/
int doWork() throws IOException {
// print hbase server version
System.out.println("Version: " + status.getHBaseVersion());
errors.print("Version: " + status.getHBaseVersion());
// Make sure regionInfo is empty before starting
regionInfo.clear();
tablesInfo.clear();
// get a list of all regions from the master. This involves
// scanning the META table
getMetaEntries(metaEntries);
if (!recordRootRegion()) {
// Will remove later if we can fix it
errors.reportError("Encountered fatal error. Exitting...");
return -1;
}
getMetaEntries();
// Check if .META. is found only once and on the right place
if (!checkMetaEntries()) {
// Will remove later if we can fix it
errors.reportError("Encountered fatal error. Exitting...");
return -1;
}
// get a list of all tables that have not changed recently.
AtomicInteger numSkipped = new AtomicInteger(0);
HTableDescriptor[] allTables = getTables(metaEntries, numSkipped);
System.out.println("Number of Tables: " + allTables.length);
HTableDescriptor[] allTables = getTables(numSkipped);
errors.print("Number of Tables: " + allTables.length);
if (details) {
if (numSkipped.get() > 0) {
System.out.println("\n Number of Tables in flux: " + numSkipped.get());
errors.detail("Number of Tables in flux: " + numSkipped.get());
}
for (HTableDescriptor td : allTables) {
String tableName = td.getNameAsString();
System.out.println("\t Table: " + tableName + "\t" +
errors.detail(" Table: " + tableName + "\t" +
(td.isReadOnly() ? "ro" : "rw") + "\t" +
(td.isRootRegion() ? "ROOT" :
(td.isMetaRegion() ? "META" : " ")) + "\t" +
" families:" + td.getFamilies().size());
" families: " + td.getFamilies().size());
}
}
// From the master, get a list of all known live region servers
Collection<HServerInfo> regionServers = status.getServerInfo();
System.out.println("Number of live region servers:" +
errors.print("Number of live region servers: " +
regionServers.size());
if (details) {
for (HServerInfo rsinfo: regionServers) {
System.out.println("\t RegionServer:" + rsinfo.getServerName());
errors.print(" " + rsinfo.getServerName());
}
}
// From the master, get a list of all dead region servers
Collection<String> deadRegionServers = status.getDeadServerNames();
System.out.println("Number of dead region servers:" +
errors.print("Number of dead region servers: " +
deadRegionServers.size());
if (details) {
for (String name: deadRegionServers) {
System.out.println("\t RegionServer(dead):" + name);
errors.print(" " + name);
}
}
// process information from all region servers
boolean status1 = processRegionServers(regionServers);
// Determine what's deployed
processRegionServers(regionServers);
// match HDFS with META
boolean status2 = checkHdfs();
// Determine what's on HDFS
checkHdfs();
if (status1 == true && status2 == true) {
System.out.println("\nRest easy, buddy! HBase is clean. ");
return 0;
// Check consistency
checkConsistency();
// Check integrity
checkIntegrity();
// Print table summary
printTableSummary();
return errors.summarize();
}
/**
* Scan HDFS for all regions, recording their information into
* regionInfo
*/
void checkHdfs() throws IOException {
Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
FileSystem fs = rootDir.getFileSystem(conf);
// list all tables from HDFS
List<FileStatus> tableDirs = Lists.newArrayList();
boolean foundVersionFile = false;
FileStatus[] files = fs.listStatus(rootDir);
for (FileStatus file : files) {
if (file.getPath().getName().equals(HConstants.VERSION_FILE_NAME)) {
foundVersionFile = true;
} else {
System.out.println("\nInconsistencies detected.");
return -1;
tableDirs.add(file);
}
}
// verify that version file exists
if (!foundVersionFile) {
errors.reportError("Version file does not exist in root dir " + rootDir);
}
// level 1: <HBASE_DIR>/*
for (FileStatus tableDir : tableDirs) {
String tableName = tableDir.getPath().getName();
// ignore hidden files
if (tableName.startsWith(".") &&
!tableName.equals( Bytes.toString(HConstants.META_TABLE_NAME)))
continue;
// level 2: <HBASE_DIR>/<table>/*
FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
for (FileStatus regionDir : regionDirs) {
String encodedName = regionDir.getPath().getName();
// ignore directories that aren't hexadecimal
if (!encodedName.toLowerCase().matches("[0-9a-f]+")) continue;
HbckInfo hbi = getOrCreateInfo(encodedName);
hbi.foundRegionDir = regionDir;
// Set a flag if this region contains only edits
// This is special case if a region is left after split
hbi.onlyEdits = true;
FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
Path ePath = HLog.getRegionDirRecoveredEditsDir(regionDir.getPath());
for (FileStatus subDir : subDirs) {
String sdName = subDir.getPath().getName();
if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
hbi.onlyEdits = false;
break;
}
}
}
}
}
/**
* Checks HDFS and META
* @return true if there were no errors, otherwise return false
* Record the location of the ROOT region as found in ZooKeeper,
* as if it were in a META table. This is so that we can check
* deployment of ROOT.
*/
boolean checkHdfs() throws IOException {
boolean recordRootRegion() throws IOException {
HRegionLocation rootLocation = connection.locateRegion(
HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
boolean status = true; // success
// make a copy of all tables in META
TreeMap<String, MetaEntry> regions = new TreeMap<String, MetaEntry>();
for (MetaEntry meta: metaEntries.values()) {
regions.put(meta.getTableDesc().getNameAsString(), meta);
// Check if Root region is valid and existing
if (rootLocation == null || rootLocation.getRegionInfo() == null ||
rootLocation.getServerAddress() == null) {
errors.reportError("Root Region or some of its attributes is null.");
return false;
}
// list all tables from HDFS
TreeMap<Path, FileStatus> allTableDirs = new TreeMap<Path, FileStatus>();
FileStatus[] files = fs.listStatus(rootDir);
for (int i = 0; files != null && i < files.length; i++) {
allTableDirs.put(files[i].getPath(), files[i]);
}
// verify that -ROOT-, .META directories exists.
Path rdir = new Path(rootDir, Bytes.toString(HConstants.ROOT_TABLE_NAME));
FileStatus ignore = allTableDirs.remove(rdir);
if (ignore == null) {
status = false;
System.out.print("\nERROR: Path " + rdir + " for ROOT table does not exist.");
}
Path mdir = new Path(rootDir, Bytes.toString(HConstants.META_TABLE_NAME));
ignore = allTableDirs.remove(mdir);
if (ignore == null) {
status = false;
System.out.print("\nERROR: Path " + mdir + " for META table does not exist.");
}
// verify that version file exists
Path vfile = new Path(rootDir, HConstants.VERSION_FILE_NAME);
ignore = allTableDirs.remove(vfile);
if (ignore == null) {
status = false;
System.out.print("\nERROR: Version file " + vfile + " does not exist.");
}
// filter out all valid regions found in the META
for (HRegionInfo rinfo: metaEntries.values()) {
Path tableDir = HTableDescriptor.getTableDir(rootDir,
rinfo.getTableDesc().getName());
// Path regionDir = HRegion.getRegionDir(tableDir, rinfo.getEncodedName());
// if the entry exists in allTableDirs, then remove it from allTableDirs as well
// as from the META tmp list
FileStatus found = allTableDirs.remove(tableDir);
if (found != null) {
regions.remove(tableDir.getName());
}
}
// The remaining entries in allTableDirs do not have entries in .META
// However, if the path name was modified in the last few milliseconds
// as specified by timelag, then do not flag it as an inconsistency.
long now = System.currentTimeMillis();
for (FileStatus region: allTableDirs.values()) {
if (region.getModificationTime() + timelag < now) {
String finalComponent = region.getPath().getName();
if (!finalComponent.startsWith(".")) {
// ignore .logs and .oldlogs directories
System.out.print("\nERROR: Path " + region.getPath() +
" does not have a corresponding entry in META.");
status = false;
}
}
}
// the remaining entries in tmp do not have entries in HDFS
for (HRegionInfo rinfo: regions.values()) {
System.out.println("\nERROR: Region " + rinfo.getRegionNameAsString() +
" does not have a corresponding entry in HDFS.");
status = false;
}
return status;
MetaEntry m = new MetaEntry(rootLocation.getRegionInfo(),
rootLocation.getServerAddress(), null, System.currentTimeMillis());
HbckInfo hbInfo = new HbckInfo(m);
regionInfo.put(rootLocation.getRegionInfo().getEncodedName(), hbInfo);
return true;
}
/**
* Contacts each regionserver and fetches metadata about regions.
* @param regionServerList - the list of region servers to connect to
* @throws IOException if a remote or network exception occurs
* @return true if there were no errors, otherwise return false
*/
boolean processRegionServers(Collection<HServerInfo> regionServerList)
void processRegionServers(Collection<HServerInfo> regionServerList)
throws IOException {
// make a copy of all entries in META
TreeMap<HRegionInfo, MetaEntry> tmp =
new TreeMap<HRegionInfo, MetaEntry>(metaEntries);
long errorCount = 0; // number of inconsistencies detected
int showProgress = 0;
// loop to contact each region server
for (HServerInfo rsinfo: regionServerList) {
showProgress++; // one more server.
errors.progress();
try {
HRegionInterface server = connection.getHRegionConnection(
rsinfo.getServerAddress());
// list all online regions from this region server
NavigableSet<HRegionInfo> regions = server.getOnlineRegions();
List<HRegionInfo> regions = server.getOnlineRegions();
if (details) {
System.out.print("\nRegionServer:" + rsinfo.getServerName() +
" number of regions:" + regions.size());
errors.detail("RegionServer: " + rsinfo.getServerName() +
" number of regions: " + regions.size());
for (HRegionInfo rinfo: regions) {
System.out.print("\n\t name:" + rinfo.getRegionNameAsString() +
" id:" + rinfo.getRegionId() +
" encoded name:" + rinfo.getEncodedName() +
" start :" + Bytes.toStringBinary(rinfo.getStartKey()) +
" end :" + Bytes.toStringBinary(rinfo.getEndKey()));
errors.detail(" " + rinfo.getRegionNameAsString() +
" id: " + rinfo.getRegionId() +
" encoded_name: " + rinfo.getEncodedName() +
" start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
" end: " + Bytes.toStringBinary(rinfo.getEndKey()));
}
showProgress = 0;
}
// check to see if the existance of this region matches the region in META
for (HRegionInfo r: regions) {
MetaEntry metaEntry = metaEntries.get(r);
// this entry exists in the region server but is not in the META
if (metaEntry == null) {
if (r.isMetaRegion()) {
continue; // this is ROOT or META region
}
System.out.print("\nERROR: Region " + r.getRegionNameAsString() +
" found on server " + rsinfo.getServerAddress() +
" but is not listed in META.");
errorCount++;
showProgress = 0;
continue;
}
if (!metaEntry.regionServer.equals(rsinfo.getServerAddress())) {
System.out.print("\nERROR: Region " + r.getRegionNameAsString() +
" found on server " + rsinfo.getServerAddress() +
" but is listed in META to be on server " +
metaEntry.regionServer);
errorCount++;
showProgress = 0;
}
// The region server is indeed serving a valid region. Remove it from tmp
tmp.remove(r);
for (HRegionInfo r:regions) {
HbckInfo hbi = getOrCreateInfo(r.getEncodedName());
hbi.deployedOn.add(rsinfo.getServerAddress());
}
} catch (IOException e) { // unable to connect to the region server.
if (details) {
System.out.print("\nRegionServer:" + rsinfo.getServerName() +
errors.reportError("\nRegionServer:" + rsinfo.getServerName() +
" Unable to fetch region information. " + e);
}
}
if (showProgress % 10 == 0) {
System.out.print("."); // show progress to user
showProgress = 0;
}
}
// all the region left in tmp are not found on any region server
for (MetaEntry metaEntry: tmp.values()) {
// An offlined region will not be present out on a regionserver. A region
// is offlined if table is offlined -- will still have an entry in .META.
// of a region is offlined because its a parent region and its daughters
// still have references.
if (metaEntry.isOffline()) continue;
System.out.print("\nERROR: Region " + metaEntry.getRegionNameAsString() +
" is not served by any region server " +
" but is listed in META to be on server " +
metaEntry.regionServer);
errorCount++;
}
if (errorCount > 0) {
System.out.println("\nDetected " + errorCount + " inconsistencies. " +
"This might not indicate a real problem because these regions " +
"could be in the midst of a split. Consider re-running with a " +
"larger value of -timelag.");
return false;
}
return true; // no errors
}
/**
* Return a list of table names whose metadata have not been modified in the
* last few milliseconds specified by timelag
* Check consistency of all regions that have been found in previous phases.
*/
void checkConsistency() throws IOException {
for (HbckInfo hbi : regionInfo.values()) {
doConsistencyCheck(hbi);
}
}
/**
* Check a single region for consistency and correct deployment.
*/
void doConsistencyCheck(HbckInfo hbi) throws IOException {
String descriptiveName = hbi.toString();
boolean inMeta = hbi.metaEntry != null;
boolean inHdfs = hbi.foundRegionDir != null;
boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
boolean isDeployed = !hbi.deployedOn.isEmpty();
boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
boolean deploymentMatchesMeta =
hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
boolean shouldBeDeployed = inMeta && !hbi.metaEntry.isOffline();
boolean recentlyModified = hbi.foundRegionDir != null &&
hbi.foundRegionDir.getModificationTime() + timelag > System.currentTimeMillis();
// ========== First the healthy cases =============
if (hbi.onlyEdits) {
return;
}
if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
return;
} else if (inMeta && !shouldBeDeployed && !isDeployed) {
// offline regions shouldn't cause complaints
LOG.debug("Region " + descriptiveName + " offline, ignoring.");
return;
} else if (recentlyModified) {
LOG.info("Region " + descriptiveName + " was recently modified -- skipping");
return;
}
// ========== Cases where the region is not in META =============
else if (!inMeta && !inHdfs && !isDeployed) {
// We shouldn't have record of this region at all then!
assert false : "Entry for region with no data";
} else if (!inMeta && !inHdfs && isDeployed) {
errors.reportError("Region " + descriptiveName + " not on HDFS or in META but " +
"deployed on " + Joiner.on(", ").join(hbi.deployedOn));
} else if (!inMeta && inHdfs && !isDeployed) {
errors.reportError("Region " + descriptiveName + " on HDFS, but not listed in META " +
"or deployed on any region server.");
} else if (!inMeta && inHdfs && isDeployed) {
errors.reportError("Region " + descriptiveName + " not in META, but deployed on " +
Joiner.on(", ").join(hbi.deployedOn));
// ========== Cases where the region is in META =============
} else if (inMeta && !inHdfs && !isDeployed) {
errors.reportError("Region " + descriptiveName + " found in META, but not in HDFS " +
"or deployed on any region server.");
} else if (inMeta && !inHdfs && isDeployed) {
errors.reportError("Region " + descriptiveName + " found in META, but not in HDFS, " +
"and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
} else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
errors.reportError("Region " + descriptiveName + " not deployed on any region server.");
// If we are trying to fix the errors
if (shouldFix()) {
errors.print("Trying to fix unassigned region...");
setShouldRerun();
HBaseFsckRepair.fixUnassigned(this.conf, hbi.metaEntry);
}
} else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
errors.reportError("Region " + descriptiveName + " has should not be deployed according " +
"to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
} else if (inMeta && inHdfs && isMultiplyDeployed) {
errors.reportError("Region " + descriptiveName + " is listed in META on region server " +
hbi.metaEntry.regionServer + " but is multiply assigned to region servers " +
Joiner.on(", ").join(hbi.deployedOn));
// If we are trying to fix the errors
if (shouldFix()) {
errors.print("Trying to fix assignment error...");
setShouldRerun();
HBaseFsckRepair.fixDupeAssignment(this.conf, hbi.metaEntry, hbi.deployedOn);
}
} else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
errors.reportError("Region " + descriptiveName + " listed in META on region server " +
hbi.metaEntry.regionServer + " but found on region server " +
hbi.deployedOn.get(0));
// If we are trying to fix the errors
if (shouldFix()) {
errors.print("Trying to fix assignment error...");
setShouldRerun();
HBaseFsckRepair.fixDupeAssignment(this.conf, hbi.metaEntry, hbi.deployedOn);
}
} else {
errors.reportError("Region " + descriptiveName + " is in an unforeseen state:" +
" inMeta=" + inMeta +
" inHdfs=" + inHdfs +
" isDeployed=" + isDeployed +
" isMultiplyDeployed=" + isMultiplyDeployed +
" deploymentMatchesMeta=" + deploymentMatchesMeta +
" shouldBeDeployed=" + shouldBeDeployed);
}
}
/**
* Checks tables integrity. Goes over all regions and scans the tables.
* Collects all the pieces for each table and checks if there are missing,
* repeated or overlapping ones.
*/
void checkIntegrity() {
for (HbckInfo hbi : regionInfo.values()) {
// Check only valid, working regions
if (hbi.metaEntry == null) continue;
if (hbi.metaEntry.regionServer == null) continue;
if (hbi.foundRegionDir == null) continue;
if (hbi.deployedOn.size() != 1) continue;
if (hbi.onlyEdits) continue;
// We should be safe here
String tableName = hbi.metaEntry.getTableDesc().getNameAsString();
TInfo modTInfo = tablesInfo.get(tableName);
if (modTInfo == null) {
modTInfo = new TInfo(tableName);
}
for (HServerAddress server : hbi.deployedOn) {
modTInfo.addServer(server);
}
modTInfo.addEdge(hbi.metaEntry.getStartKey(), hbi.metaEntry.getEndKey());
tablesInfo.put(tableName, modTInfo);
}
for (TInfo tInfo : tablesInfo.values()) {
if (!tInfo.check()) {
errors.reportError("Found inconsistency in table " + tInfo.getName());
}
}
}
/**
* Maintain information about a particular table.
*/
private class TInfo {
String tableName;
TreeMap <byte[], byte[]> edges;
TreeSet <HServerAddress> deployedOn;
TInfo(String name) {
this.tableName = name;
edges = new TreeMap <byte[], byte[]> (Bytes.BYTES_COMPARATOR);
deployedOn = new TreeSet <HServerAddress>();
}
public void addEdge(byte[] fromNode, byte[] toNode) {
this.edges.put(fromNode, toNode);
}
public void addServer(HServerAddress server) {
this.deployedOn.add(server);
}
public String getName() {
return tableName;
}
public int getNumRegions() {
return edges.size();
}
public boolean check() {
byte[] last = new byte[0];
byte[] next = new byte[0];
TreeSet <byte[]> visited = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
// Each table should start with a zero-length byte[] and end at a
// zero-length byte[]. Just follow the edges to see if this is true
while (true) {
// Check if chain is broken
if (!edges.containsKey(last)) {
errors.detail("Chain of regions in table " + tableName +
" is broken.");
return false;
}
next = edges.get(last);
// Found a cycle
if (visited.contains(next)) {
errors.detail("Chain of regions in table " + tableName +
" has a cycle.");
return false;
}
// Mark next node as visited
visited.add(next);
// If next is zero-length byte[] we are possibly at the end of the chain
if (next.length == 0) {
// If we have visited all elements we are fine
if (edges.size() != visited.size()) {
errors.detail("Chain of regions in table " + tableName +
" contains less elements than are listed in META.");
return false;
}
return true;
}
last = next;
}
// How did we get here?
}
}
/**
* Return a list of user-space table names whose metadata have not been
* modified in the last few milliseconds specified by timelag
* if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
* SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
* milliseconds specified by timelag, then the table is a candidate to be returned.
@ -352,18 +517,17 @@ public class HBaseFsck extends HBaseAdmin {
* @return tables that have not been modified recently
* @throws IOException if an error is encountered
*/
HTableDescriptor[] getTables(final TreeMap<HRegionInfo, MetaEntry> regionList,
AtomicInteger numSkipped) {
HTableDescriptor[] getTables(AtomicInteger numSkipped) {
TreeSet<HTableDescriptor> uniqueTables = new TreeSet<HTableDescriptor>();
long now = System.currentTimeMillis();
for (MetaEntry m: regionList.values()) {
HRegionInfo info = m;
for (HbckInfo hbi : regionInfo.values()) {
MetaEntry info = hbi.metaEntry;
// if the start key is zero, then we have found the first region of a table.
// pick only those tables that were not modified in the last few milliseconds.
if (info != null && info.getStartKey().length == 0) {
if (m.modTime + timelag < now) {
if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
if (info.modTime + timelag < now) {
uniqueTables.add(info.getTableDesc());
} else {
numSkipped.incrementAndGet(); // one more in-flux table
@ -374,11 +538,77 @@ public class HBaseFsck extends HBaseAdmin {
}
/**
* Scan META. Returns a list of all regions of all known tables.
* @param regionList - fill up all entries found in .META
* Gets the entry in regionInfo corresponding to the the given encoded
* region name. If the region has not been seen yet, a new entry is added
* and returned.
*/
private HbckInfo getOrCreateInfo(String name) {
HbckInfo hbi = regionInfo.get(name);
if (hbi == null) {
hbi = new HbckInfo(null);
regionInfo.put(name, hbi);
}
return hbi;
}
/**
* Check values in regionInfo for .META.
* Check if zero or more than one regions with META are found.
* If there are inconsistencies (i.e. zero or more than one regions
* pretend to be holding the .META.) try to fix that and report an error.
* @throws IOException from HBaseFsckRepair functions
*/
boolean checkMetaEntries() throws IOException {
List <HbckInfo> metaRegions = Lists.newArrayList();
for (HbckInfo value : regionInfo.values()) {
if (value.metaEntry.isMetaTable()) {
metaRegions.add(value);
}
}
// If something is wrong
if (metaRegions.size() != 1) {
HRegionLocation rootLocation = connection.locateRegion(
HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
HbckInfo root =
regionInfo.get(rootLocation.getRegionInfo().getEncodedName());
// If there is no region holding .META.
if (metaRegions.size() == 0) {
errors.reportError(".META. is not found on any region.");
if (shouldFix()) {
errors.print("Trying to fix a problem with .META...");
setShouldRerun();
// try to fix it (treat it as unassigned region)
HBaseFsckRepair.fixUnassigned(conf, root.metaEntry);
}
}
// If there are more than one regions pretending to hold the .META.
else if (metaRegions.size() > 1) {
errors.reportError(".META. is found on more than one region.");
if (shouldFix()) {
errors.print("Trying to fix a problem with .META...");
setShouldRerun();
// try fix it (treat is a dupe assignment)
List <HServerAddress> deployedOn = Lists.newArrayList();
for (HbckInfo mRegion : metaRegions) {
deployedOn.add(mRegion.metaEntry.regionServer);
}
HBaseFsckRepair.fixDupeAssignment(conf, root.metaEntry, deployedOn);
}
}
// rerun hbck with hopefully fixed META
return false;
}
// no errors, so continue normally
return true;
}
/**
* Scan .META. and -ROOT-, adding all regions found to the regionInfo map.
* @throws IOException if an error is encountered
*/
void getMetaEntries(final TreeMap<HRegionInfo,MetaEntry> regionList) throws IOException {
void getMetaEntries() throws IOException {
MetaScannerVisitor visitor = new MetaScannerVisitor() {
int countRecord = 1;
@ -420,14 +650,15 @@ public class HBaseFsck extends HBaseAdmin {
startCode = value;
}
MetaEntry m = new MetaEntry(info, server, startCode, ts);
m = regionList.put(m ,m);
if (m != null) {
throw new IOException("Two entries in META are same " + m);
HbckInfo hbInfo = new HbckInfo(m);
HbckInfo previous = regionInfo.put(info.getEncodedName(), hbInfo);
if (previous != null) {
throw new IOException("Two entries in META are same " + previous);
}
// show proof of progress to the user, once for every 100 records.
if (countRecord % 100 == 0) {
System.out.print(".");
errors.progress();
}
countRecord++;
return true;
@ -437,8 +668,14 @@ public class HBaseFsck extends HBaseAdmin {
}
}
};
// Scan -ROOT- to pick up META regions
MetaScanner.metaScan(conf, visitor, null, null,
Integer.MAX_VALUE, HConstants.ROOT_TABLE_NAME);
// Scan .META. to pick up user regions
MetaScanner.metaScan(conf, visitor);
System.out.println("");
errors.print("");
}
/**
@ -446,26 +683,158 @@ public class HBaseFsck extends HBaseAdmin {
*/
private static class MetaEntry extends HRegionInfo {
HServerAddress regionServer; // server hosting this region
byte[] startCode; // start value of region
long modTime; // timestamp of most recent modification metadata
public MetaEntry(HRegionInfo rinfo, HServerAddress regionServer,
byte[] startCode, long modTime) {
super(rinfo);
this.regionServer = regionServer;
this.startCode = startCode;
this.modTime = modTime;
}
}
/**
* Display the full report from fsck. This displays all live and dead region servers ,
* and all known regions.
* Maintain information about a particular region.
*/
static class HbckInfo {
boolean onlyEdits = false;
MetaEntry metaEntry = null;
FileStatus foundRegionDir = null;
List<HServerAddress> deployedOn = Lists.newArrayList();
HbckInfo(MetaEntry metaEntry) {
this.metaEntry = metaEntry;
}
public String toString() {
if (metaEntry != null) {
return metaEntry.getRegionNameAsString();
} else if (foundRegionDir != null) {
return foundRegionDir.getPath().toString();
} else {
return "unknown region on " + Joiner.on(", ").join(deployedOn);
}
}
}
/**
* Prints summary of all tables found on the system.
*/
private void printTableSummary() {
System.out.println("Summary:");
for (TInfo tInfo : tablesInfo.values()) {
if (tInfo.check()) {
System.out.println(" " + tInfo.getName() + " is okay.");
}
else {
System.out.println("Table " + tInfo.getName() + " is inconsistent.");
}
System.out.println(" Number of regions: " + tInfo.getNumRegions());
System.out.print(" Deployed on: ");
for (HServerAddress server : tInfo.deployedOn) {
System.out.print(" " + server.toString());
}
System.out.println();
}
}
interface ErrorReporter {
public void reportError(String message);
public int summarize();
public void detail(String details);
public void progress();
public void print(String message);
}
private static class PrintingErrorReporter implements ErrorReporter {
public int errorCount = 0;
private int showProgress;
public void reportError(String message) {
if (!summary) {
System.out.println("ERROR: " + message);
}
errorCount++;
showProgress = 0;
}
public int summarize() {
System.out.println(Integer.toString(errorCount) +
" inconsistencies detected.");
if (errorCount == 0) {
System.out.println("Status: OK");
return 0;
} else {
System.out.println("Status: INCONSISTENT");
return -1;
}
}
public void print(String message) {
if (!summary) {
System.out.println(message);
}
}
public void detail(String message) {
if (details) {
System.out.println(message);
}
showProgress = 0;
}
public void progress() {
if (showProgress++ == 10) {
if (!summary) {
System.out.print(".");
}
showProgress = 0;
}
}
}
/**
* Display the full report from fsck.
* This displays all live and dead region servers, and all known regions.
*/
void displayFullReport() {
details = true;
}
/**
* Set summary mode.
* Print only summary of the tables and status (OK or INCONSISTENT)
*/
void setSummary() {
summary = true;
}
/**
* Check if we should rerun fsck again. This checks if we've tried to
* fix something and we should rerun fsck tool again.
* Display the full report from fsck. This displays all live and dead
* region servers, and all known regions.
*/
void setShouldRerun() {
rerun = true;
}
boolean shouldRerun() {
return rerun;
}
/**
* Fix inconsistencies found by fsck. This should try to fix errors (if any)
* found by fsck utility.
*/
void setFixErrors() {
fix = true;
}
boolean shouldFix() {
return fix;
}
/**
* We are interested in only those tables that have not changed their state in
* META during the last few seconds specified by hbase.admin.fsck.timelag
@ -482,6 +851,9 @@ public class HBaseFsck extends HBaseAdmin {
System.err.println(" -timelag {timeInSeconds} Process only regions that " +
" have not experienced any metadata updates in the last " +
" {{timeInSeconds} seconds.");
System.err.println(" -fix Try to fix some of the errors.");
System.err.println(" -summary Print only summary of the tables and status.");
Runtime.getRuntime().exit(-2);
}
@ -515,6 +887,10 @@ public class HBaseFsck extends HBaseAdmin {
printUsageAndExit();
}
i++;
} else if (cmd.equals("-fix")) {
fsck.setFixErrors();
} else if (cmd.equals("-summary")) {
fsck.setSummary();
} else {
String str = "Unknown command line option : " + cmd;
LOG.info(str);
@ -524,6 +900,14 @@ public class HBaseFsck extends HBaseAdmin {
}
// do the real work of fsck
int code = fsck.doWork();
// If we have changed the HBase state it is better to run fsck again
// to see if we haven't broken something else in the process.
// We run it only once more because otherwise we can easily fall into
// an infinite loop.
if (fsck.shouldRerun()) {
code = fsck.doWork();
}
Runtime.getRuntime().exit(code);
}
}

View File

@ -0,0 +1,121 @@
/**
* Copyright 2010 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.client;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.ipc.HMasterInterface;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException;
public class HBaseFsckRepair {
public static void fixDupeAssignment(Configuration conf, HRegionInfo region,
List<HServerAddress> servers)
throws IOException {
HRegionInfo actualRegion = new HRegionInfo(region);
// Clear status in master and zk
clearInMaster(conf, actualRegion);
clearInZK(conf, actualRegion);
// Close region on the servers
for(HServerAddress server : servers) {
closeRegion(conf, server, actualRegion);
}
// It's unassigned so fix it as such
fixUnassigned(conf, actualRegion);
}
public static void fixUnassigned(Configuration conf, HRegionInfo region)
throws IOException {
HRegionInfo actualRegion = new HRegionInfo(region);
// Clear status in master and zk
clearInMaster(conf, actualRegion);
clearInZK(conf, actualRegion);
// Clear assignment in META or ROOT
clearAssignment(conf, actualRegion);
}
private static void clearInMaster(Configuration conf, HRegionInfo region)
throws IOException {
System.out.println("Region being cleared in master: " + region);
HMasterInterface master = HConnectionManager.getConnection(conf).getMaster();
long masterVersion =
master.getProtocolVersion("org.apache.hadoop.hbase.ipc.HMasterInterface", 25);
System.out.println("Master protocol version: " + masterVersion);
try {
// TODO: Do we want to do it this way?
// Better way is to tell master to fix the issue itself?
// That way it can use in-memory state to determine best plan
// master.clearFromTransition(region);
} catch (Exception e) {}
}
private static void clearInZK(Configuration conf, HRegionInfo region)
throws IOException {
ZooKeeperWatcher zkw =
HConnectionManager.getConnection(conf).getZooKeeperWatcher();
try {
ZKAssign.deleteNodeFailSilent(zkw, region);
} catch (KeeperException e) {
throw new IOException("Unexpected ZK exception", e);
}
}
private static void closeRegion(Configuration conf, HServerAddress server,
HRegionInfo region)
throws IOException {
HRegionInterface rs =
HConnectionManager.getConnection(conf).getHRegionConnection(server);
rs.closeRegion(region, false);
}
private static void clearAssignment(Configuration conf,
HRegionInfo region)
throws IOException {
HTable ht = null;
if (region.isMetaTable()) {
// Clear assignment in ROOT
ht = new HTable(conf, HConstants.ROOT_TABLE_NAME);
}
else {
// Clear assignment in META
ht = new HTable(conf, HConstants.META_TABLE_NAME);
}
Delete del = new Delete(region.getRegionName());
del.deleteColumns(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
del.deleteColumns(HConstants.CATALOG_FAMILY,
HConstants.STARTCODE_QUALIFIER);
ht.delete(del);
}
}

View File

@ -63,13 +63,14 @@ public class MetaScanner {
*
* @param configuration config
* @param visitor visitor object
* @param tableName table name
* @param userTableName User table name in meta table to start scan at. Pass
* null if not interested in a particular table.
* @throws IOException e
*/
public static void metaScan(Configuration configuration,
MetaScannerVisitor visitor, byte[] tableName)
MetaScannerVisitor visitor, byte [] userTableName)
throws IOException {
metaScan(configuration, visitor, tableName, null, Integer.MAX_VALUE);
metaScan(configuration, visitor, userTableName, null, Integer.MAX_VALUE);
}
/**
@ -79,7 +80,8 @@ public class MetaScanner {
*
* @param configuration HBase configuration.
* @param visitor Visitor object.
* @param tableName User table name.
* @param userTableName User table name in meta table to start scan at. Pass
* null if not interested in a particular table.
* @param row Name of the row at the user table. The scan will start from
* the region row where the row resides.
* @param rowLimit Max of processed rows. If it is less than 0, it
@ -87,8 +89,32 @@ public class MetaScanner {
* @throws IOException e
*/
public static void metaScan(Configuration configuration,
MetaScannerVisitor visitor, byte[] tableName, byte[] row,
MetaScannerVisitor visitor, byte [] userTableName, byte[] row,
int rowLimit)
throws IOException {
metaScan(configuration, visitor, userTableName, row, rowLimit,
HConstants.META_TABLE_NAME);
}
/**
* Scans the meta table and calls a visitor on each RowResult. Uses a table
* name and a row name to locate meta regions. And it only scans at most
* <code>rowLimit</code> of rows.
*
* @param configuration HBase configuration.
* @param visitor Visitor object.
* @param userTableName User table name in meta table to start scan at. Pass
* null if not interested in a particular table.
* @param row Name of the row at the user table. The scan will start from
* the region row where the row resides.
* @param rowLimit Max of processed rows. If it is less than 0, it
* will be set to default value <code>Integer.MAX_VALUE</code>.
* @param metaTableName Meta table to scan, root or meta.
* @throws IOException e
*/
public static void metaScan(Configuration configuration,
MetaScannerVisitor visitor, byte [] tableName, byte[] row,
int rowLimit, final byte [] metaTableName)
throws IOException {
int rowUpperLimit = rowLimit > 0 ? rowLimit: Integer.MAX_VALUE;
@ -136,8 +162,6 @@ public class MetaScanner {
configuration.getInt("hbase.meta.scanner.caching", 100));
do {
final Scan scan = new Scan(startRow).addFamily(HConstants.CATALOG_FAMILY);
byte [] metaTableName = Bytes.equals(tableName, HConstants.ROOT_TABLE_NAME)?
HConstants.ROOT_TABLE_NAME: HConstants.META_TABLE_NAME;
LOG.debug("Scanning " + Bytes.toString(metaTableName) +
" starting at row=" + Bytes.toString(startRow) + " for max=" +
rowUpperLimit + " rows");

View File

@ -80,5 +80,5 @@ public interface HBaseRPCProtocolVersion extends VersionedProtocol {
* <li>Version 26: New master and Increment, 0.90 version bump.</li>
* </ul>
*/
public static final long versionID = 25L; // Setting it to 25 temporarily to see if hudson passes. #1608 hudson failed because of version mismatch 25 vs 26.
public static final long versionID = 26L;
}

View File

@ -275,7 +275,7 @@ public interface HRegionInterface extends HBaseRPCProtocolVersion, Stoppable, Ab
* @return All regions online on this region server
* @throws IOException e
*/
public NavigableSet<HRegionInfo> getOnlineRegions();
public List<HRegionInfo> getOnlineRegions();
/**
* Method used when a master is taking the place of another failed one.
@ -334,6 +334,17 @@ public interface HRegionInterface extends HBaseRPCProtocolVersion, Stoppable, Ab
public boolean closeRegion(final HRegionInfo region)
throws IOException;
/**
* Closes the specified region and will use or not use ZK during the close
* according to the specified flag.
* @param region region to close
* @param zk true if transitions should be done in ZK, false if not
* @return true if closing region, false if not
* @throws IOException
*/
public boolean closeRegion(final HRegionInfo region, final boolean zk)
throws IOException;
// Region administrative methods
/**

View File

@ -1199,6 +1199,7 @@ public class AssignmentManager extends ZooKeeperListener {
for (Result result : results) {
Pair<HRegionInfo,HServerInfo> region =
MetaReader.metaRowToRegionPairWithInfo(result);
if (region == null) continue;
HServerInfo regionLocation = region.getSecond();
HRegionInfo regionInfo = region.getFirst();
if (regionLocation == null) {
@ -1325,6 +1326,34 @@ public class AssignmentManager extends ZooKeeperListener {
}
}
/**
* Clears the specified region from being in transition.
* <p>
* Used only by HBCK tool.
* @param hri
*/
public void clearRegionFromTransition(HRegionInfo hri) {
synchronized (this.regionsInTransition) {
this.regionsInTransition.remove(hri.getEncodedName());
}
synchronized (this.regions) {
this.regions.remove(hri);
}
synchronized (this.regionPlans) {
this.regionPlans.remove(hri.getEncodedName());
}
synchronized (this.servers) {
for (List<HRegionInfo> regions : this.servers.values()) {
for (int i=0;i<regions.size();i++) {
if (regions.get(i).equals(hri)) {
regions.remove(i);
break;
}
}
}
}
}
/**
* Checks if the specified table has been disabled by the user.
* @param tableName

View File

@ -862,6 +862,11 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
}
}
public void clearFromTransition(HRegionInfo hri) {
if (this.assignmentManager.isRegionInTransition(hri) != null) {
this.assignmentManager.clearRegionFromTransition(hri);
}
}
/**
* @return cluster status
*/

View File

@ -345,9 +345,9 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
@Override
public Integer apply(Writable from) {
if (from instanceof HBaseRPC.Invocation) {
HBaseRPC.Invocation inv = (HBaseRPC.Invocation) from;
if (!(from instanceof HBaseRPC.Invocation)) return NORMAL_QOS;
HBaseRPC.Invocation inv = (HBaseRPC.Invocation) from;
String methodName = inv.getMethodName();
// scanner methods...
@ -357,7 +357,7 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
try {
scannerId = (Long) inv.getParameters()[0];
} catch (ClassCastException ignored) {
//LOG.debug("Low priority: " + from);
// LOG.debug("Low priority: " + from);
return NORMAL_QOS; // doh.
}
String scannerIdString = Long.toString(scannerId);
@ -366,43 +366,46 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
HRegion.RegionScanner rs = (HRegion.RegionScanner) scanner;
HRegionInfo regionName = rs.getRegionName();
if (regionName.isMetaRegion()) {
//LOG.debug("High priority scanner request: " + scannerId);
// LOG.debug("High priority scanner request: " + scannerId);
return HIGH_QOS;
}
}
}
else if (methodName.equals("getHServerInfo") ||
methodName.equals("getRegionsAssignment") ||
methodName.equals("unlockRow") ||
methodName.equals("getProtocolVersion") ||
methodName.equals("getClosestRowBefore")) {
//LOG.debug("High priority method: " + methodName);
} else if (methodName.equals("getHServerInfo")
|| methodName.equals("getRegionsAssignment")
|| methodName.equals("unlockRow")
|| methodName.equals("getProtocolVersion")
|| methodName.equals("getClosestRowBefore")) {
// LOG.debug("High priority method: " + methodName);
return HIGH_QOS;
}
else if (inv.getParameterClasses()[0] == byte[].class) {
} else if (inv.getParameterClasses().length == 0) {
// Just let it through. This is getOnlineRegions, etc.
} else if (inv.getParameterClasses()[0] == byte[].class) {
// first arg is byte array, so assume this is a regionname:
if (isMetaRegion((byte[]) inv.getParameters()[0])) {
//LOG.debug("High priority with method: " + methodName + " and region: "
// LOG.debug("High priority with method: " + methodName +
// " and region: "
// + Bytes.toString((byte[]) inv.getParameters()[0]));
return HIGH_QOS;
}
}
else if (inv.getParameterClasses()[0] == MultiAction.class) {
} else if (inv.getParameterClasses()[0] == MultiAction.class) {
MultiAction ma = (MultiAction) inv.getParameters()[0];
Set<byte[]> regions = ma.getRegions();
// ok this sucks, but if any single of the actions touches a meta, the whole
// thing gets pingged high priority. This is a dangerous hack because people
// can get their multi action tagged high QOS by tossing a Get(.META.) AND this
// ok this sucks, but if any single of the actions touches a meta, the
// whole
// thing gets pingged high priority. This is a dangerous hack because
// people
// can get their multi action tagged high QOS by tossing a Get(.META.)
// AND this
// regionserver hosts META/-ROOT-
for (byte[] region: regions) {
for (byte[] region : regions) {
if (isMetaRegion(region)) {
//LOG.debug("High priority multi with region: " + Bytes.toString(region));
// LOG.debug("High priority multi with region: " +
// Bytes.toString(region));
return HIGH_QOS; // short circuit for the win.
}
}
}
}
//LOG.debug("Low priority: " + from.toString());
// LOG.debug("Low priority: " + from.toString());
return NORMAL_QOS;
}
}
@ -1973,17 +1976,21 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
@Override
public boolean closeRegion(HRegionInfo region)
throws NotServingRegionException {
return closeRegion(region, true);
}
@Override
public boolean closeRegion(HRegionInfo region, final boolean zk)
throws NotServingRegionException {
LOG.info("Received close region: " + region.getRegionNameAsString());
// TODO: Need to check if this is being served here but currently undergoing
// a split (so master needs to retry close after split is complete)
if (!onlineRegions.containsKey(region.getEncodedName())) {
LOG.warn("Received close for region we are not serving; " +
region.getEncodedName());
throw new NotServingRegionException("Received close for "
+ region.getRegionNameAsString() + " but we are not serving it");
}
return closeRegion(region, false, true);
return closeRegion(region, false, zk);
}
/**
@ -2066,14 +2073,14 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
}
@Override
public NavigableSet<HRegionInfo> getOnlineRegions() {
NavigableSet<HRegionInfo> sortedset = new TreeSet<HRegionInfo>();
public List<HRegionInfo> getOnlineRegions() {
List<HRegionInfo> list = new ArrayList<HRegionInfo>();
synchronized(this.onlineRegions) {
for (Map.Entry<String,HRegion> e: this.onlineRegions.entrySet()) {
sortedset.add(e.getValue().getRegionInfo());
list.add(e.getValue().getRegionInfo());
}
}
return sortedset;
return list;
}
public int getNumberOfOnlineRegions() {

View File

@ -85,6 +85,10 @@ public class MiniHBaseCluster {
init(numMasters, numRegionServers);
}
public Configuration getConfiguration() {
return this.conf;
}
/**
* Override Master so can add inject behaviors testing.
*/

View File

@ -20,6 +20,7 @@
package org.apache.hadoop.hbase.client;
import static org.junit.Assert.*;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
@ -45,8 +46,10 @@ import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.executor.EventHandler;
import org.apache.hadoop.hbase.executor.EventHandler.EventType;
import org.apache.hadoop.hbase.executor.ExecutorService;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.JVMClusterUtil;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
@ -81,6 +84,15 @@ public class TestAdmin {
this.admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
}
@Test
public void testHBaseFsck() throws IOException {
HBaseFsck fsck =
new HBaseFsck(TEST_UTIL.getMiniHBaseCluster().getConfiguration());
fsck.displayFullReport();
int result = fsck.doWork();
assertEquals(0, result);
}
@Test
public void testCreateTable() throws IOException {
HTableDescriptor [] tables = admin.listTables();