HBASE-2819 hbck should have the ability to repair basic problems
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1031694 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
92e0f47b8b
commit
b09838d4f4
|
@ -1102,7 +1102,7 @@ Release 0.21.0 - Unreleased
|
||||||
a minute
|
a minute
|
||||||
HBASE-3189 Stagger Major Compactions (Nicolas Spiegelberg via Stack)
|
HBASE-3189 Stagger Major Compactions (Nicolas Spiegelberg via Stack)
|
||||||
HBASE-2564 [rest] Tests use deprecated foundation
|
HBASE-2564 [rest] Tests use deprecated foundation
|
||||||
|
HBASE-2819 hbck should have the ability to repair basic problems
|
||||||
|
|
||||||
NEW FEATURES
|
NEW FEATURES
|
||||||
HBASE-1961 HBase EC2 scripts
|
HBASE-1961 HBase EC2 scripts
|
||||||
|
|
|
@ -325,9 +325,10 @@ public class MetaReader {
|
||||||
*/
|
*/
|
||||||
public static Pair<HRegionInfo, HServerInfo> metaRowToRegionPairWithInfo(
|
public static Pair<HRegionInfo, HServerInfo> metaRowToRegionPairWithInfo(
|
||||||
Result data) throws IOException {
|
Result data) throws IOException {
|
||||||
HRegionInfo info = Writables.getHRegionInfo(
|
byte [] bytes = data.getValue(HConstants.CATALOG_FAMILY,
|
||||||
data.getValue(HConstants.CATALOG_FAMILY,
|
HConstants.REGIONINFO_QUALIFIER);
|
||||||
HConstants.REGIONINFO_QUALIFIER));
|
if (bytes == null) return null;
|
||||||
|
HRegionInfo info = Writables.getHRegionInfo(bytes);
|
||||||
final byte[] value = data.getValue(HConstants.CATALOG_FAMILY,
|
final byte[] value = data.getValue(HConstants.CATALOG_FAMILY,
|
||||||
HConstants.SERVER_QUALIFIER);
|
HConstants.SERVER_QUALIFIER);
|
||||||
if (value != null && value.length > 0) {
|
if (value != null && value.length > 0) {
|
||||||
|
|
|
@ -737,12 +737,22 @@ public class HBaseAdmin implements Abortable {
|
||||||
HServerAddress hsa = new HServerAddress(hostAndPort);
|
HServerAddress hsa = new HServerAddress(hostAndPort);
|
||||||
Pair<HRegionInfo, HServerAddress> pair =
|
Pair<HRegionInfo, HServerAddress> pair =
|
||||||
MetaReader.getRegion(ct, regionname);
|
MetaReader.getRegion(ct, regionname);
|
||||||
|
if (pair == null || pair.getSecond() == null) {
|
||||||
|
LOG.info("No server in .META. for " +
|
||||||
|
Bytes.toString(regionname) + "; pair=" + pair);
|
||||||
|
} else {
|
||||||
closeRegion(hsa, pair.getFirst());
|
closeRegion(hsa, pair.getFirst());
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
Pair<HRegionInfo, HServerAddress> pair =
|
Pair<HRegionInfo, HServerAddress> pair =
|
||||||
MetaReader.getRegion(ct, regionname);
|
MetaReader.getRegion(ct, regionname);
|
||||||
|
if (pair == null || pair.getSecond() == null) {
|
||||||
|
LOG.info("No server in .META. for " +
|
||||||
|
Bytes.toString(regionname) + "; pair=" + pair);
|
||||||
|
} else {
|
||||||
closeRegion(pair.getSecond(), pair.getFirst());
|
closeRegion(pair.getSecond(), pair.getFirst());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
} finally {
|
} finally {
|
||||||
cleanupCatalogTracker(ct);
|
cleanupCatalogTracker(ct);
|
||||||
}
|
}
|
||||||
|
@ -783,12 +793,18 @@ public class HBaseAdmin implements Abortable {
|
||||||
if (isRegionName) {
|
if (isRegionName) {
|
||||||
Pair<HRegionInfo, HServerAddress> pair =
|
Pair<HRegionInfo, HServerAddress> pair =
|
||||||
MetaReader.getRegion(getCatalogTracker(), tableNameOrRegionName);
|
MetaReader.getRegion(getCatalogTracker(), tableNameOrRegionName);
|
||||||
|
if (pair == null || pair.getSecond() == null) {
|
||||||
|
LOG.info("No server in .META. for " +
|
||||||
|
Bytes.toString(tableNameOrRegionName) + "; pair=" + pair);
|
||||||
|
} else {
|
||||||
flush(pair.getSecond(), pair.getFirst());
|
flush(pair.getSecond(), pair.getFirst());
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
List<Pair<HRegionInfo, HServerAddress>> pairs =
|
List<Pair<HRegionInfo, HServerAddress>> pairs =
|
||||||
MetaReader.getTableRegionsAndLocations(getCatalogTracker(),
|
MetaReader.getTableRegionsAndLocations(getCatalogTracker(),
|
||||||
Bytes.toString(tableNameOrRegionName));
|
Bytes.toString(tableNameOrRegionName));
|
||||||
for (Pair<HRegionInfo, HServerAddress> pair: pairs) {
|
for (Pair<HRegionInfo, HServerAddress> pair: pairs) {
|
||||||
|
if (pair.getSecond() == null) continue;
|
||||||
flush(pair.getSecond(), pair.getFirst());
|
flush(pair.getSecond(), pair.getFirst());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -871,12 +887,18 @@ public class HBaseAdmin implements Abortable {
|
||||||
if (isRegionName(tableNameOrRegionName)) {
|
if (isRegionName(tableNameOrRegionName)) {
|
||||||
Pair<HRegionInfo, HServerAddress> pair =
|
Pair<HRegionInfo, HServerAddress> pair =
|
||||||
MetaReader.getRegion(ct, tableNameOrRegionName);
|
MetaReader.getRegion(ct, tableNameOrRegionName);
|
||||||
|
if (pair == null || pair.getSecond() == null) {
|
||||||
|
LOG.info("No server in .META. for " +
|
||||||
|
Bytes.toString(tableNameOrRegionName) + "; pair=" + pair);
|
||||||
|
} else {
|
||||||
compact(pair.getSecond(), pair.getFirst(), major);
|
compact(pair.getSecond(), pair.getFirst(), major);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
List<Pair<HRegionInfo, HServerAddress>> pairs =
|
List<Pair<HRegionInfo, HServerAddress>> pairs =
|
||||||
MetaReader.getTableRegionsAndLocations(ct,
|
MetaReader.getTableRegionsAndLocations(ct,
|
||||||
Bytes.toString(tableNameOrRegionName));
|
Bytes.toString(tableNameOrRegionName));
|
||||||
for (Pair<HRegionInfo, HServerAddress> pair: pairs) {
|
for (Pair<HRegionInfo, HServerAddress> pair: pairs) {
|
||||||
|
if (pair.getSecond() == null) continue;
|
||||||
compact(pair.getSecond(), pair.getFirst(), major);
|
compact(pair.getSecond(), pair.getFirst(), major);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -956,12 +978,19 @@ public class HBaseAdmin implements Abortable {
|
||||||
// Its a possible region name.
|
// Its a possible region name.
|
||||||
Pair<HRegionInfo, HServerAddress> pair =
|
Pair<HRegionInfo, HServerAddress> pair =
|
||||||
MetaReader.getRegion(getCatalogTracker(), tableNameOrRegionName);
|
MetaReader.getRegion(getCatalogTracker(), tableNameOrRegionName);
|
||||||
|
if (pair == null || pair.getSecond() == null) {
|
||||||
|
LOG.info("No server in .META. for " +
|
||||||
|
Bytes.toString(tableNameOrRegionName) + "; pair=" + pair);
|
||||||
|
} else {
|
||||||
split(pair.getSecond(), pair.getFirst());
|
split(pair.getSecond(), pair.getFirst());
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
List<Pair<HRegionInfo, HServerAddress>> pairs =
|
List<Pair<HRegionInfo, HServerAddress>> pairs =
|
||||||
MetaReader.getTableRegionsAndLocations(getCatalogTracker(),
|
MetaReader.getTableRegionsAndLocations(getCatalogTracker(),
|
||||||
Bytes.toString(tableNameOrRegionName));
|
Bytes.toString(tableNameOrRegionName));
|
||||||
for (Pair<HRegionInfo, HServerAddress> pair: pairs) {
|
for (Pair<HRegionInfo, HServerAddress> pair: pairs) {
|
||||||
|
// May not be a server for a particular row
|
||||||
|
if (pair.getSecond() == null) continue;
|
||||||
split(pair.getSecond(), pair.getFirst());
|
split(pair.getSecond(), pair.getFirst());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,7 @@ import java.io.IOException;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.NavigableSet;
|
import java.util.List;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
@ -38,6 +38,7 @@ import org.apache.hadoop.hbase.ClusterStatus;
|
||||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||||
import org.apache.hadoop.hbase.HConstants;
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
import org.apache.hadoop.hbase.HRegionInfo;
|
import org.apache.hadoop.hbase.HRegionInfo;
|
||||||
|
import org.apache.hadoop.hbase.HRegionLocation;
|
||||||
import org.apache.hadoop.hbase.HServerAddress;
|
import org.apache.hadoop.hbase.HServerAddress;
|
||||||
import org.apache.hadoop.hbase.HServerInfo;
|
import org.apache.hadoop.hbase.HServerInfo;
|
||||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||||
|
@ -45,30 +46,36 @@ import org.apache.hadoop.hbase.KeyValue;
|
||||||
import org.apache.hadoop.hbase.MasterNotRunningException;
|
import org.apache.hadoop.hbase.MasterNotRunningException;
|
||||||
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
|
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
|
||||||
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
|
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
|
||||||
import org.apache.hadoop.hbase.ipc.HMasterInterface;
|
|
||||||
import org.apache.hadoop.hbase.ipc.HRegionInterface;
|
import org.apache.hadoop.hbase.ipc.HRegionInterface;
|
||||||
|
import org.apache.hadoop.hbase.regionserver.wal.HLog;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.apache.hadoop.hbase.util.Writables;
|
import org.apache.hadoop.hbase.util.Writables;
|
||||||
|
|
||||||
|
import com.google.common.base.Joiner;
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check consistency among the in-memory states of the master and the
|
* Check consistency among the in-memory states of the master and the
|
||||||
* region server(s) and the state of data in HDFS.
|
* region server(s) and the state of data in HDFS.
|
||||||
*/
|
*/
|
||||||
public class HBaseFsck extends HBaseAdmin {
|
public class HBaseFsck {
|
||||||
public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
|
public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
|
||||||
|
|
||||||
private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
|
private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
|
||||||
private Configuration conf;
|
private Configuration conf;
|
||||||
private FileSystem fs;
|
|
||||||
private Path rootDir;
|
|
||||||
|
|
||||||
private ClusterStatus status;
|
private ClusterStatus status;
|
||||||
private HMasterInterface master;
|
|
||||||
private HConnection connection;
|
private HConnection connection;
|
||||||
private TreeMap<HRegionInfo, MetaEntry> metaEntries;
|
|
||||||
|
|
||||||
private boolean details = false; // do we display the full report?
|
private TreeMap<String, HbckInfo> regionInfo = new TreeMap<String, HbckInfo>();
|
||||||
|
private TreeMap<String, TInfo> tablesInfo = new TreeMap<String, TInfo>();
|
||||||
|
ErrorReporter errors = new PrintingErrorReporter();
|
||||||
|
|
||||||
|
private static boolean details = false; // do we display the full report
|
||||||
private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
|
private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
|
||||||
|
private boolean fix = false; // do we want to try fixing the errors?
|
||||||
|
private boolean rerun = false; // if we tried to fix something rerun hbck
|
||||||
|
private static boolean summary = false; // if we want to print less output
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor
|
* Constructor
|
||||||
|
@ -79,19 +86,11 @@ public class HBaseFsck extends HBaseAdmin {
|
||||||
*/
|
*/
|
||||||
public HBaseFsck(Configuration conf)
|
public HBaseFsck(Configuration conf)
|
||||||
throws MasterNotRunningException, ZooKeeperConnectionException, IOException {
|
throws MasterNotRunningException, ZooKeeperConnectionException, IOException {
|
||||||
super(conf);
|
|
||||||
this.conf = conf;
|
this.conf = conf;
|
||||||
|
|
||||||
// setup filesystem properties
|
HBaseAdmin admin = new HBaseAdmin(conf);
|
||||||
this.rootDir = new Path(conf.get(HConstants.HBASE_DIR));
|
status = admin.getMaster().getClusterStatus();
|
||||||
this.fs = rootDir.getFileSystem(conf);
|
connection = admin.getConnection();
|
||||||
|
|
||||||
|
|
||||||
// fetch information from master
|
|
||||||
master = getMaster();
|
|
||||||
status = master.getClusterStatus();
|
|
||||||
connection = getConnection();
|
|
||||||
this.metaEntries = new TreeMap<HRegionInfo, MetaEntry>();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -101,250 +100,416 @@ public class HBaseFsck extends HBaseAdmin {
|
||||||
*/
|
*/
|
||||||
int doWork() throws IOException {
|
int doWork() throws IOException {
|
||||||
// print hbase server version
|
// print hbase server version
|
||||||
System.out.println("Version: " + status.getHBaseVersion());
|
errors.print("Version: " + status.getHBaseVersion());
|
||||||
|
|
||||||
|
// Make sure regionInfo is empty before starting
|
||||||
|
regionInfo.clear();
|
||||||
|
tablesInfo.clear();
|
||||||
|
|
||||||
// get a list of all regions from the master. This involves
|
// get a list of all regions from the master. This involves
|
||||||
// scanning the META table
|
// scanning the META table
|
||||||
getMetaEntries(metaEntries);
|
if (!recordRootRegion()) {
|
||||||
|
// Will remove later if we can fix it
|
||||||
|
errors.reportError("Encountered fatal error. Exitting...");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
getMetaEntries();
|
||||||
|
|
||||||
|
// Check if .META. is found only once and on the right place
|
||||||
|
if (!checkMetaEntries()) {
|
||||||
|
// Will remove later if we can fix it
|
||||||
|
errors.reportError("Encountered fatal error. Exitting...");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
// get a list of all tables that have not changed recently.
|
// get a list of all tables that have not changed recently.
|
||||||
AtomicInteger numSkipped = new AtomicInteger(0);
|
AtomicInteger numSkipped = new AtomicInteger(0);
|
||||||
HTableDescriptor[] allTables = getTables(metaEntries, numSkipped);
|
HTableDescriptor[] allTables = getTables(numSkipped);
|
||||||
System.out.println("Number of Tables: " + allTables.length);
|
errors.print("Number of Tables: " + allTables.length);
|
||||||
if (details) {
|
if (details) {
|
||||||
if (numSkipped.get() > 0) {
|
if (numSkipped.get() > 0) {
|
||||||
System.out.println("\n Number of Tables in flux: " + numSkipped.get());
|
errors.detail("Number of Tables in flux: " + numSkipped.get());
|
||||||
}
|
}
|
||||||
for (HTableDescriptor td : allTables) {
|
for (HTableDescriptor td : allTables) {
|
||||||
String tableName = td.getNameAsString();
|
String tableName = td.getNameAsString();
|
||||||
System.out.println("\t Table: " + tableName + "\t" +
|
errors.detail(" Table: " + tableName + "\t" +
|
||||||
(td.isReadOnly() ? "ro" : "rw") + "\t" +
|
(td.isReadOnly() ? "ro" : "rw") + "\t" +
|
||||||
(td.isRootRegion() ? "ROOT" :
|
(td.isRootRegion() ? "ROOT" :
|
||||||
(td.isMetaRegion() ? "META" : " ")) + "\t" +
|
(td.isMetaRegion() ? "META" : " ")) + "\t" +
|
||||||
" families:" + td.getFamilies().size());
|
" families: " + td.getFamilies().size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// From the master, get a list of all known live region servers
|
// From the master, get a list of all known live region servers
|
||||||
Collection<HServerInfo> regionServers = status.getServerInfo();
|
Collection<HServerInfo> regionServers = status.getServerInfo();
|
||||||
System.out.println("Number of live region servers:" +
|
errors.print("Number of live region servers: " +
|
||||||
regionServers.size());
|
regionServers.size());
|
||||||
if (details) {
|
if (details) {
|
||||||
for (HServerInfo rsinfo: regionServers) {
|
for (HServerInfo rsinfo: regionServers) {
|
||||||
System.out.println("\t RegionServer:" + rsinfo.getServerName());
|
errors.print(" " + rsinfo.getServerName());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// From the master, get a list of all dead region servers
|
// From the master, get a list of all dead region servers
|
||||||
Collection<String> deadRegionServers = status.getDeadServerNames();
|
Collection<String> deadRegionServers = status.getDeadServerNames();
|
||||||
System.out.println("Number of dead region servers:" +
|
errors.print("Number of dead region servers: " +
|
||||||
deadRegionServers.size());
|
deadRegionServers.size());
|
||||||
if (details) {
|
if (details) {
|
||||||
for (String name: deadRegionServers) {
|
for (String name: deadRegionServers) {
|
||||||
System.out.println("\t RegionServer(dead):" + name);
|
errors.print(" " + name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// process information from all region servers
|
// Determine what's deployed
|
||||||
boolean status1 = processRegionServers(regionServers);
|
processRegionServers(regionServers);
|
||||||
|
|
||||||
// match HDFS with META
|
// Determine what's on HDFS
|
||||||
boolean status2 = checkHdfs();
|
checkHdfs();
|
||||||
|
|
||||||
if (status1 == true && status2 == true) {
|
// Check consistency
|
||||||
System.out.println("\nRest easy, buddy! HBase is clean. ");
|
checkConsistency();
|
||||||
return 0;
|
|
||||||
|
// Check integrity
|
||||||
|
checkIntegrity();
|
||||||
|
|
||||||
|
// Print table summary
|
||||||
|
printTableSummary();
|
||||||
|
|
||||||
|
return errors.summarize();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scan HDFS for all regions, recording their information into
|
||||||
|
* regionInfo
|
||||||
|
*/
|
||||||
|
void checkHdfs() throws IOException {
|
||||||
|
Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
|
||||||
|
FileSystem fs = rootDir.getFileSystem(conf);
|
||||||
|
|
||||||
|
// list all tables from HDFS
|
||||||
|
List<FileStatus> tableDirs = Lists.newArrayList();
|
||||||
|
|
||||||
|
boolean foundVersionFile = false;
|
||||||
|
FileStatus[] files = fs.listStatus(rootDir);
|
||||||
|
for (FileStatus file : files) {
|
||||||
|
if (file.getPath().getName().equals(HConstants.VERSION_FILE_NAME)) {
|
||||||
|
foundVersionFile = true;
|
||||||
} else {
|
} else {
|
||||||
System.out.println("\nInconsistencies detected.");
|
tableDirs.add(file);
|
||||||
return -1;
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// verify that version file exists
|
||||||
|
if (!foundVersionFile) {
|
||||||
|
errors.reportError("Version file does not exist in root dir " + rootDir);
|
||||||
|
}
|
||||||
|
|
||||||
|
// level 1: <HBASE_DIR>/*
|
||||||
|
for (FileStatus tableDir : tableDirs) {
|
||||||
|
String tableName = tableDir.getPath().getName();
|
||||||
|
// ignore hidden files
|
||||||
|
if (tableName.startsWith(".") &&
|
||||||
|
!tableName.equals( Bytes.toString(HConstants.META_TABLE_NAME)))
|
||||||
|
continue;
|
||||||
|
// level 2: <HBASE_DIR>/<table>/*
|
||||||
|
FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
|
||||||
|
for (FileStatus regionDir : regionDirs) {
|
||||||
|
String encodedName = regionDir.getPath().getName();
|
||||||
|
// ignore directories that aren't hexadecimal
|
||||||
|
if (!encodedName.toLowerCase().matches("[0-9a-f]+")) continue;
|
||||||
|
|
||||||
|
HbckInfo hbi = getOrCreateInfo(encodedName);
|
||||||
|
hbi.foundRegionDir = regionDir;
|
||||||
|
|
||||||
|
// Set a flag if this region contains only edits
|
||||||
|
// This is special case if a region is left after split
|
||||||
|
hbi.onlyEdits = true;
|
||||||
|
FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
|
||||||
|
Path ePath = HLog.getRegionDirRecoveredEditsDir(regionDir.getPath());
|
||||||
|
for (FileStatus subDir : subDirs) {
|
||||||
|
String sdName = subDir.getPath().getName();
|
||||||
|
if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
|
||||||
|
hbi.onlyEdits = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks HDFS and META
|
* Record the location of the ROOT region as found in ZooKeeper,
|
||||||
* @return true if there were no errors, otherwise return false
|
* as if it were in a META table. This is so that we can check
|
||||||
|
* deployment of ROOT.
|
||||||
*/
|
*/
|
||||||
boolean checkHdfs() throws IOException {
|
boolean recordRootRegion() throws IOException {
|
||||||
|
HRegionLocation rootLocation = connection.locateRegion(
|
||||||
|
HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
|
||||||
|
|
||||||
boolean status = true; // success
|
// Check if Root region is valid and existing
|
||||||
|
if (rootLocation == null || rootLocation.getRegionInfo() == null ||
|
||||||
// make a copy of all tables in META
|
rootLocation.getServerAddress() == null) {
|
||||||
TreeMap<String, MetaEntry> regions = new TreeMap<String, MetaEntry>();
|
errors.reportError("Root Region or some of its attributes is null.");
|
||||||
for (MetaEntry meta: metaEntries.values()) {
|
return false;
|
||||||
regions.put(meta.getTableDesc().getNameAsString(), meta);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// list all tables from HDFS
|
MetaEntry m = new MetaEntry(rootLocation.getRegionInfo(),
|
||||||
TreeMap<Path, FileStatus> allTableDirs = new TreeMap<Path, FileStatus>();
|
rootLocation.getServerAddress(), null, System.currentTimeMillis());
|
||||||
FileStatus[] files = fs.listStatus(rootDir);
|
HbckInfo hbInfo = new HbckInfo(m);
|
||||||
for (int i = 0; files != null && i < files.length; i++) {
|
regionInfo.put(rootLocation.getRegionInfo().getEncodedName(), hbInfo);
|
||||||
allTableDirs.put(files[i].getPath(), files[i]);
|
return true;
|
||||||
}
|
|
||||||
|
|
||||||
// verify that -ROOT-, .META directories exists.
|
|
||||||
Path rdir = new Path(rootDir, Bytes.toString(HConstants.ROOT_TABLE_NAME));
|
|
||||||
FileStatus ignore = allTableDirs.remove(rdir);
|
|
||||||
if (ignore == null) {
|
|
||||||
status = false;
|
|
||||||
System.out.print("\nERROR: Path " + rdir + " for ROOT table does not exist.");
|
|
||||||
}
|
|
||||||
Path mdir = new Path(rootDir, Bytes.toString(HConstants.META_TABLE_NAME));
|
|
||||||
ignore = allTableDirs.remove(mdir);
|
|
||||||
if (ignore == null) {
|
|
||||||
status = false;
|
|
||||||
System.out.print("\nERROR: Path " + mdir + " for META table does not exist.");
|
|
||||||
}
|
|
||||||
|
|
||||||
// verify that version file exists
|
|
||||||
Path vfile = new Path(rootDir, HConstants.VERSION_FILE_NAME);
|
|
||||||
ignore = allTableDirs.remove(vfile);
|
|
||||||
if (ignore == null) {
|
|
||||||
status = false;
|
|
||||||
System.out.print("\nERROR: Version file " + vfile + " does not exist.");
|
|
||||||
}
|
|
||||||
|
|
||||||
// filter out all valid regions found in the META
|
|
||||||
for (HRegionInfo rinfo: metaEntries.values()) {
|
|
||||||
Path tableDir = HTableDescriptor.getTableDir(rootDir,
|
|
||||||
rinfo.getTableDesc().getName());
|
|
||||||
// Path regionDir = HRegion.getRegionDir(tableDir, rinfo.getEncodedName());
|
|
||||||
// if the entry exists in allTableDirs, then remove it from allTableDirs as well
|
|
||||||
// as from the META tmp list
|
|
||||||
FileStatus found = allTableDirs.remove(tableDir);
|
|
||||||
if (found != null) {
|
|
||||||
regions.remove(tableDir.getName());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// The remaining entries in allTableDirs do not have entries in .META
|
|
||||||
// However, if the path name was modified in the last few milliseconds
|
|
||||||
// as specified by timelag, then do not flag it as an inconsistency.
|
|
||||||
long now = System.currentTimeMillis();
|
|
||||||
for (FileStatus region: allTableDirs.values()) {
|
|
||||||
if (region.getModificationTime() + timelag < now) {
|
|
||||||
String finalComponent = region.getPath().getName();
|
|
||||||
if (!finalComponent.startsWith(".")) {
|
|
||||||
// ignore .logs and .oldlogs directories
|
|
||||||
System.out.print("\nERROR: Path " + region.getPath() +
|
|
||||||
" does not have a corresponding entry in META.");
|
|
||||||
status = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// the remaining entries in tmp do not have entries in HDFS
|
|
||||||
for (HRegionInfo rinfo: regions.values()) {
|
|
||||||
System.out.println("\nERROR: Region " + rinfo.getRegionNameAsString() +
|
|
||||||
" does not have a corresponding entry in HDFS.");
|
|
||||||
status = false;
|
|
||||||
}
|
|
||||||
return status;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Contacts each regionserver and fetches metadata about regions.
|
* Contacts each regionserver and fetches metadata about regions.
|
||||||
* @param regionServerList - the list of region servers to connect to
|
* @param regionServerList - the list of region servers to connect to
|
||||||
* @throws IOException if a remote or network exception occurs
|
* @throws IOException if a remote or network exception occurs
|
||||||
* @return true if there were no errors, otherwise return false
|
|
||||||
*/
|
*/
|
||||||
boolean processRegionServers(Collection<HServerInfo> regionServerList)
|
void processRegionServers(Collection<HServerInfo> regionServerList)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
// make a copy of all entries in META
|
|
||||||
TreeMap<HRegionInfo, MetaEntry> tmp =
|
|
||||||
new TreeMap<HRegionInfo, MetaEntry>(metaEntries);
|
|
||||||
long errorCount = 0; // number of inconsistencies detected
|
|
||||||
int showProgress = 0;
|
|
||||||
|
|
||||||
// loop to contact each region server
|
// loop to contact each region server
|
||||||
for (HServerInfo rsinfo: regionServerList) {
|
for (HServerInfo rsinfo: regionServerList) {
|
||||||
showProgress++; // one more server.
|
errors.progress();
|
||||||
try {
|
try {
|
||||||
HRegionInterface server = connection.getHRegionConnection(
|
HRegionInterface server = connection.getHRegionConnection(
|
||||||
rsinfo.getServerAddress());
|
rsinfo.getServerAddress());
|
||||||
|
|
||||||
// list all online regions from this region server
|
// list all online regions from this region server
|
||||||
NavigableSet<HRegionInfo> regions = server.getOnlineRegions();
|
List<HRegionInfo> regions = server.getOnlineRegions();
|
||||||
if (details) {
|
if (details) {
|
||||||
System.out.print("\nRegionServer:" + rsinfo.getServerName() +
|
errors.detail("RegionServer: " + rsinfo.getServerName() +
|
||||||
" number of regions:" + regions.size());
|
" number of regions: " + regions.size());
|
||||||
for (HRegionInfo rinfo: regions) {
|
for (HRegionInfo rinfo: regions) {
|
||||||
System.out.print("\n\t name:" + rinfo.getRegionNameAsString() +
|
errors.detail(" " + rinfo.getRegionNameAsString() +
|
||||||
" id:" + rinfo.getRegionId() +
|
" id: " + rinfo.getRegionId() +
|
||||||
" encoded name:" + rinfo.getEncodedName() +
|
" encoded_name: " + rinfo.getEncodedName() +
|
||||||
" start :" + Bytes.toStringBinary(rinfo.getStartKey()) +
|
" start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
|
||||||
" end :" + Bytes.toStringBinary(rinfo.getEndKey()));
|
" end: " + Bytes.toStringBinary(rinfo.getEndKey()));
|
||||||
}
|
}
|
||||||
showProgress = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// check to see if the existance of this region matches the region in META
|
// check to see if the existance of this region matches the region in META
|
||||||
for (HRegionInfo r: regions) {
|
for (HRegionInfo r:regions) {
|
||||||
MetaEntry metaEntry = metaEntries.get(r);
|
HbckInfo hbi = getOrCreateInfo(r.getEncodedName());
|
||||||
|
hbi.deployedOn.add(rsinfo.getServerAddress());
|
||||||
// this entry exists in the region server but is not in the META
|
|
||||||
if (metaEntry == null) {
|
|
||||||
if (r.isMetaRegion()) {
|
|
||||||
continue; // this is ROOT or META region
|
|
||||||
}
|
|
||||||
System.out.print("\nERROR: Region " + r.getRegionNameAsString() +
|
|
||||||
" found on server " + rsinfo.getServerAddress() +
|
|
||||||
" but is not listed in META.");
|
|
||||||
errorCount++;
|
|
||||||
showProgress = 0;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (!metaEntry.regionServer.equals(rsinfo.getServerAddress())) {
|
|
||||||
System.out.print("\nERROR: Region " + r.getRegionNameAsString() +
|
|
||||||
" found on server " + rsinfo.getServerAddress() +
|
|
||||||
" but is listed in META to be on server " +
|
|
||||||
metaEntry.regionServer);
|
|
||||||
errorCount++;
|
|
||||||
showProgress = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// The region server is indeed serving a valid region. Remove it from tmp
|
|
||||||
tmp.remove(r);
|
|
||||||
}
|
}
|
||||||
} catch (IOException e) { // unable to connect to the region server.
|
} catch (IOException e) { // unable to connect to the region server.
|
||||||
if (details) {
|
errors.reportError("\nRegionServer:" + rsinfo.getServerName() +
|
||||||
System.out.print("\nRegionServer:" + rsinfo.getServerName() +
|
|
||||||
" Unable to fetch region information. " + e);
|
" Unable to fetch region information. " + e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (showProgress % 10 == 0) {
|
|
||||||
System.out.print("."); // show progress to user
|
|
||||||
showProgress = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// all the region left in tmp are not found on any region server
|
|
||||||
for (MetaEntry metaEntry: tmp.values()) {
|
|
||||||
// An offlined region will not be present out on a regionserver. A region
|
|
||||||
// is offlined if table is offlined -- will still have an entry in .META.
|
|
||||||
// of a region is offlined because its a parent region and its daughters
|
|
||||||
// still have references.
|
|
||||||
if (metaEntry.isOffline()) continue;
|
|
||||||
System.out.print("\nERROR: Region " + metaEntry.getRegionNameAsString() +
|
|
||||||
" is not served by any region server " +
|
|
||||||
" but is listed in META to be on server " +
|
|
||||||
metaEntry.regionServer);
|
|
||||||
errorCount++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (errorCount > 0) {
|
|
||||||
System.out.println("\nDetected " + errorCount + " inconsistencies. " +
|
|
||||||
"This might not indicate a real problem because these regions " +
|
|
||||||
"could be in the midst of a split. Consider re-running with a " +
|
|
||||||
"larger value of -timelag.");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true; // no errors
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return a list of table names whose metadata have not been modified in the
|
* Check consistency of all regions that have been found in previous phases.
|
||||||
* last few milliseconds specified by timelag
|
*/
|
||||||
|
void checkConsistency() throws IOException {
|
||||||
|
for (HbckInfo hbi : regionInfo.values()) {
|
||||||
|
doConsistencyCheck(hbi);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check a single region for consistency and correct deployment.
|
||||||
|
*/
|
||||||
|
void doConsistencyCheck(HbckInfo hbi) throws IOException {
|
||||||
|
String descriptiveName = hbi.toString();
|
||||||
|
|
||||||
|
boolean inMeta = hbi.metaEntry != null;
|
||||||
|
boolean inHdfs = hbi.foundRegionDir != null;
|
||||||
|
boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
|
||||||
|
boolean isDeployed = !hbi.deployedOn.isEmpty();
|
||||||
|
boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
|
||||||
|
boolean deploymentMatchesMeta =
|
||||||
|
hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
|
||||||
|
hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
|
||||||
|
boolean shouldBeDeployed = inMeta && !hbi.metaEntry.isOffline();
|
||||||
|
boolean recentlyModified = hbi.foundRegionDir != null &&
|
||||||
|
hbi.foundRegionDir.getModificationTime() + timelag > System.currentTimeMillis();
|
||||||
|
|
||||||
|
// ========== First the healthy cases =============
|
||||||
|
if (hbi.onlyEdits) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
|
||||||
|
return;
|
||||||
|
} else if (inMeta && !shouldBeDeployed && !isDeployed) {
|
||||||
|
// offline regions shouldn't cause complaints
|
||||||
|
LOG.debug("Region " + descriptiveName + " offline, ignoring.");
|
||||||
|
return;
|
||||||
|
} else if (recentlyModified) {
|
||||||
|
LOG.info("Region " + descriptiveName + " was recently modified -- skipping");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// ========== Cases where the region is not in META =============
|
||||||
|
else if (!inMeta && !inHdfs && !isDeployed) {
|
||||||
|
// We shouldn't have record of this region at all then!
|
||||||
|
assert false : "Entry for region with no data";
|
||||||
|
} else if (!inMeta && !inHdfs && isDeployed) {
|
||||||
|
errors.reportError("Region " + descriptiveName + " not on HDFS or in META but " +
|
||||||
|
"deployed on " + Joiner.on(", ").join(hbi.deployedOn));
|
||||||
|
} else if (!inMeta && inHdfs && !isDeployed) {
|
||||||
|
errors.reportError("Region " + descriptiveName + " on HDFS, but not listed in META " +
|
||||||
|
"or deployed on any region server.");
|
||||||
|
} else if (!inMeta && inHdfs && isDeployed) {
|
||||||
|
errors.reportError("Region " + descriptiveName + " not in META, but deployed on " +
|
||||||
|
Joiner.on(", ").join(hbi.deployedOn));
|
||||||
|
|
||||||
|
// ========== Cases where the region is in META =============
|
||||||
|
} else if (inMeta && !inHdfs && !isDeployed) {
|
||||||
|
errors.reportError("Region " + descriptiveName + " found in META, but not in HDFS " +
|
||||||
|
"or deployed on any region server.");
|
||||||
|
} else if (inMeta && !inHdfs && isDeployed) {
|
||||||
|
errors.reportError("Region " + descriptiveName + " found in META, but not in HDFS, " +
|
||||||
|
"and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
|
||||||
|
} else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
|
||||||
|
errors.reportError("Region " + descriptiveName + " not deployed on any region server.");
|
||||||
|
// If we are trying to fix the errors
|
||||||
|
if (shouldFix()) {
|
||||||
|
errors.print("Trying to fix unassigned region...");
|
||||||
|
setShouldRerun();
|
||||||
|
HBaseFsckRepair.fixUnassigned(this.conf, hbi.metaEntry);
|
||||||
|
}
|
||||||
|
} else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
|
||||||
|
errors.reportError("Region " + descriptiveName + " has should not be deployed according " +
|
||||||
|
"to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
|
||||||
|
} else if (inMeta && inHdfs && isMultiplyDeployed) {
|
||||||
|
errors.reportError("Region " + descriptiveName + " is listed in META on region server " +
|
||||||
|
hbi.metaEntry.regionServer + " but is multiply assigned to region servers " +
|
||||||
|
Joiner.on(", ").join(hbi.deployedOn));
|
||||||
|
// If we are trying to fix the errors
|
||||||
|
if (shouldFix()) {
|
||||||
|
errors.print("Trying to fix assignment error...");
|
||||||
|
setShouldRerun();
|
||||||
|
HBaseFsckRepair.fixDupeAssignment(this.conf, hbi.metaEntry, hbi.deployedOn);
|
||||||
|
}
|
||||||
|
} else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
|
||||||
|
errors.reportError("Region " + descriptiveName + " listed in META on region server " +
|
||||||
|
hbi.metaEntry.regionServer + " but found on region server " +
|
||||||
|
hbi.deployedOn.get(0));
|
||||||
|
// If we are trying to fix the errors
|
||||||
|
if (shouldFix()) {
|
||||||
|
errors.print("Trying to fix assignment error...");
|
||||||
|
setShouldRerun();
|
||||||
|
HBaseFsckRepair.fixDupeAssignment(this.conf, hbi.metaEntry, hbi.deployedOn);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
errors.reportError("Region " + descriptiveName + " is in an unforeseen state:" +
|
||||||
|
" inMeta=" + inMeta +
|
||||||
|
" inHdfs=" + inHdfs +
|
||||||
|
" isDeployed=" + isDeployed +
|
||||||
|
" isMultiplyDeployed=" + isMultiplyDeployed +
|
||||||
|
" deploymentMatchesMeta=" + deploymentMatchesMeta +
|
||||||
|
" shouldBeDeployed=" + shouldBeDeployed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks tables integrity. Goes over all regions and scans the tables.
|
||||||
|
* Collects all the pieces for each table and checks if there are missing,
|
||||||
|
* repeated or overlapping ones.
|
||||||
|
*/
|
||||||
|
void checkIntegrity() {
|
||||||
|
for (HbckInfo hbi : regionInfo.values()) {
|
||||||
|
// Check only valid, working regions
|
||||||
|
if (hbi.metaEntry == null) continue;
|
||||||
|
if (hbi.metaEntry.regionServer == null) continue;
|
||||||
|
if (hbi.foundRegionDir == null) continue;
|
||||||
|
if (hbi.deployedOn.size() != 1) continue;
|
||||||
|
if (hbi.onlyEdits) continue;
|
||||||
|
|
||||||
|
// We should be safe here
|
||||||
|
String tableName = hbi.metaEntry.getTableDesc().getNameAsString();
|
||||||
|
TInfo modTInfo = tablesInfo.get(tableName);
|
||||||
|
if (modTInfo == null) {
|
||||||
|
modTInfo = new TInfo(tableName);
|
||||||
|
}
|
||||||
|
for (HServerAddress server : hbi.deployedOn) {
|
||||||
|
modTInfo.addServer(server);
|
||||||
|
}
|
||||||
|
modTInfo.addEdge(hbi.metaEntry.getStartKey(), hbi.metaEntry.getEndKey());
|
||||||
|
tablesInfo.put(tableName, modTInfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (TInfo tInfo : tablesInfo.values()) {
|
||||||
|
if (!tInfo.check()) {
|
||||||
|
errors.reportError("Found inconsistency in table " + tInfo.getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maintain information about a particular table.
|
||||||
|
*/
|
||||||
|
private class TInfo {
|
||||||
|
String tableName;
|
||||||
|
TreeMap <byte[], byte[]> edges;
|
||||||
|
TreeSet <HServerAddress> deployedOn;
|
||||||
|
|
||||||
|
TInfo(String name) {
|
||||||
|
this.tableName = name;
|
||||||
|
edges = new TreeMap <byte[], byte[]> (Bytes.BYTES_COMPARATOR);
|
||||||
|
deployedOn = new TreeSet <HServerAddress>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addEdge(byte[] fromNode, byte[] toNode) {
|
||||||
|
this.edges.put(fromNode, toNode);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addServer(HServerAddress server) {
|
||||||
|
this.deployedOn.add(server);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return tableName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getNumRegions() {
|
||||||
|
return edges.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean check() {
|
||||||
|
byte[] last = new byte[0];
|
||||||
|
byte[] next = new byte[0];
|
||||||
|
TreeSet <byte[]> visited = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
|
||||||
|
// Each table should start with a zero-length byte[] and end at a
|
||||||
|
// zero-length byte[]. Just follow the edges to see if this is true
|
||||||
|
while (true) {
|
||||||
|
// Check if chain is broken
|
||||||
|
if (!edges.containsKey(last)) {
|
||||||
|
errors.detail("Chain of regions in table " + tableName +
|
||||||
|
" is broken.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
next = edges.get(last);
|
||||||
|
// Found a cycle
|
||||||
|
if (visited.contains(next)) {
|
||||||
|
errors.detail("Chain of regions in table " + tableName +
|
||||||
|
" has a cycle.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Mark next node as visited
|
||||||
|
visited.add(next);
|
||||||
|
// If next is zero-length byte[] we are possibly at the end of the chain
|
||||||
|
if (next.length == 0) {
|
||||||
|
// If we have visited all elements we are fine
|
||||||
|
if (edges.size() != visited.size()) {
|
||||||
|
errors.detail("Chain of regions in table " + tableName +
|
||||||
|
" contains less elements than are listed in META.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
last = next;
|
||||||
|
}
|
||||||
|
// How did we get here?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return a list of user-space table names whose metadata have not been
|
||||||
|
* modified in the last few milliseconds specified by timelag
|
||||||
* if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
|
* if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
|
||||||
* SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
|
* SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
|
||||||
* milliseconds specified by timelag, then the table is a candidate to be returned.
|
* milliseconds specified by timelag, then the table is a candidate to be returned.
|
||||||
|
@ -352,18 +517,17 @@ public class HBaseFsck extends HBaseAdmin {
|
||||||
* @return tables that have not been modified recently
|
* @return tables that have not been modified recently
|
||||||
* @throws IOException if an error is encountered
|
* @throws IOException if an error is encountered
|
||||||
*/
|
*/
|
||||||
HTableDescriptor[] getTables(final TreeMap<HRegionInfo, MetaEntry> regionList,
|
HTableDescriptor[] getTables(AtomicInteger numSkipped) {
|
||||||
AtomicInteger numSkipped) {
|
|
||||||
TreeSet<HTableDescriptor> uniqueTables = new TreeSet<HTableDescriptor>();
|
TreeSet<HTableDescriptor> uniqueTables = new TreeSet<HTableDescriptor>();
|
||||||
long now = System.currentTimeMillis();
|
long now = System.currentTimeMillis();
|
||||||
|
|
||||||
for (MetaEntry m: regionList.values()) {
|
for (HbckInfo hbi : regionInfo.values()) {
|
||||||
HRegionInfo info = m;
|
MetaEntry info = hbi.metaEntry;
|
||||||
|
|
||||||
// if the start key is zero, then we have found the first region of a table.
|
// if the start key is zero, then we have found the first region of a table.
|
||||||
// pick only those tables that were not modified in the last few milliseconds.
|
// pick only those tables that were not modified in the last few milliseconds.
|
||||||
if (info != null && info.getStartKey().length == 0) {
|
if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
|
||||||
if (m.modTime + timelag < now) {
|
if (info.modTime + timelag < now) {
|
||||||
uniqueTables.add(info.getTableDesc());
|
uniqueTables.add(info.getTableDesc());
|
||||||
} else {
|
} else {
|
||||||
numSkipped.incrementAndGet(); // one more in-flux table
|
numSkipped.incrementAndGet(); // one more in-flux table
|
||||||
|
@ -374,11 +538,77 @@ public class HBaseFsck extends HBaseAdmin {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Scan META. Returns a list of all regions of all known tables.
|
* Gets the entry in regionInfo corresponding to the the given encoded
|
||||||
* @param regionList - fill up all entries found in .META
|
* region name. If the region has not been seen yet, a new entry is added
|
||||||
|
* and returned.
|
||||||
|
*/
|
||||||
|
private HbckInfo getOrCreateInfo(String name) {
|
||||||
|
HbckInfo hbi = regionInfo.get(name);
|
||||||
|
if (hbi == null) {
|
||||||
|
hbi = new HbckInfo(null);
|
||||||
|
regionInfo.put(name, hbi);
|
||||||
|
}
|
||||||
|
return hbi;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check values in regionInfo for .META.
|
||||||
|
* Check if zero or more than one regions with META are found.
|
||||||
|
* If there are inconsistencies (i.e. zero or more than one regions
|
||||||
|
* pretend to be holding the .META.) try to fix that and report an error.
|
||||||
|
* @throws IOException from HBaseFsckRepair functions
|
||||||
|
*/
|
||||||
|
boolean checkMetaEntries() throws IOException {
|
||||||
|
List <HbckInfo> metaRegions = Lists.newArrayList();
|
||||||
|
for (HbckInfo value : regionInfo.values()) {
|
||||||
|
if (value.metaEntry.isMetaTable()) {
|
||||||
|
metaRegions.add(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If something is wrong
|
||||||
|
if (metaRegions.size() != 1) {
|
||||||
|
HRegionLocation rootLocation = connection.locateRegion(
|
||||||
|
HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
|
||||||
|
HbckInfo root =
|
||||||
|
regionInfo.get(rootLocation.getRegionInfo().getEncodedName());
|
||||||
|
|
||||||
|
// If there is no region holding .META.
|
||||||
|
if (metaRegions.size() == 0) {
|
||||||
|
errors.reportError(".META. is not found on any region.");
|
||||||
|
if (shouldFix()) {
|
||||||
|
errors.print("Trying to fix a problem with .META...");
|
||||||
|
setShouldRerun();
|
||||||
|
// try to fix it (treat it as unassigned region)
|
||||||
|
HBaseFsckRepair.fixUnassigned(conf, root.metaEntry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If there are more than one regions pretending to hold the .META.
|
||||||
|
else if (metaRegions.size() > 1) {
|
||||||
|
errors.reportError(".META. is found on more than one region.");
|
||||||
|
if (shouldFix()) {
|
||||||
|
errors.print("Trying to fix a problem with .META...");
|
||||||
|
setShouldRerun();
|
||||||
|
// try fix it (treat is a dupe assignment)
|
||||||
|
List <HServerAddress> deployedOn = Lists.newArrayList();
|
||||||
|
for (HbckInfo mRegion : metaRegions) {
|
||||||
|
deployedOn.add(mRegion.metaEntry.regionServer);
|
||||||
|
}
|
||||||
|
HBaseFsckRepair.fixDupeAssignment(conf, root.metaEntry, deployedOn);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// rerun hbck with hopefully fixed META
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// no errors, so continue normally
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scan .META. and -ROOT-, adding all regions found to the regionInfo map.
|
||||||
* @throws IOException if an error is encountered
|
* @throws IOException if an error is encountered
|
||||||
*/
|
*/
|
||||||
void getMetaEntries(final TreeMap<HRegionInfo,MetaEntry> regionList) throws IOException {
|
void getMetaEntries() throws IOException {
|
||||||
MetaScannerVisitor visitor = new MetaScannerVisitor() {
|
MetaScannerVisitor visitor = new MetaScannerVisitor() {
|
||||||
int countRecord = 1;
|
int countRecord = 1;
|
||||||
|
|
||||||
|
@ -420,14 +650,15 @@ public class HBaseFsck extends HBaseAdmin {
|
||||||
startCode = value;
|
startCode = value;
|
||||||
}
|
}
|
||||||
MetaEntry m = new MetaEntry(info, server, startCode, ts);
|
MetaEntry m = new MetaEntry(info, server, startCode, ts);
|
||||||
m = regionList.put(m ,m);
|
HbckInfo hbInfo = new HbckInfo(m);
|
||||||
if (m != null) {
|
HbckInfo previous = regionInfo.put(info.getEncodedName(), hbInfo);
|
||||||
throw new IOException("Two entries in META are same " + m);
|
if (previous != null) {
|
||||||
|
throw new IOException("Two entries in META are same " + previous);
|
||||||
}
|
}
|
||||||
|
|
||||||
// show proof of progress to the user, once for every 100 records.
|
// show proof of progress to the user, once for every 100 records.
|
||||||
if (countRecord % 100 == 0) {
|
if (countRecord % 100 == 0) {
|
||||||
System.out.print(".");
|
errors.progress();
|
||||||
}
|
}
|
||||||
countRecord++;
|
countRecord++;
|
||||||
return true;
|
return true;
|
||||||
|
@ -437,8 +668,14 @@ public class HBaseFsck extends HBaseAdmin {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Scan -ROOT- to pick up META regions
|
||||||
|
MetaScanner.metaScan(conf, visitor, null, null,
|
||||||
|
Integer.MAX_VALUE, HConstants.ROOT_TABLE_NAME);
|
||||||
|
|
||||||
|
// Scan .META. to pick up user regions
|
||||||
MetaScanner.metaScan(conf, visitor);
|
MetaScanner.metaScan(conf, visitor);
|
||||||
System.out.println("");
|
errors.print("");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -446,26 +683,158 @@ public class HBaseFsck extends HBaseAdmin {
|
||||||
*/
|
*/
|
||||||
private static class MetaEntry extends HRegionInfo {
|
private static class MetaEntry extends HRegionInfo {
|
||||||
HServerAddress regionServer; // server hosting this region
|
HServerAddress regionServer; // server hosting this region
|
||||||
byte[] startCode; // start value of region
|
|
||||||
long modTime; // timestamp of most recent modification metadata
|
long modTime; // timestamp of most recent modification metadata
|
||||||
|
|
||||||
public MetaEntry(HRegionInfo rinfo, HServerAddress regionServer,
|
public MetaEntry(HRegionInfo rinfo, HServerAddress regionServer,
|
||||||
byte[] startCode, long modTime) {
|
byte[] startCode, long modTime) {
|
||||||
super(rinfo);
|
super(rinfo);
|
||||||
this.regionServer = regionServer;
|
this.regionServer = regionServer;
|
||||||
this.startCode = startCode;
|
|
||||||
this.modTime = modTime;
|
this.modTime = modTime;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Display the full report from fsck. This displays all live and dead region servers ,
|
* Maintain information about a particular region.
|
||||||
* and all known regions.
|
*/
|
||||||
|
static class HbckInfo {
|
||||||
|
boolean onlyEdits = false;
|
||||||
|
MetaEntry metaEntry = null;
|
||||||
|
FileStatus foundRegionDir = null;
|
||||||
|
List<HServerAddress> deployedOn = Lists.newArrayList();
|
||||||
|
|
||||||
|
HbckInfo(MetaEntry metaEntry) {
|
||||||
|
this.metaEntry = metaEntry;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
if (metaEntry != null) {
|
||||||
|
return metaEntry.getRegionNameAsString();
|
||||||
|
} else if (foundRegionDir != null) {
|
||||||
|
return foundRegionDir.getPath().toString();
|
||||||
|
} else {
|
||||||
|
return "unknown region on " + Joiner.on(", ").join(deployedOn);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prints summary of all tables found on the system.
|
||||||
|
*/
|
||||||
|
private void printTableSummary() {
|
||||||
|
System.out.println("Summary:");
|
||||||
|
for (TInfo tInfo : tablesInfo.values()) {
|
||||||
|
if (tInfo.check()) {
|
||||||
|
System.out.println(" " + tInfo.getName() + " is okay.");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
System.out.println("Table " + tInfo.getName() + " is inconsistent.");
|
||||||
|
}
|
||||||
|
System.out.println(" Number of regions: " + tInfo.getNumRegions());
|
||||||
|
System.out.print(" Deployed on: ");
|
||||||
|
for (HServerAddress server : tInfo.deployedOn) {
|
||||||
|
System.out.print(" " + server.toString());
|
||||||
|
}
|
||||||
|
System.out.println();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ErrorReporter {
|
||||||
|
public void reportError(String message);
|
||||||
|
public int summarize();
|
||||||
|
public void detail(String details);
|
||||||
|
public void progress();
|
||||||
|
public void print(String message);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class PrintingErrorReporter implements ErrorReporter {
|
||||||
|
public int errorCount = 0;
|
||||||
|
private int showProgress;
|
||||||
|
|
||||||
|
public void reportError(String message) {
|
||||||
|
if (!summary) {
|
||||||
|
System.out.println("ERROR: " + message);
|
||||||
|
}
|
||||||
|
errorCount++;
|
||||||
|
showProgress = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int summarize() {
|
||||||
|
System.out.println(Integer.toString(errorCount) +
|
||||||
|
" inconsistencies detected.");
|
||||||
|
if (errorCount == 0) {
|
||||||
|
System.out.println("Status: OK");
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
System.out.println("Status: INCONSISTENT");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void print(String message) {
|
||||||
|
if (!summary) {
|
||||||
|
System.out.println(message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void detail(String message) {
|
||||||
|
if (details) {
|
||||||
|
System.out.println(message);
|
||||||
|
}
|
||||||
|
showProgress = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void progress() {
|
||||||
|
if (showProgress++ == 10) {
|
||||||
|
if (!summary) {
|
||||||
|
System.out.print(".");
|
||||||
|
}
|
||||||
|
showProgress = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Display the full report from fsck.
|
||||||
|
* This displays all live and dead region servers, and all known regions.
|
||||||
*/
|
*/
|
||||||
void displayFullReport() {
|
void displayFullReport() {
|
||||||
details = true;
|
details = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set summary mode.
|
||||||
|
* Print only summary of the tables and status (OK or INCONSISTENT)
|
||||||
|
*/
|
||||||
|
void setSummary() {
|
||||||
|
summary = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if we should rerun fsck again. This checks if we've tried to
|
||||||
|
* fix something and we should rerun fsck tool again.
|
||||||
|
* Display the full report from fsck. This displays all live and dead
|
||||||
|
* region servers, and all known regions.
|
||||||
|
*/
|
||||||
|
void setShouldRerun() {
|
||||||
|
rerun = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean shouldRerun() {
|
||||||
|
return rerun;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fix inconsistencies found by fsck. This should try to fix errors (if any)
|
||||||
|
* found by fsck utility.
|
||||||
|
*/
|
||||||
|
void setFixErrors() {
|
||||||
|
fix = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean shouldFix() {
|
||||||
|
return fix;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* We are interested in only those tables that have not changed their state in
|
* We are interested in only those tables that have not changed their state in
|
||||||
* META during the last few seconds specified by hbase.admin.fsck.timelag
|
* META during the last few seconds specified by hbase.admin.fsck.timelag
|
||||||
|
@ -482,6 +851,9 @@ public class HBaseFsck extends HBaseAdmin {
|
||||||
System.err.println(" -timelag {timeInSeconds} Process only regions that " +
|
System.err.println(" -timelag {timeInSeconds} Process only regions that " +
|
||||||
" have not experienced any metadata updates in the last " +
|
" have not experienced any metadata updates in the last " +
|
||||||
" {{timeInSeconds} seconds.");
|
" {{timeInSeconds} seconds.");
|
||||||
|
System.err.println(" -fix Try to fix some of the errors.");
|
||||||
|
System.err.println(" -summary Print only summary of the tables and status.");
|
||||||
|
|
||||||
Runtime.getRuntime().exit(-2);
|
Runtime.getRuntime().exit(-2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -515,6 +887,10 @@ public class HBaseFsck extends HBaseAdmin {
|
||||||
printUsageAndExit();
|
printUsageAndExit();
|
||||||
}
|
}
|
||||||
i++;
|
i++;
|
||||||
|
} else if (cmd.equals("-fix")) {
|
||||||
|
fsck.setFixErrors();
|
||||||
|
} else if (cmd.equals("-summary")) {
|
||||||
|
fsck.setSummary();
|
||||||
} else {
|
} else {
|
||||||
String str = "Unknown command line option : " + cmd;
|
String str = "Unknown command line option : " + cmd;
|
||||||
LOG.info(str);
|
LOG.info(str);
|
||||||
|
@ -524,6 +900,14 @@ public class HBaseFsck extends HBaseAdmin {
|
||||||
}
|
}
|
||||||
// do the real work of fsck
|
// do the real work of fsck
|
||||||
int code = fsck.doWork();
|
int code = fsck.doWork();
|
||||||
|
// If we have changed the HBase state it is better to run fsck again
|
||||||
|
// to see if we haven't broken something else in the process.
|
||||||
|
// We run it only once more because otherwise we can easily fall into
|
||||||
|
// an infinite loop.
|
||||||
|
if (fsck.shouldRerun()) {
|
||||||
|
code = fsck.doWork();
|
||||||
|
}
|
||||||
|
|
||||||
Runtime.getRuntime().exit(code);
|
Runtime.getRuntime().exit(code);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,121 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2010 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.client;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
|
import org.apache.hadoop.hbase.HRegionInfo;
|
||||||
|
import org.apache.hadoop.hbase.HServerAddress;
|
||||||
|
import org.apache.hadoop.hbase.ipc.HMasterInterface;
|
||||||
|
import org.apache.hadoop.hbase.ipc.HRegionInterface;
|
||||||
|
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
|
||||||
|
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
|
||||||
|
import org.apache.zookeeper.KeeperException;
|
||||||
|
|
||||||
|
public class HBaseFsckRepair {
|
||||||
|
|
||||||
|
public static void fixDupeAssignment(Configuration conf, HRegionInfo region,
|
||||||
|
List<HServerAddress> servers)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
HRegionInfo actualRegion = new HRegionInfo(region);
|
||||||
|
|
||||||
|
// Clear status in master and zk
|
||||||
|
clearInMaster(conf, actualRegion);
|
||||||
|
clearInZK(conf, actualRegion);
|
||||||
|
|
||||||
|
// Close region on the servers
|
||||||
|
for(HServerAddress server : servers) {
|
||||||
|
closeRegion(conf, server, actualRegion);
|
||||||
|
}
|
||||||
|
|
||||||
|
// It's unassigned so fix it as such
|
||||||
|
fixUnassigned(conf, actualRegion);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void fixUnassigned(Configuration conf, HRegionInfo region)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
HRegionInfo actualRegion = new HRegionInfo(region);
|
||||||
|
|
||||||
|
// Clear status in master and zk
|
||||||
|
clearInMaster(conf, actualRegion);
|
||||||
|
clearInZK(conf, actualRegion);
|
||||||
|
|
||||||
|
// Clear assignment in META or ROOT
|
||||||
|
clearAssignment(conf, actualRegion);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void clearInMaster(Configuration conf, HRegionInfo region)
|
||||||
|
throws IOException {
|
||||||
|
System.out.println("Region being cleared in master: " + region);
|
||||||
|
HMasterInterface master = HConnectionManager.getConnection(conf).getMaster();
|
||||||
|
long masterVersion =
|
||||||
|
master.getProtocolVersion("org.apache.hadoop.hbase.ipc.HMasterInterface", 25);
|
||||||
|
System.out.println("Master protocol version: " + masterVersion);
|
||||||
|
try {
|
||||||
|
// TODO: Do we want to do it this way?
|
||||||
|
// Better way is to tell master to fix the issue itself?
|
||||||
|
// That way it can use in-memory state to determine best plan
|
||||||
|
// master.clearFromTransition(region);
|
||||||
|
} catch (Exception e) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void clearInZK(Configuration conf, HRegionInfo region)
|
||||||
|
throws IOException {
|
||||||
|
ZooKeeperWatcher zkw =
|
||||||
|
HConnectionManager.getConnection(conf).getZooKeeperWatcher();
|
||||||
|
try {
|
||||||
|
ZKAssign.deleteNodeFailSilent(zkw, region);
|
||||||
|
} catch (KeeperException e) {
|
||||||
|
throw new IOException("Unexpected ZK exception", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void closeRegion(Configuration conf, HServerAddress server,
|
||||||
|
HRegionInfo region)
|
||||||
|
throws IOException {
|
||||||
|
HRegionInterface rs =
|
||||||
|
HConnectionManager.getConnection(conf).getHRegionConnection(server);
|
||||||
|
rs.closeRegion(region, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void clearAssignment(Configuration conf,
|
||||||
|
HRegionInfo region)
|
||||||
|
throws IOException {
|
||||||
|
HTable ht = null;
|
||||||
|
if (region.isMetaTable()) {
|
||||||
|
// Clear assignment in ROOT
|
||||||
|
ht = new HTable(conf, HConstants.ROOT_TABLE_NAME);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Clear assignment in META
|
||||||
|
ht = new HTable(conf, HConstants.META_TABLE_NAME);
|
||||||
|
}
|
||||||
|
Delete del = new Delete(region.getRegionName());
|
||||||
|
del.deleteColumns(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
|
||||||
|
del.deleteColumns(HConstants.CATALOG_FAMILY,
|
||||||
|
HConstants.STARTCODE_QUALIFIER);
|
||||||
|
ht.delete(del);
|
||||||
|
}
|
||||||
|
}
|
|
@ -63,13 +63,14 @@ public class MetaScanner {
|
||||||
*
|
*
|
||||||
* @param configuration config
|
* @param configuration config
|
||||||
* @param visitor visitor object
|
* @param visitor visitor object
|
||||||
* @param tableName table name
|
* @param userTableName User table name in meta table to start scan at. Pass
|
||||||
|
* null if not interested in a particular table.
|
||||||
* @throws IOException e
|
* @throws IOException e
|
||||||
*/
|
*/
|
||||||
public static void metaScan(Configuration configuration,
|
public static void metaScan(Configuration configuration,
|
||||||
MetaScannerVisitor visitor, byte[] tableName)
|
MetaScannerVisitor visitor, byte [] userTableName)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
metaScan(configuration, visitor, tableName, null, Integer.MAX_VALUE);
|
metaScan(configuration, visitor, userTableName, null, Integer.MAX_VALUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -79,7 +80,8 @@ public class MetaScanner {
|
||||||
*
|
*
|
||||||
* @param configuration HBase configuration.
|
* @param configuration HBase configuration.
|
||||||
* @param visitor Visitor object.
|
* @param visitor Visitor object.
|
||||||
* @param tableName User table name.
|
* @param userTableName User table name in meta table to start scan at. Pass
|
||||||
|
* null if not interested in a particular table.
|
||||||
* @param row Name of the row at the user table. The scan will start from
|
* @param row Name of the row at the user table. The scan will start from
|
||||||
* the region row where the row resides.
|
* the region row where the row resides.
|
||||||
* @param rowLimit Max of processed rows. If it is less than 0, it
|
* @param rowLimit Max of processed rows. If it is less than 0, it
|
||||||
|
@ -87,8 +89,32 @@ public class MetaScanner {
|
||||||
* @throws IOException e
|
* @throws IOException e
|
||||||
*/
|
*/
|
||||||
public static void metaScan(Configuration configuration,
|
public static void metaScan(Configuration configuration,
|
||||||
MetaScannerVisitor visitor, byte[] tableName, byte[] row,
|
MetaScannerVisitor visitor, byte [] userTableName, byte[] row,
|
||||||
int rowLimit)
|
int rowLimit)
|
||||||
|
throws IOException {
|
||||||
|
metaScan(configuration, visitor, userTableName, row, rowLimit,
|
||||||
|
HConstants.META_TABLE_NAME);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scans the meta table and calls a visitor on each RowResult. Uses a table
|
||||||
|
* name and a row name to locate meta regions. And it only scans at most
|
||||||
|
* <code>rowLimit</code> of rows.
|
||||||
|
*
|
||||||
|
* @param configuration HBase configuration.
|
||||||
|
* @param visitor Visitor object.
|
||||||
|
* @param userTableName User table name in meta table to start scan at. Pass
|
||||||
|
* null if not interested in a particular table.
|
||||||
|
* @param row Name of the row at the user table. The scan will start from
|
||||||
|
* the region row where the row resides.
|
||||||
|
* @param rowLimit Max of processed rows. If it is less than 0, it
|
||||||
|
* will be set to default value <code>Integer.MAX_VALUE</code>.
|
||||||
|
* @param metaTableName Meta table to scan, root or meta.
|
||||||
|
* @throws IOException e
|
||||||
|
*/
|
||||||
|
public static void metaScan(Configuration configuration,
|
||||||
|
MetaScannerVisitor visitor, byte [] tableName, byte[] row,
|
||||||
|
int rowLimit, final byte [] metaTableName)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
int rowUpperLimit = rowLimit > 0 ? rowLimit: Integer.MAX_VALUE;
|
int rowUpperLimit = rowLimit > 0 ? rowLimit: Integer.MAX_VALUE;
|
||||||
|
|
||||||
|
@ -136,8 +162,6 @@ public class MetaScanner {
|
||||||
configuration.getInt("hbase.meta.scanner.caching", 100));
|
configuration.getInt("hbase.meta.scanner.caching", 100));
|
||||||
do {
|
do {
|
||||||
final Scan scan = new Scan(startRow).addFamily(HConstants.CATALOG_FAMILY);
|
final Scan scan = new Scan(startRow).addFamily(HConstants.CATALOG_FAMILY);
|
||||||
byte [] metaTableName = Bytes.equals(tableName, HConstants.ROOT_TABLE_NAME)?
|
|
||||||
HConstants.ROOT_TABLE_NAME: HConstants.META_TABLE_NAME;
|
|
||||||
LOG.debug("Scanning " + Bytes.toString(metaTableName) +
|
LOG.debug("Scanning " + Bytes.toString(metaTableName) +
|
||||||
" starting at row=" + Bytes.toString(startRow) + " for max=" +
|
" starting at row=" + Bytes.toString(startRow) + " for max=" +
|
||||||
rowUpperLimit + " rows");
|
rowUpperLimit + " rows");
|
||||||
|
|
|
@ -80,5 +80,5 @@ public interface HBaseRPCProtocolVersion extends VersionedProtocol {
|
||||||
* <li>Version 26: New master and Increment, 0.90 version bump.</li>
|
* <li>Version 26: New master and Increment, 0.90 version bump.</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
*/
|
*/
|
||||||
public static final long versionID = 25L; // Setting it to 25 temporarily to see if hudson passes. #1608 hudson failed because of version mismatch 25 vs 26.
|
public static final long versionID = 26L;
|
||||||
}
|
}
|
||||||
|
|
|
@ -275,7 +275,7 @@ public interface HRegionInterface extends HBaseRPCProtocolVersion, Stoppable, Ab
|
||||||
* @return All regions online on this region server
|
* @return All regions online on this region server
|
||||||
* @throws IOException e
|
* @throws IOException e
|
||||||
*/
|
*/
|
||||||
public NavigableSet<HRegionInfo> getOnlineRegions();
|
public List<HRegionInfo> getOnlineRegions();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Method used when a master is taking the place of another failed one.
|
* Method used when a master is taking the place of another failed one.
|
||||||
|
@ -334,6 +334,17 @@ public interface HRegionInterface extends HBaseRPCProtocolVersion, Stoppable, Ab
|
||||||
public boolean closeRegion(final HRegionInfo region)
|
public boolean closeRegion(final HRegionInfo region)
|
||||||
throws IOException;
|
throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Closes the specified region and will use or not use ZK during the close
|
||||||
|
* according to the specified flag.
|
||||||
|
* @param region region to close
|
||||||
|
* @param zk true if transitions should be done in ZK, false if not
|
||||||
|
* @return true if closing region, false if not
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public boolean closeRegion(final HRegionInfo region, final boolean zk)
|
||||||
|
throws IOException;
|
||||||
|
|
||||||
// Region administrative methods
|
// Region administrative methods
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -1199,6 +1199,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
for (Result result : results) {
|
for (Result result : results) {
|
||||||
Pair<HRegionInfo,HServerInfo> region =
|
Pair<HRegionInfo,HServerInfo> region =
|
||||||
MetaReader.metaRowToRegionPairWithInfo(result);
|
MetaReader.metaRowToRegionPairWithInfo(result);
|
||||||
|
if (region == null) continue;
|
||||||
HServerInfo regionLocation = region.getSecond();
|
HServerInfo regionLocation = region.getSecond();
|
||||||
HRegionInfo regionInfo = region.getFirst();
|
HRegionInfo regionInfo = region.getFirst();
|
||||||
if (regionLocation == null) {
|
if (regionLocation == null) {
|
||||||
|
@ -1325,6 +1326,34 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clears the specified region from being in transition.
|
||||||
|
* <p>
|
||||||
|
* Used only by HBCK tool.
|
||||||
|
* @param hri
|
||||||
|
*/
|
||||||
|
public void clearRegionFromTransition(HRegionInfo hri) {
|
||||||
|
synchronized (this.regionsInTransition) {
|
||||||
|
this.regionsInTransition.remove(hri.getEncodedName());
|
||||||
|
}
|
||||||
|
synchronized (this.regions) {
|
||||||
|
this.regions.remove(hri);
|
||||||
|
}
|
||||||
|
synchronized (this.regionPlans) {
|
||||||
|
this.regionPlans.remove(hri.getEncodedName());
|
||||||
|
}
|
||||||
|
synchronized (this.servers) {
|
||||||
|
for (List<HRegionInfo> regions : this.servers.values()) {
|
||||||
|
for (int i=0;i<regions.size();i++) {
|
||||||
|
if (regions.get(i).equals(hri)) {
|
||||||
|
regions.remove(i);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks if the specified table has been disabled by the user.
|
* Checks if the specified table has been disabled by the user.
|
||||||
* @param tableName
|
* @param tableName
|
||||||
|
|
|
@ -862,6 +862,11 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void clearFromTransition(HRegionInfo hri) {
|
||||||
|
if (this.assignmentManager.isRegionInTransition(hri) != null) {
|
||||||
|
this.assignmentManager.clearRegionFromTransition(hri);
|
||||||
|
}
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* @return cluster status
|
* @return cluster status
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -345,9 +345,9 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Integer apply(Writable from) {
|
public Integer apply(Writable from) {
|
||||||
if (from instanceof HBaseRPC.Invocation) {
|
if (!(from instanceof HBaseRPC.Invocation)) return NORMAL_QOS;
|
||||||
HBaseRPC.Invocation inv = (HBaseRPC.Invocation) from;
|
|
||||||
|
|
||||||
|
HBaseRPC.Invocation inv = (HBaseRPC.Invocation) from;
|
||||||
String methodName = inv.getMethodName();
|
String methodName = inv.getMethodName();
|
||||||
|
|
||||||
// scanner methods...
|
// scanner methods...
|
||||||
|
@ -357,7 +357,7 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
|
||||||
try {
|
try {
|
||||||
scannerId = (Long) inv.getParameters()[0];
|
scannerId = (Long) inv.getParameters()[0];
|
||||||
} catch (ClassCastException ignored) {
|
} catch (ClassCastException ignored) {
|
||||||
//LOG.debug("Low priority: " + from);
|
// LOG.debug("Low priority: " + from);
|
||||||
return NORMAL_QOS; // doh.
|
return NORMAL_QOS; // doh.
|
||||||
}
|
}
|
||||||
String scannerIdString = Long.toString(scannerId);
|
String scannerIdString = Long.toString(scannerId);
|
||||||
|
@ -366,43 +366,46 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
|
||||||
HRegion.RegionScanner rs = (HRegion.RegionScanner) scanner;
|
HRegion.RegionScanner rs = (HRegion.RegionScanner) scanner;
|
||||||
HRegionInfo regionName = rs.getRegionName();
|
HRegionInfo regionName = rs.getRegionName();
|
||||||
if (regionName.isMetaRegion()) {
|
if (regionName.isMetaRegion()) {
|
||||||
//LOG.debug("High priority scanner request: " + scannerId);
|
// LOG.debug("High priority scanner request: " + scannerId);
|
||||||
return HIGH_QOS;
|
return HIGH_QOS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
} else if (methodName.equals("getHServerInfo")
|
||||||
else if (methodName.equals("getHServerInfo") ||
|
|| methodName.equals("getRegionsAssignment")
|
||||||
methodName.equals("getRegionsAssignment") ||
|
|| methodName.equals("unlockRow")
|
||||||
methodName.equals("unlockRow") ||
|
|| methodName.equals("getProtocolVersion")
|
||||||
methodName.equals("getProtocolVersion") ||
|
|| methodName.equals("getClosestRowBefore")) {
|
||||||
methodName.equals("getClosestRowBefore")) {
|
// LOG.debug("High priority method: " + methodName);
|
||||||
//LOG.debug("High priority method: " + methodName);
|
|
||||||
return HIGH_QOS;
|
return HIGH_QOS;
|
||||||
}
|
} else if (inv.getParameterClasses().length == 0) {
|
||||||
else if (inv.getParameterClasses()[0] == byte[].class) {
|
// Just let it through. This is getOnlineRegions, etc.
|
||||||
|
} else if (inv.getParameterClasses()[0] == byte[].class) {
|
||||||
// first arg is byte array, so assume this is a regionname:
|
// first arg is byte array, so assume this is a regionname:
|
||||||
if (isMetaRegion((byte[]) inv.getParameters()[0])) {
|
if (isMetaRegion((byte[]) inv.getParameters()[0])) {
|
||||||
//LOG.debug("High priority with method: " + methodName + " and region: "
|
// LOG.debug("High priority with method: " + methodName +
|
||||||
|
// " and region: "
|
||||||
// + Bytes.toString((byte[]) inv.getParameters()[0]));
|
// + Bytes.toString((byte[]) inv.getParameters()[0]));
|
||||||
return HIGH_QOS;
|
return HIGH_QOS;
|
||||||
}
|
}
|
||||||
}
|
} else if (inv.getParameterClasses()[0] == MultiAction.class) {
|
||||||
else if (inv.getParameterClasses()[0] == MultiAction.class) {
|
|
||||||
MultiAction ma = (MultiAction) inv.getParameters()[0];
|
MultiAction ma = (MultiAction) inv.getParameters()[0];
|
||||||
Set<byte[]> regions = ma.getRegions();
|
Set<byte[]> regions = ma.getRegions();
|
||||||
// ok this sucks, but if any single of the actions touches a meta, the whole
|
// ok this sucks, but if any single of the actions touches a meta, the
|
||||||
// thing gets pingged high priority. This is a dangerous hack because people
|
// whole
|
||||||
// can get their multi action tagged high QOS by tossing a Get(.META.) AND this
|
// thing gets pingged high priority. This is a dangerous hack because
|
||||||
|
// people
|
||||||
|
// can get their multi action tagged high QOS by tossing a Get(.META.)
|
||||||
|
// AND this
|
||||||
// regionserver hosts META/-ROOT-
|
// regionserver hosts META/-ROOT-
|
||||||
for (byte[] region: regions) {
|
for (byte[] region : regions) {
|
||||||
if (isMetaRegion(region)) {
|
if (isMetaRegion(region)) {
|
||||||
//LOG.debug("High priority multi with region: " + Bytes.toString(region));
|
// LOG.debug("High priority multi with region: " +
|
||||||
|
// Bytes.toString(region));
|
||||||
return HIGH_QOS; // short circuit for the win.
|
return HIGH_QOS; // short circuit for the win.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
// LOG.debug("Low priority: " + from.toString());
|
||||||
//LOG.debug("Low priority: " + from.toString());
|
|
||||||
return NORMAL_QOS;
|
return NORMAL_QOS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1973,17 +1976,21 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean closeRegion(HRegionInfo region)
|
public boolean closeRegion(HRegionInfo region)
|
||||||
|
throws NotServingRegionException {
|
||||||
|
return closeRegion(region, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean closeRegion(HRegionInfo region, final boolean zk)
|
||||||
throws NotServingRegionException {
|
throws NotServingRegionException {
|
||||||
LOG.info("Received close region: " + region.getRegionNameAsString());
|
LOG.info("Received close region: " + region.getRegionNameAsString());
|
||||||
// TODO: Need to check if this is being served here but currently undergoing
|
|
||||||
// a split (so master needs to retry close after split is complete)
|
|
||||||
if (!onlineRegions.containsKey(region.getEncodedName())) {
|
if (!onlineRegions.containsKey(region.getEncodedName())) {
|
||||||
LOG.warn("Received close for region we are not serving; " +
|
LOG.warn("Received close for region we are not serving; " +
|
||||||
region.getEncodedName());
|
region.getEncodedName());
|
||||||
throw new NotServingRegionException("Received close for "
|
throw new NotServingRegionException("Received close for "
|
||||||
+ region.getRegionNameAsString() + " but we are not serving it");
|
+ region.getRegionNameAsString() + " but we are not serving it");
|
||||||
}
|
}
|
||||||
return closeRegion(region, false, true);
|
return closeRegion(region, false, zk);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -2066,14 +2073,14 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public NavigableSet<HRegionInfo> getOnlineRegions() {
|
public List<HRegionInfo> getOnlineRegions() {
|
||||||
NavigableSet<HRegionInfo> sortedset = new TreeSet<HRegionInfo>();
|
List<HRegionInfo> list = new ArrayList<HRegionInfo>();
|
||||||
synchronized(this.onlineRegions) {
|
synchronized(this.onlineRegions) {
|
||||||
for (Map.Entry<String,HRegion> e: this.onlineRegions.entrySet()) {
|
for (Map.Entry<String,HRegion> e: this.onlineRegions.entrySet()) {
|
||||||
sortedset.add(e.getValue().getRegionInfo());
|
list.add(e.getValue().getRegionInfo());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return sortedset;
|
return list;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getNumberOfOnlineRegions() {
|
public int getNumberOfOnlineRegions() {
|
||||||
|
|
|
@ -85,6 +85,10 @@ public class MiniHBaseCluster {
|
||||||
init(numMasters, numRegionServers);
|
init(numMasters, numRegionServers);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Configuration getConfiguration() {
|
||||||
|
return this.conf;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Override Master so can add inject behaviors testing.
|
* Override Master so can add inject behaviors testing.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
package org.apache.hadoop.hbase.client;
|
package org.apache.hadoop.hbase.client;
|
||||||
|
|
||||||
|
|
||||||
|
import static org.junit.Assert.*;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertFalse;
|
import static org.junit.Assert.assertFalse;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
@ -45,8 +46,10 @@ import org.apache.hadoop.hbase.TableNotFoundException;
|
||||||
import org.apache.hadoop.hbase.executor.EventHandler;
|
import org.apache.hadoop.hbase.executor.EventHandler;
|
||||||
import org.apache.hadoop.hbase.executor.EventHandler.EventType;
|
import org.apache.hadoop.hbase.executor.EventHandler.EventType;
|
||||||
import org.apache.hadoop.hbase.executor.ExecutorService;
|
import org.apache.hadoop.hbase.executor.ExecutorService;
|
||||||
|
import org.apache.hadoop.hbase.ipc.HRegionInterface;
|
||||||
import org.apache.hadoop.hbase.master.MasterServices;
|
import org.apache.hadoop.hbase.master.MasterServices;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.apache.hadoop.hbase.util.JVMClusterUtil;
|
||||||
import org.junit.AfterClass;
|
import org.junit.AfterClass;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
|
@ -81,6 +84,15 @@ public class TestAdmin {
|
||||||
this.admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
|
this.admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHBaseFsck() throws IOException {
|
||||||
|
HBaseFsck fsck =
|
||||||
|
new HBaseFsck(TEST_UTIL.getMiniHBaseCluster().getConfiguration());
|
||||||
|
fsck.displayFullReport();
|
||||||
|
int result = fsck.doWork();
|
||||||
|
assertEquals(0, result);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCreateTable() throws IOException {
|
public void testCreateTable() throws IOException {
|
||||||
HTableDescriptor [] tables = admin.listTables();
|
HTableDescriptor [] tables = admin.listTables();
|
||||||
|
|
Loading…
Reference in New Issue