HBASE-8627 HBCK can not fix meta not assigned issue

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1511081 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
anoopsamjohn 2013-08-06 19:18:51 +00:00
parent 23cd5cf78b
commit 2581ebb0b2
2 changed files with 104 additions and 52 deletions

View File

@ -390,17 +390,30 @@ public class HBaseFsck extends Configured implements Tool {
InterruptedException {
clearState();
// get regions according to what is online on each RegionServer
loadDeployedRegions();
// check whether .META. is deployed and online
if (!recordMetaRegion()) {
// Will remove later if we can fix it
errors.reportError("Fatal error: unable to get .META. region location. Exiting...");
return -2;
}
// Check if .META. is found only once and in the right place
if (!checkMetaRegion()) {
String errorMsg = ".META. table is not consistent. ";
if (shouldFixAssignments()) {
errorMsg += "HBCK will try fixing it. Rerun once .META. is back to consistent state.";
} else {
errorMsg += "Run HBCK with proper fix options to fix .META. inconsistency.";
}
errors.reportError(errorMsg + " Exiting...");
return -2;
}
// Not going with further consistency check for tables when META itself is not consistent.
LOG.info("Loading regionsinfo from the .META. table");
boolean success = loadMetaEntries();
if (!success) return -1;
// Check if .META. is found only once and in the right place
if (!checkMetaRegion()) {
// Will remove later if we can fix it
errors.reportError("Encountered fatal error. Exiting...");
return -2;
}
// Empty cells in .META.?
reportEmptyMetaCells();
@ -414,9 +427,6 @@ public class HBaseFsck extends Configured implements Tool {
reportTablesInFlux();
}
// get regions according to what is online on each RegionServer
loadDeployedRegions();
// load regiondirs and regioninfos from HDFS
if (shouldCheckHdfs()) {
loadHdfsRegionDirs();
@ -1334,10 +1344,13 @@ public class HBaseFsck extends Configured implements Tool {
} catch (KeeperException e) {
throw new IOException(e);
}
MetaEntry m =
new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis());
HbckInfo hbInfo = new HbckInfo(m);
regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), hbInfo);
MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis());
HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
if (hbckInfo == null) {
regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
} else {
hbckInfo.metaEntry = m;
}
return true;
}
@ -2492,45 +2505,36 @@ public class HBaseFsck extends Configured implements Tool {
* @throws KeeperException
* @throws InterruptedException
*/
boolean checkMetaRegion()
throws IOException, KeeperException, InterruptedException {
List <HbckInfo> metaRegions = Lists.newArrayList();
boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
List<HbckInfo> metaRegions = Lists.newArrayList();
for (HbckInfo value : regionInfoMap.values()) {
if (value.metaEntry.isMetaRegion()) {
if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
metaRegions.add(value);
}
}
// If something is wrong
if (metaRegions.size() != 1) {
HRegionLocation rootLocation = connection.locateRegion(
HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
HbckInfo root =
regionInfoMap.get(rootLocation.getRegionInfo().getEncodedName());
// If there is no region holding .META.
if (metaRegions.size() == 0) {
// There will be always one entry in regionInfoMap corresponding to .META.
// Check the deployed servers. It should be exactly one server.
HbckInfo metaHbckInfo = metaRegions.get(0);
List<ServerName> servers = metaHbckInfo.deployedOn;
if (servers.size() != 1) {
if (servers.size() == 0) {
errors.reportError(ERROR_CODE.NO_META_REGION, ".META. is not found on any region.");
if (shouldFixAssignments()) {
errors.print("Trying to fix a problem with .META...");
setShouldRerun();
// try to fix it (treat it as unassigned region)
HBaseFsckRepair.fixUnassigned(admin, root.metaEntry);
HBaseFsckRepair.waitUntilAssigned(admin, root.getHdfsHRI());
HBaseFsckRepair.fixUnassigned(admin, metaHbckInfo.metaEntry);
HBaseFsckRepair.waitUntilAssigned(admin, metaHbckInfo.metaEntry);
}
}
// If there are more than one regions pretending to hold the .META.
else if (metaRegions.size() > 1) {
errors.reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region.");
} else if (servers.size() > 1) {
errors
.reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region.");
if (shouldFixAssignments()) {
errors.print("Trying to fix a problem with .META...");
setShouldRerun();
// try fix it (treat is a dupe assignment)
List <ServerName> deployedOn = Lists.newArrayList();
for (HbckInfo mRegion : metaRegions) {
deployedOn.add(mRegion.metaEntry.regionServer);
}
HBaseFsckRepair.fixMultiAssignment(admin, root.metaEntry, deployedOn);
HBaseFsckRepair.fixMultiAssignment(admin, metaHbckInfo.metaEntry, servers);
}
}
// rerun hbck with hopefully fixed META
@ -2545,15 +2549,6 @@ public class HBaseFsck extends Configured implements Tool {
* @throws IOException if an error is encountered
*/
boolean loadMetaEntries() throws IOException {
// get a list of all regions from the master. This involves
// scanning the META table
if (!recordMetaRegion()) {
// Will remove later if we can fix it
errors.reportError("Fatal error: unable to get root region location. Exiting...");
return false;
}
MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
int countRecord = 1;
@ -2587,9 +2582,12 @@ public class HBaseFsck extends Configured implements Tool {
}
PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
MetaEntry m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
HbckInfo hbInfo = new HbckInfo(m);
HbckInfo previous = regionInfoMap.put(hri.getEncodedName(), hbInfo);
if (previous != null) {
HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
if (previous == null) {
regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
} else if (previous.metaEntry == null) {
previous.metaEntry = m;
} else {
throw new IOException("Two entries in META are same " + previous);
}

View File

@ -60,15 +60,16 @@ import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.catalog.MetaEditor;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.io.hfile.TestHFile;
import org.apache.hadoop.hbase.master.AssignmentManager;
import org.apache.hadoop.hbase.master.HMaster;
@ -231,7 +232,9 @@ public class TestHBaseFsck {
HRegionInfo hri) throws IOException, InterruptedException {
try {
HBaseFsckRepair.closeRegionSilentlyAndWait(admin, sn, hri);
if (!hri.isMetaTable()) {
admin.offline(hri.getRegionName());
}
} catch (IOException ioe) {
LOG.warn("Got exception when attempting to offline region "
+ Bytes.toString(hri.getRegionName()), ioe);
@ -2000,6 +2003,57 @@ public class TestHBaseFsck {
writeLock.release(); // release for clean state
}
@Test
public void testMetaOffline() throws Exception {
// check no errors
HBaseFsck hbck = doFsck(conf, false);
assertNoErrors(hbck);
deleteMetaRegion(conf, true, false, false);
hbck = doFsck(conf, false);
// ERROR_CODE.UNKNOWN is coming because we reportError with a message for the .META.
// inconsistency and whether we will be fixing it or not.
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
hbck = doFsck(conf, true);
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
hbck = doFsck(conf, false);
assertNoErrors(hbck);
}
private void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs,
boolean regionInfoOnly) throws IOException, InterruptedException {
HConnection connection = HConnectionManager.getConnection(conf);
HRegionLocation metaLocation = connection.locateRegion(HConstants.META_TABLE_NAME,
HConstants.EMPTY_START_ROW);
ServerName hsa = new ServerName(metaLocation.getHostnamePort(), 0L);
HRegionInfo hri = metaLocation.getRegionInfo();
if (unassign) {
LOG.info("Undeploying meta region " + hri + " from server " + hsa);
undeployRegion(new HBaseAdmin(conf), hsa, hri);
}
if (regionInfoOnly) {
LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
Path rootDir = FSUtils.getRootDir(conf);
FileSystem fs = rootDir.getFileSystem(conf);
Path p = new Path(rootDir + "/" + HTableDescriptor.META_TABLEDESC.getNameAsString(),
hri.getEncodedName());
Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
fs.delete(hriPath, true);
}
if (hdfs) {
LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
Path rootDir = FSUtils.getRootDir(conf);
FileSystem fs = rootDir.getFileSystem(conf);
Path p = new Path(rootDir + "/" + HTableDescriptor.META_TABLEDESC.getNameAsString(),
hri.getEncodedName());
HBaseFsck.debugLsr(conf, p);
boolean success = fs.delete(p, true);
LOG.info("Deleted " + p + " sucessfully? " + success);
HBaseFsck.debugLsr(conf, p);
}
}
@org.junit.Rule
public TestName name = new TestName();
}