HBASE-8627 HBCK can not fix meta not assigned issue

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1511081 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
anoopsamjohn 2013-08-06 19:18:51 +00:00
parent 23cd5cf78b
commit 2581ebb0b2
2 changed files with 104 additions and 52 deletions

View File

@ -390,17 +390,30 @@ public class HBaseFsck extends Configured implements Tool {
InterruptedException { InterruptedException {
clearState(); clearState();
// get regions according to what is online on each RegionServer
loadDeployedRegions();
// check whether .META. is deployed and online
if (!recordMetaRegion()) {
// Will remove later if we can fix it
errors.reportError("Fatal error: unable to get .META. region location. Exiting...");
return -2;
}
// Check if .META. is found only once and in the right place
if (!checkMetaRegion()) {
String errorMsg = ".META. table is not consistent. ";
if (shouldFixAssignments()) {
errorMsg += "HBCK will try fixing it. Rerun once .META. is back to consistent state.";
} else {
errorMsg += "Run HBCK with proper fix options to fix .META. inconsistency.";
}
errors.reportError(errorMsg + " Exiting...");
return -2;
}
// Not going with further consistency check for tables when META itself is not consistent.
LOG.info("Loading regionsinfo from the .META. table"); LOG.info("Loading regionsinfo from the .META. table");
boolean success = loadMetaEntries(); boolean success = loadMetaEntries();
if (!success) return -1; if (!success) return -1;
// Check if .META. is found only once and in the right place
if (!checkMetaRegion()) {
// Will remove later if we can fix it
errors.reportError("Encountered fatal error. Exiting...");
return -2;
}
// Empty cells in .META.? // Empty cells in .META.?
reportEmptyMetaCells(); reportEmptyMetaCells();
@ -414,9 +427,6 @@ public class HBaseFsck extends Configured implements Tool {
reportTablesInFlux(); reportTablesInFlux();
} }
// get regions according to what is online on each RegionServer
loadDeployedRegions();
// load regiondirs and regioninfos from HDFS // load regiondirs and regioninfos from HDFS
if (shouldCheckHdfs()) { if (shouldCheckHdfs()) {
loadHdfsRegionDirs(); loadHdfsRegionDirs();
@ -1334,10 +1344,13 @@ public class HBaseFsck extends Configured implements Tool {
} catch (KeeperException e) { } catch (KeeperException e) {
throw new IOException(e); throw new IOException(e);
} }
MetaEntry m = MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis());
new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis()); HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
HbckInfo hbInfo = new HbckInfo(m); if (hbckInfo == null) {
regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), hbInfo); regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
} else {
hbckInfo.metaEntry = m;
}
return true; return true;
} }
@ -2492,45 +2505,36 @@ public class HBaseFsck extends Configured implements Tool {
* @throws KeeperException * @throws KeeperException
* @throws InterruptedException * @throws InterruptedException
*/ */
boolean checkMetaRegion() boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
throws IOException, KeeperException, InterruptedException { List<HbckInfo> metaRegions = Lists.newArrayList();
List <HbckInfo> metaRegions = Lists.newArrayList();
for (HbckInfo value : regionInfoMap.values()) { for (HbckInfo value : regionInfoMap.values()) {
if (value.metaEntry.isMetaRegion()) { if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
metaRegions.add(value); metaRegions.add(value);
} }
} }
// If something is wrong // There will be always one entry in regionInfoMap corresponding to .META.
if (metaRegions.size() != 1) { // Check the deployed servers. It should be exactly one server.
HRegionLocation rootLocation = connection.locateRegion( HbckInfo metaHbckInfo = metaRegions.get(0);
HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW); List<ServerName> servers = metaHbckInfo.deployedOn;
HbckInfo root = if (servers.size() != 1) {
regionInfoMap.get(rootLocation.getRegionInfo().getEncodedName()); if (servers.size() == 0) {
// If there is no region holding .META.
if (metaRegions.size() == 0) {
errors.reportError(ERROR_CODE.NO_META_REGION, ".META. is not found on any region."); errors.reportError(ERROR_CODE.NO_META_REGION, ".META. is not found on any region.");
if (shouldFixAssignments()) { if (shouldFixAssignments()) {
errors.print("Trying to fix a problem with .META..."); errors.print("Trying to fix a problem with .META...");
setShouldRerun(); setShouldRerun();
// try to fix it (treat it as unassigned region) // try to fix it (treat it as unassigned region)
HBaseFsckRepair.fixUnassigned(admin, root.metaEntry); HBaseFsckRepair.fixUnassigned(admin, metaHbckInfo.metaEntry);
HBaseFsckRepair.waitUntilAssigned(admin, root.getHdfsHRI()); HBaseFsckRepair.waitUntilAssigned(admin, metaHbckInfo.metaEntry);
} }
} } else if (servers.size() > 1) {
// If there are more than one regions pretending to hold the .META. errors
else if (metaRegions.size() > 1) { .reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region.");
errors.reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region.");
if (shouldFixAssignments()) { if (shouldFixAssignments()) {
errors.print("Trying to fix a problem with .META..."); errors.print("Trying to fix a problem with .META...");
setShouldRerun(); setShouldRerun();
// try fix it (treat is a dupe assignment) // try fix it (treat is a dupe assignment)
List <ServerName> deployedOn = Lists.newArrayList(); HBaseFsckRepair.fixMultiAssignment(admin, metaHbckInfo.metaEntry, servers);
for (HbckInfo mRegion : metaRegions) {
deployedOn.add(mRegion.metaEntry.regionServer);
}
HBaseFsckRepair.fixMultiAssignment(admin, root.metaEntry, deployedOn);
} }
} }
// rerun hbck with hopefully fixed META // rerun hbck with hopefully fixed META
@ -2545,15 +2549,6 @@ public class HBaseFsck extends Configured implements Tool {
* @throws IOException if an error is encountered * @throws IOException if an error is encountered
*/ */
boolean loadMetaEntries() throws IOException { boolean loadMetaEntries() throws IOException {
// get a list of all regions from the master. This involves
// scanning the META table
if (!recordMetaRegion()) {
// Will remove later if we can fix it
errors.reportError("Fatal error: unable to get root region location. Exiting...");
return false;
}
MetaScannerVisitor visitor = new MetaScannerVisitorBase() { MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
int countRecord = 1; int countRecord = 1;
@ -2587,9 +2582,12 @@ public class HBaseFsck extends Configured implements Tool {
} }
PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result); PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
MetaEntry m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond()); MetaEntry m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
HbckInfo hbInfo = new HbckInfo(m); HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
HbckInfo previous = regionInfoMap.put(hri.getEncodedName(), hbInfo); if (previous == null) {
if (previous != null) { regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
} else if (previous.metaEntry == null) {
previous.metaEntry = m;
} else {
throw new IOException("Two entries in META are same " + previous); throw new IOException("Two entries in META are same " + previous);
} }

View File

@ -60,15 +60,16 @@ import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.catalog.MetaEditor;
import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.io.hfile.TestHFile; import org.apache.hadoop.hbase.io.hfile.TestHFile;
import org.apache.hadoop.hbase.master.AssignmentManager; import org.apache.hadoop.hbase.master.AssignmentManager;
import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.HMaster;
@ -231,7 +232,9 @@ public class TestHBaseFsck {
HRegionInfo hri) throws IOException, InterruptedException { HRegionInfo hri) throws IOException, InterruptedException {
try { try {
HBaseFsckRepair.closeRegionSilentlyAndWait(admin, sn, hri); HBaseFsckRepair.closeRegionSilentlyAndWait(admin, sn, hri);
admin.offline(hri.getRegionName()); if (!hri.isMetaTable()) {
admin.offline(hri.getRegionName());
}
} catch (IOException ioe) { } catch (IOException ioe) {
LOG.warn("Got exception when attempting to offline region " LOG.warn("Got exception when attempting to offline region "
+ Bytes.toString(hri.getRegionName()), ioe); + Bytes.toString(hri.getRegionName()), ioe);
@ -2000,6 +2003,57 @@ public class TestHBaseFsck {
writeLock.release(); // release for clean state writeLock.release(); // release for clean state
} }
@Test
public void testMetaOffline() throws Exception {
// check no errors
HBaseFsck hbck = doFsck(conf, false);
assertNoErrors(hbck);
deleteMetaRegion(conf, true, false, false);
hbck = doFsck(conf, false);
// ERROR_CODE.UNKNOWN is coming because we reportError with a message for the .META.
// inconsistency and whether we will be fixing it or not.
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
hbck = doFsck(conf, true);
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
hbck = doFsck(conf, false);
assertNoErrors(hbck);
}
private void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs,
boolean regionInfoOnly) throws IOException, InterruptedException {
HConnection connection = HConnectionManager.getConnection(conf);
HRegionLocation metaLocation = connection.locateRegion(HConstants.META_TABLE_NAME,
HConstants.EMPTY_START_ROW);
ServerName hsa = new ServerName(metaLocation.getHostnamePort(), 0L);
HRegionInfo hri = metaLocation.getRegionInfo();
if (unassign) {
LOG.info("Undeploying meta region " + hri + " from server " + hsa);
undeployRegion(new HBaseAdmin(conf), hsa, hri);
}
if (regionInfoOnly) {
LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
Path rootDir = FSUtils.getRootDir(conf);
FileSystem fs = rootDir.getFileSystem(conf);
Path p = new Path(rootDir + "/" + HTableDescriptor.META_TABLEDESC.getNameAsString(),
hri.getEncodedName());
Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
fs.delete(hriPath, true);
}
if (hdfs) {
LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
Path rootDir = FSUtils.getRootDir(conf);
FileSystem fs = rootDir.getFileSystem(conf);
Path p = new Path(rootDir + "/" + HTableDescriptor.META_TABLEDESC.getNameAsString(),
hri.getEncodedName());
HBaseFsck.debugLsr(conf, p);
boolean success = fs.delete(p, true);
LOG.info("Deleted " + p + " sucessfully? " + success);
HBaseFsck.debugLsr(conf, p);
}
}
@org.junit.Rule @org.junit.Rule
public TestName name = new TestName(); public TestName name = new TestName();
} }