HBASE-8627 HBCK can not fix meta not assigned issue
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1511081 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
23cd5cf78b
commit
2581ebb0b2
|
@ -390,17 +390,30 @@ public class HBaseFsck extends Configured implements Tool {
|
|||
InterruptedException {
|
||||
clearState();
|
||||
|
||||
// get regions according to what is online on each RegionServer
|
||||
loadDeployedRegions();
|
||||
// check whether .META. is deployed and online
|
||||
if (!recordMetaRegion()) {
|
||||
// Will remove later if we can fix it
|
||||
errors.reportError("Fatal error: unable to get .META. region location. Exiting...");
|
||||
return -2;
|
||||
}
|
||||
// Check if .META. is found only once and in the right place
|
||||
if (!checkMetaRegion()) {
|
||||
String errorMsg = ".META. table is not consistent. ";
|
||||
if (shouldFixAssignments()) {
|
||||
errorMsg += "HBCK will try fixing it. Rerun once .META. is back to consistent state.";
|
||||
} else {
|
||||
errorMsg += "Run HBCK with proper fix options to fix .META. inconsistency.";
|
||||
}
|
||||
errors.reportError(errorMsg + " Exiting...");
|
||||
return -2;
|
||||
}
|
||||
// Not going with further consistency check for tables when META itself is not consistent.
|
||||
LOG.info("Loading regionsinfo from the .META. table");
|
||||
boolean success = loadMetaEntries();
|
||||
if (!success) return -1;
|
||||
|
||||
// Check if .META. is found only once and in the right place
|
||||
if (!checkMetaRegion()) {
|
||||
// Will remove later if we can fix it
|
||||
errors.reportError("Encountered fatal error. Exiting...");
|
||||
return -2;
|
||||
}
|
||||
|
||||
// Empty cells in .META.?
|
||||
reportEmptyMetaCells();
|
||||
|
||||
|
@ -414,9 +427,6 @@ public class HBaseFsck extends Configured implements Tool {
|
|||
reportTablesInFlux();
|
||||
}
|
||||
|
||||
// get regions according to what is online on each RegionServer
|
||||
loadDeployedRegions();
|
||||
|
||||
// load regiondirs and regioninfos from HDFS
|
||||
if (shouldCheckHdfs()) {
|
||||
loadHdfsRegionDirs();
|
||||
|
@ -1334,10 +1344,13 @@ public class HBaseFsck extends Configured implements Tool {
|
|||
} catch (KeeperException e) {
|
||||
throw new IOException(e);
|
||||
}
|
||||
MetaEntry m =
|
||||
new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis());
|
||||
HbckInfo hbInfo = new HbckInfo(m);
|
||||
regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), hbInfo);
|
||||
MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis());
|
||||
HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
|
||||
if (hbckInfo == null) {
|
||||
regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
|
||||
} else {
|
||||
hbckInfo.metaEntry = m;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -2492,45 +2505,36 @@ public class HBaseFsck extends Configured implements Tool {
|
|||
* @throws KeeperException
|
||||
* @throws InterruptedException
|
||||
*/
|
||||
boolean checkMetaRegion()
|
||||
throws IOException, KeeperException, InterruptedException {
|
||||
List <HbckInfo> metaRegions = Lists.newArrayList();
|
||||
boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
|
||||
List<HbckInfo> metaRegions = Lists.newArrayList();
|
||||
for (HbckInfo value : regionInfoMap.values()) {
|
||||
if (value.metaEntry.isMetaRegion()) {
|
||||
if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
|
||||
metaRegions.add(value);
|
||||
}
|
||||
}
|
||||
|
||||
// If something is wrong
|
||||
if (metaRegions.size() != 1) {
|
||||
HRegionLocation rootLocation = connection.locateRegion(
|
||||
HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
|
||||
HbckInfo root =
|
||||
regionInfoMap.get(rootLocation.getRegionInfo().getEncodedName());
|
||||
|
||||
// If there is no region holding .META.
|
||||
if (metaRegions.size() == 0) {
|
||||
// There will be always one entry in regionInfoMap corresponding to .META.
|
||||
// Check the deployed servers. It should be exactly one server.
|
||||
HbckInfo metaHbckInfo = metaRegions.get(0);
|
||||
List<ServerName> servers = metaHbckInfo.deployedOn;
|
||||
if (servers.size() != 1) {
|
||||
if (servers.size() == 0) {
|
||||
errors.reportError(ERROR_CODE.NO_META_REGION, ".META. is not found on any region.");
|
||||
if (shouldFixAssignments()) {
|
||||
errors.print("Trying to fix a problem with .META...");
|
||||
setShouldRerun();
|
||||
// try to fix it (treat it as unassigned region)
|
||||
HBaseFsckRepair.fixUnassigned(admin, root.metaEntry);
|
||||
HBaseFsckRepair.waitUntilAssigned(admin, root.getHdfsHRI());
|
||||
HBaseFsckRepair.fixUnassigned(admin, metaHbckInfo.metaEntry);
|
||||
HBaseFsckRepair.waitUntilAssigned(admin, metaHbckInfo.metaEntry);
|
||||
}
|
||||
}
|
||||
// If there are more than one regions pretending to hold the .META.
|
||||
else if (metaRegions.size() > 1) {
|
||||
errors.reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region.");
|
||||
} else if (servers.size() > 1) {
|
||||
errors
|
||||
.reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region.");
|
||||
if (shouldFixAssignments()) {
|
||||
errors.print("Trying to fix a problem with .META...");
|
||||
setShouldRerun();
|
||||
// try fix it (treat is a dupe assignment)
|
||||
List <ServerName> deployedOn = Lists.newArrayList();
|
||||
for (HbckInfo mRegion : metaRegions) {
|
||||
deployedOn.add(mRegion.metaEntry.regionServer);
|
||||
}
|
||||
HBaseFsckRepair.fixMultiAssignment(admin, root.metaEntry, deployedOn);
|
||||
HBaseFsckRepair.fixMultiAssignment(admin, metaHbckInfo.metaEntry, servers);
|
||||
}
|
||||
}
|
||||
// rerun hbck with hopefully fixed META
|
||||
|
@ -2545,15 +2549,6 @@ public class HBaseFsck extends Configured implements Tool {
|
|||
* @throws IOException if an error is encountered
|
||||
*/
|
||||
boolean loadMetaEntries() throws IOException {
|
||||
|
||||
// get a list of all regions from the master. This involves
|
||||
// scanning the META table
|
||||
if (!recordMetaRegion()) {
|
||||
// Will remove later if we can fix it
|
||||
errors.reportError("Fatal error: unable to get root region location. Exiting...");
|
||||
return false;
|
||||
}
|
||||
|
||||
MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
|
||||
int countRecord = 1;
|
||||
|
||||
|
@ -2587,9 +2582,12 @@ public class HBaseFsck extends Configured implements Tool {
|
|||
}
|
||||
PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
|
||||
MetaEntry m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
|
||||
HbckInfo hbInfo = new HbckInfo(m);
|
||||
HbckInfo previous = regionInfoMap.put(hri.getEncodedName(), hbInfo);
|
||||
if (previous != null) {
|
||||
HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
|
||||
if (previous == null) {
|
||||
regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
|
||||
} else if (previous.metaEntry == null) {
|
||||
previous.metaEntry = m;
|
||||
} else {
|
||||
throw new IOException("Two entries in META are same " + previous);
|
||||
}
|
||||
|
||||
|
|
|
@ -60,15 +60,16 @@ import org.apache.hadoop.hbase.MiniHBaseCluster;
|
|||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.catalog.MetaEditor;
|
||||
import org.apache.hadoop.hbase.client.Delete;
|
||||
import org.apache.hadoop.hbase.client.Durability;
|
||||
import org.apache.hadoop.hbase.client.Get;
|
||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||
import org.apache.hadoop.hbase.client.HConnection;
|
||||
import org.apache.hadoop.hbase.client.HConnectionManager;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.ResultScanner;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.client.Durability;
|
||||
import org.apache.hadoop.hbase.io.hfile.TestHFile;
|
||||
import org.apache.hadoop.hbase.master.AssignmentManager;
|
||||
import org.apache.hadoop.hbase.master.HMaster;
|
||||
|
@ -231,7 +232,9 @@ public class TestHBaseFsck {
|
|||
HRegionInfo hri) throws IOException, InterruptedException {
|
||||
try {
|
||||
HBaseFsckRepair.closeRegionSilentlyAndWait(admin, sn, hri);
|
||||
if (!hri.isMetaTable()) {
|
||||
admin.offline(hri.getRegionName());
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
LOG.warn("Got exception when attempting to offline region "
|
||||
+ Bytes.toString(hri.getRegionName()), ioe);
|
||||
|
@ -2000,6 +2003,57 @@ public class TestHBaseFsck {
|
|||
writeLock.release(); // release for clean state
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMetaOffline() throws Exception {
|
||||
// check no errors
|
||||
HBaseFsck hbck = doFsck(conf, false);
|
||||
assertNoErrors(hbck);
|
||||
deleteMetaRegion(conf, true, false, false);
|
||||
hbck = doFsck(conf, false);
|
||||
// ERROR_CODE.UNKNOWN is coming because we reportError with a message for the .META.
|
||||
// inconsistency and whether we will be fixing it or not.
|
||||
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
|
||||
hbck = doFsck(conf, true);
|
||||
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
|
||||
hbck = doFsck(conf, false);
|
||||
assertNoErrors(hbck);
|
||||
}
|
||||
|
||||
private void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs,
|
||||
boolean regionInfoOnly) throws IOException, InterruptedException {
|
||||
HConnection connection = HConnectionManager.getConnection(conf);
|
||||
HRegionLocation metaLocation = connection.locateRegion(HConstants.META_TABLE_NAME,
|
||||
HConstants.EMPTY_START_ROW);
|
||||
ServerName hsa = new ServerName(metaLocation.getHostnamePort(), 0L);
|
||||
HRegionInfo hri = metaLocation.getRegionInfo();
|
||||
if (unassign) {
|
||||
LOG.info("Undeploying meta region " + hri + " from server " + hsa);
|
||||
undeployRegion(new HBaseAdmin(conf), hsa, hri);
|
||||
}
|
||||
|
||||
if (regionInfoOnly) {
|
||||
LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
|
||||
Path rootDir = FSUtils.getRootDir(conf);
|
||||
FileSystem fs = rootDir.getFileSystem(conf);
|
||||
Path p = new Path(rootDir + "/" + HTableDescriptor.META_TABLEDESC.getNameAsString(),
|
||||
hri.getEncodedName());
|
||||
Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
|
||||
fs.delete(hriPath, true);
|
||||
}
|
||||
|
||||
if (hdfs) {
|
||||
LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
|
||||
Path rootDir = FSUtils.getRootDir(conf);
|
||||
FileSystem fs = rootDir.getFileSystem(conf);
|
||||
Path p = new Path(rootDir + "/" + HTableDescriptor.META_TABLEDESC.getNameAsString(),
|
||||
hri.getEncodedName());
|
||||
HBaseFsck.debugLsr(conf, p);
|
||||
boolean success = fs.delete(p, true);
|
||||
LOG.info("Deleted " + p + " sucessfully? " + success);
|
||||
HBaseFsck.debugLsr(conf, p);
|
||||
}
|
||||
}
|
||||
|
||||
@org.junit.Rule
|
||||
public TestName name = new TestName();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue