HBASE-8627 HBCK can not fix meta not assigned issue
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1511081 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
23cd5cf78b
commit
2581ebb0b2
|
@ -390,17 +390,30 @@ public class HBaseFsck extends Configured implements Tool {
|
||||||
InterruptedException {
|
InterruptedException {
|
||||||
clearState();
|
clearState();
|
||||||
|
|
||||||
|
// get regions according to what is online on each RegionServer
|
||||||
|
loadDeployedRegions();
|
||||||
|
// check whether .META. is deployed and online
|
||||||
|
if (!recordMetaRegion()) {
|
||||||
|
// Will remove later if we can fix it
|
||||||
|
errors.reportError("Fatal error: unable to get .META. region location. Exiting...");
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
// Check if .META. is found only once and in the right place
|
||||||
|
if (!checkMetaRegion()) {
|
||||||
|
String errorMsg = ".META. table is not consistent. ";
|
||||||
|
if (shouldFixAssignments()) {
|
||||||
|
errorMsg += "HBCK will try fixing it. Rerun once .META. is back to consistent state.";
|
||||||
|
} else {
|
||||||
|
errorMsg += "Run HBCK with proper fix options to fix .META. inconsistency.";
|
||||||
|
}
|
||||||
|
errors.reportError(errorMsg + " Exiting...");
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
// Not going with further consistency check for tables when META itself is not consistent.
|
||||||
LOG.info("Loading regionsinfo from the .META. table");
|
LOG.info("Loading regionsinfo from the .META. table");
|
||||||
boolean success = loadMetaEntries();
|
boolean success = loadMetaEntries();
|
||||||
if (!success) return -1;
|
if (!success) return -1;
|
||||||
|
|
||||||
// Check if .META. is found only once and in the right place
|
|
||||||
if (!checkMetaRegion()) {
|
|
||||||
// Will remove later if we can fix it
|
|
||||||
errors.reportError("Encountered fatal error. Exiting...");
|
|
||||||
return -2;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Empty cells in .META.?
|
// Empty cells in .META.?
|
||||||
reportEmptyMetaCells();
|
reportEmptyMetaCells();
|
||||||
|
|
||||||
|
@ -414,9 +427,6 @@ public class HBaseFsck extends Configured implements Tool {
|
||||||
reportTablesInFlux();
|
reportTablesInFlux();
|
||||||
}
|
}
|
||||||
|
|
||||||
// get regions according to what is online on each RegionServer
|
|
||||||
loadDeployedRegions();
|
|
||||||
|
|
||||||
// load regiondirs and regioninfos from HDFS
|
// load regiondirs and regioninfos from HDFS
|
||||||
if (shouldCheckHdfs()) {
|
if (shouldCheckHdfs()) {
|
||||||
loadHdfsRegionDirs();
|
loadHdfsRegionDirs();
|
||||||
|
@ -1334,10 +1344,13 @@ public class HBaseFsck extends Configured implements Tool {
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
throw new IOException(e);
|
throw new IOException(e);
|
||||||
}
|
}
|
||||||
MetaEntry m =
|
MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis());
|
||||||
new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis());
|
HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
|
||||||
HbckInfo hbInfo = new HbckInfo(m);
|
if (hbckInfo == null) {
|
||||||
regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), hbInfo);
|
regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
|
||||||
|
} else {
|
||||||
|
hbckInfo.metaEntry = m;
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2492,45 +2505,36 @@ public class HBaseFsck extends Configured implements Tool {
|
||||||
* @throws KeeperException
|
* @throws KeeperException
|
||||||
* @throws InterruptedException
|
* @throws InterruptedException
|
||||||
*/
|
*/
|
||||||
boolean checkMetaRegion()
|
boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
|
||||||
throws IOException, KeeperException, InterruptedException {
|
List<HbckInfo> metaRegions = Lists.newArrayList();
|
||||||
List <HbckInfo> metaRegions = Lists.newArrayList();
|
|
||||||
for (HbckInfo value : regionInfoMap.values()) {
|
for (HbckInfo value : regionInfoMap.values()) {
|
||||||
if (value.metaEntry.isMetaRegion()) {
|
if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
|
||||||
metaRegions.add(value);
|
metaRegions.add(value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If something is wrong
|
// There will be always one entry in regionInfoMap corresponding to .META.
|
||||||
if (metaRegions.size() != 1) {
|
// Check the deployed servers. It should be exactly one server.
|
||||||
HRegionLocation rootLocation = connection.locateRegion(
|
HbckInfo metaHbckInfo = metaRegions.get(0);
|
||||||
HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
|
List<ServerName> servers = metaHbckInfo.deployedOn;
|
||||||
HbckInfo root =
|
if (servers.size() != 1) {
|
||||||
regionInfoMap.get(rootLocation.getRegionInfo().getEncodedName());
|
if (servers.size() == 0) {
|
||||||
|
|
||||||
// If there is no region holding .META.
|
|
||||||
if (metaRegions.size() == 0) {
|
|
||||||
errors.reportError(ERROR_CODE.NO_META_REGION, ".META. is not found on any region.");
|
errors.reportError(ERROR_CODE.NO_META_REGION, ".META. is not found on any region.");
|
||||||
if (shouldFixAssignments()) {
|
if (shouldFixAssignments()) {
|
||||||
errors.print("Trying to fix a problem with .META...");
|
errors.print("Trying to fix a problem with .META...");
|
||||||
setShouldRerun();
|
setShouldRerun();
|
||||||
// try to fix it (treat it as unassigned region)
|
// try to fix it (treat it as unassigned region)
|
||||||
HBaseFsckRepair.fixUnassigned(admin, root.metaEntry);
|
HBaseFsckRepair.fixUnassigned(admin, metaHbckInfo.metaEntry);
|
||||||
HBaseFsckRepair.waitUntilAssigned(admin, root.getHdfsHRI());
|
HBaseFsckRepair.waitUntilAssigned(admin, metaHbckInfo.metaEntry);
|
||||||
}
|
}
|
||||||
}
|
} else if (servers.size() > 1) {
|
||||||
// If there are more than one regions pretending to hold the .META.
|
errors
|
||||||
else if (metaRegions.size() > 1) {
|
.reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region.");
|
||||||
errors.reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region.");
|
|
||||||
if (shouldFixAssignments()) {
|
if (shouldFixAssignments()) {
|
||||||
errors.print("Trying to fix a problem with .META...");
|
errors.print("Trying to fix a problem with .META...");
|
||||||
setShouldRerun();
|
setShouldRerun();
|
||||||
// try fix it (treat is a dupe assignment)
|
// try fix it (treat is a dupe assignment)
|
||||||
List <ServerName> deployedOn = Lists.newArrayList();
|
HBaseFsckRepair.fixMultiAssignment(admin, metaHbckInfo.metaEntry, servers);
|
||||||
for (HbckInfo mRegion : metaRegions) {
|
|
||||||
deployedOn.add(mRegion.metaEntry.regionServer);
|
|
||||||
}
|
|
||||||
HBaseFsckRepair.fixMultiAssignment(admin, root.metaEntry, deployedOn);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// rerun hbck with hopefully fixed META
|
// rerun hbck with hopefully fixed META
|
||||||
|
@ -2545,15 +2549,6 @@ public class HBaseFsck extends Configured implements Tool {
|
||||||
* @throws IOException if an error is encountered
|
* @throws IOException if an error is encountered
|
||||||
*/
|
*/
|
||||||
boolean loadMetaEntries() throws IOException {
|
boolean loadMetaEntries() throws IOException {
|
||||||
|
|
||||||
// get a list of all regions from the master. This involves
|
|
||||||
// scanning the META table
|
|
||||||
if (!recordMetaRegion()) {
|
|
||||||
// Will remove later if we can fix it
|
|
||||||
errors.reportError("Fatal error: unable to get root region location. Exiting...");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
|
MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
|
||||||
int countRecord = 1;
|
int countRecord = 1;
|
||||||
|
|
||||||
|
@ -2587,9 +2582,12 @@ public class HBaseFsck extends Configured implements Tool {
|
||||||
}
|
}
|
||||||
PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
|
PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
|
||||||
MetaEntry m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
|
MetaEntry m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
|
||||||
HbckInfo hbInfo = new HbckInfo(m);
|
HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
|
||||||
HbckInfo previous = regionInfoMap.put(hri.getEncodedName(), hbInfo);
|
if (previous == null) {
|
||||||
if (previous != null) {
|
regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
|
||||||
|
} else if (previous.metaEntry == null) {
|
||||||
|
previous.metaEntry = m;
|
||||||
|
} else {
|
||||||
throw new IOException("Two entries in META are same " + previous);
|
throw new IOException("Two entries in META are same " + previous);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -60,15 +60,16 @@ import org.apache.hadoop.hbase.MiniHBaseCluster;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.catalog.MetaEditor;
|
import org.apache.hadoop.hbase.catalog.MetaEditor;
|
||||||
import org.apache.hadoop.hbase.client.Delete;
|
import org.apache.hadoop.hbase.client.Delete;
|
||||||
|
import org.apache.hadoop.hbase.client.Durability;
|
||||||
import org.apache.hadoop.hbase.client.Get;
|
import org.apache.hadoop.hbase.client.Get;
|
||||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||||
import org.apache.hadoop.hbase.client.HConnection;
|
import org.apache.hadoop.hbase.client.HConnection;
|
||||||
|
import org.apache.hadoop.hbase.client.HConnectionManager;
|
||||||
import org.apache.hadoop.hbase.client.HTable;
|
import org.apache.hadoop.hbase.client.HTable;
|
||||||
import org.apache.hadoop.hbase.client.Put;
|
import org.apache.hadoop.hbase.client.Put;
|
||||||
import org.apache.hadoop.hbase.client.Result;
|
import org.apache.hadoop.hbase.client.Result;
|
||||||
import org.apache.hadoop.hbase.client.ResultScanner;
|
import org.apache.hadoop.hbase.client.ResultScanner;
|
||||||
import org.apache.hadoop.hbase.client.Scan;
|
import org.apache.hadoop.hbase.client.Scan;
|
||||||
import org.apache.hadoop.hbase.client.Durability;
|
|
||||||
import org.apache.hadoop.hbase.io.hfile.TestHFile;
|
import org.apache.hadoop.hbase.io.hfile.TestHFile;
|
||||||
import org.apache.hadoop.hbase.master.AssignmentManager;
|
import org.apache.hadoop.hbase.master.AssignmentManager;
|
||||||
import org.apache.hadoop.hbase.master.HMaster;
|
import org.apache.hadoop.hbase.master.HMaster;
|
||||||
|
@ -231,7 +232,9 @@ public class TestHBaseFsck {
|
||||||
HRegionInfo hri) throws IOException, InterruptedException {
|
HRegionInfo hri) throws IOException, InterruptedException {
|
||||||
try {
|
try {
|
||||||
HBaseFsckRepair.closeRegionSilentlyAndWait(admin, sn, hri);
|
HBaseFsckRepair.closeRegionSilentlyAndWait(admin, sn, hri);
|
||||||
|
if (!hri.isMetaTable()) {
|
||||||
admin.offline(hri.getRegionName());
|
admin.offline(hri.getRegionName());
|
||||||
|
}
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
LOG.warn("Got exception when attempting to offline region "
|
LOG.warn("Got exception when attempting to offline region "
|
||||||
+ Bytes.toString(hri.getRegionName()), ioe);
|
+ Bytes.toString(hri.getRegionName()), ioe);
|
||||||
|
@ -2000,6 +2003,57 @@ public class TestHBaseFsck {
|
||||||
writeLock.release(); // release for clean state
|
writeLock.release(); // release for clean state
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMetaOffline() throws Exception {
|
||||||
|
// check no errors
|
||||||
|
HBaseFsck hbck = doFsck(conf, false);
|
||||||
|
assertNoErrors(hbck);
|
||||||
|
deleteMetaRegion(conf, true, false, false);
|
||||||
|
hbck = doFsck(conf, false);
|
||||||
|
// ERROR_CODE.UNKNOWN is coming because we reportError with a message for the .META.
|
||||||
|
// inconsistency and whether we will be fixing it or not.
|
||||||
|
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
|
||||||
|
hbck = doFsck(conf, true);
|
||||||
|
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
|
||||||
|
hbck = doFsck(conf, false);
|
||||||
|
assertNoErrors(hbck);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs,
|
||||||
|
boolean regionInfoOnly) throws IOException, InterruptedException {
|
||||||
|
HConnection connection = HConnectionManager.getConnection(conf);
|
||||||
|
HRegionLocation metaLocation = connection.locateRegion(HConstants.META_TABLE_NAME,
|
||||||
|
HConstants.EMPTY_START_ROW);
|
||||||
|
ServerName hsa = new ServerName(metaLocation.getHostnamePort(), 0L);
|
||||||
|
HRegionInfo hri = metaLocation.getRegionInfo();
|
||||||
|
if (unassign) {
|
||||||
|
LOG.info("Undeploying meta region " + hri + " from server " + hsa);
|
||||||
|
undeployRegion(new HBaseAdmin(conf), hsa, hri);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (regionInfoOnly) {
|
||||||
|
LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
|
||||||
|
Path rootDir = FSUtils.getRootDir(conf);
|
||||||
|
FileSystem fs = rootDir.getFileSystem(conf);
|
||||||
|
Path p = new Path(rootDir + "/" + HTableDescriptor.META_TABLEDESC.getNameAsString(),
|
||||||
|
hri.getEncodedName());
|
||||||
|
Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
|
||||||
|
fs.delete(hriPath, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hdfs) {
|
||||||
|
LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
|
||||||
|
Path rootDir = FSUtils.getRootDir(conf);
|
||||||
|
FileSystem fs = rootDir.getFileSystem(conf);
|
||||||
|
Path p = new Path(rootDir + "/" + HTableDescriptor.META_TABLEDESC.getNameAsString(),
|
||||||
|
hri.getEncodedName());
|
||||||
|
HBaseFsck.debugLsr(conf, p);
|
||||||
|
boolean success = fs.delete(p, true);
|
||||||
|
LOG.info("Deleted " + p + " sucessfully? " + success);
|
||||||
|
HBaseFsck.debugLsr(conf, p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@org.junit.Rule
|
@org.junit.Rule
|
||||||
public TestName name = new TestName();
|
public TestName name = new TestName();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue