HBASE-9151 HBCK cannot fix when meta server znode deleted, this can happen if all region servers stopped and there are no logs to split.
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1553056 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b6c898a138
commit
8d7ed731a9
|
@ -1864,12 +1864,9 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
} else {
|
||||
if (region.isMetaRegion()) {
|
||||
try {
|
||||
if (i != maximumAttempts) {
|
||||
Thread.sleep(this.sleepTimeBeforeRetryingMetaAssignment);
|
||||
continue;
|
||||
}
|
||||
// TODO : Ensure HBCK fixes this
|
||||
LOG.error("Unable to determine a plan to assign hbase:meta even after repeated attempts. Run HBCK to fix this");
|
||||
Thread.sleep(this.sleepTimeBeforeRetryingMetaAssignment);
|
||||
if (i == maximumAttempts) i = 1;
|
||||
continue;
|
||||
} catch (InterruptedException e) {
|
||||
LOG.error("Got exception while waiting for hbase:meta assignment");
|
||||
Thread.currentThread().interrupt();
|
||||
|
|
|
@ -411,11 +411,7 @@ public class HBaseFsck extends Configured {
|
|||
// get regions according to what is online on each RegionServer
|
||||
loadDeployedRegions();
|
||||
// check whether hbase:meta is deployed and online
|
||||
if (!recordMetaRegion()) {
|
||||
// Will remove later if we can fix it
|
||||
errors.reportError("Fatal error: unable to get hbase:meta region location. Exiting...");
|
||||
return -2;
|
||||
}
|
||||
recordMetaRegion();
|
||||
// Check if hbase:meta is found only once and in the right place
|
||||
if (!checkMetaRegion()) {
|
||||
String errorMsg = "hbase:meta table is not consistent. ";
|
||||
|
@ -2577,8 +2573,12 @@ public class HBaseFsck extends Configured {
|
|||
|
||||
// There will be always one entry in regionInfoMap corresponding to hbase:meta
|
||||
// Check the deployed servers. It should be exactly one server.
|
||||
HbckInfo metaHbckInfo = metaRegions.get(0);
|
||||
List<ServerName> servers = metaHbckInfo.deployedOn;
|
||||
List<ServerName> servers = new ArrayList<ServerName>();
|
||||
HbckInfo metaHbckInfo = null;
|
||||
if (!metaRegions.isEmpty()) {
|
||||
metaHbckInfo = metaRegions.get(0);
|
||||
servers = metaHbckInfo.deployedOn;
|
||||
}
|
||||
if (servers.size() != 1) {
|
||||
if (servers.size() == 0) {
|
||||
errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta is not found on any region.");
|
||||
|
@ -2586,8 +2586,8 @@ public class HBaseFsck extends Configured {
|
|||
errors.print("Trying to fix a problem with hbase:meta..");
|
||||
setShouldRerun();
|
||||
// try to fix it (treat it as unassigned region)
|
||||
HBaseFsckRepair.fixUnassigned(admin, metaHbckInfo.metaEntry);
|
||||
HBaseFsckRepair.waitUntilAssigned(admin, metaHbckInfo.metaEntry);
|
||||
HBaseFsckRepair.fixUnassigned(admin, HRegionInfo.FIRST_META_REGIONINFO);
|
||||
HBaseFsckRepair.waitUntilAssigned(admin, HRegionInfo.FIRST_META_REGIONINFO);
|
||||
}
|
||||
} else if (servers.size() > 1) {
|
||||
errors
|
||||
|
|
|
@ -19,10 +19,10 @@
|
|||
package org.apache.hadoop.hbase.regionserver;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
@ -50,6 +50,7 @@ public class TestRSKilledWhenInitializing {
|
|||
private static final Log LOG = LogFactory.getLog(TestRSKilledWhenInitializing.class);
|
||||
|
||||
private static boolean masterActive = false;
|
||||
private static AtomicBoolean firstRS = new AtomicBoolean(true);
|
||||
|
||||
/**
|
||||
* Test verifies whether a region server is removing from online servers list in master if it went
|
||||
|
@ -60,7 +61,8 @@ public class TestRSKilledWhenInitializing {
|
|||
public void testRSTermnationAfterRegisteringToMasterBeforeCreatingEphemeralNod() throws Exception {
|
||||
|
||||
final int NUM_MASTERS = 1;
|
||||
final int NUM_RS = 1;
|
||||
final int NUM_RS = 2;
|
||||
firstRS.set(true);
|
||||
// Create config to use for this cluster
|
||||
Configuration conf = HBaseConfiguration.create();
|
||||
|
||||
|
@ -86,21 +88,20 @@ public class TestRSKilledWhenInitializing {
|
|||
}
|
||||
masterActive = true;
|
||||
cluster.getRegionServers().get(0).start();
|
||||
cluster.getRegionServers().get(1).start();
|
||||
Thread.sleep(10000);
|
||||
List<ServerName> onlineServersList =
|
||||
master.getMaster().getServerManager().getOnlineServersList();
|
||||
while (!onlineServersList.isEmpty()) {
|
||||
while (onlineServersList.size() != 1) {
|
||||
Thread.sleep(100);
|
||||
onlineServersList = master.getMaster().getServerManager().getOnlineServersList();
|
||||
}
|
||||
assertTrue(onlineServersList.isEmpty());
|
||||
master.getMaster().stop("stopping master");
|
||||
master.join();
|
||||
assertEquals(onlineServersList.size(), 1);
|
||||
cluster.shutdown();
|
||||
} finally {
|
||||
masterActive = false;
|
||||
TEST_UTIL.shutdownMiniZKCluster();
|
||||
TEST_UTIL.shutdownMiniDFSCluster();
|
||||
TEST_UTIL.cleanupTestDir();
|
||||
firstRS.set(true);
|
||||
TEST_UTIL.shutdownMiniCluster();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -112,19 +113,23 @@ public class TestRSKilledWhenInitializing {
|
|||
|
||||
@Override
|
||||
protected void handleReportForDutyResponse(RegionServerStartupResponse c) throws IOException {
|
||||
for (NameStringPair e : c.getMapEntriesList()) {
|
||||
String key = e.getName();
|
||||
// The hostname the master sees us as.
|
||||
if (key.equals(HConstants.KEY_FOR_HOSTNAME_SEEN_BY_MASTER)) {
|
||||
String hostnameFromMasterPOV = e.getValue();
|
||||
assertEquals(super.getRpcServer().getListenerAddress().getHostName(),
|
||||
hostnameFromMasterPOV);
|
||||
if (firstRS.getAndSet(false)) {
|
||||
for (NameStringPair e : c.getMapEntriesList()) {
|
||||
String key = e.getName();
|
||||
// The hostname the master sees us as.
|
||||
if (key.equals(HConstants.KEY_FOR_HOSTNAME_SEEN_BY_MASTER)) {
|
||||
String hostnameFromMasterPOV = e.getValue();
|
||||
assertEquals(super.getRpcServer().getListenerAddress().getHostName(),
|
||||
hostnameFromMasterPOV);
|
||||
}
|
||||
}
|
||||
while (!masterActive) {
|
||||
Threads.sleep(100);
|
||||
}
|
||||
super.kill();
|
||||
} else {
|
||||
super.handleReportForDutyResponse(c);
|
||||
}
|
||||
while (!masterActive) {
|
||||
Threads.sleep(100);
|
||||
}
|
||||
super.kill();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -93,6 +93,7 @@ import org.apache.hadoop.hbase.util.HBaseFsck.PrintingErrorReporter;
|
|||
import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo;
|
||||
import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
|
||||
import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
|
||||
import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
@ -210,6 +211,28 @@ public class TestHBaseFsck {
|
|||
meta.close();
|
||||
}
|
||||
|
||||
@Test(timeout=180000)
|
||||
public void testFixAssignmentsWhenMETAinTransition() throws Exception {
|
||||
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
|
||||
HBaseAdmin admin = null;
|
||||
try {
|
||||
admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
|
||||
admin.closeRegion(cluster.getServerHoldingMeta(),
|
||||
HRegionInfo.FIRST_META_REGIONINFO);
|
||||
} finally {
|
||||
if (admin != null) {
|
||||
admin.close();
|
||||
}
|
||||
}
|
||||
regionStates.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
|
||||
MetaRegionTracker.deleteMetaLocation(cluster.getMaster().getZooKeeper());
|
||||
assertFalse(regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
|
||||
HBaseFsck hbck = doFsck(conf, true);
|
||||
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.UNKNOWN, ERROR_CODE.NO_META_REGION,
|
||||
ERROR_CODE.NULL_META_REGION });
|
||||
assertNoErrors(doFsck(conf, false));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new region in META.
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue