HBASE-4511 There is data loss when master failovers

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1201104 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2011-11-11 23:36:47 +00:00
parent 7671b5d49d
commit 902cd7e192
4 changed files with 70 additions and 9 deletions

View File

@ -469,6 +469,7 @@ Release 0.92.0 - Unreleased
HBASE-4734 [bulk load] Warn if bulk load directory contained no files
HBASE-4723 Loads of NotAllMetaRegionsOnlineException traces when starting
the master
HBASE-4511 There is data loss when master failovers
TESTS
HBASE-4450 test for number of blocks read: to serve as baseline for expected

View File

@ -267,6 +267,21 @@ public class CatalogTracker {
return this.metaLocation;
}
/**
* Method used by master on startup trying to figure state of cluster.
* Returns the current meta location unless its null. In this latter case,
* it has not yet been set so go check whats up in <code>-ROOT-</code> and
* return that.
* @return{@link ServerName} for server hosting <code>.META.</code> or if null,
* we'll read the location that is up in <code>-ROOT-</code> table (which
* could be null or just plain stale).
* @throws IOException
*/
public ServerName getMetaLocationOrReadLocationFromRoot() throws IOException {
ServerName sn = getMetaLocation();
return sn != null? sn: MetaReader.getMetaRegionLocation(this);
}
/**
* Waits indefinitely for availability of <code>-ROOT-</code>. Used during
* cluster startup.
@ -336,7 +351,7 @@ public class CatalogTracker {
* @return connection to server hosting root
* @throws NotAllMetaRegionsOnlineException if timed out waiting
* @throws IOException
* @deprecated Use {@link #getRootServerConnection(long)}
* @deprecated Use #getRootServerConnection(long)
*/
public HRegionInterface waitForRootServerConnectionDefault()
throws NotAllMetaRegionsOnlineException, IOException {
@ -381,8 +396,7 @@ public class CatalogTracker {
// Now read the current .META. content from -ROOT-. Note: This goes via
// an HConnection. It has its own way of figuring root and meta locations
// which we have to wait on.
ServerName newLocation =
MetaReader.readRegionLocation(this, META_REGION_NAME);
ServerName newLocation = MetaReader.getMetaRegionLocation(this);
if (newLocation == null) return null;
HRegionInterface newConnection = getCachedConnection(newLocation);

View File

@ -193,6 +193,8 @@ public class MetaReader {
throws IOException {
// Passing the CatalogTracker's connection configuration ensures this
// HTable instance uses the CatalogTracker's connection.
org.apache.hadoop.hbase.client.HConnection c = catalogTracker.getConnection();
if (c == null) throw new NullPointerException("No connection");
return new HTable(catalogTracker.getConnection().getConfiguration(), tableName);
}
@ -251,7 +253,7 @@ public class MetaReader {
* @param metaServer connection to server hosting ROOT
* @return location of META in ROOT where location, or null if not available
* @throws IOException
* @deprecated Does not retry; use {@link #readRegionLocation(CatalogTracker, byte[])}
* @deprecated Does not retry; use #getMetaRegionLocation(CatalogTracker)
*/
public static ServerName readMetaLocation(HRegionInterface metaServer)
throws IOException {
@ -259,6 +261,19 @@ public class MetaReader {
CatalogTracker.META_REGION_NAME);
}
/**
* Gets the location of <code>.META.</code> region by reading content of
* <code>-ROOT-</code>.
* @param ct
* @return location of <code>.META.</code> region as a {@link ServerName} or
* null if not found
* @throws IOException
*/
static ServerName getMetaRegionLocation(final CatalogTracker ct)
throws IOException {
return MetaReader.readRegionLocation(ct, CatalogTracker.META_REGION_NAME);
}
/**
* Reads the location of the specified region
* @param catalogTracker
@ -266,7 +281,7 @@ public class MetaReader {
* @return location of region as a {@link ServerName} or null if not found
* @throws IOException
*/
public static ServerName readRegionLocation(CatalogTracker catalogTracker,
static ServerName readRegionLocation(CatalogTracker catalogTracker,
byte [] regionName)
throws IOException {
Pair<HRegionInfo, ServerName> pair = getRegion(catalogTracker, regionName);

View File

@ -462,7 +462,8 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
for (ServerName sn: this.regionServerTracker.getOnlineServers()) {
if (!this.serverManager.isServerOnline(sn)) {
// Not registered; add it.
LOG.info("Registering server found up in zk: " + sn);
LOG.info("Registering server found up in zk but who has not yet " +
"reported in: " + sn);
this.serverManager.recordNewServer(sn, HServerLoad.EMPTY_HSERVERLOAD);
}
}
@ -526,14 +527,23 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
status.setStatus("Assigning ROOT region");
boolean rit = this.assignmentManager.
processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.ROOT_REGIONINFO);
ServerName expiredServer = null;
if (!catalogTracker.verifyRootRegionLocation(timeout)) {
this.assignmentManager.assignRoot();
ServerName currentRootServer = this.catalogTracker.getRootLocation();
if (expireIfOnline(currentRootServer)) {
// We are expiring this server. The processing of expiration will assign
// root so don't do it here.
expiredServer = currentRootServer;
} else {
// Root was not on an online server when we failed verification
this.assignmentManager.assignRoot();
}
this.catalogTracker.waitForRoot();
//This guarantees that the transition has completed
this.assignmentManager.waitForAssignment(HRegionInfo.ROOT_REGIONINFO);
assigned++;
} else {
// Region already assigned. We didnt' assign it. Add to in-memory state.
// Region already assigned. We didn't assign it. Add to in-memory state.
this.assignmentManager.regionOnline(HRegionInfo.ROOT_REGIONINFO,
this.catalogTracker.getRootLocation());
}
@ -545,7 +555,15 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
rit = this.assignmentManager.
processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.FIRST_META_REGIONINFO);
if (!this.catalogTracker.verifyMetaRegionLocation(timeout)) {
this.assignmentManager.assignMeta();
ServerName currentMetaServer =
this.catalogTracker.getMetaLocationOrReadLocationFromRoot();
if (currentMetaServer != null && currentMetaServer.equals(expiredServer)) {
// We are expiring the server that is carrying meta already.
// The expiration processing will take care of reassigning meta.
expireIfOnline(currentMetaServer);
} else {
this.assignmentManager.assignMeta();
}
this.catalogTracker.waitForMeta();
// Above check waits for general meta availability but this does not
// guarantee that the transition has completed
@ -562,6 +580,19 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
return assigned;
}
/**
* Expire a server if we find it is one of the online servers set.
* @param sn ServerName to check.
* @return True if server was online and so we expired it as unreachable.
*/
private boolean expireIfOnline(final ServerName sn) {
if (sn == null) return false;
if (!this.serverManager.isServerOnline(sn)) return false;
LOG.info("Forcing expiration of " + sn);
this.serverManager.expireServer(sn);
return true;
}
@Override
public ProtocolSignature getProtocolSignature(
String protocol, long version, int clientMethodsHashCode)