HBASE-4511 There is data loss when master failovers
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1201104 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7671b5d49d
commit
902cd7e192
|
@ -469,6 +469,7 @@ Release 0.92.0 - Unreleased
|
|||
HBASE-4734 [bulk load] Warn if bulk load directory contained no files
|
||||
HBASE-4723 Loads of NotAllMetaRegionsOnlineException traces when starting
|
||||
the master
|
||||
HBASE-4511 There is data loss when master failovers
|
||||
|
||||
TESTS
|
||||
HBASE-4450 test for number of blocks read: to serve as baseline for expected
|
||||
|
|
|
@ -267,6 +267,21 @@ public class CatalogTracker {
|
|||
return this.metaLocation;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method used by master on startup trying to figure state of cluster.
|
||||
* Returns the current meta location unless its null. In this latter case,
|
||||
* it has not yet been set so go check whats up in <code>-ROOT-</code> and
|
||||
* return that.
|
||||
* @return{@link ServerName} for server hosting <code>.META.</code> or if null,
|
||||
* we'll read the location that is up in <code>-ROOT-</code> table (which
|
||||
* could be null or just plain stale).
|
||||
* @throws IOException
|
||||
*/
|
||||
public ServerName getMetaLocationOrReadLocationFromRoot() throws IOException {
|
||||
ServerName sn = getMetaLocation();
|
||||
return sn != null? sn: MetaReader.getMetaRegionLocation(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Waits indefinitely for availability of <code>-ROOT-</code>. Used during
|
||||
* cluster startup.
|
||||
|
@ -336,7 +351,7 @@ public class CatalogTracker {
|
|||
* @return connection to server hosting root
|
||||
* @throws NotAllMetaRegionsOnlineException if timed out waiting
|
||||
* @throws IOException
|
||||
* @deprecated Use {@link #getRootServerConnection(long)}
|
||||
* @deprecated Use #getRootServerConnection(long)
|
||||
*/
|
||||
public HRegionInterface waitForRootServerConnectionDefault()
|
||||
throws NotAllMetaRegionsOnlineException, IOException {
|
||||
|
@ -381,8 +396,7 @@ public class CatalogTracker {
|
|||
// Now read the current .META. content from -ROOT-. Note: This goes via
|
||||
// an HConnection. It has its own way of figuring root and meta locations
|
||||
// which we have to wait on.
|
||||
ServerName newLocation =
|
||||
MetaReader.readRegionLocation(this, META_REGION_NAME);
|
||||
ServerName newLocation = MetaReader.getMetaRegionLocation(this);
|
||||
if (newLocation == null) return null;
|
||||
|
||||
HRegionInterface newConnection = getCachedConnection(newLocation);
|
||||
|
|
|
@ -193,6 +193,8 @@ public class MetaReader {
|
|||
throws IOException {
|
||||
// Passing the CatalogTracker's connection configuration ensures this
|
||||
// HTable instance uses the CatalogTracker's connection.
|
||||
org.apache.hadoop.hbase.client.HConnection c = catalogTracker.getConnection();
|
||||
if (c == null) throw new NullPointerException("No connection");
|
||||
return new HTable(catalogTracker.getConnection().getConfiguration(), tableName);
|
||||
}
|
||||
|
||||
|
@ -251,7 +253,7 @@ public class MetaReader {
|
|||
* @param metaServer connection to server hosting ROOT
|
||||
* @return location of META in ROOT where location, or null if not available
|
||||
* @throws IOException
|
||||
* @deprecated Does not retry; use {@link #readRegionLocation(CatalogTracker, byte[])}
|
||||
* @deprecated Does not retry; use #getMetaRegionLocation(CatalogTracker)
|
||||
*/
|
||||
public static ServerName readMetaLocation(HRegionInterface metaServer)
|
||||
throws IOException {
|
||||
|
@ -259,6 +261,19 @@ public class MetaReader {
|
|||
CatalogTracker.META_REGION_NAME);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the location of <code>.META.</code> region by reading content of
|
||||
* <code>-ROOT-</code>.
|
||||
* @param ct
|
||||
* @return location of <code>.META.</code> region as a {@link ServerName} or
|
||||
* null if not found
|
||||
* @throws IOException
|
||||
*/
|
||||
static ServerName getMetaRegionLocation(final CatalogTracker ct)
|
||||
throws IOException {
|
||||
return MetaReader.readRegionLocation(ct, CatalogTracker.META_REGION_NAME);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the location of the specified region
|
||||
* @param catalogTracker
|
||||
|
@ -266,7 +281,7 @@ public class MetaReader {
|
|||
* @return location of region as a {@link ServerName} or null if not found
|
||||
* @throws IOException
|
||||
*/
|
||||
public static ServerName readRegionLocation(CatalogTracker catalogTracker,
|
||||
static ServerName readRegionLocation(CatalogTracker catalogTracker,
|
||||
byte [] regionName)
|
||||
throws IOException {
|
||||
Pair<HRegionInfo, ServerName> pair = getRegion(catalogTracker, regionName);
|
||||
|
|
|
@ -462,7 +462,8 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
|||
for (ServerName sn: this.regionServerTracker.getOnlineServers()) {
|
||||
if (!this.serverManager.isServerOnline(sn)) {
|
||||
// Not registered; add it.
|
||||
LOG.info("Registering server found up in zk: " + sn);
|
||||
LOG.info("Registering server found up in zk but who has not yet " +
|
||||
"reported in: " + sn);
|
||||
this.serverManager.recordNewServer(sn, HServerLoad.EMPTY_HSERVERLOAD);
|
||||
}
|
||||
}
|
||||
|
@ -526,14 +527,23 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
|||
status.setStatus("Assigning ROOT region");
|
||||
boolean rit = this.assignmentManager.
|
||||
processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.ROOT_REGIONINFO);
|
||||
ServerName expiredServer = null;
|
||||
if (!catalogTracker.verifyRootRegionLocation(timeout)) {
|
||||
this.assignmentManager.assignRoot();
|
||||
ServerName currentRootServer = this.catalogTracker.getRootLocation();
|
||||
if (expireIfOnline(currentRootServer)) {
|
||||
// We are expiring this server. The processing of expiration will assign
|
||||
// root so don't do it here.
|
||||
expiredServer = currentRootServer;
|
||||
} else {
|
||||
// Root was not on an online server when we failed verification
|
||||
this.assignmentManager.assignRoot();
|
||||
}
|
||||
this.catalogTracker.waitForRoot();
|
||||
//This guarantees that the transition has completed
|
||||
this.assignmentManager.waitForAssignment(HRegionInfo.ROOT_REGIONINFO);
|
||||
assigned++;
|
||||
} else {
|
||||
// Region already assigned. We didnt' assign it. Add to in-memory state.
|
||||
// Region already assigned. We didn't assign it. Add to in-memory state.
|
||||
this.assignmentManager.regionOnline(HRegionInfo.ROOT_REGIONINFO,
|
||||
this.catalogTracker.getRootLocation());
|
||||
}
|
||||
|
@ -545,7 +555,15 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
|||
rit = this.assignmentManager.
|
||||
processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.FIRST_META_REGIONINFO);
|
||||
if (!this.catalogTracker.verifyMetaRegionLocation(timeout)) {
|
||||
this.assignmentManager.assignMeta();
|
||||
ServerName currentMetaServer =
|
||||
this.catalogTracker.getMetaLocationOrReadLocationFromRoot();
|
||||
if (currentMetaServer != null && currentMetaServer.equals(expiredServer)) {
|
||||
// We are expiring the server that is carrying meta already.
|
||||
// The expiration processing will take care of reassigning meta.
|
||||
expireIfOnline(currentMetaServer);
|
||||
} else {
|
||||
this.assignmentManager.assignMeta();
|
||||
}
|
||||
this.catalogTracker.waitForMeta();
|
||||
// Above check waits for general meta availability but this does not
|
||||
// guarantee that the transition has completed
|
||||
|
@ -562,6 +580,19 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
|||
return assigned;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expire a server if we find it is one of the online servers set.
|
||||
* @param sn ServerName to check.
|
||||
* @return True if server was online and so we expired it as unreachable.
|
||||
*/
|
||||
private boolean expireIfOnline(final ServerName sn) {
|
||||
if (sn == null) return false;
|
||||
if (!this.serverManager.isServerOnline(sn)) return false;
|
||||
LOG.info("Forcing expiration of " + sn);
|
||||
this.serverManager.expireServer(sn);
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ProtocolSignature getProtocolSignature(
|
||||
String protocol, long version, int clientMethodsHashCode)
|
||||
|
|
Loading…
Reference in New Issue