HBASE-4340 Hbase can't balance if ServerShutdownHandler encountered

exception (Jinchao Gao)


git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1167452 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Zhihong Yu 2011-09-10 04:34:54 +00:00
parent 1145bc1471
commit 75bce302e2
2 changed files with 75 additions and 69 deletions

View File

@ -258,7 +258,8 @@ Release 0.91.0 - Unreleased
HBASE-4301 META migration from 0.90 to trunk fails (Subbu Iyer)
HBASE-4331 Bypassing default actions in prePut fails sometimes with
HTable client (Lars Hofhansl via garyh)
HBASE-4340 Hbase can't balance if ServerShutdownHandler encountered
exception (Jinchao Gao)
IMPROVEMENTS
HBASE-3290 Max Compaction Size (Nicolas Spiegelberg via Stack)

View File

@ -166,83 +166,88 @@ public class ServerShutdownHandler extends EventHandler {
final ServerName serverName = this.serverName;
LOG.info("Splitting logs for " + serverName);
this.services.getMasterFileSystem().splitLog(serverName);
try {
this.services.getMasterFileSystem().splitLog(serverName);
// Clean out anything in regions in transition. Being conservative and
// doing after log splitting. Could do some states before -- OPENING?
// OFFLINE? -- and then others after like CLOSING that depend on log
// splitting.
List<RegionState> regionsInTransition =
this.services.getAssignmentManager().processServerShutdown(this.serverName);
// Clean out anything in regions in transition. Being conservative and
// doing after log splitting. Could do some states before -- OPENING?
// OFFLINE? -- and then others after like CLOSING that depend on log
// splitting.
List<RegionState> regionsInTransition =
this.services.getAssignmentManager()
.processServerShutdown(this.serverName);
// Assign root and meta if we were carrying them.
if (isCarryingRoot()) { // -ROOT-
LOG.info("Server " + serverName + " was carrying ROOT. Trying to assign.");
verifyAndAssignRootWithRetries();
}
// Carrying meta?
if (isCarryingMeta()) {
LOG.info("Server " + serverName + " was carrying META. Trying to assign.");
this.services.getAssignmentManager().assignMeta();
}
// Wait on meta to come online; we need it to progress.
// TODO: Best way to hold strictly here? We should build this retry logic
// into the MetaReader operations themselves.
// TODO: Is the reading of .META. necessary when the Master has state of
// cluster in its head? It should be possible to do without reading .META.
// in all but one case. On split, the RS updates the .META.
// table and THEN informs the master of the split via zk nodes in
// 'unassigned' dir. Currently the RS puts ephemeral nodes into zk so if
// the regionserver dies, these nodes do not stick around and this server
// shutdown processing does fixup (see the fixupDaughters method below).
// If we wanted to skip the .META. scan, we'd have to change at least the
// final SPLIT message to be permanent in zk so in here we'd know a SPLIT
// completed (zk is updated after edits to .META. have gone in). See
// {@link SplitTransaction}. We'd also have to be figure another way for
// doing the below .META. daughters fixup.
NavigableMap<HRegionInfo, Result> hris = null;
while (!this.server.isStopped()) {
try {
this.server.getCatalogTracker().waitForMeta();
hris = MetaReader.getServerUserRegions(this.server.getCatalogTracker(),
this.serverName);
break;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new IOException("Interrupted", e);
} catch (IOException ioe) {
LOG.info("Received exception accessing META during server shutdown of " +
serverName + ", retrying META read", ioe);
// Assign root and meta if we were carrying them.
if (isCarryingRoot()) { // -ROOT-
LOG.info("Server " + serverName +
" was carrying ROOT. Trying to assign.");
verifyAndAssignRootWithRetries();
}
}
// Skip regions that were in transition unless CLOSING or PENDING_CLOSE
for (RegionState rit : regionsInTransition) {
if (!rit.isClosing() && !rit.isPendingClose()) {
LOG.debug("Removed " + rit.getRegion().getRegionNameAsString() +
" from list of regions to assign because in RIT");
hris.remove(rit.getRegion());
// Carrying meta?
if (isCarryingMeta()) {
LOG.info("Server " + serverName + " was carrying META. Trying to assign.");
this.services.getAssignmentManager().assignMeta();
}
}
LOG.info("Reassigning " + (hris == null? 0: hris.size()) +
" region(s) that " + serverName +
" was carrying (skipping " + regionsInTransition.size() +
" regions(s) that are already in transition)");
// Iterate regions that were on this server and assign them
if (hris != null) {
for (Map.Entry<HRegionInfo, Result> e: hris.entrySet()) {
if (processDeadRegion(e.getKey(), e.getValue(),
this.services.getAssignmentManager(),
this.server.getCatalogTracker())) {
this.services.getAssignmentManager().assign(e.getKey(), true);
// Wait on meta to come online; we need it to progress.
// TODO: Best way to hold strictly here? We should build this retry logic
// into the MetaReader operations themselves.
// TODO: Is the reading of .META. necessary when the Master has state of
// cluster in its head? It should be possible to do without reading .META.
// in all but one case. On split, the RS updates the .META.
// table and THEN informs the master of the split via zk nodes in
// 'unassigned' dir. Currently the RS puts ephemeral nodes into zk so if
// the regionserver dies, these nodes do not stick around and this server
// shutdown processing does fixup (see the fixupDaughters method below).
// If we wanted to skip the .META. scan, we'd have to change at least the
// final SPLIT message to be permanent in zk so in here we'd know a SPLIT
// completed (zk is updated after edits to .META. have gone in). See
// {@link SplitTransaction}. We'd also have to be figure another way for
// doing the below .META. daughters fixup.
NavigableMap<HRegionInfo, Result> hris = null;
while (!this.server.isStopped()) {
try {
this.server.getCatalogTracker().waitForMeta();
hris = MetaReader.getServerUserRegions(this.server.getCatalogTracker(),
this.serverName);
break;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new IOException("Interrupted", e);
} catch (IOException ioe) {
LOG.info("Received exception accessing META during server shutdown of " +
serverName + ", retrying META read", ioe);
}
}
// Skip regions that were in transition unless CLOSING or PENDING_CLOSE
for (RegionState rit : regionsInTransition) {
if (!rit.isClosing() && !rit.isPendingClose()) {
LOG.debug("Removed " + rit.getRegion().getRegionNameAsString() +
" from list of regions to assign because in RIT");
hris.remove(rit.getRegion());
}
}
LOG.info("Reassigning " + (hris == null? 0: hris.size()) +
" region(s) that " + serverName +
" was carrying (skipping " + regionsInTransition.size() +
" regions(s) that are already in transition)");
// Iterate regions that were on this server and assign them
if (hris != null) {
for (Map.Entry<HRegionInfo, Result> e: hris.entrySet()) {
if (processDeadRegion(e.getKey(), e.getValue(),
this.services.getAssignmentManager(),
this.server.getCatalogTracker())) {
this.services.getAssignmentManager().assign(e.getKey(), true);
}
}
}
} finally {
this.deadServers.finish(serverName);
}
this.deadServers.finish(serverName);
LOG.info("Finished processing of shutdown of " + serverName);
}