HBASE-4340 Hbase can't balance if ServerShutdownHandler encountered
exception (Jinchao Gao) git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1167452 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1145bc1471
commit
75bce302e2
|
@ -258,7 +258,8 @@ Release 0.91.0 - Unreleased
|
|||
HBASE-4301 META migration from 0.90 to trunk fails (Subbu Iyer)
|
||||
HBASE-4331 Bypassing default actions in prePut fails sometimes with
|
||||
HTable client (Lars Hofhansl via garyh)
|
||||
|
||||
HBASE-4340 Hbase can't balance if ServerShutdownHandler encountered
|
||||
exception (Jinchao Gao)
|
||||
|
||||
IMPROVEMENTS
|
||||
HBASE-3290 Max Compaction Size (Nicolas Spiegelberg via Stack)
|
||||
|
|
|
@ -166,83 +166,88 @@ public class ServerShutdownHandler extends EventHandler {
|
|||
final ServerName serverName = this.serverName;
|
||||
|
||||
LOG.info("Splitting logs for " + serverName);
|
||||
this.services.getMasterFileSystem().splitLog(serverName);
|
||||
try {
|
||||
this.services.getMasterFileSystem().splitLog(serverName);
|
||||
|
||||
// Clean out anything in regions in transition. Being conservative and
|
||||
// doing after log splitting. Could do some states before -- OPENING?
|
||||
// OFFLINE? -- and then others after like CLOSING that depend on log
|
||||
// splitting.
|
||||
List<RegionState> regionsInTransition =
|
||||
this.services.getAssignmentManager().processServerShutdown(this.serverName);
|
||||
// Clean out anything in regions in transition. Being conservative and
|
||||
// doing after log splitting. Could do some states before -- OPENING?
|
||||
// OFFLINE? -- and then others after like CLOSING that depend on log
|
||||
// splitting.
|
||||
List<RegionState> regionsInTransition =
|
||||
this.services.getAssignmentManager()
|
||||
.processServerShutdown(this.serverName);
|
||||
|
||||
// Assign root and meta if we were carrying them.
|
||||
if (isCarryingRoot()) { // -ROOT-
|
||||
LOG.info("Server " + serverName + " was carrying ROOT. Trying to assign.");
|
||||
verifyAndAssignRootWithRetries();
|
||||
}
|
||||
|
||||
// Carrying meta?
|
||||
if (isCarryingMeta()) {
|
||||
LOG.info("Server " + serverName + " was carrying META. Trying to assign.");
|
||||
this.services.getAssignmentManager().assignMeta();
|
||||
}
|
||||
|
||||
// Wait on meta to come online; we need it to progress.
|
||||
// TODO: Best way to hold strictly here? We should build this retry logic
|
||||
// into the MetaReader operations themselves.
|
||||
// TODO: Is the reading of .META. necessary when the Master has state of
|
||||
// cluster in its head? It should be possible to do without reading .META.
|
||||
// in all but one case. On split, the RS updates the .META.
|
||||
// table and THEN informs the master of the split via zk nodes in
|
||||
// 'unassigned' dir. Currently the RS puts ephemeral nodes into zk so if
|
||||
// the regionserver dies, these nodes do not stick around and this server
|
||||
// shutdown processing does fixup (see the fixupDaughters method below).
|
||||
// If we wanted to skip the .META. scan, we'd have to change at least the
|
||||
// final SPLIT message to be permanent in zk so in here we'd know a SPLIT
|
||||
// completed (zk is updated after edits to .META. have gone in). See
|
||||
// {@link SplitTransaction}. We'd also have to be figure another way for
|
||||
// doing the below .META. daughters fixup.
|
||||
NavigableMap<HRegionInfo, Result> hris = null;
|
||||
while (!this.server.isStopped()) {
|
||||
try {
|
||||
this.server.getCatalogTracker().waitForMeta();
|
||||
hris = MetaReader.getServerUserRegions(this.server.getCatalogTracker(),
|
||||
this.serverName);
|
||||
break;
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
throw new IOException("Interrupted", e);
|
||||
} catch (IOException ioe) {
|
||||
LOG.info("Received exception accessing META during server shutdown of " +
|
||||
serverName + ", retrying META read", ioe);
|
||||
// Assign root and meta if we were carrying them.
|
||||
if (isCarryingRoot()) { // -ROOT-
|
||||
LOG.info("Server " + serverName +
|
||||
" was carrying ROOT. Trying to assign.");
|
||||
verifyAndAssignRootWithRetries();
|
||||
}
|
||||
}
|
||||
|
||||
// Skip regions that were in transition unless CLOSING or PENDING_CLOSE
|
||||
for (RegionState rit : regionsInTransition) {
|
||||
if (!rit.isClosing() && !rit.isPendingClose()) {
|
||||
LOG.debug("Removed " + rit.getRegion().getRegionNameAsString() +
|
||||
" from list of regions to assign because in RIT");
|
||||
hris.remove(rit.getRegion());
|
||||
// Carrying meta?
|
||||
if (isCarryingMeta()) {
|
||||
LOG.info("Server " + serverName + " was carrying META. Trying to assign.");
|
||||
this.services.getAssignmentManager().assignMeta();
|
||||
}
|
||||
}
|
||||
|
||||
LOG.info("Reassigning " + (hris == null? 0: hris.size()) +
|
||||
" region(s) that " + serverName +
|
||||
" was carrying (skipping " + regionsInTransition.size() +
|
||||
" regions(s) that are already in transition)");
|
||||
|
||||
// Iterate regions that were on this server and assign them
|
||||
if (hris != null) {
|
||||
for (Map.Entry<HRegionInfo, Result> e: hris.entrySet()) {
|
||||
if (processDeadRegion(e.getKey(), e.getValue(),
|
||||
this.services.getAssignmentManager(),
|
||||
this.server.getCatalogTracker())) {
|
||||
this.services.getAssignmentManager().assign(e.getKey(), true);
|
||||
// Wait on meta to come online; we need it to progress.
|
||||
// TODO: Best way to hold strictly here? We should build this retry logic
|
||||
// into the MetaReader operations themselves.
|
||||
// TODO: Is the reading of .META. necessary when the Master has state of
|
||||
// cluster in its head? It should be possible to do without reading .META.
|
||||
// in all but one case. On split, the RS updates the .META.
|
||||
// table and THEN informs the master of the split via zk nodes in
|
||||
// 'unassigned' dir. Currently the RS puts ephemeral nodes into zk so if
|
||||
// the regionserver dies, these nodes do not stick around and this server
|
||||
// shutdown processing does fixup (see the fixupDaughters method below).
|
||||
// If we wanted to skip the .META. scan, we'd have to change at least the
|
||||
// final SPLIT message to be permanent in zk so in here we'd know a SPLIT
|
||||
// completed (zk is updated after edits to .META. have gone in). See
|
||||
// {@link SplitTransaction}. We'd also have to be figure another way for
|
||||
// doing the below .META. daughters fixup.
|
||||
NavigableMap<HRegionInfo, Result> hris = null;
|
||||
while (!this.server.isStopped()) {
|
||||
try {
|
||||
this.server.getCatalogTracker().waitForMeta();
|
||||
hris = MetaReader.getServerUserRegions(this.server.getCatalogTracker(),
|
||||
this.serverName);
|
||||
break;
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
throw new IOException("Interrupted", e);
|
||||
} catch (IOException ioe) {
|
||||
LOG.info("Received exception accessing META during server shutdown of " +
|
||||
serverName + ", retrying META read", ioe);
|
||||
}
|
||||
}
|
||||
|
||||
// Skip regions that were in transition unless CLOSING or PENDING_CLOSE
|
||||
for (RegionState rit : regionsInTransition) {
|
||||
if (!rit.isClosing() && !rit.isPendingClose()) {
|
||||
LOG.debug("Removed " + rit.getRegion().getRegionNameAsString() +
|
||||
" from list of regions to assign because in RIT");
|
||||
hris.remove(rit.getRegion());
|
||||
}
|
||||
}
|
||||
|
||||
LOG.info("Reassigning " + (hris == null? 0: hris.size()) +
|
||||
" region(s) that " + serverName +
|
||||
" was carrying (skipping " + regionsInTransition.size() +
|
||||
" regions(s) that are already in transition)");
|
||||
|
||||
// Iterate regions that were on this server and assign them
|
||||
if (hris != null) {
|
||||
for (Map.Entry<HRegionInfo, Result> e: hris.entrySet()) {
|
||||
if (processDeadRegion(e.getKey(), e.getValue(),
|
||||
this.services.getAssignmentManager(),
|
||||
this.server.getCatalogTracker())) {
|
||||
this.services.getAssignmentManager().assign(e.getKey(), true);
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
this.deadServers.finish(serverName);
|
||||
}
|
||||
this.deadServers.finish(serverName);
|
||||
LOG.info("Finished processing of shutdown of " + serverName);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue