HBASE-4340 Hbase can't balance if ServerShutdownHandler encountered
exception (Jinchao Gao) git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1167452 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1145bc1471
commit
75bce302e2
@ -258,7 +258,8 @@ Release 0.91.0 - Unreleased
|
|||||||
HBASE-4301 META migration from 0.90 to trunk fails (Subbu Iyer)
|
HBASE-4301 META migration from 0.90 to trunk fails (Subbu Iyer)
|
||||||
HBASE-4331 Bypassing default actions in prePut fails sometimes with
|
HBASE-4331 Bypassing default actions in prePut fails sometimes with
|
||||||
HTable client (Lars Hofhansl via garyh)
|
HTable client (Lars Hofhansl via garyh)
|
||||||
|
HBASE-4340 Hbase can't balance if ServerShutdownHandler encountered
|
||||||
|
exception (Jinchao Gao)
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
HBASE-3290 Max Compaction Size (Nicolas Spiegelberg via Stack)
|
HBASE-3290 Max Compaction Size (Nicolas Spiegelberg via Stack)
|
||||||
|
@ -166,83 +166,88 @@ public class ServerShutdownHandler extends EventHandler {
|
|||||||
final ServerName serverName = this.serverName;
|
final ServerName serverName = this.serverName;
|
||||||
|
|
||||||
LOG.info("Splitting logs for " + serverName);
|
LOG.info("Splitting logs for " + serverName);
|
||||||
this.services.getMasterFileSystem().splitLog(serverName);
|
try {
|
||||||
|
this.services.getMasterFileSystem().splitLog(serverName);
|
||||||
|
|
||||||
// Clean out anything in regions in transition. Being conservative and
|
// Clean out anything in regions in transition. Being conservative and
|
||||||
// doing after log splitting. Could do some states before -- OPENING?
|
// doing after log splitting. Could do some states before -- OPENING?
|
||||||
// OFFLINE? -- and then others after like CLOSING that depend on log
|
// OFFLINE? -- and then others after like CLOSING that depend on log
|
||||||
// splitting.
|
// splitting.
|
||||||
List<RegionState> regionsInTransition =
|
List<RegionState> regionsInTransition =
|
||||||
this.services.getAssignmentManager().processServerShutdown(this.serverName);
|
this.services.getAssignmentManager()
|
||||||
|
.processServerShutdown(this.serverName);
|
||||||
|
|
||||||
// Assign root and meta if we were carrying them.
|
// Assign root and meta if we were carrying them.
|
||||||
if (isCarryingRoot()) { // -ROOT-
|
if (isCarryingRoot()) { // -ROOT-
|
||||||
LOG.info("Server " + serverName + " was carrying ROOT. Trying to assign.");
|
LOG.info("Server " + serverName +
|
||||||
verifyAndAssignRootWithRetries();
|
" was carrying ROOT. Trying to assign.");
|
||||||
}
|
verifyAndAssignRootWithRetries();
|
||||||
|
|
||||||
// Carrying meta?
|
|
||||||
if (isCarryingMeta()) {
|
|
||||||
LOG.info("Server " + serverName + " was carrying META. Trying to assign.");
|
|
||||||
this.services.getAssignmentManager().assignMeta();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait on meta to come online; we need it to progress.
|
|
||||||
// TODO: Best way to hold strictly here? We should build this retry logic
|
|
||||||
// into the MetaReader operations themselves.
|
|
||||||
// TODO: Is the reading of .META. necessary when the Master has state of
|
|
||||||
// cluster in its head? It should be possible to do without reading .META.
|
|
||||||
// in all but one case. On split, the RS updates the .META.
|
|
||||||
// table and THEN informs the master of the split via zk nodes in
|
|
||||||
// 'unassigned' dir. Currently the RS puts ephemeral nodes into zk so if
|
|
||||||
// the regionserver dies, these nodes do not stick around and this server
|
|
||||||
// shutdown processing does fixup (see the fixupDaughters method below).
|
|
||||||
// If we wanted to skip the .META. scan, we'd have to change at least the
|
|
||||||
// final SPLIT message to be permanent in zk so in here we'd know a SPLIT
|
|
||||||
// completed (zk is updated after edits to .META. have gone in). See
|
|
||||||
// {@link SplitTransaction}. We'd also have to be figure another way for
|
|
||||||
// doing the below .META. daughters fixup.
|
|
||||||
NavigableMap<HRegionInfo, Result> hris = null;
|
|
||||||
while (!this.server.isStopped()) {
|
|
||||||
try {
|
|
||||||
this.server.getCatalogTracker().waitForMeta();
|
|
||||||
hris = MetaReader.getServerUserRegions(this.server.getCatalogTracker(),
|
|
||||||
this.serverName);
|
|
||||||
break;
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
Thread.currentThread().interrupt();
|
|
||||||
throw new IOException("Interrupted", e);
|
|
||||||
} catch (IOException ioe) {
|
|
||||||
LOG.info("Received exception accessing META during server shutdown of " +
|
|
||||||
serverName + ", retrying META read", ioe);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Skip regions that were in transition unless CLOSING or PENDING_CLOSE
|
// Carrying meta?
|
||||||
for (RegionState rit : regionsInTransition) {
|
if (isCarryingMeta()) {
|
||||||
if (!rit.isClosing() && !rit.isPendingClose()) {
|
LOG.info("Server " + serverName + " was carrying META. Trying to assign.");
|
||||||
LOG.debug("Removed " + rit.getRegion().getRegionNameAsString() +
|
this.services.getAssignmentManager().assignMeta();
|
||||||
" from list of regions to assign because in RIT");
|
|
||||||
hris.remove(rit.getRegion());
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
LOG.info("Reassigning " + (hris == null? 0: hris.size()) +
|
// Wait on meta to come online; we need it to progress.
|
||||||
" region(s) that " + serverName +
|
// TODO: Best way to hold strictly here? We should build this retry logic
|
||||||
" was carrying (skipping " + regionsInTransition.size() +
|
// into the MetaReader operations themselves.
|
||||||
" regions(s) that are already in transition)");
|
// TODO: Is the reading of .META. necessary when the Master has state of
|
||||||
|
// cluster in its head? It should be possible to do without reading .META.
|
||||||
// Iterate regions that were on this server and assign them
|
// in all but one case. On split, the RS updates the .META.
|
||||||
if (hris != null) {
|
// table and THEN informs the master of the split via zk nodes in
|
||||||
for (Map.Entry<HRegionInfo, Result> e: hris.entrySet()) {
|
// 'unassigned' dir. Currently the RS puts ephemeral nodes into zk so if
|
||||||
if (processDeadRegion(e.getKey(), e.getValue(),
|
// the regionserver dies, these nodes do not stick around and this server
|
||||||
this.services.getAssignmentManager(),
|
// shutdown processing does fixup (see the fixupDaughters method below).
|
||||||
this.server.getCatalogTracker())) {
|
// If we wanted to skip the .META. scan, we'd have to change at least the
|
||||||
this.services.getAssignmentManager().assign(e.getKey(), true);
|
// final SPLIT message to be permanent in zk so in here we'd know a SPLIT
|
||||||
|
// completed (zk is updated after edits to .META. have gone in). See
|
||||||
|
// {@link SplitTransaction}. We'd also have to be figure another way for
|
||||||
|
// doing the below .META. daughters fixup.
|
||||||
|
NavigableMap<HRegionInfo, Result> hris = null;
|
||||||
|
while (!this.server.isStopped()) {
|
||||||
|
try {
|
||||||
|
this.server.getCatalogTracker().waitForMeta();
|
||||||
|
hris = MetaReader.getServerUserRegions(this.server.getCatalogTracker(),
|
||||||
|
this.serverName);
|
||||||
|
break;
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
throw new IOException("Interrupted", e);
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
LOG.info("Received exception accessing META during server shutdown of " +
|
||||||
|
serverName + ", retrying META read", ioe);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Skip regions that were in transition unless CLOSING or PENDING_CLOSE
|
||||||
|
for (RegionState rit : regionsInTransition) {
|
||||||
|
if (!rit.isClosing() && !rit.isPendingClose()) {
|
||||||
|
LOG.debug("Removed " + rit.getRegion().getRegionNameAsString() +
|
||||||
|
" from list of regions to assign because in RIT");
|
||||||
|
hris.remove(rit.getRegion());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG.info("Reassigning " + (hris == null? 0: hris.size()) +
|
||||||
|
" region(s) that " + serverName +
|
||||||
|
" was carrying (skipping " + regionsInTransition.size() +
|
||||||
|
" regions(s) that are already in transition)");
|
||||||
|
|
||||||
|
// Iterate regions that were on this server and assign them
|
||||||
|
if (hris != null) {
|
||||||
|
for (Map.Entry<HRegionInfo, Result> e: hris.entrySet()) {
|
||||||
|
if (processDeadRegion(e.getKey(), e.getValue(),
|
||||||
|
this.services.getAssignmentManager(),
|
||||||
|
this.server.getCatalogTracker())) {
|
||||||
|
this.services.getAssignmentManager().assign(e.getKey(), true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
this.deadServers.finish(serverName);
|
||||||
}
|
}
|
||||||
this.deadServers.finish(serverName);
|
|
||||||
LOG.info("Finished processing of shutdown of " + serverName);
|
LOG.info("Finished processing of shutdown of " + serverName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user