HBASE-954 Don't reassign root region until ProcessServerShutdown has split the former region server's log
git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@707710 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9b91e0bfe4
commit
ae1653c001
|
@ -41,6 +41,8 @@ Release 0.19.0 - Unreleased
|
|||
HBASE-950 HTable.commit no longer works with existing RowLocks though it's still in API
|
||||
HBASE-728 Support for HLog appends
|
||||
HBASE-952 Deadlock in HRegion.batchUpdate
|
||||
HBASE-954 Don't reassign root region until ProcessServerShutdown has split
|
||||
the former region server's log
|
||||
|
||||
IMPROVEMENTS
|
||||
HBASE-901 Add a limit to key length, check key and value length on client side
|
||||
|
|
|
@ -96,13 +96,8 @@ class ProcessRegionOpen extends ProcessRegionStatusChange {
|
|||
regionInfo.getRegionName(), regionInfo.getStartKey());
|
||||
if (!master.regionManager.isInitialMetaScanComplete()) {
|
||||
// Put it on the queue to be scanned for the first time.
|
||||
try {
|
||||
LOG.debug("Adding " + m.toString() + " to regions to scan");
|
||||
master.regionManager.addMetaRegionToScan(m);
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(
|
||||
"Putting into metaRegionsToScan was interrupted.", e);
|
||||
}
|
||||
LOG.debug("Adding " + m.toString() + " to regions to scan");
|
||||
master.regionManager.addMetaRegionToScan(m);
|
||||
} else {
|
||||
// Add it to the online meta regions
|
||||
LOG.debug("Adding to onlineMetaRegions: " + m.toString());
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
package org.apache.hadoop.hbase.master;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException; //TODO: remove
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
|
@ -45,11 +44,13 @@ import org.apache.hadoop.hbase.io.RowResult;
|
|||
* serving, and the regions need to get reassigned.
|
||||
*/
|
||||
class ProcessServerShutdown extends RegionServerOperation {
|
||||
private HServerAddress deadServer;
|
||||
private String deadServerName;
|
||||
private final HServerAddress deadServer;
|
||||
private final String deadServerName;
|
||||
private final boolean rootRegionServer;
|
||||
private Path oldLogDir;
|
||||
private boolean logSplit;
|
||||
private boolean rootRescanned;
|
||||
|
||||
|
||||
private class ToDoEntry {
|
||||
boolean regionOffline;
|
||||
|
@ -66,11 +67,14 @@ class ProcessServerShutdown extends RegionServerOperation {
|
|||
/**
|
||||
* @param master
|
||||
* @param serverInfo
|
||||
* @param rootRegionServer
|
||||
*/
|
||||
public ProcessServerShutdown(HMaster master, HServerInfo serverInfo) {
|
||||
public ProcessServerShutdown(HMaster master, HServerInfo serverInfo,
|
||||
boolean rootRegionServer) {
|
||||
super(master);
|
||||
this.deadServer = serverInfo.getServerAddress();
|
||||
this.deadServerName = this.deadServer.toString();
|
||||
this.rootRegionServer = rootRegionServer;
|
||||
this.logSplit = false;
|
||||
this.rootRescanned = false;
|
||||
StringBuilder dirName = new StringBuilder("log_");
|
||||
|
@ -253,6 +257,9 @@ class ProcessServerShutdown extends RegionServerOperation {
|
|||
}
|
||||
|
||||
if (!rootAvailable()) {
|
||||
// Get root region assigned now that log has been split
|
||||
master.regionManager.reassignRootRegion();
|
||||
|
||||
// Return true so that worker does not put this request back on the
|
||||
// toDoQueue.
|
||||
// rootAvailable() has already put it on the delayedToDoQueue
|
||||
|
|
|
@ -148,7 +148,7 @@ class RegionManager implements HConstants {
|
|||
// Scans the meta table
|
||||
metaScannerThread = new MetaScanner(master, this);
|
||||
|
||||
unassignRootRegion();
|
||||
reassignRootRegion();
|
||||
}
|
||||
|
||||
void start() {
|
||||
|
@ -158,15 +158,11 @@ class RegionManager implements HConstants {
|
|||
"RegionManager.metaScanner");
|
||||
}
|
||||
|
||||
/*
|
||||
* Unassign the root region.
|
||||
* This method would be used in case where root region server had died
|
||||
* without reporting in. Currently, we just flounder and never recover. We
|
||||
* could 'notice' dead region server in root scanner -- if we failed access
|
||||
* multiple times -- but reassigning root is catastrophic.
|
||||
*
|
||||
*/
|
||||
void unassignRootRegion() {
|
||||
void unsetRootRegion() {
|
||||
rootRegionLocation.set(null);
|
||||
}
|
||||
|
||||
void reassignRootRegion() {
|
||||
rootRegionLocation.set(null);
|
||||
if (!master.shutdownRequested) {
|
||||
unassignedRegions.put(HRegionInfo.ROOT_REGIONINFO, ZERO_L);
|
||||
|
@ -818,9 +814,8 @@ class RegionManager implements HConstants {
|
|||
/**
|
||||
* Add a meta region to the scan queue
|
||||
* @param m MetaRegion that needs to get scanned
|
||||
* @throws InterruptedException
|
||||
*/
|
||||
public void addMetaRegionToScan(MetaRegion m) throws InterruptedException {
|
||||
public void addMetaRegionToScan(MetaRegion m) {
|
||||
metaScannerThread.addMetaRegionToScan(m);
|
||||
}
|
||||
|
||||
|
@ -937,6 +932,9 @@ class RegionManager implements HConstants {
|
|||
|
||||
/**
|
||||
* @param regionName
|
||||
* @param info
|
||||
* @param server
|
||||
* @param op
|
||||
*/
|
||||
public void startAction(byte[] regionName, HRegionInfo info,
|
||||
HServerAddress server, int op) {
|
||||
|
@ -956,6 +954,7 @@ class RegionManager implements HConstants {
|
|||
|
||||
/**
|
||||
* @param regionName
|
||||
* @param op
|
||||
*/
|
||||
public void endAction(byte[] regionName, int op) {
|
||||
switch (op) {
|
||||
|
|
|
@ -125,10 +125,17 @@ class ServerManager implements HConstants {
|
|||
// The startup message was from a known server with the same name.
|
||||
// Timeout the old one right away.
|
||||
HServerAddress root = master.getRootRegionLocation();
|
||||
boolean rootServer = false;
|
||||
if (root != null && root.equals(storedInfo.getServerAddress())) {
|
||||
master.regionManager.unassignRootRegion();
|
||||
master.regionManager.setRootRegionLocation(null);
|
||||
rootServer = true;
|
||||
}
|
||||
try {
|
||||
master.toDoQueue.put(
|
||||
new ProcessServerShutdown(master, storedInfo, rootServer));
|
||||
} catch (InterruptedException e) {
|
||||
LOG.error("Insertion into toDoQueue was interrupted", e);
|
||||
}
|
||||
master.delayedToDoQueue.put(new ProcessServerShutdown(master, storedInfo));
|
||||
}
|
||||
|
||||
// record new server
|
||||
|
@ -254,7 +261,7 @@ class ServerManager implements HConstants {
|
|||
LOG.info("Processing " + msgs[i] + " from " + serverName);
|
||||
HRegionInfo info = msgs[i].getRegionInfo();
|
||||
if (info.isRootRegion()) {
|
||||
master.regionManager.unassignRootRegion();
|
||||
master.regionManager.reassignRootRegion();
|
||||
} else if (info.isMetaTable()) {
|
||||
master.regionManager.offlineMetaRegion(info.getStartKey());
|
||||
}
|
||||
|
@ -484,7 +491,7 @@ class ServerManager implements HConstants {
|
|||
master.shutdown();
|
||||
}
|
||||
master.connection.setRootRegionLocation(null);
|
||||
master.regionManager.unassignRootRegion();
|
||||
master.regionManager.reassignRootRegion();
|
||||
|
||||
} else {
|
||||
boolean reassignRegion = !region.isOffline();
|
||||
|
@ -537,7 +544,7 @@ class ServerManager implements HConstants {
|
|||
// This method can be called a couple of times during shutdown.
|
||||
if (master.getRootRegionLocation() != null &&
|
||||
info.getServerAddress().equals(master.getRootRegionLocation())) {
|
||||
master.regionManager.unassignRootRegion();
|
||||
master.regionManager.reassignRootRegion();
|
||||
}
|
||||
LOG.info("Cancelling lease for " + serverName);
|
||||
try {
|
||||
|
@ -682,10 +689,15 @@ class ServerManager implements HConstants {
|
|||
LOG.info(server + " lease expired");
|
||||
// Remove the server from the known servers list and update load info
|
||||
HServerInfo info = serversToServerInfo.remove(server);
|
||||
boolean rootServer = false;
|
||||
if (info != null) {
|
||||
HServerAddress root = master.getRootRegionLocation();
|
||||
if (root != null && root.equals(info.getServerAddress())) {
|
||||
master.regionManager.unassignRootRegion();
|
||||
// NOTE: If the server was serving the root region, we cannot reassign
|
||||
// it here because the new server will start serving the root region
|
||||
// before ProcessServerShutdown has a chance to split the log file.
|
||||
master.regionManager.unsetRootRegion();
|
||||
rootServer = true;
|
||||
}
|
||||
String serverName = info.getServerAddress().toString();
|
||||
HServerLoad load = serversToLoad.remove(serverName);
|
||||
|
@ -704,11 +716,13 @@ class ServerManager implements HConstants {
|
|||
serversToServerInfo.notifyAll();
|
||||
}
|
||||
|
||||
// NOTE: If the server was serving the root region, we cannot reassign it
|
||||
// here because the new server will start serving the root region before
|
||||
// the ProcessServerShutdown operation has a chance to split the log file.
|
||||
if (info != null) {
|
||||
master.delayedToDoQueue.put(new ProcessServerShutdown(master, info));
|
||||
try {
|
||||
master.toDoQueue.put(
|
||||
new ProcessServerShutdown(master, info, rootServer));
|
||||
} catch (InterruptedException e) {
|
||||
LOG.error("Insertion into toDoQueue was interrupted", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -662,7 +662,7 @@ public class HLog extends Thread implements HConstants, Syncable {
|
|||
try {
|
||||
for (int i = 0; i < logfiles.length; i++) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Splitting " + i + " of " + logfiles.length + ": " +
|
||||
LOG.debug("Splitting " + (i + 1) + " of " + logfiles.length + ": " +
|
||||
logfiles[i].getPath());
|
||||
}
|
||||
// Check for empty file.
|
||||
|
@ -703,7 +703,7 @@ public class HLog extends Thread implements HConstants, Syncable {
|
|||
logWriters.put(regionName, w);
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Creating new log file writer for path " + logfile +
|
||||
" and region " + regionName);
|
||||
" and region " + Bytes.toString(regionName));
|
||||
}
|
||||
|
||||
if (old != null) {
|
||||
|
|
Loading…
Reference in New Issue