HBASE-3836 Add facility to track currently progressing actions and workflows.
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1098933 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8341e4a3be
commit
ca952ac00b
|
@ -233,6 +233,8 @@ Release 0.91.0 - Unreleased
|
||||||
(Subbu M. Iyer via Stack)
|
(Subbu M. Iyer via Stack)
|
||||||
HBASE-1364 [performance] Distributed splitting of regionserver commit logs
|
HBASE-1364 [performance] Distributed splitting of regionserver commit logs
|
||||||
(Prakash Khemani)
|
(Prakash Khemani)
|
||||||
|
HBASE-3836 Add facility to track currently progressing actions and
|
||||||
|
workflows. (todd)
|
||||||
|
|
||||||
Release 0.90.3 - Unreleased
|
Release 0.90.3 - Unreleased
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.hbase.Server;
|
import org.apache.hadoop.hbase.Server;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
|
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
|
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
|
import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
|
||||||
|
@ -119,17 +120,20 @@ class ActiveMasterManager extends ZooKeeperListener {
|
||||||
*
|
*
|
||||||
* This also makes sure that we are watching the master znode so will be
|
* This also makes sure that we are watching the master znode so will be
|
||||||
* notified if another master dies.
|
* notified if another master dies.
|
||||||
|
* @param startupStatus
|
||||||
* @return True if no issue becoming active master else false if another
|
* @return True if no issue becoming active master else false if another
|
||||||
* master was running or if some other problem (zookeeper, stop flag has been
|
* master was running or if some other problem (zookeeper, stop flag has been
|
||||||
* set on this Master)
|
* set on this Master)
|
||||||
*/
|
*/
|
||||||
boolean blockUntilBecomingActiveMaster() {
|
boolean blockUntilBecomingActiveMaster(MonitoredTask startupStatus) {
|
||||||
|
startupStatus.setStatus("Trying to register in ZK as active master");
|
||||||
boolean cleanSetOfActiveMaster = true;
|
boolean cleanSetOfActiveMaster = true;
|
||||||
// Try to become the active master, watch if there is another master
|
// Try to become the active master, watch if there is another master
|
||||||
try {
|
try {
|
||||||
if (ZKUtil.createEphemeralNodeAndWatch(this.watcher,
|
if (ZKUtil.createEphemeralNodeAndWatch(this.watcher,
|
||||||
this.watcher.masterAddressZNode, Bytes.toBytes(this.sn.toString()))) {
|
this.watcher.masterAddressZNode, Bytes.toBytes(this.sn.toString()))) {
|
||||||
// We are the master, return
|
// We are the master, return
|
||||||
|
startupStatus.setStatus("Successfully registered as active master.");
|
||||||
this.clusterHasActiveMaster.set(true);
|
this.clusterHasActiveMaster.set(true);
|
||||||
LOG.info("Master=" + this.sn);
|
LOG.info("Master=" + this.sn);
|
||||||
return cleanSetOfActiveMaster;
|
return cleanSetOfActiveMaster;
|
||||||
|
@ -143,13 +147,17 @@ class ActiveMasterManager extends ZooKeeperListener {
|
||||||
ZKUtil.getDataAndWatch(this.watcher, this.watcher.masterAddressZNode);
|
ZKUtil.getDataAndWatch(this.watcher, this.watcher.masterAddressZNode);
|
||||||
ServerName currentMaster = new ServerName(Bytes.toString(bytes));
|
ServerName currentMaster = new ServerName(Bytes.toString(bytes));
|
||||||
if (ServerName.isSameHostnameAndPort(currentMaster, this.sn)) {
|
if (ServerName.isSameHostnameAndPort(currentMaster, this.sn)) {
|
||||||
LOG.info("Current master has this master's address, " + currentMaster +
|
String msg = ("Current master has this master's address, " + currentMaster +
|
||||||
"; master was restarted? Waiting on znode to expire...");
|
"; master was restarted? Waiting on znode to expire...");
|
||||||
|
LOG.info(msg);
|
||||||
|
startupStatus.setStatus(msg);
|
||||||
// Hurry along the expiration of the znode.
|
// Hurry along the expiration of the znode.
|
||||||
ZKUtil.deleteNode(this.watcher, this.watcher.masterAddressZNode);
|
ZKUtil.deleteNode(this.watcher, this.watcher.masterAddressZNode);
|
||||||
} else {
|
} else {
|
||||||
LOG.info("Another master is the active master, " + currentMaster +
|
String msg = "Another master is the active master, " + currentMaster +
|
||||||
"; waiting to become the next active master");
|
"; waiting to become the next active master";
|
||||||
|
LOG.info(msg);
|
||||||
|
startupStatus.setStatus(msg);
|
||||||
}
|
}
|
||||||
} catch (KeeperException ke) {
|
} catch (KeeperException ke) {
|
||||||
master.abort("Received an unexpected KeeperException, aborting", ke);
|
master.abort("Received an unexpected KeeperException, aborting", ke);
|
||||||
|
@ -168,7 +176,7 @@ class ActiveMasterManager extends ZooKeeperListener {
|
||||||
return cleanSetOfActiveMaster;
|
return cleanSetOfActiveMaster;
|
||||||
}
|
}
|
||||||
// Try to become active master again now that there is no active master
|
// Try to become active master again now that there is no active master
|
||||||
blockUntilBecomingActiveMaster();
|
blockUntilBecomingActiveMaster(startupStatus);
|
||||||
}
|
}
|
||||||
return cleanSetOfActiveMaster;
|
return cleanSetOfActiveMaster;
|
||||||
}
|
}
|
||||||
|
|
|
@ -71,6 +71,8 @@ import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler;
|
||||||
import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
|
import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
|
||||||
import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
|
import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
|
||||||
import org.apache.hadoop.hbase.master.metrics.MasterMetrics;
|
import org.apache.hadoop.hbase.master.metrics.MasterMetrics;
|
||||||
|
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
|
||||||
|
import org.apache.hadoop.hbase.monitoring.TaskMonitor;
|
||||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||||
import org.apache.hadoop.hbase.replication.regionserver.Replication;
|
import org.apache.hadoop.hbase.replication.regionserver.Replication;
|
||||||
import org.apache.hadoop.hbase.security.User;
|
import org.apache.hadoop.hbase.security.User;
|
||||||
|
@ -271,6 +273,9 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
|
MonitoredTask startupStatus =
|
||||||
|
TaskMonitor.get().createStatus("Master startup");
|
||||||
|
startupStatus.setDescription("Master startup");
|
||||||
try {
|
try {
|
||||||
/*
|
/*
|
||||||
* Block on becoming the active master.
|
* Block on becoming the active master.
|
||||||
|
@ -282,16 +287,18 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
||||||
* now wait until it dies to try and become the next active master. If we
|
* now wait until it dies to try and become the next active master. If we
|
||||||
* do not succeed on our first attempt, this is no longer a cluster startup.
|
* do not succeed on our first attempt, this is no longer a cluster startup.
|
||||||
*/
|
*/
|
||||||
becomeActiveMaster();
|
becomeActiveMaster(startupStatus);
|
||||||
|
|
||||||
// We are either the active master or we were asked to shutdown
|
// We are either the active master or we were asked to shutdown
|
||||||
if (!this.stopped) {
|
if (!this.stopped) {
|
||||||
finishInitialization();
|
finishInitialization(startupStatus);
|
||||||
loop();
|
loop();
|
||||||
}
|
}
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
abort("Unhandled exception. Starting shutdown.", t);
|
abort("Unhandled exception. Starting shutdown.", t);
|
||||||
} finally {
|
} finally {
|
||||||
|
startupStatus.cleanup();
|
||||||
|
|
||||||
stopChores();
|
stopChores();
|
||||||
// Wait for all the remaining region servers to report in IFF we were
|
// Wait for all the remaining region servers to report in IFF we were
|
||||||
// running a cluster shutdown AND we were NOT aborting.
|
// running a cluster shutdown AND we were NOT aborting.
|
||||||
|
@ -313,17 +320,19 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Try becoming active master.
|
* Try becoming active master.
|
||||||
|
* @param startupStatus
|
||||||
* @return True if we could successfully become the active master.
|
* @return True if we could successfully become the active master.
|
||||||
* @throws InterruptedException
|
* @throws InterruptedException
|
||||||
*/
|
*/
|
||||||
private boolean becomeActiveMaster() throws InterruptedException {
|
private boolean becomeActiveMaster(MonitoredTask startupStatus)
|
||||||
|
throws InterruptedException {
|
||||||
// TODO: This is wrong!!!! Should have new servername if we restart ourselves,
|
// TODO: This is wrong!!!! Should have new servername if we restart ourselves,
|
||||||
// if we come back to life.
|
// if we come back to life.
|
||||||
this.activeMasterManager = new ActiveMasterManager(zooKeeper, this.serverName,
|
this.activeMasterManager = new ActiveMasterManager(zooKeeper, this.serverName,
|
||||||
this);
|
this);
|
||||||
this.zooKeeper.registerListener(activeMasterManager);
|
this.zooKeeper.registerListener(activeMasterManager);
|
||||||
stallIfBackupMaster(this.conf, this.activeMasterManager);
|
stallIfBackupMaster(this.conf, this.activeMasterManager);
|
||||||
return this.activeMasterManager.blockUntilBecomingActiveMaster();
|
return this.activeMasterManager.blockUntilBecomingActiveMaster(startupStatus);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -386,7 +395,7 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
||||||
* @throws InterruptedException
|
* @throws InterruptedException
|
||||||
* @throws KeeperException
|
* @throws KeeperException
|
||||||
*/
|
*/
|
||||||
private void finishInitialization()
|
private void finishInitialization(MonitoredTask status)
|
||||||
throws IOException, InterruptedException, KeeperException {
|
throws IOException, InterruptedException, KeeperException {
|
||||||
|
|
||||||
isActiveMaster = true;
|
isActiveMaster = true;
|
||||||
|
@ -397,9 +406,12 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
||||||
* below after we determine if cluster startup or failover.
|
* below after we determine if cluster startup or failover.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
status.setStatus("Initializing Master file system");
|
||||||
// TODO: Do this using Dependency Injection, using PicoContainer, Guice or Spring.
|
// TODO: Do this using Dependency Injection, using PicoContainer, Guice or Spring.
|
||||||
this.fileSystemManager = new MasterFileSystem(this, metrics);
|
this.fileSystemManager = new MasterFileSystem(this, metrics);
|
||||||
|
|
||||||
// publish cluster ID
|
// publish cluster ID
|
||||||
|
status.setStatus("Publishing Cluster ID in ZooKeeper");
|
||||||
ClusterId.setClusterId(this.zooKeeper,
|
ClusterId.setClusterId(this.zooKeeper,
|
||||||
fileSystemManager.getClusterId());
|
fileSystemManager.getClusterId());
|
||||||
|
|
||||||
|
@ -407,16 +419,19 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
||||||
|
|
||||||
this.serverManager = new ServerManager(this, this);
|
this.serverManager = new ServerManager(this, this);
|
||||||
|
|
||||||
|
status.setStatus("Initializing ZK system trackers");
|
||||||
initializeZKBasedSystemTrackers();
|
initializeZKBasedSystemTrackers();
|
||||||
|
|
||||||
// initialize master side coprocessors before we start handling requests
|
// initialize master side coprocessors before we start handling requests
|
||||||
|
status.setStatus("Initializing master coprocessors");
|
||||||
this.cpHost = new MasterCoprocessorHost(this, this.conf);
|
this.cpHost = new MasterCoprocessorHost(this, this.conf);
|
||||||
|
|
||||||
// start up all service threads.
|
// start up all service threads.
|
||||||
|
status.setStatus("Initializing master service threads");
|
||||||
startServiceThreads();
|
startServiceThreads();
|
||||||
|
|
||||||
// Wait for region servers to report in.
|
// Wait for region servers to report in.
|
||||||
this.serverManager.waitForRegionServers();
|
this.serverManager.waitForRegionServers(status);
|
||||||
// Check zk for regionservers that are up but didn't register
|
// Check zk for regionservers that are up but didn't register
|
||||||
for (ServerName sn: this.regionServerTracker.getOnlineServers()) {
|
for (ServerName sn: this.regionServerTracker.getOnlineServers()) {
|
||||||
if (!this.serverManager.isServerOnline(sn)) {
|
if (!this.serverManager.isServerOnline(sn)) {
|
||||||
|
@ -427,20 +442,25 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Should do this in background rather than block master startup
|
// TODO: Should do this in background rather than block master startup
|
||||||
|
status.setStatus("Splitting logs after master startup");
|
||||||
this.fileSystemManager.
|
this.fileSystemManager.
|
||||||
splitLogAfterStartup(this.serverManager.getOnlineServers().keySet());
|
splitLogAfterStartup(this.serverManager.getOnlineServers().keySet());
|
||||||
|
|
||||||
// Make sure root and meta assigned before proceeding.
|
// Make sure root and meta assigned before proceeding.
|
||||||
assignRootAndMeta();
|
assignRootAndMeta(status);
|
||||||
|
|
||||||
// Fixup assignment manager status
|
// Fixup assignment manager status
|
||||||
|
status.setStatus("Starting assignment manager");
|
||||||
this.assignmentManager.joinCluster();
|
this.assignmentManager.joinCluster();
|
||||||
|
|
||||||
// Start balancer and meta catalog janitor after meta and regions have
|
// Start balancer and meta catalog janitor after meta and regions have
|
||||||
// been assigned.
|
// been assigned.
|
||||||
|
status.setStatus("Starting balancer and catalog janitor");
|
||||||
this.balancerChore = getAndStartBalancerChore(this);
|
this.balancerChore = getAndStartBalancerChore(this);
|
||||||
this.catalogJanitorChore =
|
this.catalogJanitorChore =
|
||||||
Threads.setDaemonThreadRunning(new CatalogJanitor(this, this));
|
Threads.setDaemonThreadRunning(new CatalogJanitor(this, this));
|
||||||
|
|
||||||
|
status.markComplete("Initialization successful");
|
||||||
LOG.info("Master has completed initialization");
|
LOG.info("Master has completed initialization");
|
||||||
initialized = true;
|
initialized = true;
|
||||||
}
|
}
|
||||||
|
@ -453,12 +473,13 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
||||||
* @throws KeeperException
|
* @throws KeeperException
|
||||||
* @return Count of regions we assigned.
|
* @return Count of regions we assigned.
|
||||||
*/
|
*/
|
||||||
int assignRootAndMeta()
|
int assignRootAndMeta(MonitoredTask status)
|
||||||
throws InterruptedException, IOException, KeeperException {
|
throws InterruptedException, IOException, KeeperException {
|
||||||
int assigned = 0;
|
int assigned = 0;
|
||||||
long timeout = this.conf.getLong("hbase.catalog.verification.timeout", 1000);
|
long timeout = this.conf.getLong("hbase.catalog.verification.timeout", 1000);
|
||||||
|
|
||||||
// Work on ROOT region. Is it in zk in transition?
|
// Work on ROOT region. Is it in zk in transition?
|
||||||
|
status.setStatus("Assigning ROOT region");
|
||||||
boolean rit = this.assignmentManager.
|
boolean rit = this.assignmentManager.
|
||||||
processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.ROOT_REGIONINFO);
|
processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.ROOT_REGIONINFO);
|
||||||
if (!catalogTracker.verifyRootRegionLocation(timeout)) {
|
if (!catalogTracker.verifyRootRegionLocation(timeout)) {
|
||||||
|
@ -474,6 +495,7 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
||||||
", location=" + catalogTracker.getRootLocation());
|
", location=" + catalogTracker.getRootLocation());
|
||||||
|
|
||||||
// Work on meta region
|
// Work on meta region
|
||||||
|
status.setStatus("Assigning META region");
|
||||||
rit = this.assignmentManager.
|
rit = this.assignmentManager.
|
||||||
processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.FIRST_META_REGIONINFO);
|
processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.FIRST_META_REGIONINFO);
|
||||||
if (!this.catalogTracker.verifyMetaRegionLocation(timeout)) {
|
if (!this.catalogTracker.verifyMetaRegionLocation(timeout)) {
|
||||||
|
@ -490,6 +512,7 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
||||||
}
|
}
|
||||||
LOG.info(".META. assigned=" + assigned + ", rit=" + rit +
|
LOG.info(".META. assigned=" + assigned + ", rit=" + rit +
|
||||||
", location=" + catalogTracker.getMetaLocation());
|
", location=" + catalogTracker.getMetaLocation());
|
||||||
|
status.setStatus("META and ROOT assigned.");
|
||||||
return assigned;
|
return assigned;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1101,15 +1124,21 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
||||||
this.zooKeeper = new ZooKeeperWatcher(conf, MASTER + ":"
|
this.zooKeeper = new ZooKeeperWatcher(conf, MASTER + ":"
|
||||||
+ this.serverName.getPort(), this);
|
+ this.serverName.getPort(), this);
|
||||||
|
|
||||||
if (!becomeActiveMaster()) {
|
MonitoredTask status =
|
||||||
|
TaskMonitor.get().createStatus("Recovering expired ZK session");
|
||||||
|
try {
|
||||||
|
if (!becomeActiveMaster(status)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
initializeZKBasedSystemTrackers();
|
initializeZKBasedSystemTrackers();
|
||||||
// Update in-memory structures to reflect our earlier Root/Meta assignment.
|
// Update in-memory structures to reflect our earlier Root/Meta assignment.
|
||||||
assignRootAndMeta();
|
assignRootAndMeta(status);
|
||||||
// process RIT if any
|
// process RIT if any
|
||||||
this.assignmentManager.processRegionsInTransition();
|
this.assignmentManager.processRegionsInTransition();
|
||||||
return true;
|
return true;
|
||||||
|
} finally {
|
||||||
|
status.cleanup();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -48,6 +48,7 @@ import org.apache.hadoop.hbase.client.RetriesExhaustedException;
|
||||||
import org.apache.hadoop.hbase.ipc.HRegionInterface;
|
import org.apache.hadoop.hbase.ipc.HRegionInterface;
|
||||||
import org.apache.hadoop.hbase.master.handler.MetaServerShutdownHandler;
|
import org.apache.hadoop.hbase.master.handler.MetaServerShutdownHandler;
|
||||||
import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
|
import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
|
||||||
|
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The ServerManager class manages info about region servers.
|
* The ServerManager class manages info about region servers.
|
||||||
|
@ -466,7 +467,7 @@ public class ServerManager {
|
||||||
* Waits for the regionservers to report in.
|
* Waits for the regionservers to report in.
|
||||||
* @throws InterruptedException
|
* @throws InterruptedException
|
||||||
*/
|
*/
|
||||||
public void waitForRegionServers()
|
public void waitForRegionServers(MonitoredTask status)
|
||||||
throws InterruptedException {
|
throws InterruptedException {
|
||||||
long interval = this.master.getConfiguration().
|
long interval = this.master.getConfiguration().
|
||||||
getLong("hbase.master.wait.on.regionservers.interval", 3000);
|
getLong("hbase.master.wait.on.regionservers.interval", 3000);
|
||||||
|
@ -477,11 +478,15 @@ public class ServerManager {
|
||||||
Thread.sleep(interval);
|
Thread.sleep(interval);
|
||||||
count = countOfRegionServers();
|
count = countOfRegionServers();
|
||||||
if (count == oldcount && count > 0) break;
|
if (count == oldcount && count > 0) break;
|
||||||
|
|
||||||
|
String msg;
|
||||||
if (count == 0) {
|
if (count == 0) {
|
||||||
LOG.info("Waiting on regionserver(s) to checkin");
|
msg = "Waiting on regionserver(s) to checkin";
|
||||||
} else {
|
} else {
|
||||||
LOG.info("Waiting on regionserver(s) count to settle; currently=" + count);
|
msg = "Waiting on regionserver(s) count to settle; currently=" + count;
|
||||||
}
|
}
|
||||||
|
LOG.info(msg);
|
||||||
|
status.setStatus(msg);
|
||||||
oldcount = count;
|
oldcount = count;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,6 +35,8 @@ import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hbase.Chore;
|
import org.apache.hadoop.hbase.Chore;
|
||||||
import org.apache.hadoop.hbase.Stoppable;
|
import org.apache.hadoop.hbase.Stoppable;
|
||||||
import org.apache.hadoop.hbase.master.SplitLogManager.TaskFinisher.Status;
|
import org.apache.hadoop.hbase.master.SplitLogManager.TaskFinisher.Status;
|
||||||
|
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
|
||||||
|
import org.apache.hadoop.hbase.monitoring.TaskMonitor;
|
||||||
import org.apache.hadoop.hbase.regionserver.SplitLogWorker;
|
import org.apache.hadoop.hbase.regionserver.SplitLogWorker;
|
||||||
import org.apache.hadoop.hbase.regionserver.wal.HLogSplitter;
|
import org.apache.hadoop.hbase.regionserver.wal.HLogSplitter;
|
||||||
import org.apache.hadoop.hbase.regionserver.wal.OrphanHLogAfterSplitException;
|
import org.apache.hadoop.hbase.regionserver.wal.OrphanHLogAfterSplitException;
|
||||||
|
@ -183,11 +185,18 @@ public class SplitLogManager extends ZooKeeperListener {
|
||||||
LOG.warn(logDir + " doesn't exist. Nothing to do!");
|
LOG.warn(logDir + " doesn't exist. Nothing to do!");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MonitoredTask status = TaskMonitor.get().createStatus(
|
||||||
|
"Doing distributed log split in " + logDir);
|
||||||
|
|
||||||
|
status.setStatus("Checking directory contents...");
|
||||||
FileStatus[] logfiles = fs.listStatus(logDir); // TODO filter filenames?
|
FileStatus[] logfiles = fs.listStatus(logDir); // TODO filter filenames?
|
||||||
if (logfiles == null || logfiles.length == 0) {
|
if (logfiles == null || logfiles.length == 0) {
|
||||||
LOG.info(logDir + " is empty dir, no logs to split");
|
LOG.info(logDir + " is empty dir, no logs to split");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
status.setStatus("Scheduling batch of logs to split");
|
||||||
tot_mgr_log_split_batch_start.incrementAndGet();
|
tot_mgr_log_split_batch_start.incrementAndGet();
|
||||||
LOG.info("started splitting logs in " + logDir);
|
LOG.info("started splitting logs in " + logDir);
|
||||||
long t = EnvironmentEdgeManager.currentTimeMillis();
|
long t = EnvironmentEdgeManager.currentTimeMillis();
|
||||||
|
@ -205,7 +214,7 @@ public class SplitLogManager extends ZooKeeperListener {
|
||||||
+ lf.getPath());
|
+ lf.getPath());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
waitTasks(batch);
|
waitTasks(batch, status);
|
||||||
if (batch.done != batch.installed) {
|
if (batch.done != batch.installed) {
|
||||||
stopTrackingTasks(batch);
|
stopTrackingTasks(batch);
|
||||||
tot_mgr_log_split_batch_err.incrementAndGet();
|
tot_mgr_log_split_batch_err.incrementAndGet();
|
||||||
|
@ -214,6 +223,8 @@ public class SplitLogManager extends ZooKeeperListener {
|
||||||
throw new IOException("error or interrupt while splitting logs in "
|
throw new IOException("error or interrupt while splitting logs in "
|
||||||
+ logDir + " Task = " + batch);
|
+ logDir + " Task = " + batch);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
status.setStatus("Checking for orphaned logs in log directory...");
|
||||||
if (anyNewLogFiles(logDir, logfiles)) {
|
if (anyNewLogFiles(logDir, logfiles)) {
|
||||||
tot_mgr_new_unexpected_hlogs.incrementAndGet();
|
tot_mgr_new_unexpected_hlogs.incrementAndGet();
|
||||||
LOG.warn("new hlogs were produced while logs in " + logDir +
|
LOG.warn("new hlogs were produced while logs in " + logDir +
|
||||||
|
@ -221,12 +232,18 @@ public class SplitLogManager extends ZooKeeperListener {
|
||||||
throw new OrphanHLogAfterSplitException();
|
throw new OrphanHLogAfterSplitException();
|
||||||
}
|
}
|
||||||
tot_mgr_log_split_batch_success.incrementAndGet();
|
tot_mgr_log_split_batch_success.incrementAndGet();
|
||||||
|
|
||||||
|
status.setStatus("Cleaning up log directory...");
|
||||||
if (!fs.delete(logDir, true)) {
|
if (!fs.delete(logDir, true)) {
|
||||||
throw new IOException("Unable to delete src dir: " + logDir);
|
throw new IOException("Unable to delete src dir: " + logDir);
|
||||||
}
|
}
|
||||||
LOG.info("finished splitting (more than or equal to) " + totalSize +
|
|
||||||
|
String msg = "finished splitting (more than or equal to) " + totalSize +
|
||||||
" bytes in " + batch.installed + " log files in " + logDir + " in " +
|
" bytes in " + batch.installed + " log files in " + logDir + " in " +
|
||||||
(EnvironmentEdgeManager.currentTimeMillis() - t) + "ms");
|
(EnvironmentEdgeManager.currentTimeMillis() - t) + "ms";
|
||||||
|
status.markComplete(msg);
|
||||||
|
LOG.info(msg);
|
||||||
|
|
||||||
return totalSize;
|
return totalSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -244,10 +261,14 @@ public class SplitLogManager extends ZooKeeperListener {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void waitTasks(TaskBatch batch) {
|
private void waitTasks(TaskBatch batch, MonitoredTask status) {
|
||||||
synchronized (batch) {
|
synchronized (batch) {
|
||||||
while ((batch.done + batch.error) != batch.installed) {
|
while ((batch.done + batch.error) != batch.installed) {
|
||||||
try {
|
try {
|
||||||
|
status.setStatus("Waiting for distributed tasks to finish. "
|
||||||
|
+ " scheduled=" + batch.installed
|
||||||
|
+ " done=" + batch.done
|
||||||
|
+ " error=" + batch.error);
|
||||||
batch.wait(100);
|
batch.wait(100);
|
||||||
if (stopper.isStopped()) {
|
if (stopper.isStopped()) {
|
||||||
LOG.warn("Stopped while waiting for log splits to be completed");
|
LOG.warn("Stopped while waiting for log splits to be completed");
|
||||||
|
|
|
@ -0,0 +1,53 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2011 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.monitoring;
|
||||||
|
|
||||||
|
public interface MonitoredTask {
|
||||||
|
enum State {
|
||||||
|
RUNNING,
|
||||||
|
COMPLETE,
|
||||||
|
ABORTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract long getStartTime();
|
||||||
|
|
||||||
|
public abstract String getDescription();
|
||||||
|
|
||||||
|
public abstract String getStatus();
|
||||||
|
|
||||||
|
public abstract State getState();
|
||||||
|
|
||||||
|
public abstract long getCompletionTimestamp();
|
||||||
|
|
||||||
|
public abstract void markComplete(String msg);
|
||||||
|
public abstract void abort(String msg);
|
||||||
|
|
||||||
|
public abstract void setStatus(String status);
|
||||||
|
|
||||||
|
public abstract void setDescription(String description);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Explicitly mark this status as able to be cleaned up,
|
||||||
|
* even though it might not be complete.
|
||||||
|
*/
|
||||||
|
public abstract void cleanup();
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,102 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2011 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.monitoring;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
|
||||||
|
class MonitoredTaskImpl implements MonitoredTask {
|
||||||
|
private long startTime;
|
||||||
|
private long completionTimestamp = -1;
|
||||||
|
|
||||||
|
private String status;
|
||||||
|
private String description;
|
||||||
|
|
||||||
|
private State state = State.RUNNING;
|
||||||
|
|
||||||
|
public MonitoredTaskImpl() {
|
||||||
|
startTime = System.currentTimeMillis();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getStartTime() {
|
||||||
|
return startTime;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDescription() {
|
||||||
|
return description;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getStatus() {
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public State getState() {
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getCompletionTimestamp() {
|
||||||
|
return completionTimestamp;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void markComplete(String status) {
|
||||||
|
state = State.COMPLETE;
|
||||||
|
setStatus(status);
|
||||||
|
completionTimestamp = System.currentTimeMillis();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void abort(String msg) {
|
||||||
|
setStatus(msg);
|
||||||
|
state = State.ABORTED;
|
||||||
|
completionTimestamp = System.currentTimeMillis();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setStatus(String status) {
|
||||||
|
this.status = status;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setDescription(String description) {
|
||||||
|
this.description = description;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void cleanup() {
|
||||||
|
if (state == State.RUNNING) {
|
||||||
|
state = State.ABORTED;
|
||||||
|
completionTimestamp = System.currentTimeMillis();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Force the completion timestamp backwards so that
|
||||||
|
* it expires now.
|
||||||
|
*/
|
||||||
|
@VisibleForTesting
|
||||||
|
void expireNow() {
|
||||||
|
completionTimestamp -= 180 * 1000;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,176 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2011 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.monitoring;
|
||||||
|
|
||||||
|
import java.lang.ref.WeakReference;
|
||||||
|
import java.lang.reflect.InvocationHandler;
|
||||||
|
import java.lang.reflect.Method;
|
||||||
|
import java.lang.reflect.Proxy;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Singleton which keeps track of tasks going on in this VM.
|
||||||
|
* A Task here is anything which takes more than a few seconds
|
||||||
|
* and the user might want to inquire about the status
|
||||||
|
*/
|
||||||
|
public class TaskMonitor {
|
||||||
|
private static final Log LOG = LogFactory.getLog(TaskMonitor.class);
|
||||||
|
|
||||||
|
// Don't keep around any tasks that have completed more than
|
||||||
|
// 60 seconds ago
|
||||||
|
private static final long EXPIRATION_TIME = 60*1000;
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
static final int MAX_TASKS = 1000;
|
||||||
|
|
||||||
|
private static TaskMonitor instance;
|
||||||
|
private List<TaskAndWeakRefPair> tasks =
|
||||||
|
Lists.newArrayList();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get singleton instance.
|
||||||
|
* TODO this would be better off scoped to a single daemon
|
||||||
|
*/
|
||||||
|
public static synchronized TaskMonitor get() {
|
||||||
|
if (instance == null) {
|
||||||
|
instance = new TaskMonitor();
|
||||||
|
}
|
||||||
|
return instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
public MonitoredTask createStatus(String description) {
|
||||||
|
MonitoredTask stat = new MonitoredTaskImpl();
|
||||||
|
stat.setDescription(description);
|
||||||
|
MonitoredTask proxy = (MonitoredTask) Proxy.newProxyInstance(
|
||||||
|
stat.getClass().getClassLoader(),
|
||||||
|
new Class<?>[] { MonitoredTask.class },
|
||||||
|
new PassthroughInvocationHandler<MonitoredTask>(stat));
|
||||||
|
|
||||||
|
TaskAndWeakRefPair pair = new TaskAndWeakRefPair(stat, proxy);
|
||||||
|
tasks.add(pair);
|
||||||
|
return proxy;
|
||||||
|
}
|
||||||
|
|
||||||
|
private synchronized void purgeExpiredTasks() {
|
||||||
|
int size = 0;
|
||||||
|
|
||||||
|
for (Iterator<TaskAndWeakRefPair> it = tasks.iterator();
|
||||||
|
it.hasNext();) {
|
||||||
|
TaskAndWeakRefPair pair = it.next();
|
||||||
|
MonitoredTask stat = pair.get();
|
||||||
|
|
||||||
|
if (pair.isDead()) {
|
||||||
|
// The class who constructed this leaked it. So we can
|
||||||
|
// assume it's done.
|
||||||
|
if (stat.getState() == MonitoredTaskImpl.State.RUNNING) {
|
||||||
|
LOG.warn("Status " + stat + " appears to have been leaked");
|
||||||
|
stat.cleanup();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (canPurge(stat)) {
|
||||||
|
it.remove();
|
||||||
|
} else {
|
||||||
|
size++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (size > MAX_TASKS) {
|
||||||
|
LOG.warn("Too many actions in action monitor! Purging some.");
|
||||||
|
tasks = tasks.subList(size - MAX_TASKS, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized List<MonitoredTask> getTasks() {
|
||||||
|
purgeExpiredTasks();
|
||||||
|
ArrayList<MonitoredTask> ret = Lists.newArrayListWithCapacity(tasks.size());
|
||||||
|
for (TaskAndWeakRefPair pair : tasks) {
|
||||||
|
ret.add(pair.get());
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean canPurge(MonitoredTask stat) {
|
||||||
|
long cts = stat.getCompletionTimestamp();
|
||||||
|
return (cts > 0 && System.currentTimeMillis() - cts > EXPIRATION_TIME);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class encapsulates an object as well as a weak reference to a proxy
|
||||||
|
* that passes through calls to that object. In art form:
|
||||||
|
* <code>
|
||||||
|
* Proxy <------------------
|
||||||
|
* | \
|
||||||
|
* v \
|
||||||
|
* PassthroughInvocationHandler | weak reference
|
||||||
|
* | /
|
||||||
|
* MonitoredTaskImpl /
|
||||||
|
* | /
|
||||||
|
* StatAndWeakRefProxy ------/
|
||||||
|
*
|
||||||
|
* Since we only return the Proxy to the creator of the MonitorableStatus,
|
||||||
|
* this means that they can leak that object, and we'll detect it
|
||||||
|
* since our weak reference will go null. But, we still have the actual
|
||||||
|
* object, so we can log it and display it as a leaked (incomplete) action.
|
||||||
|
*/
|
||||||
|
private static class TaskAndWeakRefPair {
|
||||||
|
private MonitoredTask impl;
|
||||||
|
private WeakReference<MonitoredTask> weakProxy;
|
||||||
|
|
||||||
|
public TaskAndWeakRefPair(MonitoredTask stat,
|
||||||
|
MonitoredTask proxy) {
|
||||||
|
this.impl = stat;
|
||||||
|
this.weakProxy = new WeakReference<MonitoredTask>(proxy);
|
||||||
|
}
|
||||||
|
|
||||||
|
public MonitoredTask get() {
|
||||||
|
return impl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isDead() {
|
||||||
|
return weakProxy.get() == null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An InvocationHandler that simply passes through calls to the original object.
|
||||||
|
*/
|
||||||
|
private static class PassthroughInvocationHandler<T> implements InvocationHandler {
|
||||||
|
private T delegatee;
|
||||||
|
|
||||||
|
public PassthroughInvocationHandler(T delegatee) {
|
||||||
|
this.delegatee = delegatee;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object invoke(Object proxy, Method method, Object[] args)
|
||||||
|
throws Throwable {
|
||||||
|
return method.invoke(delegatee, args);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -83,6 +83,8 @@ import org.apache.hadoop.hbase.io.TimeRange;
|
||||||
import org.apache.hadoop.hbase.io.hfile.BlockCache;
|
import org.apache.hadoop.hbase.io.hfile.BlockCache;
|
||||||
import org.apache.hadoop.hbase.ipc.CoprocessorProtocol;
|
import org.apache.hadoop.hbase.ipc.CoprocessorProtocol;
|
||||||
import org.apache.hadoop.hbase.ipc.HBaseRPC;
|
import org.apache.hadoop.hbase.ipc.HBaseRPC;
|
||||||
|
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
|
||||||
|
import org.apache.hadoop.hbase.monitoring.TaskMonitor;
|
||||||
import org.apache.hadoop.hbase.regionserver.wal.HLog;
|
import org.apache.hadoop.hbase.regionserver.wal.HLog;
|
||||||
import org.apache.hadoop.hbase.regionserver.wal.HLogKey;
|
import org.apache.hadoop.hbase.regionserver.wal.HLogKey;
|
||||||
import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
|
import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
|
||||||
|
@ -349,7 +351,12 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
*/
|
*/
|
||||||
public long initialize(final CancelableProgressable reporter)
|
public long initialize(final CancelableProgressable reporter)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
|
MonitoredTask status = TaskMonitor.get().createStatus(
|
||||||
|
"Initializing region " + this);
|
||||||
|
|
||||||
if (coprocessorHost != null) {
|
if (coprocessorHost != null) {
|
||||||
|
status.setStatus("Running coprocessor pre-open hook");
|
||||||
coprocessorHost.preOpen();
|
coprocessorHost.preOpen();
|
||||||
}
|
}
|
||||||
// A region can be reopened if failed a split; reset flags
|
// A region can be reopened if failed a split; reset flags
|
||||||
|
@ -357,14 +364,17 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
this.closed.set(false);
|
this.closed.set(false);
|
||||||
|
|
||||||
// Write HRI to a file in case we need to recover .META.
|
// Write HRI to a file in case we need to recover .META.
|
||||||
|
status.setStatus("Writing region info on filesystem");
|
||||||
checkRegioninfoOnFilesystem();
|
checkRegioninfoOnFilesystem();
|
||||||
|
|
||||||
// Remove temporary data left over from old regions
|
// Remove temporary data left over from old regions
|
||||||
|
status.setStatus("Cleaning up temporary data from old regions");
|
||||||
cleanupTmpDir();
|
cleanupTmpDir();
|
||||||
|
|
||||||
// Load in all the HStores. Get maximum seqid.
|
// Load in all the HStores. Get maximum seqid.
|
||||||
long maxSeqId = -1;
|
long maxSeqId = -1;
|
||||||
for (HColumnDescriptor c : this.regionInfo.getTableDesc().getFamilies()) {
|
for (HColumnDescriptor c : this.regionInfo.getTableDesc().getFamilies()) {
|
||||||
|
status.setStatus("Instantiating store for column family " + c);
|
||||||
Store store = instantiateHStore(this.tableDir, c);
|
Store store = instantiateHStore(this.tableDir, c);
|
||||||
this.stores.put(c.getName(), store);
|
this.stores.put(c.getName(), store);
|
||||||
long storeSeqId = store.getMaxSequenceId();
|
long storeSeqId = store.getMaxSequenceId();
|
||||||
|
@ -373,8 +383,10 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Recover any edits if available.
|
// Recover any edits if available.
|
||||||
maxSeqId = replayRecoveredEditsIfAny(this.regiondir, maxSeqId, reporter);
|
maxSeqId = replayRecoveredEditsIfAny(
|
||||||
|
this.regiondir, maxSeqId, reporter, status);
|
||||||
|
|
||||||
|
status.setStatus("Cleaning up detritus from prior splits");
|
||||||
// Get rid of any splits or merges that were lost in-progress. Clean out
|
// Get rid of any splits or merges that were lost in-progress. Clean out
|
||||||
// these directories here on open. We may be opening a region that was
|
// these directories here on open. We may be opening a region that was
|
||||||
// being split but we crashed in the middle of it all.
|
// being split but we crashed in the middle of it all.
|
||||||
|
@ -390,9 +402,12 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
long nextSeqid = maxSeqId + 1;
|
long nextSeqid = maxSeqId + 1;
|
||||||
LOG.info("Onlined " + this.toString() + "; next sequenceid=" + nextSeqid);
|
LOG.info("Onlined " + this.toString() + "; next sequenceid=" + nextSeqid);
|
||||||
|
|
||||||
|
|
||||||
if (coprocessorHost != null) {
|
if (coprocessorHost != null) {
|
||||||
|
status.setStatus("Running coprocessor post-open hooks");
|
||||||
coprocessorHost.postOpen();
|
coprocessorHost.postOpen();
|
||||||
}
|
}
|
||||||
|
status.markComplete("Region opened successfully");
|
||||||
return nextSeqid;
|
return nextSeqid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -556,12 +571,22 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
public List<StoreFile> close(final boolean abort) throws IOException {
|
public List<StoreFile> close(final boolean abort) throws IOException {
|
||||||
// Only allow one thread to close at a time. Serialize them so dual
|
// Only allow one thread to close at a time. Serialize them so dual
|
||||||
// threads attempting to close will run up against each other.
|
// threads attempting to close will run up against each other.
|
||||||
|
MonitoredTask status = TaskMonitor.get().createStatus(
|
||||||
|
"Closing region " + this +
|
||||||
|
(abort ? " due to abort" : ""));
|
||||||
|
|
||||||
|
status.setStatus("Waiting for close lock");
|
||||||
|
try {
|
||||||
synchronized (closeLock) {
|
synchronized (closeLock) {
|
||||||
return doClose(abort);
|
return doClose(abort, status);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
status.cleanup();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<StoreFile> doClose(final boolean abort)
|
private List<StoreFile> doClose(
|
||||||
|
final boolean abort, MonitoredTask status)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
if (isClosed()) {
|
if (isClosed()) {
|
||||||
LOG.warn("Region " + this + " already closed");
|
LOG.warn("Region " + this + " already closed");
|
||||||
|
@ -569,9 +594,11 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
}
|
}
|
||||||
|
|
||||||
if (coprocessorHost != null) {
|
if (coprocessorHost != null) {
|
||||||
|
status.setStatus("Running coprocessor pre-close hooks");
|
||||||
this.coprocessorHost.preClose(abort);
|
this.coprocessorHost.preClose(abort);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
status.setStatus("Disabling compacts and flushes for region");
|
||||||
boolean wasFlushing = false;
|
boolean wasFlushing = false;
|
||||||
synchronized (writestate) {
|
synchronized (writestate) {
|
||||||
// Disable compacting and flushing by background threads for this
|
// Disable compacting and flushing by background threads for this
|
||||||
|
@ -596,20 +623,24 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
// that will clear out of the bulk of the memstore before we put up
|
// that will clear out of the bulk of the memstore before we put up
|
||||||
// the close flag?
|
// the close flag?
|
||||||
if (!abort && !wasFlushing && worthPreFlushing()) {
|
if (!abort && !wasFlushing && worthPreFlushing()) {
|
||||||
|
status.setStatus("Pre-flushing region before close");
|
||||||
LOG.info("Running close preflush of " + this.getRegionNameAsString());
|
LOG.info("Running close preflush of " + this.getRegionNameAsString());
|
||||||
internalFlushcache();
|
internalFlushcache(status);
|
||||||
}
|
}
|
||||||
|
|
||||||
this.closing.set(true);
|
this.closing.set(true);
|
||||||
|
status.setStatus("Disabling writes for close");
|
||||||
lock.writeLock().lock();
|
lock.writeLock().lock();
|
||||||
try {
|
try {
|
||||||
if (this.isClosed()) {
|
if (this.isClosed()) {
|
||||||
|
status.abort("Already got closed by another process");
|
||||||
// SplitTransaction handles the null
|
// SplitTransaction handles the null
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
LOG.debug("Updates disabled for region " + this);
|
LOG.debug("Updates disabled for region " + this);
|
||||||
// Don't flush the cache if we are aborting
|
// Don't flush the cache if we are aborting
|
||||||
if (!abort) {
|
if (!abort) {
|
||||||
internalFlushcache();
|
internalFlushcache(status);
|
||||||
}
|
}
|
||||||
|
|
||||||
List<StoreFile> result = new ArrayList<StoreFile>();
|
List<StoreFile> result = new ArrayList<StoreFile>();
|
||||||
|
@ -619,8 +650,10 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
this.closed.set(true);
|
this.closed.set(true);
|
||||||
|
|
||||||
if (coprocessorHost != null) {
|
if (coprocessorHost != null) {
|
||||||
|
status.setStatus("Running coprocessor post-close hooks");
|
||||||
this.coprocessorHost.postClose(abort);
|
this.coprocessorHost.postClose(abort);
|
||||||
}
|
}
|
||||||
|
status.markComplete("Closed");
|
||||||
LOG.info("Closed " + this);
|
LOG.info("Closed " + this);
|
||||||
return result;
|
return result;
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -824,6 +857,8 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
lock.readLock().lock();
|
lock.readLock().lock();
|
||||||
this.lastCompactInfo = null;
|
this.lastCompactInfo = null;
|
||||||
byte [] splitRow = null;
|
byte [] splitRow = null;
|
||||||
|
MonitoredTask status = TaskMonitor.get().createStatus(
|
||||||
|
"Compacting stores in " + this);
|
||||||
try {
|
try {
|
||||||
if (this.closed.get()) {
|
if (this.closed.get()) {
|
||||||
LOG.debug("Skipping compaction on " + this + " because closed");
|
LOG.debug("Skipping compaction on " + this + " because closed");
|
||||||
|
@ -833,6 +868,7 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
return splitRow;
|
return splitRow;
|
||||||
}
|
}
|
||||||
if (coprocessorHost != null) {
|
if (coprocessorHost != null) {
|
||||||
|
status.setStatus("Running coprocessor preCompact hooks");
|
||||||
coprocessorHost.preCompact(false);
|
coprocessorHost.preCompact(false);
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
|
@ -840,9 +876,11 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
if (!writestate.compacting && writestate.writesEnabled) {
|
if (!writestate.compacting && writestate.writesEnabled) {
|
||||||
writestate.compacting = true;
|
writestate.compacting = true;
|
||||||
} else {
|
} else {
|
||||||
LOG.info("NOT compacting region " + this +
|
String msg = "NOT compacting region " + this +
|
||||||
": compacting=" + writestate.compacting + ", writesEnabled=" +
|
": compacting=" + writestate.compacting + ", writesEnabled=" +
|
||||||
writestate.writesEnabled);
|
writestate.writesEnabled;
|
||||||
|
LOG.info(msg);
|
||||||
|
status.abort(msg);
|
||||||
return splitRow;
|
return splitRow;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -852,6 +890,7 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
long lastCompactSize = 0;
|
long lastCompactSize = 0;
|
||||||
boolean completed = false;
|
boolean completed = false;
|
||||||
try {
|
try {
|
||||||
|
status.setStatus("Compacting store " + store);
|
||||||
final Store.StoreSize ss = store.compact();
|
final Store.StoreSize ss = store.compact();
|
||||||
lastCompactSize += store.getLastCompactSize();
|
lastCompactSize += store.getLastCompactSize();
|
||||||
if (ss != null) {
|
if (ss != null) {
|
||||||
|
@ -868,6 +907,9 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
if (completed) {
|
if (completed) {
|
||||||
this.lastCompactInfo =
|
this.lastCompactInfo =
|
||||||
new Pair<Long,Long>((now - startTime) / 1000, lastCompactSize);
|
new Pair<Long,Long>((now - startTime) / 1000, lastCompactSize);
|
||||||
|
status.setStatus("Compaction complete: " +
|
||||||
|
StringUtils.humanReadableInt(lastCompactSize) + " in " +
|
||||||
|
(now - startTime) + "ms");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -877,9 +919,13 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (coprocessorHost != null) {
|
if (coprocessorHost != null) {
|
||||||
|
status.setStatus("Running coprocessor post-compact hooks");
|
||||||
coprocessorHost.postCompact(splitRow != null);
|
coprocessorHost.postCompact(splitRow != null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
status.markComplete("Compaction complete");
|
||||||
} finally {
|
} finally {
|
||||||
|
status.cleanup();
|
||||||
lock.readLock().unlock();
|
lock.readLock().unlock();
|
||||||
}
|
}
|
||||||
if (splitRow != null) {
|
if (splitRow != null) {
|
||||||
|
@ -915,13 +961,17 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
LOG.debug("Skipping flush on " + this + " because closing");
|
LOG.debug("Skipping flush on " + this + " because closing");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
MonitoredTask status = TaskMonitor.get().createStatus("Flushing " + this);
|
||||||
|
status.setStatus("Acquiring readlock on region");
|
||||||
lock.readLock().lock();
|
lock.readLock().lock();
|
||||||
try {
|
try {
|
||||||
if (this.closed.get()) {
|
if (this.closed.get()) {
|
||||||
LOG.debug("Skipping flush on " + this + " because closed");
|
LOG.debug("Skipping flush on " + this + " because closed");
|
||||||
|
status.abort("Skipped: closed");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (coprocessorHost != null) {
|
if (coprocessorHost != null) {
|
||||||
|
status.setStatus("Running coprocessor pre-flush hooks");
|
||||||
coprocessorHost.preFlush();
|
coprocessorHost.preFlush();
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
|
@ -935,13 +985,19 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
writestate.flushing + ", writesEnabled=" +
|
writestate.flushing + ", writesEnabled=" +
|
||||||
writestate.writesEnabled);
|
writestate.writesEnabled);
|
||||||
}
|
}
|
||||||
|
status.abort("Not flushing since " +
|
||||||
|
(writestate.flushing ? "already flushing" : "writes not enabled"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
boolean result = internalFlushcache();
|
boolean result = internalFlushcache(status);
|
||||||
|
|
||||||
if (coprocessorHost != null) {
|
if (coprocessorHost != null) {
|
||||||
|
status.setStatus("Running post-flush coprocessor hooks");
|
||||||
coprocessorHost.postFlush();
|
coprocessorHost.postFlush();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
status.markComplete("Flush successful");
|
||||||
return result;
|
return result;
|
||||||
} finally {
|
} finally {
|
||||||
synchronized (writestate) {
|
synchronized (writestate) {
|
||||||
|
@ -952,6 +1008,7 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
lock.readLock().unlock();
|
lock.readLock().unlock();
|
||||||
|
status.cleanup();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -982,6 +1039,7 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
* routes.
|
* routes.
|
||||||
*
|
*
|
||||||
* <p> This method may block for some time.
|
* <p> This method may block for some time.
|
||||||
|
* @param status
|
||||||
*
|
*
|
||||||
* @return true if the region needs compacting
|
* @return true if the region needs compacting
|
||||||
*
|
*
|
||||||
|
@ -989,19 +1047,21 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
* @throws DroppedSnapshotException Thrown when replay of hlog is required
|
* @throws DroppedSnapshotException Thrown when replay of hlog is required
|
||||||
* because a Snapshot was not properly persisted.
|
* because a Snapshot was not properly persisted.
|
||||||
*/
|
*/
|
||||||
protected boolean internalFlushcache() throws IOException {
|
protected boolean internalFlushcache(MonitoredTask status) throws IOException {
|
||||||
return internalFlushcache(this.log, -1);
|
return internalFlushcache(this.log, -1, status);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param wal Null if we're NOT to go via hlog/wal.
|
* @param wal Null if we're NOT to go via hlog/wal.
|
||||||
* @param myseqid The seqid to use if <code>wal</code> is null writing out
|
* @param myseqid The seqid to use if <code>wal</code> is null writing out
|
||||||
* flush file.
|
* flush file.
|
||||||
|
* @param status
|
||||||
* @return true if the region needs compacting
|
* @return true if the region needs compacting
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* @see #internalFlushcache()
|
* @see #internalFlushcache()
|
||||||
*/
|
*/
|
||||||
protected boolean internalFlushcache(final HLog wal, final long myseqid)
|
protected boolean internalFlushcache(
|
||||||
|
final HLog wal, final long myseqid, MonitoredTask status)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
final long startTime = EnvironmentEdgeManager.currentTimeMillis();
|
final long startTime = EnvironmentEdgeManager.currentTimeMillis();
|
||||||
// Clear flush flag.
|
// Clear flush flag.
|
||||||
|
@ -1031,7 +1091,9 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
// We have to take a write lock during snapshot, or else a write could
|
// We have to take a write lock during snapshot, or else a write could
|
||||||
// end up in both snapshot and memstore (makes it difficult to do atomic
|
// end up in both snapshot and memstore (makes it difficult to do atomic
|
||||||
// rows then)
|
// rows then)
|
||||||
|
status.setStatus("Obtaining lock to block concurrent updates");
|
||||||
this.updatesLock.writeLock().lock();
|
this.updatesLock.writeLock().lock();
|
||||||
|
status.setStatus("Preparing to flush by snapshotting stores");
|
||||||
final long currentMemStoreSize = this.memstoreSize.get();
|
final long currentMemStoreSize = this.memstoreSize.get();
|
||||||
List<StoreFlusher> storeFlushers = new ArrayList<StoreFlusher>(stores.size());
|
List<StoreFlusher> storeFlushers = new ArrayList<StoreFlusher>(stores.size());
|
||||||
try {
|
try {
|
||||||
|
@ -1049,6 +1111,7 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
} finally {
|
} finally {
|
||||||
this.updatesLock.writeLock().unlock();
|
this.updatesLock.writeLock().unlock();
|
||||||
}
|
}
|
||||||
|
status.setStatus("Flushing stores");
|
||||||
|
|
||||||
LOG.debug("Finished snapshotting, commencing flushing stores");
|
LOG.debug("Finished snapshotting, commencing flushing stores");
|
||||||
|
|
||||||
|
@ -1063,7 +1126,7 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
// just-made new flush store file.
|
// just-made new flush store file.
|
||||||
|
|
||||||
for (StoreFlusher flusher : storeFlushers) {
|
for (StoreFlusher flusher : storeFlushers) {
|
||||||
flusher.flushCache();
|
flusher.flushCache(status);
|
||||||
}
|
}
|
||||||
// Switch snapshot (in memstore) -> new hfile (thus causing
|
// Switch snapshot (in memstore) -> new hfile (thus causing
|
||||||
// all the store scanners to reset/reseek).
|
// all the store scanners to reset/reseek).
|
||||||
|
@ -1088,6 +1151,7 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
DroppedSnapshotException dse = new DroppedSnapshotException("region: " +
|
DroppedSnapshotException dse = new DroppedSnapshotException("region: " +
|
||||||
Bytes.toStringBinary(getRegionName()));
|
Bytes.toStringBinary(getRegionName()));
|
||||||
dse.initCause(t);
|
dse.initCause(t);
|
||||||
|
status.abort("Flush failed: " + StringUtils.stringifyException(t));
|
||||||
throw dse;
|
throw dse;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1111,13 +1175,13 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
}
|
}
|
||||||
|
|
||||||
long time = EnvironmentEdgeManager.currentTimeMillis() - startTime;
|
long time = EnvironmentEdgeManager.currentTimeMillis() - startTime;
|
||||||
if (LOG.isDebugEnabled()) {
|
String msg = "Finished memstore flush of ~" +
|
||||||
LOG.info("Finished memstore flush of ~" +
|
|
||||||
StringUtils.humanReadableInt(currentMemStoreSize) + " for region " +
|
StringUtils.humanReadableInt(currentMemStoreSize) + " for region " +
|
||||||
this + " in " + time + "ms, sequenceid=" + sequenceId +
|
this + " in " + time + "ms, sequenceid=" + sequenceId +
|
||||||
", compaction requested=" + compactionRequested +
|
", compaction requested=" + compactionRequested +
|
||||||
((wal == null)? "; wal=null": ""));
|
((wal == null)? "; wal=null": "");
|
||||||
}
|
LOG.info(msg);
|
||||||
|
status.setStatus(msg);
|
||||||
this.recentFlushes.add(new Pair<Long,Long>(time/1000,currentMemStoreSize));
|
this.recentFlushes.add(new Pair<Long,Long>(time/1000,currentMemStoreSize));
|
||||||
|
|
||||||
return compactionRequested;
|
return compactionRequested;
|
||||||
|
@ -2020,7 +2084,8 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
protected long replayRecoveredEditsIfAny(final Path regiondir,
|
protected long replayRecoveredEditsIfAny(final Path regiondir,
|
||||||
final long minSeqId, final CancelableProgressable reporter)
|
final long minSeqId, final CancelableProgressable reporter,
|
||||||
|
final MonitoredTask status)
|
||||||
throws UnsupportedEncodingException, IOException {
|
throws UnsupportedEncodingException, IOException {
|
||||||
long seqid = minSeqId;
|
long seqid = minSeqId;
|
||||||
NavigableSet<Path> files = HLog.getSplitEditFilesSorted(this.fs, regiondir);
|
NavigableSet<Path> files = HLog.getSplitEditFilesSorted(this.fs, regiondir);
|
||||||
|
@ -2046,7 +2111,7 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
}
|
}
|
||||||
if (seqid > minSeqId) {
|
if (seqid > minSeqId) {
|
||||||
// Then we added some edits to memory. Flush and cleanup split edit files.
|
// Then we added some edits to memory. Flush and cleanup split edit files.
|
||||||
internalFlushcache(null, seqid);
|
internalFlushcache(null, seqid, status);
|
||||||
}
|
}
|
||||||
// Now delete the content of recovered edits. We're done w/ them.
|
// Now delete the content of recovered edits. We're done w/ them.
|
||||||
for (Path file: files) {
|
for (Path file: files) {
|
||||||
|
@ -2071,7 +2136,11 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
private long replayRecoveredEdits(final Path edits,
|
private long replayRecoveredEdits(final Path edits,
|
||||||
final long minSeqId, final CancelableProgressable reporter)
|
final long minSeqId, final CancelableProgressable reporter)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
LOG.info("Replaying edits from " + edits + "; minSequenceid=" + minSeqId);
|
String msg = "Replaying edits from " + edits + "; minSequenceid=" + minSeqId;
|
||||||
|
LOG.info(msg);
|
||||||
|
MonitoredTask status = TaskMonitor.get().createStatus(msg);
|
||||||
|
|
||||||
|
status.setStatus("Opening logs");
|
||||||
HLog.Reader reader = HLog.getReader(this.fs, edits, conf);
|
HLog.Reader reader = HLog.getReader(this.fs, edits, conf);
|
||||||
try {
|
try {
|
||||||
long currentEditSeqId = minSeqId;
|
long currentEditSeqId = minSeqId;
|
||||||
|
@ -2103,10 +2172,14 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
intervalEdits = 0;
|
intervalEdits = 0;
|
||||||
long cur = EnvironmentEdgeManager.currentTimeMillis();
|
long cur = EnvironmentEdgeManager.currentTimeMillis();
|
||||||
if (lastReport + period <= cur) {
|
if (lastReport + period <= cur) {
|
||||||
|
status.setStatus("Replaying edits..." +
|
||||||
|
" skipped=" + skippedEdits +
|
||||||
|
" edits=" + editsCount);
|
||||||
// Timeout reached
|
// Timeout reached
|
||||||
if(!reporter.progress()) {
|
if(!reporter.progress()) {
|
||||||
String msg = "Progressable reporter failed, stopping replay";
|
msg = "Progressable reporter failed, stopping replay";
|
||||||
LOG.warn(msg);
|
LOG.warn(msg);
|
||||||
|
status.abort(msg);
|
||||||
throw new IOException(msg);
|
throw new IOException(msg);
|
||||||
}
|
}
|
||||||
lastReport = cur;
|
lastReport = cur;
|
||||||
|
@ -2117,6 +2190,7 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
// Start coprocessor replay here. The coprocessor is for each WALEdit
|
// Start coprocessor replay here. The coprocessor is for each WALEdit
|
||||||
// instead of a KeyValue.
|
// instead of a KeyValue.
|
||||||
if (coprocessorHost != null) {
|
if (coprocessorHost != null) {
|
||||||
|
status.setStatus("Running pre-WAL-restore hook in coprocessors");
|
||||||
if (coprocessorHost.preWALRestore(this.getRegionInfo(), key, val)) {
|
if (coprocessorHost.preWALRestore(this.getRegionInfo(), key, val)) {
|
||||||
// if bypass this log entry, ignore it ...
|
// if bypass this log entry, ignore it ...
|
||||||
continue;
|
continue;
|
||||||
|
@ -2158,7 +2232,7 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
flush = restoreEdit(store, kv);
|
flush = restoreEdit(store, kv);
|
||||||
editsCount++;
|
editsCount++;
|
||||||
}
|
}
|
||||||
if (flush) internalFlushcache(null, currentEditSeqId);
|
if (flush) internalFlushcache(null, currentEditSeqId, status);
|
||||||
|
|
||||||
if (coprocessorHost != null) {
|
if (coprocessorHost != null) {
|
||||||
coprocessorHost.postWALRestore(this.getRegionInfo(), key, val);
|
coprocessorHost.postWALRestore(this.getRegionInfo(), key, val);
|
||||||
|
@ -2166,30 +2240,39 @@ public class HRegion implements HeapSize { // , Writable{
|
||||||
}
|
}
|
||||||
} catch (EOFException eof) {
|
} catch (EOFException eof) {
|
||||||
Path p = HLog.moveAsideBadEditsFile(fs, edits);
|
Path p = HLog.moveAsideBadEditsFile(fs, edits);
|
||||||
LOG.warn("Encountered EOF. Most likely due to Master failure during " +
|
msg = "Encountered EOF. Most likely due to Master failure during " +
|
||||||
"log spliting, so we have this data in another edit. " +
|
"log spliting, so we have this data in another edit. " +
|
||||||
"Continuing, but renaming " + edits + " as " + p, eof);
|
"Continuing, but renaming " + edits + " as " + p;
|
||||||
|
LOG.warn(msg, eof);
|
||||||
|
status.abort(msg);
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
// If the IOE resulted from bad file format,
|
// If the IOE resulted from bad file format,
|
||||||
// then this problem is idempotent and retrying won't help
|
// then this problem is idempotent and retrying won't help
|
||||||
if (ioe.getCause() instanceof ParseException) {
|
if (ioe.getCause() instanceof ParseException) {
|
||||||
Path p = HLog.moveAsideBadEditsFile(fs, edits);
|
Path p = HLog.moveAsideBadEditsFile(fs, edits);
|
||||||
LOG.warn("File corruption encountered! " +
|
msg = "File corruption encountered! " +
|
||||||
"Continuing, but renaming " + edits + " as " + p, ioe);
|
"Continuing, but renaming " + edits + " as " + p;
|
||||||
|
LOG.warn(msg, ioe);
|
||||||
|
status.setStatus(msg);
|
||||||
} else {
|
} else {
|
||||||
|
status.abort(StringUtils.stringifyException(ioe));
|
||||||
// other IO errors may be transient (bad network connection,
|
// other IO errors may be transient (bad network connection,
|
||||||
// checksum exception on one datanode, etc). throw & retry
|
// checksum exception on one datanode, etc). throw & retry
|
||||||
throw ioe;
|
throw ioe;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (LOG.isDebugEnabled()) {
|
|
||||||
LOG.debug("Applied " + editsCount + ", skipped " + skippedEdits +
|
msg = "Applied " + editsCount + ", skipped " + skippedEdits +
|
||||||
", firstSequenceidInLog=" + firstSeqIdInLog +
|
", firstSequenceidInLog=" + firstSeqIdInLog +
|
||||||
", maxSequenceidInLog=" + currentEditSeqId);
|
", maxSequenceidInLog=" + currentEditSeqId;
|
||||||
|
status.markComplete(msg);
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug(msg);
|
||||||
}
|
}
|
||||||
return currentEditSeqId;
|
return currentEditSeqId;
|
||||||
} finally {
|
} finally {
|
||||||
reader.close();
|
reader.close();
|
||||||
|
status.cleanup();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -48,6 +48,7 @@ import org.apache.hadoop.hbase.io.hfile.BlockCache;
|
||||||
import org.apache.hadoop.hbase.io.hfile.Compression;
|
import org.apache.hadoop.hbase.io.hfile.Compression;
|
||||||
import org.apache.hadoop.hbase.io.hfile.HFile;
|
import org.apache.hadoop.hbase.io.hfile.HFile;
|
||||||
import org.apache.hadoop.hbase.io.hfile.HFileScanner;
|
import org.apache.hadoop.hbase.io.hfile.HFileScanner;
|
||||||
|
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.apache.hadoop.hbase.util.ClassSize;
|
import org.apache.hadoop.hbase.util.ClassSize;
|
||||||
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
|
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
|
||||||
|
@ -448,11 +449,13 @@ public class Store implements HeapSize {
|
||||||
*/
|
*/
|
||||||
private StoreFile flushCache(final long logCacheFlushId,
|
private StoreFile flushCache(final long logCacheFlushId,
|
||||||
SortedSet<KeyValue> snapshot,
|
SortedSet<KeyValue> snapshot,
|
||||||
TimeRangeTracker snapshotTimeRangeTracker) throws IOException {
|
TimeRangeTracker snapshotTimeRangeTracker,
|
||||||
|
MonitoredTask status) throws IOException {
|
||||||
// If an exception happens flushing, we let it out without clearing
|
// If an exception happens flushing, we let it out without clearing
|
||||||
// the memstore snapshot. The old snapshot will be returned when we say
|
// the memstore snapshot. The old snapshot will be returned when we say
|
||||||
// 'snapshot', the next time flush comes around.
|
// 'snapshot', the next time flush comes around.
|
||||||
return internalFlushCache(snapshot, logCacheFlushId, snapshotTimeRangeTracker);
|
return internalFlushCache(
|
||||||
|
snapshot, logCacheFlushId, snapshotTimeRangeTracker, status);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -463,7 +466,8 @@ public class Store implements HeapSize {
|
||||||
*/
|
*/
|
||||||
private StoreFile internalFlushCache(final SortedSet<KeyValue> set,
|
private StoreFile internalFlushCache(final SortedSet<KeyValue> set,
|
||||||
final long logCacheFlushId,
|
final long logCacheFlushId,
|
||||||
TimeRangeTracker snapshotTimeRangeTracker)
|
TimeRangeTracker snapshotTimeRangeTracker,
|
||||||
|
MonitoredTask status)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
StoreFile.Writer writer = null;
|
StoreFile.Writer writer = null;
|
||||||
long flushed = 0;
|
long flushed = 0;
|
||||||
|
@ -476,6 +480,7 @@ public class Store implements HeapSize {
|
||||||
// flush to list of store files. Add cleanup of anything put on filesystem
|
// flush to list of store files. Add cleanup of anything put on filesystem
|
||||||
// if we fail.
|
// if we fail.
|
||||||
synchronized (flushLock) {
|
synchronized (flushLock) {
|
||||||
|
status.setStatus("Flushing " + this + ": creating writer");
|
||||||
// A. Write the map out to the disk
|
// A. Write the map out to the disk
|
||||||
writer = createWriterInTmp(set.size());
|
writer = createWriterInTmp(set.size());
|
||||||
writer.setTimeRangeTracker(snapshotTimeRangeTracker);
|
writer.setTimeRangeTracker(snapshotTimeRangeTracker);
|
||||||
|
@ -491,18 +496,23 @@ public class Store implements HeapSize {
|
||||||
} finally {
|
} finally {
|
||||||
// Write out the log sequence number that corresponds to this output
|
// Write out the log sequence number that corresponds to this output
|
||||||
// hfile. The hfile is current up to and including logCacheFlushId.
|
// hfile. The hfile is current up to and including logCacheFlushId.
|
||||||
|
status.setStatus("Flushing " + this + ": appending metadata");
|
||||||
writer.appendMetadata(logCacheFlushId, false);
|
writer.appendMetadata(logCacheFlushId, false);
|
||||||
|
status.setStatus("Flushing " + this + ": closing flushed file");
|
||||||
writer.close();
|
writer.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write-out finished successfully, move into the right spot
|
// Write-out finished successfully, move into the right spot
|
||||||
Path dstPath = StoreFile.getUniqueFile(fs, homedir);
|
Path dstPath = StoreFile.getUniqueFile(fs, homedir);
|
||||||
LOG.info("Renaming flushed file at " + writer.getPath() + " to " + dstPath);
|
String msg = "Renaming flushed file at " + writer.getPath() + " to " + dstPath;
|
||||||
|
LOG.info(msg);
|
||||||
|
status.setStatus("Flushing " + this + ": " + msg);
|
||||||
if (!fs.rename(writer.getPath(), dstPath)) {
|
if (!fs.rename(writer.getPath(), dstPath)) {
|
||||||
LOG.warn("Unable to rename " + writer.getPath() + " to " + dstPath);
|
LOG.warn("Unable to rename " + writer.getPath() + " to " + dstPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
status.setStatus("Flushing " + this + ": reopening flushed file");
|
||||||
StoreFile sf = new StoreFile(this.fs, dstPath, blockcache,
|
StoreFile sf = new StoreFile(this.fs, dstPath, blockcache,
|
||||||
this.conf, this.family.getBloomFilterType(), this.inMemory);
|
this.conf, this.family.getBloomFilterType(), this.inMemory);
|
||||||
StoreFile.Reader r = sf.createReader();
|
StoreFile.Reader r = sf.createReader();
|
||||||
|
@ -1593,8 +1603,9 @@ public class Store implements HeapSize {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void flushCache() throws IOException {
|
public void flushCache(MonitoredTask status) throws IOException {
|
||||||
storeFile = Store.this.flushCache(cacheFlushId, snapshot, snapshotTimeRangeTracker);
|
storeFile = Store.this.flushCache(
|
||||||
|
cacheFlushId, snapshot, snapshotTimeRangeTracker, status);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -21,6 +21,8 @@ package org.apache.hadoop.hbase.regionserver;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A package protected interface for a store flushing.
|
* A package protected interface for a store flushing.
|
||||||
* A store flusher carries the state required to prepare/flush/commit the
|
* A store flusher carries the state required to prepare/flush/commit the
|
||||||
|
@ -45,7 +47,7 @@ interface StoreFlusher {
|
||||||
*
|
*
|
||||||
* @throws IOException in case the flush fails
|
* @throws IOException in case the flush fails
|
||||||
*/
|
*/
|
||||||
void flushCache() throws IOException;
|
void flushCache(MonitoredTask status) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Commit the flush - add the store file to the store and clear the
|
* Commit the flush - add the store file to the store and clear the
|
||||||
|
|
|
@ -46,6 +46,8 @@ import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hbase.io.HeapSize;
|
import org.apache.hadoop.hbase.io.HeapSize;
|
||||||
import org.apache.hadoop.hbase.master.SplitLogManager.TaskFinisher.Status;
|
import org.apache.hadoop.hbase.master.SplitLogManager.TaskFinisher.Status;
|
||||||
|
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
|
||||||
|
import org.apache.hadoop.hbase.monitoring.TaskMonitor;
|
||||||
import org.apache.hadoop.hbase.HConstants;
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||||
import org.apache.hadoop.hbase.RemoteExceptionHandler;
|
import org.apache.hadoop.hbase.RemoteExceptionHandler;
|
||||||
|
@ -70,7 +72,6 @@ import com.google.common.collect.Lists;
|
||||||
* region to replay on startup. Delete the old log files when finished.
|
* region to replay on startup. Delete the old log files when finished.
|
||||||
*/
|
*/
|
||||||
public class HLogSplitter {
|
public class HLogSplitter {
|
||||||
|
|
||||||
private static final String LOG_SPLITTER_IMPL = "hbase.hlog.splitter.impl";
|
private static final String LOG_SPLITTER_IMPL = "hbase.hlog.splitter.impl";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -107,6 +108,8 @@ public class HLogSplitter {
|
||||||
// consumed by the reader thread, or an exception occurred
|
// consumed by the reader thread, or an exception occurred
|
||||||
Object dataAvailable = new Object();
|
Object dataAvailable = new Object();
|
||||||
|
|
||||||
|
private MonitoredTask status;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new HLogSplitter using the given {@link Configuration} and the
|
* Create a new HLogSplitter using the given {@link Configuration} and the
|
||||||
|
@ -179,10 +182,16 @@ public class HLogSplitter {
|
||||||
"An HLogSplitter instance may only be used once");
|
"An HLogSplitter instance may only be used once");
|
||||||
hasSplit = true;
|
hasSplit = true;
|
||||||
|
|
||||||
|
status = TaskMonitor.get().createStatus(
|
||||||
|
"Splitting logs in " + srcDir);
|
||||||
|
|
||||||
long startTime = EnvironmentEdgeManager.currentTimeMillis();
|
long startTime = EnvironmentEdgeManager.currentTimeMillis();
|
||||||
|
|
||||||
|
status.setStatus("Determining files to split...");
|
||||||
List<Path> splits = null;
|
List<Path> splits = null;
|
||||||
if (!fs.exists(srcDir)) {
|
if (!fs.exists(srcDir)) {
|
||||||
// Nothing to do
|
// Nothing to do
|
||||||
|
status.markComplete("No log directory existed to split.");
|
||||||
return splits;
|
return splits;
|
||||||
}
|
}
|
||||||
FileStatus[] logfiles = fs.listStatus(srcDir);
|
FileStatus[] logfiles = fs.listStatus(srcDir);
|
||||||
|
@ -190,16 +199,21 @@ public class HLogSplitter {
|
||||||
// Nothing to do
|
// Nothing to do
|
||||||
return splits;
|
return splits;
|
||||||
}
|
}
|
||||||
LOG.info("Splitting " + logfiles.length + " hlog(s) in "
|
logAndReport("Splitting " + logfiles.length + " hlog(s) in "
|
||||||
+ srcDir.toString());
|
+ srcDir.toString());
|
||||||
splits = splitLog(logfiles);
|
splits = splitLog(logfiles);
|
||||||
|
|
||||||
splitTime = EnvironmentEdgeManager.currentTimeMillis() - startTime;
|
splitTime = EnvironmentEdgeManager.currentTimeMillis() - startTime;
|
||||||
LOG.info("hlog file splitting completed in " + splitTime +
|
logAndReport("hlog file splitting completed in " + splitTime +
|
||||||
" ms for " + srcDir.toString());
|
" ms for " + srcDir.toString());
|
||||||
return splits;
|
return splits;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void logAndReport(String msg) {
|
||||||
|
status.setStatus(msg);
|
||||||
|
LOG.info(msg);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return time that this split took
|
* @return time that this split took
|
||||||
*/
|
*/
|
||||||
|
@ -252,6 +266,7 @@ public class HLogSplitter {
|
||||||
|
|
||||||
boolean skipErrors = conf.getBoolean("hbase.hlog.split.skip.errors", true);
|
boolean skipErrors = conf.getBoolean("hbase.hlog.split.skip.errors", true);
|
||||||
|
|
||||||
|
long totalBytesToSplit = countTotalBytes(logfiles);
|
||||||
splitSize = 0;
|
splitSize = 0;
|
||||||
|
|
||||||
outputSink.startWriterThreads(entryBuffers);
|
outputSink.startWriterThreads(entryBuffers);
|
||||||
|
@ -262,7 +277,7 @@ public class HLogSplitter {
|
||||||
Path logPath = log.getPath();
|
Path logPath = log.getPath();
|
||||||
long logLength = log.getLen();
|
long logLength = log.getLen();
|
||||||
splitSize += logLength;
|
splitSize += logLength;
|
||||||
LOG.debug("Splitting hlog " + (i++ + 1) + " of " + logfiles.length
|
logAndReport("Splitting hlog " + (i++ + 1) + " of " + logfiles.length
|
||||||
+ ": " + logPath + ", length=" + logLength);
|
+ ": " + logPath + ", length=" + logLength);
|
||||||
Reader in;
|
Reader in;
|
||||||
try {
|
try {
|
||||||
|
@ -284,19 +299,35 @@ public class HLogSplitter {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
status.setStatus("Log splits complete. Checking for orphaned logs.");
|
||||||
|
|
||||||
if (fs.listStatus(srcDir).length > processedLogs.size()
|
if (fs.listStatus(srcDir).length > processedLogs.size()
|
||||||
+ corruptedLogs.size()) {
|
+ corruptedLogs.size()) {
|
||||||
throw new OrphanHLogAfterSplitException(
|
throw new OrphanHLogAfterSplitException(
|
||||||
"Discovered orphan hlog after split. Maybe the "
|
"Discovered orphan hlog after split. Maybe the "
|
||||||
+ "HRegionServer was not dead when we started");
|
+ "HRegionServer was not dead when we started");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
status.setStatus("Archiving logs after completed split");
|
||||||
archiveLogs(srcDir, corruptedLogs, processedLogs, oldLogDir, fs, conf);
|
archiveLogs(srcDir, corruptedLogs, processedLogs, oldLogDir, fs, conf);
|
||||||
} finally {
|
} finally {
|
||||||
|
status.setStatus("Finishing writing output logs and closing down.");
|
||||||
splits = outputSink.finishWritingAndClose();
|
splits = outputSink.finishWritingAndClose();
|
||||||
}
|
}
|
||||||
return splits;
|
return splits;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the total size of the passed list of files.
|
||||||
|
*/
|
||||||
|
private static long countTotalBytes(FileStatus[] logfiles) {
|
||||||
|
long ret = 0;
|
||||||
|
for (FileStatus stat : logfiles) {
|
||||||
|
ret += stat.getLen();
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Splits a HLog file into a temporary staging area. tmpname is used to build
|
* Splits a HLog file into a temporary staging area. tmpname is used to build
|
||||||
* the name of the staging area where the recovered-edits will be separated
|
* the name of the staging area where the recovered-edits will be separated
|
||||||
|
@ -329,6 +360,11 @@ public class HLogSplitter {
|
||||||
synchronizedMap(new TreeMap<byte[], Object>(Bytes.BYTES_COMPARATOR));
|
synchronizedMap(new TreeMap<byte[], Object>(Bytes.BYTES_COMPARATOR));
|
||||||
boolean isCorrupted = false;
|
boolean isCorrupted = false;
|
||||||
|
|
||||||
|
Preconditions.checkState(status == null);
|
||||||
|
status = TaskMonitor.get().createStatus(
|
||||||
|
"Splitting log file " + logfile.getPath() +
|
||||||
|
"into a temporary staging area.");
|
||||||
|
|
||||||
Object BAD_WRITER = new Object();
|
Object BAD_WRITER = new Object();
|
||||||
|
|
||||||
boolean progress_failed = false;
|
boolean progress_failed = false;
|
||||||
|
@ -342,6 +378,7 @@ public class HLogSplitter {
|
||||||
Path logPath = logfile.getPath();
|
Path logPath = logfile.getPath();
|
||||||
long logLength = logfile.getLen();
|
long logLength = logfile.getLen();
|
||||||
LOG.info("Splitting hlog: " + logPath + ", length=" + logLength);
|
LOG.info("Splitting hlog: " + logPath + ", length=" + logLength);
|
||||||
|
status.setStatus("Opening log file");
|
||||||
Reader in = null;
|
Reader in = null;
|
||||||
try {
|
try {
|
||||||
in = getReader(fs, logfile, conf, skipErrors);
|
in = getReader(fs, logfile, conf, skipErrors);
|
||||||
|
@ -351,12 +388,14 @@ public class HLogSplitter {
|
||||||
isCorrupted = true;
|
isCorrupted = true;
|
||||||
}
|
}
|
||||||
if (in == null) {
|
if (in == null) {
|
||||||
|
status.markComplete("Was nothing to split in log file");
|
||||||
LOG.warn("Nothing to split in log file " + logPath);
|
LOG.warn("Nothing to split in log file " + logPath);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
long t = EnvironmentEdgeManager.currentTimeMillis();
|
long t = EnvironmentEdgeManager.currentTimeMillis();
|
||||||
long last_report_at = t;
|
long last_report_at = t;
|
||||||
if (reporter != null && reporter.progress() == false) {
|
if (reporter != null && reporter.progress() == false) {
|
||||||
|
status.markComplete("Failed: reporter.progress asked us to terminate");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
int editsCount = 0;
|
int editsCount = 0;
|
||||||
|
@ -380,10 +419,12 @@ public class HLogSplitter {
|
||||||
wap.w.append(entry);
|
wap.w.append(entry);
|
||||||
editsCount++;
|
editsCount++;
|
||||||
if (editsCount % interval == 0) {
|
if (editsCount % interval == 0) {
|
||||||
|
status.setStatus("Split " + editsCount + " edits");
|
||||||
long t1 = EnvironmentEdgeManager.currentTimeMillis();
|
long t1 = EnvironmentEdgeManager.currentTimeMillis();
|
||||||
if ((t1 - last_report_at) > period) {
|
if ((t1 - last_report_at) > period) {
|
||||||
last_report_at = t;
|
last_report_at = t;
|
||||||
if (reporter != null && reporter.progress() == false) {
|
if (reporter != null && reporter.progress() == false) {
|
||||||
|
status.markComplete("Failed: reporter.progress asked us to terminate");
|
||||||
progress_failed = true;
|
progress_failed = true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -416,10 +457,12 @@ public class HLogSplitter {
|
||||||
wap.w.close();
|
wap.w.close();
|
||||||
LOG.debug("Closed " + wap.p);
|
LOG.debug("Closed " + wap.p);
|
||||||
}
|
}
|
||||||
LOG.info("processed " + editsCount + " edits across " + n + " regions" +
|
String msg = ("processed " + editsCount + " edits across " + n + " regions" +
|
||||||
" threw away edits for " + (logWriters.size() - n) + " regions" +
|
" threw away edits for " + (logWriters.size() - n) + " regions" +
|
||||||
" log file = " + logPath +
|
" log file = " + logPath +
|
||||||
" is corrupted = " + isCorrupted);
|
" is corrupted = " + isCorrupted);
|
||||||
|
LOG.info(msg);
|
||||||
|
status.markComplete(msg);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
import org.apache.hadoop.hbase.Server;
|
import org.apache.hadoop.hbase.Server;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.catalog.CatalogTracker;
|
import org.apache.hadoop.hbase.catalog.CatalogTracker;
|
||||||
|
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
|
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
|
import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
|
||||||
|
@ -41,6 +42,7 @@ import org.apache.zookeeper.KeeperException;
|
||||||
import org.junit.AfterClass;
|
import org.junit.AfterClass;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
import org.mockito.Mockito;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test the {@link ActiveMasterManager}.
|
* Test the {@link ActiveMasterManager}.
|
||||||
|
@ -77,7 +79,8 @@ public class TestActiveMasterManager {
|
||||||
assertFalse(activeMasterManager.clusterHasActiveMaster.get());
|
assertFalse(activeMasterManager.clusterHasActiveMaster.get());
|
||||||
|
|
||||||
// First test becoming the active master uninterrupted
|
// First test becoming the active master uninterrupted
|
||||||
activeMasterManager.blockUntilBecomingActiveMaster();
|
MonitoredTask status = Mockito.mock(MonitoredTask.class);
|
||||||
|
activeMasterManager.blockUntilBecomingActiveMaster(status);
|
||||||
assertTrue(activeMasterManager.clusterHasActiveMaster.get());
|
assertTrue(activeMasterManager.clusterHasActiveMaster.get());
|
||||||
assertMaster(zk, master);
|
assertMaster(zk, master);
|
||||||
|
|
||||||
|
@ -87,7 +90,7 @@ public class TestActiveMasterManager {
|
||||||
master, secondDummyMaster);
|
master, secondDummyMaster);
|
||||||
zk.registerListener(secondActiveMasterManager);
|
zk.registerListener(secondActiveMasterManager);
|
||||||
assertFalse(secondActiveMasterManager.clusterHasActiveMaster.get());
|
assertFalse(secondActiveMasterManager.clusterHasActiveMaster.get());
|
||||||
activeMasterManager.blockUntilBecomingActiveMaster();
|
activeMasterManager.blockUntilBecomingActiveMaster(status);
|
||||||
assertTrue(activeMasterManager.clusterHasActiveMaster.get());
|
assertTrue(activeMasterManager.clusterHasActiveMaster.get());
|
||||||
assertMaster(zk, master);
|
assertMaster(zk, master);
|
||||||
}
|
}
|
||||||
|
@ -120,7 +123,8 @@ public class TestActiveMasterManager {
|
||||||
assertFalse(activeMasterManager.clusterHasActiveMaster.get());
|
assertFalse(activeMasterManager.clusterHasActiveMaster.get());
|
||||||
|
|
||||||
// First test becoming the active master uninterrupted
|
// First test becoming the active master uninterrupted
|
||||||
activeMasterManager.blockUntilBecomingActiveMaster();
|
activeMasterManager.blockUntilBecomingActiveMaster(
|
||||||
|
Mockito.mock(MonitoredTask.class));
|
||||||
assertTrue(activeMasterManager.clusterHasActiveMaster.get());
|
assertTrue(activeMasterManager.clusterHasActiveMaster.get());
|
||||||
assertMaster(zk, firstMasterAddress);
|
assertMaster(zk, firstMasterAddress);
|
||||||
|
|
||||||
|
@ -201,7 +205,8 @@ public class TestActiveMasterManager {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
manager.blockUntilBecomingActiveMaster();
|
manager.blockUntilBecomingActiveMaster(
|
||||||
|
Mockito.mock(MonitoredTask.class));
|
||||||
LOG.info("Second master has become the active master!");
|
LOG.info("Second master has become the active master!");
|
||||||
isActiveMaster = true;
|
isActiveMaster = true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,101 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2011 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.monitoring;
|
||||||
|
|
||||||
|
import static org.junit.Assert.*;
|
||||||
|
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class TestTaskMonitor {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTaskMonitorBasics() {
|
||||||
|
TaskMonitor tm = new TaskMonitor();
|
||||||
|
assertTrue("Task monitor should start empty",
|
||||||
|
tm.getTasks().isEmpty());
|
||||||
|
|
||||||
|
// Make a task and fetch it back out
|
||||||
|
MonitoredTask task = tm.createStatus("Test task");
|
||||||
|
MonitoredTask taskFromTm = tm.getTasks().get(0);
|
||||||
|
|
||||||
|
// Make sure the state is reasonable.
|
||||||
|
assertEquals(task.getDescription(), taskFromTm.getDescription());
|
||||||
|
assertEquals(-1, taskFromTm.getCompletionTimestamp());
|
||||||
|
assertEquals(MonitoredTask.State.RUNNING, taskFromTm.getState());
|
||||||
|
|
||||||
|
// Mark it as finished
|
||||||
|
task.markComplete("Finished!");
|
||||||
|
assertEquals(MonitoredTask.State.COMPLETE, taskFromTm.getState());
|
||||||
|
|
||||||
|
// It should still show up in the TaskMonitor list
|
||||||
|
assertEquals(1, tm.getTasks().size());
|
||||||
|
|
||||||
|
// If we mark its completion time back a few minutes, it should get gced
|
||||||
|
((MonitoredTaskImpl)taskFromTm).expireNow();
|
||||||
|
assertEquals(0, tm.getTasks().size());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTasksGetAbortedOnLeak() throws InterruptedException {
|
||||||
|
final TaskMonitor tm = new TaskMonitor();
|
||||||
|
assertTrue("Task monitor should start empty",
|
||||||
|
tm.getTasks().isEmpty());
|
||||||
|
|
||||||
|
final AtomicBoolean threadSuccess = new AtomicBoolean(false);
|
||||||
|
// Make a task in some other thread and leak it
|
||||||
|
Thread t = new Thread() {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
MonitoredTask task = tm.createStatus("Test task");
|
||||||
|
assertEquals(MonitoredTask.State.RUNNING, task.getState());
|
||||||
|
threadSuccess.set(true);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
t.start();
|
||||||
|
t.join();
|
||||||
|
// Make sure the thread saw the correct state
|
||||||
|
assertTrue(threadSuccess.get());
|
||||||
|
|
||||||
|
// Make sure the leaked reference gets cleared
|
||||||
|
System.gc();
|
||||||
|
System.gc();
|
||||||
|
System.gc();
|
||||||
|
|
||||||
|
// Now it should be aborted
|
||||||
|
MonitoredTask taskFromTm = tm.getTasks().get(0);
|
||||||
|
assertEquals(MonitoredTask.State.ABORTED, taskFromTm.getState());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTaskLimit() throws Exception {
|
||||||
|
TaskMonitor tm = new TaskMonitor();
|
||||||
|
for (int i = 0; i < TaskMonitor.MAX_TASKS + 10; i++) {
|
||||||
|
tm.createStatus("task " + i);
|
||||||
|
}
|
||||||
|
// Make sure it was limited correctly
|
||||||
|
assertEquals(TaskMonitor.MAX_TASKS, tm.getTasks().size());
|
||||||
|
// Make sure we culled the earlier tasks, not later
|
||||||
|
// (i.e. tasks 0 through 9 should have been deleted)
|
||||||
|
assertEquals("task 10", tm.getTasks().get(0).getDescription());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -50,6 +50,7 @@ import org.apache.hadoop.hbase.HRegionInfo;
|
||||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||||
import org.apache.hadoop.hbase.KeyValue;
|
import org.apache.hadoop.hbase.KeyValue;
|
||||||
import org.apache.hadoop.hbase.client.Get;
|
import org.apache.hadoop.hbase.client.Get;
|
||||||
|
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
|
||||||
import org.apache.hadoop.hbase.regionserver.wal.HLog;
|
import org.apache.hadoop.hbase.regionserver.wal.HLog;
|
||||||
import org.apache.hadoop.hbase.security.User;
|
import org.apache.hadoop.hbase.security.User;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
@ -57,6 +58,7 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
|
||||||
import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
|
import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
|
||||||
import org.apache.hadoop.hbase.util.IncrementingEnvironmentEdge;
|
import org.apache.hadoop.hbase.util.IncrementingEnvironmentEdge;
|
||||||
import org.apache.hadoop.hbase.util.ManualEnvironmentEdge;
|
import org.apache.hadoop.hbase.util.ManualEnvironmentEdge;
|
||||||
|
import org.mockito.Mockito;
|
||||||
|
|
||||||
import com.google.common.base.Joiner;
|
import com.google.common.base.Joiner;
|
||||||
|
|
||||||
|
@ -600,7 +602,7 @@ public class TestStore extends TestCase {
|
||||||
private static void flushStore(Store store, long id) throws IOException {
|
private static void flushStore(Store store, long id) throws IOException {
|
||||||
StoreFlusher storeFlusher = store.getStoreFlusher(id);
|
StoreFlusher storeFlusher = store.getStoreFlusher(id);
|
||||||
storeFlusher.prepare();
|
storeFlusher.prepare();
|
||||||
storeFlusher.flushCache();
|
storeFlusher.flushCache(Mockito.mock(MonitoredTask.class));
|
||||||
storeFlusher.commit();
|
storeFlusher.commit();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -43,6 +43,7 @@ import org.apache.hadoop.hbase.client.Get;
|
||||||
import org.apache.hadoop.hbase.client.Put;
|
import org.apache.hadoop.hbase.client.Put;
|
||||||
import org.apache.hadoop.hbase.client.Result;
|
import org.apache.hadoop.hbase.client.Result;
|
||||||
import org.apache.hadoop.hbase.io.hfile.HFile;
|
import org.apache.hadoop.hbase.io.hfile.HFile;
|
||||||
|
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
|
||||||
import org.apache.hadoop.hbase.regionserver.FlushRequester;
|
import org.apache.hadoop.hbase.regionserver.FlushRequester;
|
||||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||||
import org.apache.hadoop.hbase.regionserver.Store;
|
import org.apache.hadoop.hbase.regionserver.Store;
|
||||||
|
@ -55,6 +56,7 @@ import org.junit.AfterClass;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
import org.mockito.Mockito;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test replay of edits out of a WAL split.
|
* Test replay of edits out of a WAL split.
|
||||||
|
@ -394,7 +396,8 @@ public class TestWALReplay {
|
||||||
null) {
|
null) {
|
||||||
protected boolean internalFlushcache(HLog wal, long myseqid)
|
protected boolean internalFlushcache(HLog wal, long myseqid)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
boolean b = super.internalFlushcache(wal, myseqid);
|
boolean b = super.internalFlushcache(wal, myseqid,
|
||||||
|
Mockito.mock(MonitoredTask.class));
|
||||||
flushcount.incrementAndGet();
|
flushcount.incrementAndGet();
|
||||||
return b;
|
return b;
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue