HBASE-21521 Expose master startup status via web UI (#4788) (#5021) (#5026)

Signed-off-by: Bryan Beaudreault <bbeaudreault@apache.org>
This commit is contained in:
Xiaolin Ha 2023-02-16 09:31:32 +08:00 committed by GitHub
parent ee618d5fdf
commit 4b9214c75f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 367 additions and 47 deletions

View File

@ -177,6 +177,7 @@ AssignmentManager assignmentManager = master.getAssignmentManager();
<%if HBaseConfiguration.isShowConfInServlet()%>
<li><a href="/conf">HBase Configuration</a></li>
</%if>
<li><a href="/startupProgress.jsp">Startup Progress</a></li>
</ul>
</div><!--/.nav-collapse -->
</div>

View File

@ -27,6 +27,7 @@ import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.ZNodeClearer;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
import org.apache.hadoop.hbase.monitoring.TaskGroup;
import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
import org.apache.hadoop.hbase.zookeeper.ZKListener;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
@ -219,15 +220,17 @@ public class ActiveMasterManager extends ZKListener {
* and our attempt to become the new active master is successful. This also makes sure that we are
* watching the master znode so will be notified if another master dies.
* @param checkInterval the interval to check if the master is stopped
* @param startupStatus the monitor status to track the progress
* @param startupTaskGroup the task group for master startup to track the progress
* @return True if no issue becoming active master else false if another master was running or if
* some other problem (zookeeper, stop flag has been set on this Master)
*/
boolean blockUntilBecomingActiveMaster(int checkInterval, MonitoredTask startupStatus) {
boolean blockUntilBecomingActiveMaster(int checkInterval, TaskGroup startupTaskGroup) {
MonitoredTask blockUntilActive =
startupTaskGroup.addTask("Blocking until becoming active master");
String backupZNode = ZNodePaths
.joinZNode(this.watcher.getZNodePaths().backupMasterAddressesZNode, this.sn.toString());
while (!(master.isAborted() || master.isStopped())) {
startupStatus.setStatus("Trying to register in ZK as active master");
blockUntilActive.setStatus("Trying to register in ZK as active master");
// Try to become the active master, watch if there is another master.
// Write out our ServerName as versioned bytes.
try {
@ -246,7 +249,7 @@ public class ActiveMasterManager extends ZKListener {
ZNodeClearer.writeMyEphemeralNodeOnDisk(this.sn.toString());
// We are the master, return
startupStatus.setStatus("Successfully registered as active master.");
blockUntilActive.setStatus("Successfully registered as active master.");
this.clusterHasActiveMaster.set(true);
activeMasterServerName = sn;
LOG.info("Registered as active master=" + this.sn);
@ -291,7 +294,7 @@ public class ActiveMasterManager extends ZKListener {
}
}
LOG.info(msg);
startupStatus.setStatus(msg);
blockUntilActive.setStatus(msg);
} catch (KeeperException ke) {
master.abort("Received an unexpected KeeperException, aborting", ke);
return false;

View File

@ -183,6 +183,7 @@ import org.apache.hadoop.hbase.mob.MobFileCleanerChore;
import org.apache.hadoop.hbase.mob.MobFileCompactionChore;
import org.apache.hadoop.hbase.monitoring.MemoryBoundedLogMessageBuffer;
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
import org.apache.hadoop.hbase.monitoring.TaskGroup;
import org.apache.hadoop.hbase.monitoring.TaskMonitor;
import org.apache.hadoop.hbase.procedure.MasterProcedureManagerHost;
import org.apache.hadoop.hbase.procedure.flush.MasterFlushTableProcedureManager;
@ -447,6 +448,8 @@ public class HMaster extends HRegionServer implements MasterServices {
public static final String WARMUP_BEFORE_MOVE = "hbase.master.warmup.before.move";
private static final boolean DEFAULT_WARMUP_BEFORE_MOVE = true;
private TaskGroup startupTaskGroup;
/**
* Initializes the HMaster. The steps are as follows:
* <p>
@ -455,9 +458,8 @@ public class HMaster extends HRegionServer implements MasterServices {
* <li>Start the ActiveMasterManager.
* </ol>
* <p>
* Remaining steps of initialization occur in
* {@link #finishActiveMasterInitialization(MonitoredTask)} after the master becomes the active
* one.
* Remaining steps of initialization occur in {@link #finishActiveMasterInitialization()} after
* the master becomes the active one.
*/
public HMaster(final Configuration conf) throws IOException {
super(conf);
@ -887,12 +889,12 @@ public class HMaster extends HRegionServer implements MasterServices {
* Notice that now we will not schedule a special procedure to make meta online(unless the first
* time where meta has not been created yet), we will rely on SCP to bring meta online.
*/
private void finishActiveMasterInitialization(MonitoredTask status) throws IOException,
InterruptedException, KeeperException, ReplicationException, DeserializationException {
private void finishActiveMasterInitialization() throws IOException, InterruptedException,
KeeperException, ReplicationException, DeserializationException {
/*
* We are active master now... go initialize components we need to run.
*/
status.setStatus("Initializing Master file system");
startupTaskGroup.addTask("Initializing Master file system");
this.masterActiveTime = EnvironmentEdgeManager.currentTime();
// TODO: Do this using Dependency Injection, using PicoContainer, Guice or Spring.
@ -905,7 +907,7 @@ public class HMaster extends HRegionServer implements MasterServices {
// warm-up HTDs cache on master initialization
if (preLoadTableDescriptors) {
status.setStatus("Pre-loading table descriptors");
startupTaskGroup.addTask("Pre-loading table descriptors");
this.tableDescriptors.getAll();
}
@ -913,7 +915,7 @@ public class HMaster extends HRegionServer implements MasterServices {
// only after it has checked in with the Master. At least a few tests ask Master for clusterId
// before it has called its run method and before RegionServer has done the reportForDuty.
ClusterId clusterId = fileSystemManager.getClusterId();
status.setStatus("Publishing Cluster ID " + clusterId + " in ZooKeeper");
startupTaskGroup.addTask("Publishing Cluster ID " + clusterId + " in ZooKeeper");
ZKClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId());
this.clusterId = clusterId.toString();
@ -932,7 +934,7 @@ public class HMaster extends HRegionServer implements MasterServices {
}
}
status.setStatus("Initialize ServerManager and schedule SCP for crash servers");
startupTaskGroup.addTask("Initialize ServerManager and schedule SCP for crash servers");
// The below two managers must be created before loading procedures, as they will be used during
// loading.
// initialize master local region
@ -982,8 +984,9 @@ public class HMaster extends HRegionServer implements MasterServices {
? new MirroringTableStateManager(this)
: new TableStateManager(this);
status.setStatus("Initializing ZK system trackers");
startupTaskGroup.addTask("Initializing ZK system trackers");
initializeZKBasedSystemTrackers();
// Set ourselves as active Master now our claim has succeeded up in zk.
this.activeMaster = true;
@ -995,19 +998,19 @@ public class HMaster extends HRegionServer implements MasterServices {
// This is for backwards compatibility
// See HBASE-11393
status.setStatus("Update TableCFs node in ZNode");
startupTaskGroup.addTask("Update TableCFs node in ZNode");
ReplicationPeerConfigUpgrader tableCFsUpdater =
new ReplicationPeerConfigUpgrader(zooKeeper, conf);
tableCFsUpdater.copyTableCFs();
if (!maintenanceMode) {
status.setStatus("Initializing master coprocessors");
startupTaskGroup.addTask("Initializing master coprocessors");
setQuotasObserver(conf);
initializeCoprocessorHost(conf);
}
// Checking if meta needs initializing.
status.setStatus("Initializing meta table if this is a new deploy");
startupTaskGroup.addTask("Initializing meta table if this is a new deploy");
InitMetaProcedure initMetaProc = null;
// Print out state of hbase:meta on startup; helps debugging.
if (!this.assignmentManager.getRegionStates().hasTableRegionStates(TableName.META_TABLE_NAME)) {
@ -1030,7 +1033,7 @@ public class HMaster extends HRegionServer implements MasterServices {
this.balancer.updateClusterMetrics(getClusterMetricsWithoutCoprocessor());
// start up all service threads.
status.setStatus("Initializing master service threads");
startupTaskGroup.addTask("Initializing master service threads");
startServiceThreads();
// wait meta to be initialized after we start procedure executor
if (initMetaProc != null) {
@ -1046,16 +1049,16 @@ public class HMaster extends HRegionServer implements MasterServices {
// With this as part of master initialization, it precludes our being able to start a single
// server that is both Master and RegionServer. Needs more thought. TODO.
String statusStr = "Wait for region servers to report in";
status.setStatus(statusStr);
LOG.info(Objects.toString(status));
waitForRegionServers(status);
MonitoredTask waitRegionServer = startupTaskGroup.addTask(statusStr);
LOG.info(Objects.toString(waitRegionServer));
waitForRegionServers(waitRegionServer);
// Check if master is shutting down because issue initializing regionservers or balancer.
if (isStopped()) {
return;
}
status.setStatus("Starting assignment manager");
startupTaskGroup.addTask("Starting assignment manager");
// FIRST HBASE:META READ!!!!
// The below cannot make progress w/o hbase:meta being online.
// This is the FIRST attempt at going to hbase:meta. Meta on-lining is going on in background
@ -1132,7 +1135,7 @@ public class HMaster extends HRegionServer implements MasterServices {
this.balancer.updateClusterMetrics(getClusterMetricsWithoutCoprocessor());
// Start balancer and meta catalog janitor after meta and regions have been assigned.
status.setStatus("Starting balancer and catalog janitor");
startupTaskGroup.addTask("Starting balancer and catalog janitor");
this.clusterStatusChore = new ClusterStatusChore(this, balancer);
getChoreService().scheduleChore(clusterStatusChore);
this.balancerChore = new BalancerChore(this);
@ -1154,7 +1157,7 @@ public class HMaster extends HRegionServer implements MasterServices {
if (!waitForNamespaceOnline()) {
return;
}
status.setStatus("Starting cluster schema service");
startupTaskGroup.addTask("Starting cluster schema service");
try {
initClusterSchemaService();
} catch (IllegalStateException e) {
@ -1177,7 +1180,6 @@ public class HMaster extends HRegionServer implements MasterServices {
}
}
status.markComplete("Initialization successful");
LOG.info(String.format("Master has completed initialization %.3fsec",
(EnvironmentEdgeManager.currentTime() - masterActiveTime) / 1000.0f));
this.masterFinishedInitializationTime = EnvironmentEdgeManager.currentTime();
@ -1196,6 +1198,9 @@ public class HMaster extends HRegionServer implements MasterServices {
}
// Set master as 'initialized'.
setInitialized(true);
startupTaskGroup.markComplete("Initialization successful");
MonitoredTask status =
TaskMonitor.get().createStatus("Progress after master initialized", false, true);
if (tableFamilyDesc == null && replBarrierFamilyDesc == null) {
// create missing CFs in meta table after master is set to 'initialized'.
@ -1284,6 +1289,7 @@ public class HMaster extends HRegionServer implements MasterServices {
this.rollingUpgradeChore = new RollingUpgradeChore(this);
getChoreService().scheduleChore(rollingUpgradeChore);
status.markComplete("Progress after master initialized complete");
}
private void createMissingCFsInMetaDuringUpgrade(TableDescriptor metaDescriptor)
@ -2385,14 +2391,19 @@ public class HMaster extends HRegionServer implements MasterServices {
Threads.sleep(timeout);
}
}
MonitoredTask status = TaskMonitor.get().createStatus("Master startup");
status.setDescription("Master startup");
// Here for the master startup process, we use TaskGroup to monitor the whole progress.
// The UI is similar to how Hadoop designed the startup page for the NameNode.
// See HBASE-21521 for more details.
// We do not cleanup the startupTaskGroup, let the startup progress information
// be permanent in the MEM.
startupTaskGroup = TaskMonitor.createTaskGroup(true, "Master startup");
try {
if (activeMasterManager.blockUntilBecomingActiveMaster(timeout, status)) {
finishActiveMasterInitialization(status);
if (activeMasterManager.blockUntilBecomingActiveMaster(timeout, startupTaskGroup)) {
finishActiveMasterInitialization();
}
} catch (Throwable t) {
status.setStatus("Failed to become active: " + t.getMessage());
startupTaskGroup.abort("Failed to become active master due to:" + t.getMessage());
LOG.error(HBaseMarkers.FATAL, "Failed to become active master", t);
// HBASE-5680: Likely hadoop23 vs hadoop 20.x/1.x incompatibility
if (
@ -2406,8 +2417,6 @@ public class HMaster extends HRegionServer implements MasterServices {
} else {
abort("Unhandled exception. Starting shutdown.", t);
}
} finally {
status.cleanup();
}
}
@ -3097,6 +3106,10 @@ public class HMaster extends HRegionServer implements MasterServices {
return rsFatals;
}
public TaskGroup getStartupProgress() {
return startupTaskGroup;
}
/**
* Shutdown the cluster. Master runs a coordinated stop of all RegionServers and then itself.
*/

View File

@ -125,8 +125,8 @@ public abstract class TakeSnapshotHandler extends EventHandler
// prepare the verify
this.verifier = new MasterSnapshotVerifier(masterServices, snapshot, workingDirFs);
// update the running tasks
this.status = TaskMonitor.get()
.createStatus("Taking " + snapshot.getType() + " snapshot on table: " + snapshotTable, true);
this.status = TaskMonitor.get().createStatus(
"Taking " + snapshot.getType() + " snapshot on table: " + snapshotTable, false, true);
this.snapshotManifest =
SnapshotManifest.create(conf, rootFs, workingDir, snapshot, monitor, status);
}

View File

@ -0,0 +1,115 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.monitoring;
import java.util.Collection;
import java.util.Collections;
import java.util.concurrent.ConcurrentLinkedDeque;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The {@link TaskGroup} can be seen as a big {@link MonitoredTask}, which contains a list of sub
* monitored tasks. The monitored tasks in the group are still be managed by the
* {@link TaskMonitor}, but whether to clear/expire the monitored tasks in a task group is optional.
* Since the monitored task already has journals, which mark the phases in a task, we still also
* need a task group to monitor a big task/process because the journals in a task is serial but the
* tasks in the task group can be parallel, then we have more flexible ability to monitor the
* process. Grouping the tasks is not strictly necessary but it is cleaner for presentation to
* operators. We might want to display the tasks in a group in a list view where each task can be
* collapsed (probably by default) or expanded.
*/
@InterfaceAudience.Private
public class TaskGroup extends MonitoredTaskImpl {
private static final Logger LOG = LoggerFactory.getLogger(TaskGroup.class);
/** Sub-tasks in the group */
private final ConcurrentLinkedDeque<MonitoredTask> tasks = new ConcurrentLinkedDeque<>();
/** Whether to ignore to track(e.g. show/clear/expire) in the singleton {@link TaskMonitor} */
private boolean ignoreSubTasksInTaskMonitor;
/** Used to track this task group in {@link TaskMonitor} */
private final MonitoredTask delegate;
public TaskGroup(boolean ignoreSubTasksInTaskMonitor, String description) {
super(true, description);
this.ignoreSubTasksInTaskMonitor = ignoreSubTasksInTaskMonitor;
this.delegate = TaskMonitor.get().createStatus(description, false, true);
}
public synchronized MonitoredTask addTask(String description) {
return addTask(description, true);
}
/**
* Add a new task to the group, and before that might complete the last task in the group
* @param description the description of the new task
* @param withCompleteLast whether to complete the last task in the group
* @return the added new task
*/
public synchronized MonitoredTask addTask(String description, boolean withCompleteLast) {
if (withCompleteLast) {
MonitoredTask previousTask = this.tasks.peekLast();
if (
previousTask != null && previousTask.getState() != State.COMPLETE
&& previousTask.getState() != State.ABORTED
) {
previousTask.markComplete("Completed");
}
}
MonitoredTask task =
TaskMonitor.get().createStatus(description, ignoreSubTasksInTaskMonitor, true);
this.setStatus(description);
this.tasks.addLast(task);
delegate.setStatus(description);
return task;
}
public synchronized Collection<MonitoredTask> getTasks() {
return Collections.unmodifiableCollection(this.tasks);
}
@Override
public synchronized void abort(String msg) {
setStatus(msg);
setState(State.ABORTED);
for (MonitoredTask task : tasks) {
if (task.getState() != State.COMPLETE && task.getState() != State.ABORTED) {
task.abort(msg);
}
}
delegate.abort(msg);
}
@Override
public synchronized void markComplete(String msg) {
setState(State.COMPLETE);
setStatus(msg);
if (tasks.getLast() != null) {
tasks.getLast().markComplete(msg);
}
delegate.markComplete(msg);
}
@Override
public synchronized void cleanup() {
this.tasks.clear();
}
}

View File

@ -88,7 +88,20 @@ public class TaskMonitor {
return createStatus(description, false);
}
public synchronized MonitoredTask createStatus(String description, boolean enableJournal) {
public MonitoredTask createStatus(String description, boolean ignore) {
return createStatus(description, ignore, false);
}
/**
* Create a monitored task for users to inquire about the status
* @param description description of the status
* @param ignore whether to ignore to track(e.g. show/clear/expire) the task in the
* {@link TaskMonitor}
* @param enableJournal enable when the task contains some stage journals
* @return a monitored task
*/
public synchronized MonitoredTask createStatus(String description, boolean ignore,
boolean enableJournal) {
MonitoredTask stat = new MonitoredTaskImpl(enableJournal, description);
MonitoredTask proxy = (MonitoredTask) Proxy.newProxyInstance(stat.getClass().getClassLoader(),
new Class<?>[] { MonitoredTask.class }, new PassthroughInvocationHandler<>(stat));
@ -96,10 +109,24 @@ public class TaskMonitor {
if (tasks.isFull()) {
purgeExpiredTasks();
}
if (!ignore) {
tasks.add(pair);
}
return proxy;
}
/**
* Create a task group which contains a series of monitored tasks for users to inquire about the
* status
* @param ignoreSubTasksInTaskMonitor whether to ignore to track(e.g. show/clear/expire) the task
* in the {@link TaskMonitor}
* @param description description of the status
* @return a group of monitored tasks
*/
public static TaskGroup createTaskGroup(boolean ignoreSubTasksInTaskMonitor, String description) {
return new TaskGroup(ignoreSubTasksInTaskMonitor, description);
}
public synchronized MonitoredRPCHandler createRPCStatus(String description) {
MonitoredRPCHandler stat = new MonitoredRPCHandlerImpl(description);
MonitoredRPCHandler proxy =

View File

@ -287,8 +287,8 @@ public class WALSplitter {
boolean cancelled = false;
int editsCount = 0;
int editsSkipped = 0;
MonitoredTask status =
TaskMonitor.get().createStatus("Splitting " + wal + " to temporary staging area.", true);
MonitoredTask status = TaskMonitor.get()
.createStatus("Splitting " + wal + " to temporary staging area.", false, true);
Reader walReader = null;
this.fileBeingSplit = walStatus;
long startTS = EnvironmentEdgeManager.currentTime();

View File

@ -84,6 +84,7 @@
<% if (HBaseConfiguration.isShowConfInServlet()) { %>
<li><a href="/conf">HBase Configuration</a></li>
<% } %>
<li><a href="/startupProgress.jsp">Startup Progress</a></li>
</ul>
</div><!--/.nav-collapse -->
</div>

View File

@ -0,0 +1,124 @@
<%--
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
--%>
<%@ page contentType="text/html;charset=UTF-8"
import="java.util.Date"
import="java.util.Iterator"
import="java.util.List"
%>
<%@ page import="org.apache.hadoop.hbase.master.HMaster" %>
<%@ page import="org.apache.hadoop.hbase.monitoring.MonitoredTask" %>
<%@ page import="org.apache.hadoop.hbase.monitoring.TaskGroup" %>
<%
final HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER);
%>
<jsp:include page="header.jsp">
<jsp:param name="pageTitle" value="${pageTitle}"/>
</jsp:include>
<div class="container-fluid content">
<div class="row inner_header">
<div class="page-header">
<h1>Startup Progress (
<% TaskGroup startupTaskGroup = master.getStartupProgress();
if(startupTaskGroup != null){ %>
<%= getStartupStatusString(startupTaskGroup) %>
<% } else { %>
<%= ""%>
<% } %>
)</h1>
</div>
</div>
<table class="table table-striped">
<tr>
<th>Task</th>
<th>Current State</th>
<th>Start Time</th>
<th>Last status Time</th>
<th>Elapsed Time(ms)</th>
<th>Journals</th>
</tr>
<%
if(startupTaskGroup != null){
for (MonitoredTask task : startupTaskGroup.getTasks()) { %>
<tr>
<td><%= task.getDescription() %></td>
<td><%= task.getState().name() %></td>
<td><%= new Date(task.getStartTime()) %></td>
<td><%= new Date(task.getStatusTime()) %></td>
<td><%= task.getStatusTime() - task.getStartTime() %></td>
<td><%= printLatestJournals(task, 30) %></td>
</tr>
<% }
} %>
</table>
</div>
<jsp:include page="footer.jsp"/>
<%!
private static String printLatestJournals(MonitoredTask task, int count) {
List<MonitoredTask.StatusJournalEntry> journal = task.getStatusJournal();
if (journal == null) {
return "";
}
int journalSize = journal.size();
StringBuilder sb = new StringBuilder();
int skips = journalSize - count;
if (skips > 0) {
sb.append("Current journal size is ").append(journalSize).append(", ");
sb.append("skip the previous ones and show the latest ").append(count).append(" journals...");
sb.append(" </br>");
}
Iterator<MonitoredTask.StatusJournalEntry> iter = journal.iterator();
MonitoredTask.StatusJournalEntry previousEntry = null;
int i = 0;
while (iter.hasNext()) {
MonitoredTask.StatusJournalEntry entry = iter.next();
if (i >= skips) {
sb.append(entry);
if (previousEntry != null) {
long delta = entry.getTimeStamp() - previousEntry.getTimeStamp();
if (delta != 0) {
sb.append(" (+").append(delta).append(" ms)");
}
}
sb.append(" </br>");
previousEntry = entry;
}
i++;
}
return sb.toString();
}
private static String getStartupStatusString(TaskGroup startupTaskGroup) {
MonitoredTask.State currentState = startupTaskGroup.getState();
if (currentState.equals(MonitoredTask.State.COMPLETE)) {
return "Master initialized";
} else if (currentState.equals(MonitoredTask.State.RUNNING) |
currentState.equals(MonitoredTask.State.WAITING)) {
return "Master initialize in progress";
} else {
return currentState.toString();
}
}
%>

View File

@ -23,6 +23,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
import org.apache.hadoop.hbase.monitoring.TaskGroup;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
@ -53,9 +54,11 @@ public class AlwaysStandByHMaster extends HMaster {
/**
* An implementation that never transitions to an active master.
*/
boolean blockUntilBecomingActiveMaster(int checkInterval, MonitoredTask startupStatus) {
@Override
boolean blockUntilBecomingActiveMaster(int checkInterval, TaskGroup startupTaskGroup) {
MonitoredTask loopTask = startupTaskGroup.addTask("Stay as a standby master.");
while (!(master.isAborted() || master.isStopped())) {
startupStatus.setStatus("Forever looping to stay as a standby master.");
loopTask.setStatus("Forever looping to stay as a standby master.");
try {
activeMasterServerName = null;
try {

View File

@ -21,6 +21,8 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.when;
import java.io.IOException;
import java.io.InterruptedIOException;
@ -38,6 +40,7 @@ import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.client.ClusterConnection;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
import org.apache.hadoop.hbase.monitoring.TaskGroup;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@ -100,7 +103,7 @@ public class TestActiveMasterManager {
assertFalse(activeMasterManager.getActiveMasterServerName().isPresent());
// First test becoming the active master uninterrupted
MonitoredTask status = Mockito.mock(MonitoredTask.class);
TaskGroup status = mockTaskGroup();
clusterStatusTracker.setClusterUp();
activeMasterManager.blockUntilBecomingActiveMaster(100, status);
@ -149,7 +152,8 @@ public class TestActiveMasterManager {
// First test becoming the active master uninterrupted
ClusterStatusTracker clusterStatusTracker = ms1.getClusterStatusTracker();
clusterStatusTracker.setClusterUp();
activeMasterManager.blockUntilBecomingActiveMaster(100, Mockito.mock(MonitoredTask.class));
activeMasterManager.blockUntilBecomingActiveMaster(100, mockTaskGroup());
assertTrue(activeMasterManager.clusterHasActiveMaster.get());
assertMaster(zk, firstMasterAddress);
assertMaster(zk, activeMasterManager.getActiveMasterServerName().get());
@ -215,7 +219,7 @@ public class TestActiveMasterManager {
ServerName sn1 = ServerName.valueOf("localhost", 1, -1);
DummyMaster master1 = new DummyMaster(zk, sn1);
ActiveMasterManager activeMasterManager = master1.getActiveMasterManager();
activeMasterManager.blockUntilBecomingActiveMaster(100, Mockito.mock(MonitoredTask.class));
activeMasterManager.blockUntilBecomingActiveMaster(100, mockTaskGroup());
assertEquals(sn1, activeMasterManager.getActiveMasterServerName().get());
assertEquals(0, activeMasterManager.getBackupMasters().size());
// Add backup masters
@ -268,12 +272,19 @@ public class TestActiveMasterManager {
@Override
public void run() {
manager.blockUntilBecomingActiveMaster(100, Mockito.mock(MonitoredTask.class));
manager.blockUntilBecomingActiveMaster(100, mockTaskGroup());
LOG.info("Second master has become the active master!");
isActiveMaster = true;
}
}
private static TaskGroup mockTaskGroup() {
TaskGroup taskGroup = Mockito.mock(TaskGroup.class);
MonitoredTask task = Mockito.mock(MonitoredTask.class);
when(taskGroup.addTask(any())).thenReturn(task);
return taskGroup;
}
public static class NodeDeletionListener extends ZKListener {
private static final Logger LOG = LoggerFactory.getLogger(NodeDeletionListener.class);

View File

@ -19,10 +19,12 @@ package org.apache.hadoop.hbase.monitoring;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
@ -218,7 +220,7 @@ public class TestTaskMonitor {
task.setStatus("status1");
// journal should be empty since it is disabled
assertTrue(task.getStatusJournal().isEmpty());
task = tm.createStatus("Test task with journal", true);
task = tm.createStatus("Test task with journal", false, true);
task.setStatus("status2");
assertEquals(1, task.getStatusJournal().size());
assertEquals("status2", task.getStatusJournal().get(0).getStatus());
@ -229,6 +231,26 @@ public class TestTaskMonitor {
tm.shutdown();
}
@Test
public void testTaskGroup() {
TaskGroup group = TaskMonitor.createTaskGroup(true, "test task group");
group.addTask("task1");
MonitoredTask task2 = group.addTask("task2");
task2.setStatus("task2 status2");
task2.setStatus("task2 status3");
group.addTask("task3");
group.markComplete("group complete");
Collection<MonitoredTask> tasks = group.getTasks();
assertNotNull(tasks);
assertEquals(tasks.size(), 3);
for (MonitoredTask task : tasks) {
if (task.getDescription().equals("task2")) {
assertEquals(task.getStatusJournal().size(), 3);
task.prettyPrintJournal();
}
}
}
@Test
public void testClone() throws Exception {
MonitoredRPCHandlerImpl monitor = new MonitoredRPCHandlerImpl("test");