HBASE-625 Metrics support for cluster load history: emissions and graphs

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@709008 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2008-10-29 21:06:29 +00:00
parent 4807901194
commit e95b049d33
11 changed files with 402 additions and 26 deletions

View File

@ -65,7 +65,6 @@ Release 0.19.0 - Unreleased
(Doğacan Güney via Stack)
HBASE-908 Add approximate counting to CountingBloomFilter
(Andrzej Bialecki via Stack)
HBASE-576 Investigate IPC performance
HBASE-920 Make region balancing sloppier
HBASE-902 Add force compaction and force split operations to UI and Admin
HBASE-942 Add convenience methods to RowFilterSet
@ -74,9 +73,6 @@ Release 0.19.0 - Unreleased
SubString operator (Clint Morgan via Stack)
HBASE-937 Thrift getRow does not support specifying columns
(Doğacan Güney via Stack)
HBASE-940 Make the TableOutputFormat batching-aware
HBASE-967 [Optimization] Cache cell maximum length (HCD.getMaxValueLength);
its used checking batch size
HBASE-959 Be able to get multiple RowResult at one time from client side
(Sishen Freecity via Stack)
HBASE-936 REST Interface: enable get number of rows from scanner interface
@ -88,10 +84,15 @@ Release 0.19.0 - Unreleased
NEW FEATURES
HBASE-875 Use MurmurHash instead of JenkinsHash [in bloomfilters]
(Andrzej Bialecki via Stack)
HBASE-625 Metrics support for cluster load history: emissions and graphs
OPTIMIZATIONS
HBASE-748 Add an efficient way to batch update many rows
HBASE-887 Fix a hotspot in scanners
HBASE-967 [Optimization] Cache cell maximum length (HCD.getMaxValueLength);
its used checking batch size
HBASE-940 Make the TableOutputFormat batching-aware
HBASE-576 Investigate IPC performance
Release 0.18.0 - September 21st, 2008

View File

@ -0,0 +1,44 @@
# See http://wiki.apache.org/hadoop/GangliaMetrics
# And, yes, this file is named hadoop-metrics.properties rather than
# hbase-metrics.properties because we're leveraging the hadoop metrics
# package and hadoop-metrics.properties is an hardcoded-name, at least
# for the moment.
# Configuration of the "hbase" context for null
hbase.class=org.apache.hadoop.metrics.spi.NullContext
# Configuration of the "hbase" context for file
# hbase.class=org.apache.hadoop.metrics.file.FileContext
# hbase.period=10
# hbase.fileName=/tmp/metrics_hbase.log
# Configuration of the "hbase" context for ganglia
# hbase.class=org.apache.hadoop.metrics.ganglia.GangliaContext
# hbase.period=10
# hbase.servers=GMETADHOST_IP:8649
# Configuration of the "jvm" context for null
jvm.class=org.apache.hadoop.metrics.spi.NullContext
# Configuration of the "jvm" context for file
# jvm.class=org.apache.hadoop.metrics.file.FileContext
# jvm.period=10
# jvm.fileName=/tmp/metrics_jvm.log
# Configuration of the "jvm" context for ganglia
# jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext
# jvm.period=10
# jvm.servers=GMETADHOST_IP:8649
# Configuration of the "rpc" context for null
hbase.class=org.apache.hadoop.metrics.spi.NullContext
# Configuration of the "rpc" context for file
# rpc.class=org.apache.hadoop.metrics.file.FileContext
# rpc.period=10
# rpc.fileName=/tmp/metrics_rpc.log
# Configuration of the "rpc" context for ganglia
# rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext
# rpc.period=10
# rpc.servers=GMETADHOST_IP:8649

View File

@ -31,6 +31,15 @@ import org.apache.hadoop.io.WritableComparable;
public class HServerLoad implements WritableComparable {
private int numberOfRequests; // number of requests since last report
private int numberOfRegions; // number of regions being served
/*
* Number of storefiles on the regionserver
*/
private int storefiles;
/*
* Size of the memcaches on this machine in MB.
*/
private int memcacheSizeMB;
/*
* TODO: Other metrics that might be considered when the master is actually
@ -44,18 +53,28 @@ public class HServerLoad implements WritableComparable {
* <li>server death rate (maybe there is something wrong with this server)</li>
* </ul>
*/
/** default constructior (used by Writable) */
public HServerLoad() {}
public HServerLoad() {
super();
}
/**
* Constructor
* @param numberOfRequests
* @param numberOfRegions
*/
public HServerLoad(int numberOfRequests, int numberOfRegions) {
public HServerLoad(final int numberOfRequests, final int numberOfRegions,
final int storefiles, final int memcacheSizeMB) {
this.numberOfRequests = numberOfRequests;
this.numberOfRegions = numberOfRegions;
this.storefiles = storefiles;
this.memcacheSizeMB = memcacheSizeMB;
}
public HServerLoad(final HServerLoad hsl) {
this(hsl.numberOfRequests, hsl.numberOfRegions, hsl.storefiles,
hsl.memcacheSizeMB);
}
/**
@ -85,7 +104,8 @@ public class HServerLoad implements WritableComparable {
* @return The load as a String
*/
public String toString(int msgInterval) {
return "requests: " + numberOfRequests/msgInterval + " regions: " + numberOfRegions;
return "requests: " + numberOfRequests/msgInterval +
" regions: " + numberOfRegions;
}
@Override
@ -116,8 +136,34 @@ public class HServerLoad implements WritableComparable {
return numberOfRequests;
}
// Setters
/**
* @return Count of storefiles on this regionserver
*/
public int getStorefiles() {
return this.storefiles;
}
/**
* @return Size of memcaches in kb.
*/
public int getMemcacheSizeInKB() {
return this.memcacheSizeMB;
}
/**
* @param storefiles Count of storefiles on this server.
*/
public void setStorefiles(int storefiles) {
this.storefiles = storefiles;
}
/**
* @param memcacheSizeInKB Size of memcache in kb.
*/
public void setMemcacheSizeInKB(int memcacheSizeInKB) {
this.memcacheSizeMB = memcacheSizeInKB;
}
/**
* @param numberOfRegions the numberOfRegions to set
*/

View File

@ -63,6 +63,7 @@ import org.apache.hadoop.hbase.ipc.HMasterInterface;
import org.apache.hadoop.hbase.ipc.HMasterRegionInterface;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.ipc.HbaseRPC;
import org.apache.hadoop.hbase.master.metrics.MasterMetrics;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
@ -147,6 +148,8 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
ServerManager serverManager;
RegionManager regionManager;
private MasterMetrics metrics;
/** Build the HMaster out of a raw configuration item.
*
* @param conf - Configuration object
@ -357,7 +360,6 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
startShutdown();
break;
}
// work on the TodoQueue. If that fails, we should shut down.
if (!processToDoQueue()) {
break;
@ -484,6 +486,8 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
* need to install an unexpected exception handler.
*/
private void startServiceThreads() {
// Do after main thread name has been set
this.metrics = new MasterMetrics();
try {
regionManager.start();
serverManager.start();
@ -802,6 +806,13 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
return rootServer;
}
/**
* @return Server metrics
*/
public MasterMetrics getMetrics() {
return this.metrics;
}
/*
* Managing leases
*/

View File

@ -242,11 +242,9 @@ class RegionManager implements HConstants {
nregions = 0;
// Advance past any less-loaded servers
for (HServerLoad load =
new HServerLoad(thisServersLoad.getNumberOfRequests(),
thisServersLoad.getNumberOfRegions());
load.compareTo(heavierLoad) <= 0 && nregions < nRegionsToAssign;
load.setNumberOfRegions(load.getNumberOfRegions() + 1), nregions++) {
for (HServerLoad load = new HServerLoad(thisServersLoad);
load.compareTo(heavierLoad) <= 0 && nregions < nRegionsToAssign;
load.setNumberOfRegions(load.getNumberOfRegions() + 1), nregions++) {
// continue;
}
@ -310,9 +308,7 @@ class RegionManager implements HConstants {
// unassigned. That is how many regions we should assign to this server.
int nRegions = 0;
for (Map.Entry<HServerLoad, Set<String>> e : lightServers.entrySet()) {
HServerLoad lightLoad = new HServerLoad(e.getKey().getNumberOfRequests(),
e.getKey().getNumberOfRegions());
HServerLoad lightLoad = new HServerLoad(e.getKey());
do {
lightLoad.setNumberOfRegions(lightLoad.getNumberOfRegions() + 1);
nRegions += 1;

View File

@ -293,6 +293,7 @@ class ServerManager implements HConstants {
serversToServerInfo.put(serverName, serverInfo);
HServerLoad load = serversToLoad.get(serverName);
this.master.getMetrics().incrementRequests(load.getNumberOfRequests());
if (load != null && !load.equals(serverInfo.getLoad())) {
// We have previous information about the load on this server
// and the load on this server has changed

View File

@ -0,0 +1,95 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.metrics;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.metrics.MetricsContext;
import org.apache.hadoop.metrics.MetricsRecord;
import org.apache.hadoop.metrics.MetricsUtil;
import org.apache.hadoop.metrics.Updater;
import org.apache.hadoop.metrics.jvm.JvmMetrics;
import org.apache.hadoop.metrics.util.MetricsIntValue;
/**
* This class is for maintaining the various master statistics
* and publishing them through the metrics interfaces.
* <p>
* This class has a number of metrics variables that are publicly accessible;
* these variables (objects) have methods to update their values.
*/
public class MasterMetrics implements Updater {
private final Log LOG = LogFactory.getLog(this.getClass());
private final MetricsRecord metricsRecord;
/*
* Count of requests to the cluster since last call to metrics update
*/
private final MetricsIntValue cluster_requests =
new MetricsIntValue("cluster_requests");
public MasterMetrics() {
MetricsContext context = MetricsUtil.getContext("hbase");
metricsRecord = MetricsUtil.createRecord(context, "master");
String name = Thread.currentThread().getName();
metricsRecord.setTag("Master", name);
context.registerUpdater(this);
JvmMetrics.init("Master", name);
LOG.info("Initialized");
}
public void shutdown() {
// nought to do.
}
/**
* Since this object is a registered updater, this method will be called
* periodically, e.g. every 5 seconds.
*/
public void doUpdates(@SuppressWarnings("unused") MetricsContext unused) {
synchronized (this) {
synchronized(this.cluster_requests) {
this.cluster_requests.pushMetric(metricsRecord);
// Set requests down to zero again.
this.cluster_requests.set(0);
}
}
this.metricsRecord.update();
}
public void resetAllMinMax() {
// Nothing to do
}
/**
* @return Count of requests.
*/
public int getRequests() {
return this.cluster_requests.get();
}
/**
* @param inc How much to add to requests.
*/
public void incrementRequests(final int inc) {
synchronized(this.cluster_requests) {
this.cluster_requests.inc(inc);
}
}
}

View File

@ -84,6 +84,7 @@ import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.hbase.ipc.HMasterRegionInterface;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.ipc.HbaseRPC;
import org.apache.hadoop.hbase.regionserver.metrics.RegionServerMetrics;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.InfoServer;
@ -171,6 +172,8 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
*/
private final LinkedList<byte[]> reservedSpace = new LinkedList<byte []>();
private RegionServerMetrics metrics;
/**
* Thread to shutdown the region server in an orderly manner. This thread
* is registered as a shutdown hook in the HRegionServer constructor and is
@ -236,7 +239,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
this.connection = ServerConnectionManager.getConnection(conf);
this.isOnline = false;
// Config'ed params
this.numRetries = conf.getInt("hbase.client.retries.number", 2);
this.threadWakeFrequency = conf.getInt(THREAD_WAKE_FREQUENCY, 10 * 1000);
@ -296,8 +299,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
public void run() {
boolean quiesceRequested = false;
// A sleeper that sleeps for msgInterval.
Sleeper sleeper =
new Sleeper(this.msgInterval, this.stopRequested);
Sleeper sleeper = new Sleeper(this.msgInterval, this.stopRequested);
try {
init(reportForDuty(sleeper));
long lastMsg = 0;
@ -317,8 +319,10 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
this.outboundMsgs.clear();
}
try {
doMetrics();
this.serverInfo.setLoad(new HServerLoad(requestCount.get(),
onlineRegions.size()));
onlineRegions.size(), this.metrics.storefiles.get(),
this.metrics.memcacheSizeMB.get()));
this.requestCount.set(0);
HMsg msgs[] = hbaseMaster.regionServerReport(
serverInfo, outboundArray, getMostLoadedRegions());
@ -531,6 +535,8 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
this.rootDir = new Path(this.conf.get(HConstants.HBASE_DIR));
this.log = setupHLog();
this.logFlusher.setHLog(log);
// Init in here rather than in constructor after thread name has been set
this.metrics = new RegionServerMetrics();
startServiceThreads();
isOnline = true;
} catch (IOException e) {
@ -543,7 +549,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
throw ex;
}
}
/**
* Report the status of the server. A server is online once all the startup
* is completed (setting up filesystem, starting service threads, etc.). This
@ -573,6 +579,39 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
return newlog;
}
/*
* @param interval Interval since last time metrics were called.
*/
protected void doMetrics() {
this.metrics.regions.set(this.onlineRegions.size());
this.metrics.incrementRequests(this.requestCount.get());
// Is this too expensive every three seconds getting a lock on onlineRegions
// and then per store carried? Can I make metrics be sloppier and avoid
// the synchronizations?
int storefiles = 0;
long memcacheSize = 0;
synchronized (this.onlineRegions) {
for (Map.Entry<Integer, HRegion> e: this.onlineRegions.entrySet()) {
HRegion r = e.getValue();
memcacheSize += r.memcacheSize.get();
synchronized(r.stores) {
for(Map.Entry<Integer, HStore> ee: r.stores.entrySet()) {
storefiles += ee.getValue().getStorefilesCount();
}
}
}
}
this.metrics.storefiles.set(storefiles);
this.metrics.memcacheSizeMB.set((int)(memcacheSize/(1024*1024)));
}
/**
* @return Region server metrics instance.
*/
public RegionServerMetrics getMetrics() {
return this.metrics;
}
/*
* Start maintanence Threads, Server, Worker and lease checker threads.
* Install an UncaughtExceptionHandler that calls abort of RegionServer if we
@ -748,7 +787,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
while(!stopRequested.get()) {
try {
this.requestCount.set(0);
this.serverInfo.setLoad(new HServerLoad(0, onlineRegions.size()));
this.serverInfo.setLoad(new HServerLoad(0, onlineRegions.size(), 0, 0));
lastMsg = System.currentTimeMillis();
result = this.hbaseMaster.regionServerStartup(serverInfo);
break;

View File

@ -1960,6 +1960,13 @@ public class HStore implements HConstants {
}
}
/**
* @return Count of store files
*/
int getStorefilesCount() {
return this.storefiles.size();
}
class StoreSize {
private final long size;
private final byte[] key;

View File

@ -0,0 +1,134 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver.metrics;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.metrics.MetricsContext;
import org.apache.hadoop.metrics.MetricsRecord;
import org.apache.hadoop.metrics.MetricsUtil;
import org.apache.hadoop.metrics.Updater;
import org.apache.hadoop.metrics.jvm.JvmMetrics;
import org.apache.hadoop.metrics.util.MetricsIntValue;
/**
* This class is for maintaining the various regionserver statistics
* and publishing them through the metrics interfaces.
* <p>
* This class has a number of metrics variables that are publicly accessible;
* these variables (objects) have methods to update their values.
*/
public class RegionServerMetrics implements Updater {
private final Log LOG = LogFactory.getLog(this.getClass());
private final MetricsRecord metricsRecord;
private long lastUpdate = System.currentTimeMillis();
/**
* Count of regions carried by this regionserver
*/
public final MetricsIntValue regions = new MetricsIntValue("regions");
/*
* Count of requests to the regionservers since last call to metrics update
*/
private final MetricsIntValue requests = new MetricsIntValue("requests");
/**
* Count of storefiles open on the regionserver.
*/
public final MetricsIntValue storefiles = new MetricsIntValue("storefiles");
/**
* Sum of all the memcache sizes in this regionserver in MB
*/
public final MetricsIntValue memcacheSizeMB =
new MetricsIntValue("memcachSizeMB");
public RegionServerMetrics() {
MetricsContext context = MetricsUtil.getContext("hbase");
metricsRecord = MetricsUtil.createRecord(context, "regionserver");
String name = Thread.currentThread().getName();
metricsRecord.setTag("RegionServer", name);
context.registerUpdater(this);
JvmMetrics.init("RegionServer", name);
LOG.info("Initialized");
}
public void shutdown() {
// nought to do.
}
/**
* Since this object is a registered updater, this method will be called
* periodically, e.g. every 5 seconds.
*/
public void doUpdates(@SuppressWarnings("unused") MetricsContext unused) {
synchronized (this) {
this.storefiles.pushMetric(this.metricsRecord);
this.memcacheSizeMB.pushMetric(this.metricsRecord);
this.regions.pushMetric(this.metricsRecord);
synchronized(this.requests) {
this.requests.pushMetric(this.metricsRecord);
// Set requests down to zero again.
this.requests.set(0);
}
}
this.metricsRecord.update();
this.lastUpdate = System.currentTimeMillis();
}
public void resetAllMinMax() {
// Nothing to do
}
/**
* @return Count of requests.
*/
public int getRequests() {
return this.requests.get();
}
/**
* @param inc How much to add to requests.
*/
public void incrementRequests(final int inc) {
synchronized(this.requests) {
this.requests.inc(inc);
}
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("requests=");
int seconds = (int)((System.currentTimeMillis() - this.lastUpdate)/1000);
if (seconds == 0) {
seconds = 1;
}
sb.append(this.requests.get()/seconds);
sb.append(", regions=");
sb.append(this.regions.get());
sb.append(", storefiles=");
sb.append(this.storefiles.get());
sb.append(", memcacheSize=");
sb.append(this.memcacheSizeMB.get());
sb.append("MB");
return sb.toString();
}
}

View File

@ -3,12 +3,14 @@
import="org.apache.hadoop.io.Text"
import="org.apache.hadoop.hbase.regionserver.HRegionServer"
import="org.apache.hadoop.hbase.regionserver.HRegion"
import="org.apache.hadoop.hbase.regionserver.metrics.RegionServerMetrics"
import="org.apache.hadoop.hbase.util.Bytes"
import="org.apache.hadoop.hbase.HConstants"
import="org.apache.hadoop.hbase.HServerInfo"
import="org.apache.hadoop.hbase.HRegionInfo" %><%
HRegionServer regionServer = (HRegionServer)getServletContext().getAttribute(HRegionServer.REGIONSERVER);
HServerInfo serverInfo = regionServer.getServerInfo();
RegionServerMetrics metrics = regionServer.getMetrics();
Collection<HRegionInfo> onlineRegions = regionServer.getSortedOnlineRegionInfos();
int interval = regionServer.getConfiguration().getInt("hbase.regionserver.msginterval", 3000)/1000;
%><?xml version="1.0" encoding="UTF-8" ?>
@ -32,7 +34,7 @@
<tr><th>Attribute Name</th><th>Value</th><th>Description</th></tr>
<tr><td>HBase Version</td><td><%= org.apache.hadoop.hbase.util.VersionInfo.getVersion() %>, r<%= org.apache.hadoop.hbase.util.VersionInfo.getRevision() %></td><td>HBase version and svn revision</td></tr>
<tr><td>HBase Compiled</td><td><%= org.apache.hadoop.hbase.util.VersionInfo.getDate() %>, <%= org.apache.hadoop.hbase.util.VersionInfo.getUser() %></td><td>When HBase version was compiled and by whom</td></tr>
<tr><td>Load</td><td><%= serverInfo.getLoad().toString(interval) %></td><td>Requests per second and count of loaded regions</td></tr>
<tr><td>Metrics</td><td><%= metrics.toString() %></td><td>RegionServer Metrics</td></tr>
</table>
<h2>Online Regions</h2>