HBASE-20943 Add offline/online region count into metrics

This commit is contained in:
Jinghan Xu 2018-08-22 16:12:12 -07:00 committed by Huaxiang Sun
parent 87949c9ff5
commit 2d911fdc2f
5 changed files with 121 additions and 2 deletions

View File

@ -63,6 +63,8 @@ public interface MetricsMasterSource extends BaseSource {
String IS_ACTIVE_MASTER_NAME = "isActiveMaster";
String SPLIT_PLAN_COUNT_NAME = "splitPlanCount";
String MERGE_PLAN_COUNT_NAME = "mergePlanCount";
String ONLINE_REGION_COUNT_NAME = "onlineRegionCount";
String OFFLINE_REGION_COUNT_NAME = "offlineRegionCount";
String CLUSTER_REQUESTS_NAME = "clusterRequests";
String MASTER_ACTIVE_TIME_DESC = "Master Active Time";
@ -80,6 +82,8 @@ public interface MetricsMasterSource extends BaseSource {
String IS_ACTIVE_MASTER_DESC = "Is Active Master";
String SPLIT_PLAN_COUNT_DESC = "Number of Region Split Plans executed";
String MERGE_PLAN_COUNT_DESC = "Number of Region Merge Plans executed";
String ONLINE_REGION_COUNT_DESC = "Number of Online Regions";
String OFFLINE_REGION_COUNT_DESC = "Number of Offline Regions";
String SERVER_CRASH_METRIC_PREFIX = "serverCrash";

View File

@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.master;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.hadoop.hbase.util.PairOfSameType;
import org.apache.yetus.audience.InterfaceAudience;
/**
@ -141,4 +142,11 @@ public interface MetricsMasterWrapper {
* Get the time in Millis when the master finished initializing/becoming the active master
*/
long getMasterInitializationTime();
/**
* Get the online and offline region counts
*
* @return pair of count for online regions and offline regions
*/
PairOfSameType<Integer> getRegionCounts();
}

View File

@ -21,6 +21,7 @@ package org.apache.hadoop.hbase.master;
import org.apache.hadoop.hbase.metrics.BaseSourceImpl;
import org.apache.hadoop.hbase.metrics.Interns;
import org.apache.hadoop.hbase.metrics.OperationMetrics;
import org.apache.hadoop.hbase.util.PairOfSameType;
import org.apache.hadoop.metrics2.MetricsCollector;
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.metrics2.lib.MutableFastCounter;
@ -83,6 +84,10 @@ public class MetricsMasterSourceImpl
// masterWrapper can be null because this function is called inside of init.
if (masterWrapper != null) {
// Pair<online region number, offline region number>
PairOfSameType<Integer> regionNumberPair = masterWrapper.getRegionCounts();
metricsRecordBuilder
.addGauge(Interns.info(MERGE_PLAN_COUNT_NAME, MERGE_PLAN_COUNT_DESC),
masterWrapper.getMergePlanCount())
@ -97,6 +102,10 @@ public class MetricsMasterSourceImpl
masterWrapper.getMasterInitializationTime())
.addGauge(Interns.info(AVERAGE_LOAD_NAME, AVERAGE_LOAD_DESC),
masterWrapper.getAverageLoad())
.addGauge(Interns.info(ONLINE_REGION_COUNT_NAME, ONLINE_REGION_COUNT_DESC),
regionNumberPair.getFirst())
.addGauge(Interns.info(OFFLINE_REGION_COUNT_NAME, OFFLINE_REGION_COUNT_DESC),
regionNumberPair.getSecond())
.tag(Interns.info(LIVE_REGION_SERVERS_NAME, LIVE_REGION_SERVERS_DESC),
masterWrapper.getRegionServers())
.addGauge(Interns.info(NUM_REGION_SERVERS_NAME,
@ -110,8 +119,7 @@ public class MetricsMasterSourceImpl
masterWrapper.getZookeeperQuorum())
.tag(Interns.info(SERVER_NAME_NAME, SERVER_NAME_DESC), masterWrapper.getServerName())
.tag(Interns.info(CLUSTER_ID_NAME, CLUSTER_ID_DESC), masterWrapper.getClusterId())
.tag(Interns.info(IS_ACTIVE_MASTER_NAME,
IS_ACTIVE_MASTER_DESC),
.tag(Interns.info(IS_ACTIVE_MASTER_NAME, IS_ACTIVE_MASTER_DESC),
String.valueOf(masterWrapper.getIsActiveMaster()));
}

View File

@ -17,18 +17,23 @@
*/
package org.apache.hadoop.hbase.master;
import java.io.IOException;
import java.util.AbstractMap.SimpleImmutableEntry;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.quotas.QuotaObserverChore;
import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshot;
import org.apache.hadoop.hbase.util.PairOfSameType;
import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
/**
@ -179,4 +184,30 @@ public class MetricsMasterWrapperImpl implements MetricsMasterWrapper {
Entry<Long,Long> convertSnapshot(SpaceQuotaSnapshot snapshot) {
return new SimpleImmutableEntry<Long,Long>(snapshot.getUsage(), snapshot.getLimit());
}
@Override
public PairOfSameType<Integer> getRegionCounts() {
try {
if (!master.isInitialized()) {
return new PairOfSameType<>(0, 0);
}
Integer onlineRegionCount = 0;
Integer offlineRegionCount = 0;
List<TableDescriptor> descriptors = master.listTableDescriptors(null, null,
null, false);
for (TableDescriptor htDesc : descriptors) {
TableName tableName = htDesc.getTableName();
Map<RegionState.State, List<RegionInfo>> tableRegions =
master.getAssignmentManager().getRegionStates()
.getRegionByStateOfTable(tableName);
onlineRegionCount += tableRegions.get(RegionState.State.OPEN).size();
offlineRegionCount += tableRegions.get(RegionState.State.OFFLINE).size();
}
return new PairOfSameType<>(onlineRegionCount, offlineRegionCount);
} catch (IOException e) {
return new PairOfSameType<>(0, 0);
}
}
}

View File

@ -20,13 +20,22 @@ package org.apache.hadoop.hbase.master;
import static org.junit.Assert.*;
import java.util.AbstractMap.SimpleImmutableEntry;
import java.util.List;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.master.assignment.RegionStates;
import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshot;
import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshot.SpaceQuotaStatus;
import org.apache.hadoop.hbase.quotas.SpaceViolationPolicy;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.PairOfSameType;
import org.apache.hadoop.hbase.util.Threads;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@ -103,4 +112,63 @@ public class TestMasterMetricsWrapper {
info.convertSnapshot(new SpaceQuotaSnapshot(
new SpaceQuotaStatus(SpaceViolationPolicy.NO_INSERTS), 4096L, 2048L)));
}
/**
* tests online and offline region number
*/
@Test (timeout=30000)
public void testOfflineRegion() throws Exception {
HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
MetricsMasterWrapperImpl info = new MetricsMasterWrapperImpl(master);
TableName table = TableName.valueOf("testRegionNumber");
try {
RegionInfo hri;
HTableDescriptor desc = new HTableDescriptor(table);
byte[] FAMILY = Bytes.toBytes("FAMILY");
desc.addFamily(new HColumnDescriptor(FAMILY));
TEST_UTIL.getHBaseAdmin().createTable(desc, Bytes.toBytes("A"), Bytes.toBytes("Z"), 5);
// wait till the table is assigned
long timeoutTime = System.currentTimeMillis() + 1000;
while (true) {
List<RegionInfo> regions = master.getAssignmentManager().
getRegionStates().getRegionsOfTable(table);
if (regions.size() > 3) {
hri = regions.get(2);
break;
}
long now = System.currentTimeMillis();
if (now > timeoutTime) {
fail("Could not find an online region");
}
Thread.sleep(10);
}
PairOfSameType<Integer> regionNumberPair = info.getRegionCounts();
assertEquals(5, regionNumberPair.getFirst().intValue());
assertEquals(0, regionNumberPair.getSecond().intValue());
TEST_UTIL.getHBaseAdmin().offline(hri.getRegionName());
timeoutTime = System.currentTimeMillis() + 800;
RegionStates regionStates = master.getAssignmentManager().getRegionStates();
while (true) {
if (regionStates.getRegionByStateOfTable(table)
.get(RegionState.State.OFFLINE).contains(hri)) {
break;
}
long now = System.currentTimeMillis();
if (now > timeoutTime) {
fail("Failed to offline the region in time");
break;
}
Thread.sleep(10);
}
regionNumberPair = info.getRegionCounts();
assertEquals(4, regionNumberPair.getFirst().intValue());
assertEquals(1, regionNumberPair.getSecond().intValue());
} finally {
TEST_UTIL.deleteTable(table);
}
}
}