HBASE-20943 Add offline/online region count into metrics
This commit is contained in:
parent
87949c9ff5
commit
2d911fdc2f
|
@ -63,6 +63,8 @@ public interface MetricsMasterSource extends BaseSource {
|
|||
String IS_ACTIVE_MASTER_NAME = "isActiveMaster";
|
||||
String SPLIT_PLAN_COUNT_NAME = "splitPlanCount";
|
||||
String MERGE_PLAN_COUNT_NAME = "mergePlanCount";
|
||||
String ONLINE_REGION_COUNT_NAME = "onlineRegionCount";
|
||||
String OFFLINE_REGION_COUNT_NAME = "offlineRegionCount";
|
||||
|
||||
String CLUSTER_REQUESTS_NAME = "clusterRequests";
|
||||
String MASTER_ACTIVE_TIME_DESC = "Master Active Time";
|
||||
|
@ -80,6 +82,8 @@ public interface MetricsMasterSource extends BaseSource {
|
|||
String IS_ACTIVE_MASTER_DESC = "Is Active Master";
|
||||
String SPLIT_PLAN_COUNT_DESC = "Number of Region Split Plans executed";
|
||||
String MERGE_PLAN_COUNT_DESC = "Number of Region Merge Plans executed";
|
||||
String ONLINE_REGION_COUNT_DESC = "Number of Online Regions";
|
||||
String OFFLINE_REGION_COUNT_DESC = "Number of Offline Regions";
|
||||
|
||||
String SERVER_CRASH_METRIC_PREFIX = "serverCrash";
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.master;
|
|||
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import org.apache.hadoop.hbase.util.PairOfSameType;
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
|
||||
/**
|
||||
|
@ -141,4 +142,11 @@ public interface MetricsMasterWrapper {
|
|||
* Get the time in Millis when the master finished initializing/becoming the active master
|
||||
*/
|
||||
long getMasterInitializationTime();
|
||||
|
||||
/**
|
||||
* Get the online and offline region counts
|
||||
*
|
||||
* @return pair of count for online regions and offline regions
|
||||
*/
|
||||
PairOfSameType<Integer> getRegionCounts();
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.apache.hadoop.hbase.master;
|
|||
import org.apache.hadoop.hbase.metrics.BaseSourceImpl;
|
||||
import org.apache.hadoop.hbase.metrics.Interns;
|
||||
import org.apache.hadoop.hbase.metrics.OperationMetrics;
|
||||
import org.apache.hadoop.hbase.util.PairOfSameType;
|
||||
import org.apache.hadoop.metrics2.MetricsCollector;
|
||||
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
|
||||
import org.apache.hadoop.metrics2.lib.MutableFastCounter;
|
||||
|
@ -83,6 +84,10 @@ public class MetricsMasterSourceImpl
|
|||
|
||||
// masterWrapper can be null because this function is called inside of init.
|
||||
if (masterWrapper != null) {
|
||||
|
||||
// Pair<online region number, offline region number>
|
||||
PairOfSameType<Integer> regionNumberPair = masterWrapper.getRegionCounts();
|
||||
|
||||
metricsRecordBuilder
|
||||
.addGauge(Interns.info(MERGE_PLAN_COUNT_NAME, MERGE_PLAN_COUNT_DESC),
|
||||
masterWrapper.getMergePlanCount())
|
||||
|
@ -97,6 +102,10 @@ public class MetricsMasterSourceImpl
|
|||
masterWrapper.getMasterInitializationTime())
|
||||
.addGauge(Interns.info(AVERAGE_LOAD_NAME, AVERAGE_LOAD_DESC),
|
||||
masterWrapper.getAverageLoad())
|
||||
.addGauge(Interns.info(ONLINE_REGION_COUNT_NAME, ONLINE_REGION_COUNT_DESC),
|
||||
regionNumberPair.getFirst())
|
||||
.addGauge(Interns.info(OFFLINE_REGION_COUNT_NAME, OFFLINE_REGION_COUNT_DESC),
|
||||
regionNumberPair.getSecond())
|
||||
.tag(Interns.info(LIVE_REGION_SERVERS_NAME, LIVE_REGION_SERVERS_DESC),
|
||||
masterWrapper.getRegionServers())
|
||||
.addGauge(Interns.info(NUM_REGION_SERVERS_NAME,
|
||||
|
@ -110,8 +119,7 @@ public class MetricsMasterSourceImpl
|
|||
masterWrapper.getZookeeperQuorum())
|
||||
.tag(Interns.info(SERVER_NAME_NAME, SERVER_NAME_DESC), masterWrapper.getServerName())
|
||||
.tag(Interns.info(CLUSTER_ID_NAME, CLUSTER_ID_DESC), masterWrapper.getClusterId())
|
||||
.tag(Interns.info(IS_ACTIVE_MASTER_NAME,
|
||||
IS_ACTIVE_MASTER_DESC),
|
||||
.tag(Interns.info(IS_ACTIVE_MASTER_NAME, IS_ACTIVE_MASTER_DESC),
|
||||
String.valueOf(masterWrapper.getIsActiveMaster()));
|
||||
}
|
||||
|
||||
|
|
|
@ -17,18 +17,23 @@
|
|||
*/
|
||||
package org.apache.hadoop.hbase.master;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.AbstractMap.SimpleImmutableEntry;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||
import org.apache.hadoop.hbase.client.TableDescriptor;
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.quotas.QuotaObserverChore;
|
||||
import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshot;
|
||||
import org.apache.hadoop.hbase.util.PairOfSameType;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
|
||||
|
||||
/**
|
||||
|
@ -179,4 +184,30 @@ public class MetricsMasterWrapperImpl implements MetricsMasterWrapper {
|
|||
Entry<Long,Long> convertSnapshot(SpaceQuotaSnapshot snapshot) {
|
||||
return new SimpleImmutableEntry<Long,Long>(snapshot.getUsage(), snapshot.getLimit());
|
||||
}
|
||||
|
||||
@Override
|
||||
public PairOfSameType<Integer> getRegionCounts() {
|
||||
try {
|
||||
if (!master.isInitialized()) {
|
||||
return new PairOfSameType<>(0, 0);
|
||||
}
|
||||
Integer onlineRegionCount = 0;
|
||||
Integer offlineRegionCount = 0;
|
||||
|
||||
List<TableDescriptor> descriptors = master.listTableDescriptors(null, null,
|
||||
null, false);
|
||||
|
||||
for (TableDescriptor htDesc : descriptors) {
|
||||
TableName tableName = htDesc.getTableName();
|
||||
Map<RegionState.State, List<RegionInfo>> tableRegions =
|
||||
master.getAssignmentManager().getRegionStates()
|
||||
.getRegionByStateOfTable(tableName);
|
||||
onlineRegionCount += tableRegions.get(RegionState.State.OPEN).size();
|
||||
offlineRegionCount += tableRegions.get(RegionState.State.OFFLINE).size();
|
||||
}
|
||||
return new PairOfSameType<>(onlineRegionCount, offlineRegionCount);
|
||||
} catch (IOException e) {
|
||||
return new PairOfSameType<>(0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,13 +20,22 @@ package org.apache.hadoop.hbase.master;
|
|||
import static org.junit.Assert.*;
|
||||
|
||||
import java.util.AbstractMap.SimpleImmutableEntry;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
import org.apache.hadoop.hbase.HColumnDescriptor;
|
||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||
import org.apache.hadoop.hbase.master.assignment.RegionStates;
|
||||
import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshot;
|
||||
import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshot.SpaceQuotaStatus;
|
||||
import org.apache.hadoop.hbase.quotas.SpaceViolationPolicy;
|
||||
import org.apache.hadoop.hbase.testclassification.MasterTests;
|
||||
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.PairOfSameType;
|
||||
import org.apache.hadoop.hbase.util.Threads;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
@ -103,4 +112,63 @@ public class TestMasterMetricsWrapper {
|
|||
info.convertSnapshot(new SpaceQuotaSnapshot(
|
||||
new SpaceQuotaStatus(SpaceViolationPolicy.NO_INSERTS), 4096L, 2048L)));
|
||||
}
|
||||
|
||||
/**
|
||||
* tests online and offline region number
|
||||
*/
|
||||
@Test (timeout=30000)
|
||||
public void testOfflineRegion() throws Exception {
|
||||
HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
|
||||
MetricsMasterWrapperImpl info = new MetricsMasterWrapperImpl(master);
|
||||
TableName table = TableName.valueOf("testRegionNumber");
|
||||
try {
|
||||
RegionInfo hri;
|
||||
HTableDescriptor desc = new HTableDescriptor(table);
|
||||
byte[] FAMILY = Bytes.toBytes("FAMILY");
|
||||
desc.addFamily(new HColumnDescriptor(FAMILY));
|
||||
TEST_UTIL.getHBaseAdmin().createTable(desc, Bytes.toBytes("A"), Bytes.toBytes("Z"), 5);
|
||||
|
||||
// wait till the table is assigned
|
||||
long timeoutTime = System.currentTimeMillis() + 1000;
|
||||
while (true) {
|
||||
List<RegionInfo> regions = master.getAssignmentManager().
|
||||
getRegionStates().getRegionsOfTable(table);
|
||||
if (regions.size() > 3) {
|
||||
hri = regions.get(2);
|
||||
break;
|
||||
}
|
||||
long now = System.currentTimeMillis();
|
||||
if (now > timeoutTime) {
|
||||
fail("Could not find an online region");
|
||||
}
|
||||
Thread.sleep(10);
|
||||
}
|
||||
|
||||
PairOfSameType<Integer> regionNumberPair = info.getRegionCounts();
|
||||
assertEquals(5, regionNumberPair.getFirst().intValue());
|
||||
assertEquals(0, regionNumberPair.getSecond().intValue());
|
||||
|
||||
TEST_UTIL.getHBaseAdmin().offline(hri.getRegionName());
|
||||
|
||||
timeoutTime = System.currentTimeMillis() + 800;
|
||||
RegionStates regionStates = master.getAssignmentManager().getRegionStates();
|
||||
while (true) {
|
||||
if (regionStates.getRegionByStateOfTable(table)
|
||||
.get(RegionState.State.OFFLINE).contains(hri)) {
|
||||
break;
|
||||
}
|
||||
long now = System.currentTimeMillis();
|
||||
if (now > timeoutTime) {
|
||||
fail("Failed to offline the region in time");
|
||||
break;
|
||||
}
|
||||
Thread.sleep(10);
|
||||
}
|
||||
regionNumberPair = info.getRegionCounts();
|
||||
assertEquals(4, regionNumberPair.getFirst().intValue());
|
||||
assertEquals(1, regionNumberPair.getSecond().intValue());
|
||||
} finally {
|
||||
TEST_UTIL.deleteTable(table);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue