HBASE-16549 Added new metrics for AMv2 procedures

Following AMv2 procedures are modified to override onSubmit(), onFinish() hooks provided by HBASE-17888 to do
metrics calculations when procedures are submitted and finshed:
* AssignProcedure
* UnassignProcedure
* MergeTableRegionProcedure
* SplitTableRegionProcedure
* ServerCrashProcedure

Following metrics is collected for each of the above procedure during lifetime of a process:
* Total number of requests submitted for a type of procedure
* Histogram of runtime in milliseconds for successfully completed procedures
* Total number of failed procedures

As we are moving away from Hadoop's metric2, hbase-metrics-api module is used for newly added metrics.

Modified existing tests to verify count of procedures.

Signed-off-by: Michael Stack <stack@apache.org>
This commit is contained in:
Umesh Agashe 2017-06-02 10:40:08 -07:00 committed by Michael Stack
parent e65d8653e5
commit 07c38e7165
23 changed files with 592 additions and 61 deletions

View File

@ -19,6 +19,7 @@
package org.apache.hadoop.hbase.master;
import org.apache.hadoop.hbase.metrics.BaseSource;
import org.apache.hadoop.hbase.metrics.OperationMetrics;
public interface MetricsAssignmentManagerSource extends BaseSource {
@ -42,13 +43,11 @@ public interface MetricsAssignmentManagerSource extends BaseSource {
*/
String METRICS_DESCRIPTION = "Metrics about HBase master assignment manager.";
// RIT metrics
String RIT_COUNT_NAME = "ritCount";
String RIT_COUNT_OVER_THRESHOLD_NAME = "ritCountOverThreshold";
String RIT_OLDEST_AGE_NAME = "ritOldestAge";
String RIT_DURATION_NAME = "ritDuration";
String ASSIGN_TIME_NAME = "assign";
String UNASSIGN_TIME_NAME = "unassign";
String BULK_ASSIGN_TIME_NAME = "bulkAssign";
String RIT_COUNT_DESC = "Current number of Regions In Transition (Gauge).";
String RIT_COUNT_OVER_THRESHOLD_DESC =
@ -57,6 +56,11 @@ public interface MetricsAssignmentManagerSource extends BaseSource {
String RIT_DURATION_DESC =
"Total durations in milliseconds for all Regions in Transition (Histogram).";
String ASSIGN_METRIC_PREFIX = "assign";
String UNASSIGN_METRIC_PREFIX = "unassign";
String SPLIT_METRIC_PREFIX = "split";
String MERGE_METRIC_PREFIX = "merge";
String OPERATION_COUNT_NAME = "operationCount";
/**
@ -83,17 +87,28 @@ public interface MetricsAssignmentManagerSource extends BaseSource {
void updateRitDuration(long duration);
/**
* Increment the count of assignment operation (assign/unassign).
* TODO: Remove. This may not be needed now as assign and unassign counts are tracked separately
* Increment the count of operations (assign/unassign).
*/
void incrementOperationCounter();
/**
* Add the time took to perform the last assign operation
* @return {@link OperationMetrics} containing common metrics for assign operation
*/
void updateAssignTime(long time);
OperationMetrics getAssignMetrics();
/**
* Add the time took to perform the last unassign operation
* @return {@link OperationMetrics} containing common metrics for unassign operation
*/
void updateUnassignTime(long time);
OperationMetrics getUnassignMetrics();
/**
* @return {@link OperationMetrics} containing common metrics for split operation
*/
OperationMetrics getSplitMetrics();
/**
* @return {@link OperationMetrics} containing common metrics for merge operation
*/
OperationMetrics getMergeMetrics();
}

View File

@ -19,6 +19,7 @@
package org.apache.hadoop.hbase.master;
import org.apache.hadoop.hbase.metrics.BaseSource;
import org.apache.hadoop.hbase.metrics.OperationMetrics;
/**
* Interface that classes that expose metrics about the master will implement.
@ -75,6 +76,7 @@ public interface MetricsMasterSource extends BaseSource {
String SPLIT_PLAN_COUNT_DESC = "Number of Region Split Plans executed";
String MERGE_PLAN_COUNT_DESC = "Number of Region Merge Plans executed";
String SERVER_CRASH_METRIC_PREFIX = "serverCrash";
/**
* Increment the number of requests the cluster has seen.
@ -83,7 +85,8 @@ public interface MetricsMasterSource extends BaseSource {
*/
void incRequests(final long inc);
/**
* @return {@link OperationMetrics} containing common metrics for server crash operation
*/
OperationMetrics getServerCrashMetrics();
}

View File

@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.metrics;
import com.google.common.base.Preconditions;
/**
* Container class for commonly collected metrics for most operations. Instantiate this class to
* collect submitted count, failed count and time histogram for an operation.
*/
public class OperationMetrics {
private static final String SUBMITTED_COUNT = "SubmittedCount";
private static final String TIME = "Time";
private static final String FAILED_COUNT = "FailedCount";
final private Counter submittedCounter;
final private Histogram timeHisto;
final private Counter failedCounter;
public OperationMetrics(final MetricRegistry registry, final String metricNamePrefix) {
Preconditions.checkNotNull(registry);
Preconditions.checkNotNull(metricNamePrefix);
/**
* TODO: As of now, Metrics description cannot be added/ registered with
* {@link MetricRegistry}. As metric names are unambiguous but concise, descriptions of
* metrics need to be made available someplace for users.
*/
submittedCounter = registry.counter(metricNamePrefix + SUBMITTED_COUNT);
timeHisto = registry.histogram(metricNamePrefix + TIME);
failedCounter = registry.counter(metricNamePrefix + FAILED_COUNT);
}
public Counter getSubmittedCounter() {
return submittedCounter;
}
public Histogram getTimeHisto() {
return timeHisto;
}
public Counter getFailedCounter() {
return failedCounter;
}
}

View File

@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.master;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.metrics.BaseSourceImpl;
import org.apache.hadoop.hbase.metrics.OperationMetrics;
import org.apache.hadoop.metrics2.MetricHistogram;
import org.apache.hadoop.metrics2.lib.MutableFastCounter;
import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
@ -35,8 +36,11 @@ public class MetricsAssignmentManagerSourceImpl
private MetricHistogram ritDurationHisto;
private MutableFastCounter operationCounter;
private MetricHistogram assignTimeHisto;
private MetricHistogram unassignTimeHisto;
private OperationMetrics assignMetrics;
private OperationMetrics unassignMetrics;
private OperationMetrics splitMetrics;
private OperationMetrics mergeMetrics;
public MetricsAssignmentManagerSourceImpl() {
this(METRICS_NAME, METRICS_DESCRIPTION, METRICS_CONTEXT, METRICS_JMX_CONTEXT);
@ -53,10 +57,18 @@ public class MetricsAssignmentManagerSourceImpl
ritCountOverThresholdGauge = metricsRegistry.newGauge(RIT_COUNT_OVER_THRESHOLD_NAME,
RIT_COUNT_OVER_THRESHOLD_DESC,0l);
ritOldestAgeGauge = metricsRegistry.newGauge(RIT_OLDEST_AGE_NAME, RIT_OLDEST_AGE_DESC, 0l);
assignTimeHisto = metricsRegistry.newTimeHistogram(ASSIGN_TIME_NAME);
unassignTimeHisto = metricsRegistry.newTimeHistogram(UNASSIGN_TIME_NAME);
ritDurationHisto = metricsRegistry.newTimeHistogram(RIT_DURATION_NAME, RIT_DURATION_DESC);
operationCounter = metricsRegistry.getCounter(OPERATION_COUNT_NAME, 0l);
/**
* NOTE: Please refer to HBASE-9774 and HBASE-14282. Based on these two issues, HBase is
* moving away from using Hadoop's metric2 to having independent HBase specific Metrics. Use
* {@link BaseSourceImpl#registry} to register the new metrics.
*/
assignMetrics = new OperationMetrics(registry, ASSIGN_METRIC_PREFIX);
unassignMetrics = new OperationMetrics(registry, UNASSIGN_METRIC_PREFIX);
splitMetrics = new OperationMetrics(registry, SPLIT_METRIC_PREFIX);
mergeMetrics = new OperationMetrics(registry, MERGE_METRIC_PREFIX);
}
@Override
@ -79,18 +91,28 @@ public class MetricsAssignmentManagerSourceImpl
operationCounter.incr();
}
@Override
public void updateAssignTime(final long time) {
assignTimeHisto.add(time);
}
@Override
public void updateUnassignTime(final long time) {
unassignTimeHisto.add(time);
}
@Override
public void updateRitDuration(long duration) {
ritDurationHisto.add(duration);
}
@Override
public OperationMetrics getAssignMetrics() {
return assignMetrics;
}
@Override
public OperationMetrics getUnassignMetrics() {
return unassignMetrics;
}
@Override
public OperationMetrics getSplitMetrics() {
return splitMetrics;
}
@Override
public OperationMetrics getMergeMetrics() {
return mergeMetrics;
}
}

View File

@ -21,6 +21,7 @@ package org.apache.hadoop.hbase.master;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.metrics.BaseSourceImpl;
import org.apache.hadoop.hbase.metrics.Interns;
import org.apache.hadoop.hbase.metrics.OperationMetrics;
import org.apache.hadoop.metrics2.MetricsCollector;
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.metrics2.lib.MutableFastCounter;
@ -37,6 +38,8 @@ public class MetricsMasterSourceImpl
private final MetricsMasterWrapper masterWrapper;
private MutableFastCounter clusterRequestsCounter;
private OperationMetrics serverCrashMetrics;
public MetricsMasterSourceImpl(MetricsMasterWrapper masterWrapper) {
this(METRICS_NAME,
METRICS_DESCRIPTION,
@ -59,6 +62,13 @@ public class MetricsMasterSourceImpl
public void init() {
super.init();
clusterRequestsCounter = metricsRegistry.newCounter(CLUSTER_REQUESTS_NAME, "", 0l);
/**
* NOTE: Please refer to HBASE-9774 and HBASE-14282. Based on these two issues, HBase is
* moving away from using Hadoop's metric2 to having independent HBase specific Metrics. Use
* {@link BaseSourceImpl#registry} to register the new metrics.
*/
serverCrashMetrics = new OperationMetrics(registry, SERVER_CRASH_METRIC_PREFIX);
}
@Override
@ -105,4 +115,8 @@ public class MetricsMasterSourceImpl
metricsRegistry.snapshot(metricsRecordBuilder, all);
}
@Override
public OperationMetrics getServerCrashMetrics() {
return serverCrashMetrics;
}
}

View File

@ -86,6 +86,10 @@
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-metrics-api</artifactId>
</dependency>
</dependencies>
<profiles>

View File

@ -30,6 +30,8 @@ import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.hbase.exceptions.TimeoutIOException;
import org.apache.hadoop.hbase.metrics.Counter;
import org.apache.hadoop.hbase.metrics.Histogram;
import org.apache.hadoop.hbase.procedure2.util.StringUtils;
import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState;
import org.apache.hadoop.hbase.security.User;
@ -288,25 +290,67 @@ public abstract class Procedure<TEnvironment> implements Comparable<Procedure<TE
}
/**
* This function will be called just when procedure is submitted for execution. Override this
* method to update the metrics at the beginning of the procedure
* Override this method to provide procedure specific counters for submitted count, failed
* count and time histogram.
* @param env The environment passed to the procedure executor
* @return Container object for procedure related metric
*/
protected void updateMetricsOnSubmit(final TEnvironment env) {}
protected ProcedureMetrics getProcedureMetrics(final TEnvironment env) {
return null;
}
/**
* This function will be called just when procedure is submitted for execution. Override this
* method to update the metrics at the beginning of the procedure. The default implementation
* updates submitted counter if {@link #getProcedureMetrics(Object)} returns non-null
* {@link ProcedureMetrics}.
*/
protected void updateMetricsOnSubmit(final TEnvironment env) {
ProcedureMetrics metrics = getProcedureMetrics(env);
if (metrics == null) {
return;
}
Counter submittedCounter = metrics.getSubmittedCounter();
if (submittedCounter != null) {
submittedCounter.increment();
}
}
/**
* This function will be called just after procedure execution is finished. Override this method
* to update metrics at the end of the procedure
* to update metrics at the end of the procedure. If {@link #getProcedureMetrics(Object)}
* returns non-null {@link ProcedureMetrics}, the default implementation adds runtime of a
* procedure to a time histogram for successfully completed procedures. Increments failed
* counter for failed procedures.
*
* TODO: As any of the sub-procedures on failure rolls back all procedures in the stack,
* including successfully finished siblings, this function may get called twice in certain
* cases for certain procedures. Explore further if this can be called once.
*
* @param env
* @param runtime - Runtime of the procedure in milliseconds
* @param success - true if procedure is completed successfully
* @param env The environment passed to the procedure executor
* @param runtime Runtime of the procedure in milliseconds
* @param success true if procedure is completed successfully
*/
protected void updateMetricsOnFinish(final TEnvironment env, final long runtime,
boolean success) {}
boolean success) {
ProcedureMetrics metrics = getProcedureMetrics(env);
if (metrics == null) {
return;
}
if (success) {
Histogram timeHisto = metrics.getTimeHisto();
if (timeHisto != null) {
timeHisto.update(runtime);
}
} else {
Counter failedCounter = metrics.getFailedCounter();
if (failedCounter != null) {
failedCounter.increment();
}
}
}
@Override
public String toString() {

View File

@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.procedure2;
import org.apache.hadoop.hbase.metrics.Counter;
import org.apache.hadoop.hbase.metrics.Histogram;
/**
* With this interface, the procedure framework provides means to collect following set of metrics
* per procedure type for all procedures:
* <ul>
* <li>Count of submitted procedure instances</li>
* <li>Time histogram for successfully completed procedure instances</li>
* <li>Count of failed procedure instances</li>
* </ul>
*
* Please implement this interface to return appropriate metrics.
*/
public interface ProcedureMetrics {
/**
* @return Total number of instances submitted for a type of a procedure
*/
Counter getSubmittedCounter();
/**
* @return Histogram of runtimes for all successfully completed instances of a type of a procedure
*/
Histogram getTimeHisto();
/**
* @return Total number of instances failed for a type of a procedure
*/
Counter getFailedCounter();
}

View File

@ -648,7 +648,8 @@ public class HMaster extends HRegionServer implements MasterServices {
return MasterDumpServlet.class;
}
MetricsMaster getMasterMetrics() {
@Override
public MetricsMaster getMasterMetrics() {
return metricsMaster;
}

View File

@ -133,6 +133,11 @@ public interface MasterServices extends Server {
@VisibleForTesting
public ProcedureEvent getInitializedEvent();
/**
* @return Master's instance of {@link MetricsMaster}
*/
MetricsMaster getMasterMetrics();
/**
* Check table is modifiable; i.e. exists and is offline.
* @param tableName Name of table to check.

View File

@ -19,13 +19,26 @@
package org.apache.hadoop.hbase.master;
import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
import static org.apache.hadoop.hbase.master.MetricsMaster.convertToProcedureMetrics;
public class MetricsAssignmentManager {
private final MetricsAssignmentManagerSource assignmentManagerSource;
private final ProcedureMetrics assignProcMetrics;
private final ProcedureMetrics unassignProcMetrics;
private final ProcedureMetrics splitProcMetrics;
private final ProcedureMetrics mergeProcMetrics;
public MetricsAssignmentManager() {
assignmentManagerSource = CompatibilitySingletonFactory.getInstance(
MetricsAssignmentManagerSource.class);
assignProcMetrics = convertToProcedureMetrics(assignmentManagerSource.getAssignMetrics());
unassignProcMetrics = convertToProcedureMetrics(assignmentManagerSource.getUnassignMetrics());
splitProcMetrics = convertToProcedureMetrics(assignmentManagerSource.getSplitMetrics());
mergeProcMetrics = convertToProcedureMetrics(assignmentManagerSource.getMergeMetrics());
}
public MetricsAssignmentManagerSource getMetricsProcSource() {
@ -66,6 +79,7 @@ public class MetricsAssignmentManager {
}
/*
* TODO: Remove. This may not be required as assign and unassign operations are tracked separately
* Increment the count of assignment operation (assign/unassign).
*/
public void incrementOperationCounter() {
@ -73,18 +87,30 @@ public class MetricsAssignmentManager {
}
/**
* Add the time took to perform the last assign operation
* @param time
* @return Set of common metrics for assign procedure
*/
public void updateAssignTime(final long time) {
assignmentManagerSource.updateAssignTime(time);
public ProcedureMetrics getAssignProcMetrics() {
return assignProcMetrics;
}
/**
* Add the time took to perform the last unassign operation
* @param time
* @return Set of common metrics for unassign procedure
*/
public void updateUnassignTime(final long time) {
assignmentManagerSource.updateUnassignTime(time);
public ProcedureMetrics getUnassignProcMetrics() {
return unassignProcMetrics;
}
/**
* @return Set of common metrics for split procedure
*/
public ProcedureMetrics getSplitProcMetrics() {
return splitProcMetrics;
}
/**
* @return Set of common metrics for merge procedure
*/
public ProcedureMetrics getMergeProcMetrics() {
return mergeProcMetrics;
}
}

View File

@ -23,6 +23,10 @@ import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
import org.apache.hadoop.hbase.metrics.Counter;
import org.apache.hadoop.hbase.metrics.Histogram;
import org.apache.hadoop.hbase.metrics.OperationMetrics;
import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
/**
* This class is for maintaining the various master statistics
@ -39,12 +43,16 @@ public class MetricsMaster {
private MetricsMasterProcSource masterProcSource;
private MetricsMasterQuotaSource masterQuotaSource;
private ProcedureMetrics serverCrashProcMetrics;
public MetricsMaster(MetricsMasterWrapper masterWrapper) {
masterSource = CompatibilitySingletonFactory.getInstance(MetricsMasterSourceFactory.class).create(masterWrapper);
masterProcSource =
CompatibilitySingletonFactory.getInstance(MetricsMasterProcSourceFactory.class).create(masterWrapper);
masterQuotaSource =
CompatibilitySingletonFactory.getInstance(MetricsMasterQuotaSourceFactory.class).create(masterWrapper);
serverCrashProcMetrics = convertToProcedureMetrics(masterSource.getServerCrashMetrics());
}
// for unit-test usage
@ -112,4 +120,40 @@ public class MetricsMaster {
public void incrementQuotaObserverTime(final long executionTime) {
masterQuotaSource.incrementSpaceQuotaObserverChoreTime(executionTime);
}
/**
* @return Set of metrics for assign procedure
*/
public ProcedureMetrics getServerCrashProcMetrics() {
return serverCrashProcMetrics;
}
/**
* This is utility function that converts {@link OperationMetrics} to {@link ProcedureMetrics}.
*
* NOTE: Procedure framework in hbase-procedure module accesses metrics common to most procedures
* through {@link ProcedureMetrics} interface. Metrics source classes in hbase-hadoop-compat
* module provides similar interface {@link OperationMetrics} that contains metrics common to
* most operations. As both hbase-procedure and hbase-hadoop-compat are lower level modules used
* by hbase-server (this) module and there is no dependency between them, this method does the
* required conversion.
*/
public static ProcedureMetrics convertToProcedureMetrics(final OperationMetrics metrics) {
return new ProcedureMetrics() {
@Override
public Counter getSubmittedCounter() {
return metrics.getSubmittedCounter();
}
@Override
public Histogram getTimeHisto() {
return metrics.getTimeHisto();
}
@Override
public Counter getFailedCounter() {
return metrics.getFailedCounter();
}
};
}
}

View File

@ -34,6 +34,7 @@ import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher.RegionOpenOperation;
import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteOperation;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
@ -338,4 +339,9 @@ public class AssignProcedure extends RegionTransitionProcedure {
if (node == null) return null;
return node.getRegionLocation();
}
}
@Override
protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) {
return env.getAssignmentManager().getAssignmentManagerMetrics().getAssignProcMetrics();
}
}

View File

@ -1370,10 +1370,6 @@ public class AssignmentManager implements ServerListener {
regionNode.getRegionLocation(), regionNode.getLastHost(), regionNode.getOpenSeqNum(),
regionNode.getProcedure().getProcId());
sendRegionOpenedNotification(hri, regionNode.getRegionLocation());
// update assignment metrics
if (regionNode.getProcedure() != null) {
metrics.updateAssignTime(regionNode.getProcedure().elapsedTime());
}
}
}
@ -1411,10 +1407,6 @@ public class AssignmentManager implements ServerListener {
regionNode.getRegionLocation()/*null*/, regionNode.getLastHost(),
HConstants.NO_SEQNUM, regionNode.getProcedure().getProcId());
sendRegionClosedNotification(hri);
// Update assignment metrics
if (regionNode.getProcedure() != null) {
metrics.updateUnassignTime(regionNode.getProcedure().elapsedTime());
}
}
}

View File

@ -54,6 +54,7 @@ import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
import org.apache.hadoop.hbase.regionserver.StoreFile;
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
@ -65,7 +66,6 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.FSUtils;
import com.google.common.annotations.VisibleForTesting;
import com.lmax.disruptor.YieldingWaitStrategy;
/**
* The procedure to Merge a region in a table.
@ -430,6 +430,11 @@ public class MergeTableRegionsProcedure
return TableOperationType.REGION_MERGE;
}
@Override
protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) {
return env.getAssignmentManager().getAssignmentManagerMetrics().getMergeProcMetrics();
}
/**
* Prepare merge and do some check
* @param env MasterProcedureEnv
@ -773,4 +778,4 @@ public class MergeTableRegionsProcedure
public HRegionInfo getMergedRegion() {
return this.mergedRegion;
}
}
}

View File

@ -57,13 +57,14 @@ import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode;
import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SplitTableRegionState;
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
import org.apache.hadoop.hbase.regionserver.HStore;
import org.apache.hadoop.hbase.regionserver.StoreFile;
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SplitTableRegionState;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.FSUtils;
@ -323,6 +324,11 @@ public class SplitTableRegionProcedure
return TableOperationType.REGION_SPLIT;
}
@Override
protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) {
return env.getAssignmentManager().getAssignmentManagerMetrics().getSplitProcMetrics();
}
private byte[] getSplitRow() {
return daughter_2_HRI.getStartKey();
}

View File

@ -38,6 +38,7 @@ import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.procedure.ServerCrashException;
import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher.RegionCloseOperation;
import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteOperation;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionTransitionState;
@ -258,4 +259,9 @@ public class UnassignProcedure extends RegionTransitionProcedure {
public ServerName getServer(final MasterProcedureEnv env) {
return this.hostingServer;
}
@Override
protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) {
return env.getAssignmentManager().getAssignmentManagerMetrics().getUnassignProcMetrics();
}
}

View File

@ -36,6 +36,7 @@ import org.apache.hadoop.hbase.master.MasterWalManager;
import org.apache.hadoop.hbase.master.assignment.AssignProcedure;
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
import org.apache.hadoop.hbase.master.assignment.RegionTransitionProcedure;
import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
@ -436,4 +437,9 @@ implements ServerProcedureInterface {
}
}
}
}
@Override
protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) {
return env.getMasterServices().getMasterMetrics().getServerCrashProcMetrics();
}
}

View File

@ -54,6 +54,7 @@ import com.google.protobuf.Service;
public class MockNoopMasterServices implements MasterServices, Server {
private final Configuration conf;
private final MetricsMaster metricsMaster;
public MockNoopMasterServices() {
this(null);
@ -61,6 +62,7 @@ public class MockNoopMasterServices implements MasterServices, Server {
public MockNoopMasterServices(final Configuration conf) {
this.conf = conf;
this.metricsMaster = new MetricsMaster(new MetricsMasterWrapperImpl(null));
}
@Override
@ -133,6 +135,11 @@ public class MockNoopMasterServices implements MasterServices, Server {
return null;
}
@Override
public MetricsMaster getMasterMetrics() {
return metricsMaster;
}
@Override
public ServerManager getServerManager() {
return null;

View File

@ -57,6 +57,7 @@ import org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler;
import org.apache.hadoop.hbase.master.procedure.ProcedureSyncWait;
import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher;
import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore;
import org.apache.hadoop.hbase.procedure2.util.StringUtils;
@ -115,6 +116,14 @@ public class TestAssignmentManager {
// Simple executor to run some simple tasks.
private ScheduledExecutorService executor;
private ProcedureMetrics assignProcMetrics;
private ProcedureMetrics unassignProcMetrics;
private long assignSubmittedCount = 0;
private long assignFailedCount = 0;
private long unassignSubmittedCount = 0;
private long unassignFailedCount = 0;
private void setupConfiguration(Configuration conf) throws Exception {
FSUtils.setRootDir(conf, UTIL.getDataTestDir());
conf.setBoolean(WALProcedureStore.USE_HSYNC_CONF_KEY, false);
@ -133,6 +142,8 @@ public class TestAssignmentManager {
rsDispatcher = new MockRSProcedureDispatcher(master);
master.start(NSERVERS, rsDispatcher);
am = master.getAssignmentManager();
assignProcMetrics = am.getAssignmentManagerMetrics().getAssignProcMetrics();
unassignProcMetrics = am.getAssignmentManagerMetrics().getUnassignProcMetrics();
setUpMeta();
}
@ -182,7 +193,14 @@ public class TestAssignmentManager {
@Test
public void testAssignWithGoodExec() throws Exception {
// collect AM metrics before test
collectAssignmentManagerMetrics();
testAssign(new GoodRsExecutor());
assertEquals(assignSubmittedCount + NREGIONS,
assignProcMetrics.getSubmittedCounter().getCount());
assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount());
}
@Test
@ -227,11 +245,19 @@ public class TestAssignmentManager {
final TableName tableName = TableName.valueOf(this.name.getMethodName());
final HRegionInfo hri = createRegionInfo(tableName, 1);
// collect AM metrics before test
collectAssignmentManagerMetrics();
rsDispatcher.setMockRsExecutor(new SocketTimeoutRsExecutor(20, 3));
waitOnFuture(submitProcedure(am.createAssignProcedure(hri, false)));
rsDispatcher.setMockRsExecutor(new SocketTimeoutRsExecutor(20, 3));
waitOnFuture(submitProcedure(am.createUnassignProcedure(hri, null, false)));
assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount());
assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount());
assertEquals(unassignSubmittedCount + 1, unassignProcMetrics.getSubmittedCounter().getCount());
assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount());
}
@Test
@ -244,6 +270,9 @@ public class TestAssignmentManager {
final MockRSExecutor executor) throws Exception {
final HRegionInfo hri = createRegionInfo(tableName, 1);
// collect AM metrics before test
collectAssignmentManagerMetrics();
// Test Assign operation failure
rsDispatcher.setMockRsExecutor(executor);
try {
@ -264,20 +293,40 @@ public class TestAssignmentManager {
// Test Unassign operation failure
rsDispatcher.setMockRsExecutor(executor);
waitOnFuture(submitProcedure(am.createUnassignProcedure(hri, null, false)));
assertEquals(assignSubmittedCount + 2, assignProcMetrics.getSubmittedCounter().getCount());
assertEquals(assignFailedCount + 1, assignProcMetrics.getFailedCounter().getCount());
assertEquals(unassignSubmittedCount + 1, unassignProcMetrics.getSubmittedCounter().getCount());
// TODO: We supposed to have 1 failed assign, 1 successful assign and a failed unassign
// operation. But ProcV2 framework marks aborted unassign operation as success. Fix it!
assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount());
*/
}
@Test
public void testIOExceptionOnAssignment() throws Exception {
// collect AM metrics before test
collectAssignmentManagerMetrics();
testFailedOpen(TableName.valueOf("testExceptionOnAssignment"),
new FaultyRsExecutor(new IOException("test fault")));
assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount());
assertEquals(assignFailedCount + 1, assignProcMetrics.getFailedCounter().getCount());
}
@Test
public void testDoNotRetryExceptionOnAssignment() throws Exception {
// collect AM metrics before test
collectAssignmentManagerMetrics();
testFailedOpen(TableName.valueOf("testDoNotRetryExceptionOnAssignment"),
new FaultyRsExecutor(new DoNotRetryIOException("test do not retry fault")));
assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount());
assertEquals(assignFailedCount + 1, assignProcMetrics.getFailedCounter().getCount());
}
private void testFailedOpen(final TableName tableName,
@ -325,6 +374,9 @@ public class TestAssignmentManager {
final TableName tableName = TableName.valueOf("testAssignAnAssignedRegion");
final HRegionInfo hri = createRegionInfo(tableName, 1);
// collect AM metrics before test
collectAssignmentManagerMetrics();
rsDispatcher.setMockRsExecutor(new GoodRsExecutor());
final Future<byte[]> futureA = submitProcedure(am.createAssignProcedure(hri, false));
@ -339,6 +391,12 @@ public class TestAssignmentManager {
waitOnFuture(futureB);
am.getRegionStates().isRegionInState(hri, State.OPEN);
// TODO: What else can we do to ensure just a noop.
// TODO: Though second assign is noop, it's considered success, can noop be handled in a
// better way?
assertEquals(assignSubmittedCount + 2, assignProcMetrics.getSubmittedCounter().getCount());
assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount());
}
@Test
@ -346,6 +404,9 @@ public class TestAssignmentManager {
final TableName tableName = TableName.valueOf("testUnassignAnUnassignedRegion");
final HRegionInfo hri = createRegionInfo(tableName, 1);
// collect AM metrics before test
collectAssignmentManagerMetrics();
rsDispatcher.setMockRsExecutor(new GoodRsExecutor());
// assign the region first
@ -365,6 +426,13 @@ public class TestAssignmentManager {
// Ensure we are still CLOSED.
am.getRegionStates().isRegionInState(hri, State.CLOSED);
// TODO: What else can we do to ensure just a noop.
assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount());
assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount());
// TODO: Though second unassign is noop, it's considered success, can noop be handled in a
// better way?
assertEquals(unassignSubmittedCount + 2, unassignProcMetrics.getSubmittedCounter().getCount());
assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount());
}
private Future<byte[]> submitProcedure(final Procedure proc) {
@ -747,4 +815,11 @@ public class TestAssignmentManager {
}*/
}
}
private void collectAssignmentManagerMetrics() {
assignSubmittedCount = assignProcMetrics.getSubmittedCounter().getCount();
assignFailedCount = assignProcMetrics.getFailedCounter().getCount();
unassignSubmittedCount = unassignProcMetrics.getSubmittedCounter().getCount();
unassignFailedCount = unassignProcMetrics.getFailedCounter().getCount();
}
}

View File

@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility;
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
@ -70,6 +71,17 @@ public class TestMergeTableRegionsProcedure {
final static Configuration conf = UTIL.getConfiguration();
private static Admin admin;
private AssignmentManager am;
private ProcedureMetrics mergeProcMetrics;
private ProcedureMetrics assignProcMetrics;
private ProcedureMetrics unassignProcMetrics;
private long mergeSubmittedCount = 0;
private long mergeFailedCount = 0;
private long assignSubmittedCount = 0;
private long assignFailedCount = 0;
private long unassignSubmittedCount = 0;
private long unassignFailedCount = 0;
private static void setupConf(Configuration conf) {
// Reduce the maximum attempts to speed up the test
conf.setInt("hbase.assignment.maximum.attempts", 3);
@ -105,6 +117,10 @@ public class TestMergeTableRegionsProcedure {
// Turn off the meta scanner so it don't remove parent on us.
UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
resetProcExecutorTestingKillFlag();
am = UTIL.getHBaseCluster().getMaster().getAssignmentManager();
mergeProcMetrics = am.getAssignmentManagerMetrics().getMergeProcMetrics();
assignProcMetrics = am.getAssignmentManagerMetrics().getAssignProcMetrics();
unassignProcMetrics = am.getAssignmentManagerMetrics().getUnassignProcMetrics();
}
@After
@ -135,12 +151,24 @@ public class TestMergeTableRegionsProcedure {
HRegionInfo[] regionsToMerge = new HRegionInfo[2];
regionsToMerge[0] = tableRegions.get(0);
regionsToMerge[1] = tableRegions.get(1);
// collect AM metrics before test
collectAssignmentManagerMetrics();
MergeTableRegionsProcedure proc =
new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true);
long procId = procExec.submitProcedure(proc);
ProcedureTestingUtility.waitProcedure(procExec, procId);
ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
assertRegionCount(tableName, initialRegionCount - 1);
assertEquals(mergeSubmittedCount + 1, mergeProcMetrics.getSubmittedCounter().getCount());
assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount());
assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount());
assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount());
assertEquals(unassignSubmittedCount + 2, unassignProcMetrics.getSubmittedCounter().getCount());
assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount());
Pair<HRegionInfo, HRegionInfo> pair =
MetaTableAccessor.getRegionsFromMergeQualifier(UTIL.getConnection(),
proc.getMergedRegion().getRegionName());
@ -175,6 +203,9 @@ public class TestMergeTableRegionsProcedure {
regionsToMerge2[0] = tableRegions.get(2);
regionsToMerge2[1] = tableRegions.get(3);
// collect AM metrics before test
collectAssignmentManagerMetrics();
long procId1 = procExec.submitProcedure(new MergeTableRegionsProcedure(
procExec.getEnvironment(), regionsToMerge1, true));
long procId2 = procExec.submitProcedure(new MergeTableRegionsProcedure(
@ -184,6 +215,13 @@ public class TestMergeTableRegionsProcedure {
ProcedureTestingUtility.assertProcNotFailed(procExec, procId1);
ProcedureTestingUtility.assertProcNotFailed(procExec, procId2);
assertRegionCount(tableName, initialRegionCount - 2);
assertEquals(mergeSubmittedCount + 2, mergeProcMetrics.getSubmittedCounter().getCount());
assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount());
assertEquals(assignSubmittedCount + 2, assignProcMetrics.getSubmittedCounter().getCount());
assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount());
assertEquals(unassignSubmittedCount + 4, unassignProcMetrics.getSubmittedCounter().getCount());
assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount());
}
@Test
@ -257,4 +295,14 @@ public class TestMergeTableRegionsProcedure {
private ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() {
return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor();
}
}
private void collectAssignmentManagerMetrics() {
mergeSubmittedCount = mergeProcMetrics.getSubmittedCounter().getCount();
mergeFailedCount = mergeProcMetrics.getFailedCounter().getCount();
assignSubmittedCount = assignProcMetrics.getSubmittedCounter().getCount();
assignFailedCount = assignProcMetrics.getFailedCounter().getCount();
unassignSubmittedCount = unassignProcMetrics.getSubmittedCounter().getCount();
unassignFailedCount = unassignProcMetrics.getFailedCounter().getCount();
}
}

View File

@ -45,10 +45,10 @@ import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.assignment.SplitTableRegionProcedure;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility;
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.testclassification.MasterTests;
@ -78,6 +78,19 @@ public class TestSplitTableRegionProcedure {
private static final int startRowNum = 11;
private static final int rowCount = 60;
private AssignmentManager am;
private ProcedureMetrics splitProcMetrics;
private ProcedureMetrics assignProcMetrics;
private ProcedureMetrics unassignProcMetrics;
private long splitSubmittedCount = 0;
private long splitFailedCount = 0;
private long assignSubmittedCount = 0;
private long assignFailedCount = 0;
private long unassignSubmittedCount = 0;
private long unassignFailedCount = 0;
@Rule
public TestName name = new TestName();
@ -109,6 +122,10 @@ public class TestSplitTableRegionProcedure {
UTIL.getAdmin().setBalancerRunning(false, true);
// Turn off the meta scanner so it don't remove parent on us.
UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
am = UTIL.getHBaseCluster().getMaster().getAssignmentManager();
splitProcMetrics = am.getAssignmentManagerMetrics().getSplitProcMetrics();
assignProcMetrics = am.getAssignmentManagerMetrics().getAssignProcMetrics();
unassignProcMetrics = am.getAssignmentManagerMetrics().getUnassignProcMetrics();
}
@After
@ -133,6 +150,9 @@ public class TestSplitTableRegionProcedure {
assertTrue("not able to find a splittable region", regions != null);
assertTrue("not able to find a splittable region", regions.length == 1);
// collect AM metrics before test
collectAssignmentManagerMetrics();
// Split region of the table
long procId = procExec.submitProcedure(
new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], splitKey));
@ -141,7 +161,14 @@ public class TestSplitTableRegionProcedure {
ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
verify(tableName, splitRowNum);
}
assertEquals(splitSubmittedCount + 1, splitProcMetrics.getSubmittedCounter().getCount());
assertEquals(splitFailedCount, splitProcMetrics.getFailedCounter().getCount());
assertEquals(assignSubmittedCount + 2, assignProcMetrics.getSubmittedCounter().getCount());
assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount());
assertEquals(unassignSubmittedCount + 1, unassignProcMetrics.getSubmittedCounter().getCount());
assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount());
}
@Test
public void testSplitTableRegionNoStoreFile() throws Exception {
@ -156,6 +183,9 @@ public class TestSplitTableRegionProcedure {
assertTrue("not able to find a splittable region", regions != null);
assertTrue("not able to find a splittable region", regions.length == 1);
// collect AM metrics before test
collectAssignmentManagerMetrics();
// Split region of the table
long procId = procExec.submitProcedure(
new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], splitKey));
@ -165,6 +195,9 @@ public class TestSplitTableRegionProcedure {
assertTrue(UTIL.getMiniHBaseCluster().getRegions(tableName).size() == 2);
assertTrue(UTIL.countRows(tableName) == 0);
assertEquals(splitSubmittedCount + 1, splitProcMetrics.getSubmittedCounter().getCount());
assertEquals(splitFailedCount, splitProcMetrics.getFailedCounter().getCount());
}
@Test
@ -182,6 +215,9 @@ public class TestSplitTableRegionProcedure {
assertTrue("not able to find a splittable region", regions != null);
assertTrue("not able to find a splittable region", regions.length == 1);
// collect AM metrics before test
collectAssignmentManagerMetrics();
// Split region of the table
long procId = procExec.submitProcedure(
new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], splitKey));
@ -190,6 +226,9 @@ public class TestSplitTableRegionProcedure {
ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
verify(tableName, splitRowNum);
assertEquals(splitSubmittedCount + 1, splitProcMetrics.getSubmittedCounter().getCount());
assertEquals(splitFailedCount, splitProcMetrics.getFailedCounter().getCount());
}
@Test
@ -207,6 +246,9 @@ public class TestSplitTableRegionProcedure {
assertTrue("not able to find a splittable region", regions != null);
assertTrue("not able to find a splittable region", regions.length == 1);
// collect AM metrics before test
collectAssignmentManagerMetrics();
// Split region of the table
long procId = procExec.submitProcedure(
new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], splitKey));
@ -219,6 +261,10 @@ public class TestSplitTableRegionProcedure {
assertTrue(daughters.size() == 2);
assertTrue(UTIL.countRows(tableName) == rowCount);
assertTrue(UTIL.countRows(daughters.get(0)) == 0 || UTIL.countRows(daughters.get(1)) == 0);
assertEquals(splitSubmittedCount + 1,
splitProcMetrics.getSubmittedCounter().getCount());
assertEquals(splitFailedCount, splitProcMetrics.getFailedCounter().getCount());
}
@Test
@ -237,6 +283,9 @@ public class TestSplitTableRegionProcedure {
assertTrue("not able to find a splittable region", regions != null);
assertTrue("not able to find a splittable region", regions.length == 1);
// collect AM metrics before test
collectAssignmentManagerMetrics();
// Split region of the table
long procId = procExec.submitProcedure(
new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], splitKey));
@ -259,6 +308,9 @@ public class TestSplitTableRegionProcedure {
final int currentRowCount = splitRowNum - startRowNum;
assertTrue(UTIL.countRows(tableName) == currentRowCount);
assertTrue(UTIL.countRows(daughters.get(0)) == 0 || UTIL.countRows(daughters.get(1)) == 0);
assertEquals(splitSubmittedCount + 1, splitProcMetrics.getSubmittedCounter().getCount());
assertEquals(splitFailedCount, splitProcMetrics.getFailedCounter().getCount());
}
@Test
@ -273,6 +325,9 @@ public class TestSplitTableRegionProcedure {
assertTrue("not able to find a splittable region", regions != null);
assertTrue("not able to find a splittable region", regions.length == 1);
// collect AM metrics before test
collectAssignmentManagerMetrics();
// Split region of the table with null split key
try {
long procId1 = procExec.submitProcedure(
@ -282,6 +337,9 @@ public class TestSplitTableRegionProcedure {
} catch (DoNotRetryIOException e) {
LOG.debug("Expected Split procedure construction failure: " + e.getMessage());
}
assertEquals(splitSubmittedCount, splitProcMetrics.getSubmittedCounter().getCount());
assertEquals(splitFailedCount, splitProcMetrics.getFailedCounter().getCount());
}
@Test
@ -300,6 +358,9 @@ public class TestSplitTableRegionProcedure {
ProcedureTestingUtility.waitNoProcedureRunning(procExec);
ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
// collect AM metrics before test
collectAssignmentManagerMetrics();
// Split region of the table
long procId = procExec.submitProcedure(
new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], splitKey));
@ -316,6 +377,9 @@ public class TestSplitTableRegionProcedure {
assertEquals(1, daughters.size());
verifyData(daughters.get(0), startRowNum, rowCount,
Bytes.toBytes(ColumnFamilyName1), Bytes.toBytes(ColumnFamilyName2));
assertEquals(splitSubmittedCount + 1, splitProcMetrics.getSubmittedCounter().getCount());
assertEquals(splitFailedCount + 1, splitProcMetrics.getFailedCounter().getCount());
}
@Test
@ -334,6 +398,9 @@ public class TestSplitTableRegionProcedure {
ProcedureTestingUtility.waitNoProcedureRunning(procExec);
ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
// collect AM metrics before test
collectAssignmentManagerMetrics();
// Split region of the table
long procId = procExec.submitProcedure(
new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], splitKey));
@ -343,6 +410,9 @@ public class TestSplitTableRegionProcedure {
ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
verify(tableName, splitRowNum);
assertEquals(splitSubmittedCount + 1, splitProcMetrics.getSubmittedCounter().getCount());
assertEquals(splitFailedCount, splitProcMetrics.getFailedCounter().getCount());
}
private void insertData(final TableName tableName) throws IOException, InterruptedException {
@ -425,4 +495,13 @@ public class TestSplitTableRegionProcedure {
private ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() {
return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor();
}
}
private void collectAssignmentManagerMetrics() {
splitSubmittedCount = splitProcMetrics.getSubmittedCounter().getCount();
splitFailedCount = splitProcMetrics.getFailedCounter().getCount();
assignSubmittedCount = assignProcMetrics.getSubmittedCounter().getCount();
assignFailedCount = assignProcMetrics.getFailedCounter().getCount();
unassignSubmittedCount = unassignProcMetrics.getSubmittedCounter().getCount();
unassignFailedCount = unassignProcMetrics.getFailedCounter().getCount();
}
}

View File

@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil;
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.MasterTests;
@ -48,6 +49,10 @@ public class TestServerCrashProcedure {
private HBaseTestingUtility util;
private ProcedureMetrics serverCrashProcMetrics;
private long serverCrashSubmittedCount = 0;
private long serverCrashFailedCount = 0;
private void setupConf(Configuration conf) {
conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1);
conf.set("hbase.balancer.tablesOnMaster", "none");
@ -61,6 +66,8 @@ public class TestServerCrashProcedure {
this.util.startMiniCluster(3);
ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(
this.util.getHBaseCluster().getMaster().getMasterProcedureExecutor(), false);
serverCrashProcMetrics = this.util.getHBaseCluster().getMaster().getMasterMetrics()
.getServerCrashProcMetrics();
}
@After
@ -141,4 +148,9 @@ public class TestServerCrashProcedure {
t.close();
}
}
private void collectMasterMetrics() {
serverCrashSubmittedCount = serverCrashProcMetrics.getSubmittedCounter().getCount();
serverCrashFailedCount = serverCrashProcMetrics.getFailedCounter().getCount();
}
}