HBASE-25790 NamedQueue 'BalancerRejection' for recent history of balancer skipping (#3182) (#3245)

Signed-off-by: Viraj Jasani <vjasani@apache.org>
This commit is contained in:
GeorryHuang 2021-05-09 02:39:23 +08:00 committed by GitHub
parent 90dc150b1b
commit 63d49cb7ed
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 744 additions and 19 deletions

View File

@ -0,0 +1,116 @@
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.client;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.hadoop.hbase.util.GsonUtil;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.yetus.audience.InterfaceStability;
import org.apache.hbase.thirdparty.com.google.gson.Gson;
import org.apache.hbase.thirdparty.com.google.gson.JsonSerializer;
/**
* History of detail information that balancer movements was rejected
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
final public class BalancerRejection extends LogEntry {
//The reason why balancer was rejected
private final String reason;
private final List<String> costFuncInfoList;
// used to convert object to pretty printed format
// used by toJsonPrettyPrint()
private static final Gson GSON = GsonUtil.createGson()
.setPrettyPrinting()
.disableHtmlEscaping()
.registerTypeAdapter(BalancerRejection.class, (JsonSerializer<BalancerRejection>)
(balancerRejection, type, jsonSerializationContext) -> {
Gson gson = new Gson();
return gson.toJsonTree(balancerRejection);
}).create();
private BalancerRejection(String reason, List<String> costFuncInfoList) {
this.reason = reason;
if(costFuncInfoList == null){
this.costFuncInfoList = Collections.emptyList();
}
else {
this.costFuncInfoList = costFuncInfoList;
}
}
public String getReason() {
return reason;
}
public List<String> getCostFuncInfoList() {
return costFuncInfoList;
}
@Override
public String toString() {
return new ToStringBuilder(this)
.append("reason", reason)
.append("costFuncInfoList", costFuncInfoList.toString())
.toString();
}
@Override
public String toJsonPrettyPrint() {
return GSON.toJson(this);
}
public static class Builder {
private String reason;
private List<String> costFuncInfoList;
public Builder setReason(String reason) {
this.reason = reason;
return this;
}
public void addCostFuncInfo(String funcName, double cost, float multiplier){
if(costFuncInfoList == null){
costFuncInfoList = new ArrayList<>();
}
costFuncInfoList.add(
new StringBuilder()
.append(funcName)
.append(" cost:").append(cost)
.append(" multiplier:").append(multiplier)
.toString());
}
public Builder setCostFuncInfoList(List<String> costFuncInfoList){
this.costFuncInfoList = costFuncInfoList;
return this;
}
public BalancerRejection build() {
return new BalancerRejection(reason, costFuncInfoList);
}
}
}

View File

@ -4434,6 +4434,12 @@ public class HBaseAdmin implements Admin {
"Balancer Decision logs are not maintained by HRegionServer"); "Balancer Decision logs are not maintained by HRegionServer");
} }
return getBalancerDecisions(limit); return getBalancerDecisions(limit);
} else if (logType.equals("BALANCER_REJECTION")) {
if (ServerType.REGION_SERVER.equals(serverType)) {
throw new IllegalArgumentException(
"Balancer Rejection logs are not maintained by HRegionServer");
}
return getBalancerRejections(limit);
} }
return Collections.emptyList(); return Collections.emptyList();
} }
@ -4450,6 +4456,18 @@ public class HBaseAdmin implements Admin {
}); });
} }
private List<LogEntry> getBalancerRejections(final int limit) throws IOException {
return executeCallable(new MasterCallable<List<LogEntry>>(getConnection(),
getRpcControllerFactory()) {
@Override
protected List<LogEntry> rpcCall() throws Exception {
HBaseProtos.LogEntry logEntry =
master.getLogEntries(getRpcController(), ProtobufUtil.toBalancerRejectionRequest(limit));
return ProtobufUtil.toBalancerRejectionResponse(logEntry);
}
});
}
private Boolean clearSlowLogsResponses(final ServerName serverName) throws IOException { private Boolean clearSlowLogsResponses(final ServerName serverName) throws IOException {
AdminService.BlockingInterface admin = this.connection.getAdmin(serverName); AdminService.BlockingInterface admin = this.connection.getAdmin(serverName);
return executeCallable(new RpcRetryingCallable<Boolean>() { return executeCallable(new RpcRetryingCallable<Boolean>() {

View File

@ -3968,6 +3968,15 @@ class RawAsyncHBaseAdmin implements AsyncAdmin {
.call(); .call();
} }
private CompletableFuture<List<LogEntry>> getBalancerRejections(final int limit) {
return this.<List<LogEntry>>newMasterCaller()
.action((controller, stub) ->
this.call(controller, stub,
ProtobufUtil.toBalancerRejectionRequest(limit),
MasterService.Interface::getLogEntries, ProtobufUtil::toBalancerRejectionResponse))
.call();
}
@Override @Override
public CompletableFuture<List<LogEntry>> getLogEntries(Set<ServerName> serverNames, public CompletableFuture<List<LogEntry>> getLogEntries(Set<ServerName> serverNames,
String logType, ServerType serverType, int limit, String logType, ServerType serverType, int limit,
@ -3975,18 +3984,27 @@ class RawAsyncHBaseAdmin implements AsyncAdmin {
if (logType == null || serverType == null) { if (logType == null || serverType == null) {
throw new IllegalArgumentException("logType and/or serverType cannot be empty"); throw new IllegalArgumentException("logType and/or serverType cannot be empty");
} }
if (logType.equals("SLOW_LOG") || logType.equals("LARGE_LOG")) { switch (logType){
if (ServerType.MASTER.equals(serverType)) { case "SLOW_LOG":
throw new IllegalArgumentException("Slow/Large logs are not maintained by HMaster"); case "LARGE_LOG":
} if (ServerType.MASTER.equals(serverType)) {
return getSlowLogResponses(filterParams, serverNames, limit, logType); throw new IllegalArgumentException("Slow/Large logs are not maintained by HMaster");
} else if (logType.equals("BALANCER_DECISION")) { }
if (ServerType.REGION_SERVER.equals(serverType)) { return getSlowLogResponses(filterParams, serverNames, limit, logType);
throw new IllegalArgumentException( case "BALANCER_DECISION":
"Balancer Decision logs are not maintained by HRegionServer"); if (ServerType.REGION_SERVER.equals(serverType)) {
} throw new IllegalArgumentException(
return getBalancerDecisions(limit); "Balancer Decision logs are not maintained by HRegionServer");
}
return getBalancerDecisions(limit);
case "BALANCER_REJECTION":
if (ServerType.REGION_SERVER.equals(serverType)) {
throw new IllegalArgumentException(
"Balancer Rejection logs are not maintained by HRegionServer");
}
return getBalancerRejections(limit);
default:
return CompletableFuture.completedFuture(Collections.emptyList());
} }
return CompletableFuture.completedFuture(Collections.emptyList());
} }
} }

View File

@ -67,6 +67,7 @@ import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Append; import org.apache.hadoop.hbase.client.Append;
import org.apache.hadoop.hbase.client.BalancerRejection;
import org.apache.hadoop.hbase.client.BalancerDecision; import org.apache.hadoop.hbase.client.BalancerDecision;
import org.apache.hadoop.hbase.client.CheckAndMutate; import org.apache.hadoop.hbase.client.CheckAndMutate;
import org.apache.hadoop.hbase.client.ClientUtil; import org.apache.hadoop.hbase.client.ClientUtil;
@ -3686,6 +3687,25 @@ public final class ProtobufUtil {
throw new RuntimeException("Invalid response from server"); throw new RuntimeException("Invalid response from server");
} }
public static List<LogEntry> toBalancerRejectionResponse(
HBaseProtos.LogEntry logEntry) {
try {
final String logClassName = logEntry.getLogClassName();
Class<?> logClass = Class.forName(logClassName).asSubclass(Message.class);
Method method = logClass.getMethod("parseFrom", ByteString.class);
if (logClassName.contains("BalancerRejectionsResponse")) {
MasterProtos.BalancerRejectionsResponse response =
(MasterProtos.BalancerRejectionsResponse) method
.invoke(null, logEntry.getLogMessage());
return getBalancerRejectionEntries(response);
}
} catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException
| InvocationTargetException e) {
throw new RuntimeException("Error while retrieving response from server");
}
throw new RuntimeException("Invalid response from server");
}
public static List<LogEntry> getBalancerDecisionEntries( public static List<LogEntry> getBalancerDecisionEntries(
MasterProtos.BalancerDecisionsResponse response) { MasterProtos.BalancerDecisionsResponse response) {
List<RecentLogs.BalancerDecision> balancerDecisions = response.getBalancerDecisionList(); List<RecentLogs.BalancerDecision> balancerDecisions = response.getBalancerDecisionList();
@ -3702,6 +3722,19 @@ public final class ProtobufUtil {
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
public static List<LogEntry> getBalancerRejectionEntries(
MasterProtos.BalancerRejectionsResponse response) {
List<RecentLogs.BalancerRejection> balancerRejections = response.getBalancerRejectionList();
if (CollectionUtils.isEmpty(balancerRejections)) {
return Collections.emptyList();
}
return balancerRejections.stream().map(balancerRejection -> new BalancerRejection.Builder()
.setReason(balancerRejection.getReason())
.setCostFuncInfoList(balancerRejection.getCostFuncInfoList())
.build())
.collect(Collectors.toList());
}
public static HBaseProtos.LogRequest toBalancerDecisionRequest(int limit) { public static HBaseProtos.LogRequest toBalancerDecisionRequest(int limit) {
MasterProtos.BalancerDecisionsRequest balancerDecisionsRequest = MasterProtos.BalancerDecisionsRequest balancerDecisionsRequest =
MasterProtos.BalancerDecisionsRequest.newBuilder().setLimit(limit).build(); MasterProtos.BalancerDecisionsRequest.newBuilder().setLimit(limit).build();
@ -3711,4 +3744,13 @@ public final class ProtobufUtil {
.build(); .build();
} }
public static HBaseProtos.LogRequest toBalancerRejectionRequest(int limit) {
MasterProtos.BalancerRejectionsRequest balancerRejectionsRequest =
MasterProtos.BalancerRejectionsRequest.newBuilder().setLimit(limit).build();
return HBaseProtos.LogRequest.newBuilder()
.setLogClassName(balancerRejectionsRequest.getClass().getName())
.setLogMessage(balancerRejectionsRequest.toByteString())
.build();
}
} }

View File

@ -2011,7 +2011,7 @@ possible configurations would overwhelm and obscure the important.
</property> </property>
<property> <property>
<name>hbase.namedqueue.provider.classes</name> <name>hbase.namedqueue.provider.classes</name>
<value>org.apache.hadoop.hbase.namequeues.impl.SlowLogQueueService,org.apache.hadoop.hbase.namequeues.impl.BalancerDecisionQueueService</value> <value>org.apache.hadoop.hbase.namequeues.impl.SlowLogQueueService,org.apache.hadoop.hbase.namequeues.impl.BalancerDecisionQueueService,org.apache.hadoop.hbase.namequeues.impl.BalancerRejectionQueueService</value>
<description> <description>
Default values for NamedQueueService implementors. This comma separated full class names Default values for NamedQueueService implementors. This comma separated full class names
represent all implementors of NamedQueueService that we would like to be invoked by represent all implementors of NamedQueueService that we would like to be invoked by
@ -2030,4 +2030,13 @@ possible configurations would overwhelm and obscure the important.
the ring buffer is indicated by config: hbase.master.balancer.decision.queue.size the ring buffer is indicated by config: hbase.master.balancer.decision.queue.size
</description> </description>
</property> </property>
<property>
<name>hbase.master.balancer.rejection.buffer.enabled</name>
<value>false</value>
<description>
Indicates whether active HMaster has ring buffer running for storing
balancer rejection in FIFO manner with limited entries. The size of
the ring buffer is indicated by config: hbase.master.balancer.rejection.queue.size
</description>
</property>
</configuration> </configuration>

View File

@ -705,6 +705,13 @@ message BalancerDecisionsRequest {
optional uint32 limit = 1; optional uint32 limit = 1;
} }
/**
* Same as BalancerDecision but used for BalancerRejection
*/
message BalancerRejectionsRequest {
optional uint32 limit = 1;
}
/** /**
* BalancerDecision (LogEntry) use-case specific RPC response. This response payload will be * BalancerDecision (LogEntry) use-case specific RPC response. This response payload will be
* converted in bytes by servers and sent as response to generic RPC API: GetLogEntries * converted in bytes by servers and sent as response to generic RPC API: GetLogEntries
@ -716,6 +723,10 @@ message BalancerDecisionsResponse {
repeated BalancerDecision balancer_decision = 1; repeated BalancerDecision balancer_decision = 1;
} }
message BalancerRejectionsResponse {
repeated BalancerRejection balancer_rejection = 1;
}
service MasterService { service MasterService {
/** Used by the client to get the number of regions that have received the updated schema */ /** Used by the client to get the number of regions that have received the updated schema */
rpc GetSchemaAlterStatus(GetSchemaAlterStatusRequest) rpc GetSchemaAlterStatus(GetSchemaAlterStatusRequest)

View File

@ -37,3 +37,8 @@ message BalancerDecision {
repeated string region_plans = 6; repeated string region_plans = 6;
} }
message BalancerRejection {
required string reason = 1;
repeated string cost_func_info = 2;
}

View File

@ -85,6 +85,7 @@ import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil.NonceProcedu
import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure; import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
import org.apache.hadoop.hbase.mob.MobUtils; import org.apache.hadoop.hbase.mob.MobUtils;
import org.apache.hadoop.hbase.namequeues.BalancerDecisionDetails; import org.apache.hadoop.hbase.namequeues.BalancerDecisionDetails;
import org.apache.hadoop.hbase.namequeues.BalancerRejectionDetails;
import org.apache.hadoop.hbase.namequeues.NamedQueueRecorder; import org.apache.hadoop.hbase.namequeues.NamedQueueRecorder;
import org.apache.hadoop.hbase.namequeues.request.NamedQueueGetRequest; import org.apache.hadoop.hbase.namequeues.request.NamedQueueGetRequest;
import org.apache.hadoop.hbase.namequeues.response.NamedQueueGetResponse; import org.apache.hadoop.hbase.namequeues.response.NamedQueueGetResponse;
@ -3048,6 +3049,16 @@ public class MasterRpcServices extends RSRpcServices implements
.setLogClassName(balancerDecisionsResponse.getClass().getName()) .setLogClassName(balancerDecisionsResponse.getClass().getName())
.setLogMessage(balancerDecisionsResponse.toByteString()) .setLogMessage(balancerDecisionsResponse.toByteString())
.build(); .build();
}else if (logClassName.contains("BalancerRejectionsRequest")){
MasterProtos.BalancerRejectionsRequest balancerRejectionsRequest =
(MasterProtos.BalancerRejectionsRequest) method
.invoke(null, request.getLogMessage());
MasterProtos.BalancerRejectionsResponse balancerRejectionsResponse =
getBalancerRejections(balancerRejectionsRequest);
return HBaseProtos.LogEntry.newBuilder()
.setLogClassName(balancerRejectionsResponse.getClass().getName())
.setLogMessage(balancerRejectionsResponse.toByteString())
.build();
} }
} catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException
| InvocationTargetException e) { | InvocationTargetException e) {
@ -3075,4 +3086,22 @@ public class MasterRpcServices extends RSRpcServices implements
.addAllBalancerDecision(balancerDecisions).build(); .addAllBalancerDecision(balancerDecisions).build();
} }
private MasterProtos.BalancerRejectionsResponse getBalancerRejections(
MasterProtos.BalancerRejectionsRequest request) {
final NamedQueueRecorder namedQueueRecorder = this.regionServer.getNamedQueueRecorder();
if (namedQueueRecorder == null) {
return MasterProtos.BalancerRejectionsResponse.newBuilder()
.addAllBalancerRejection(Collections.emptyList()).build();
}
final NamedQueueGetRequest namedQueueGetRequest = new NamedQueueGetRequest();
namedQueueGetRequest.setNamedQueueEvent(BalancerRejectionDetails.BALANCER_REJECTION_EVENT);
namedQueueGetRequest.setBalancerRejectionsRequest(request);
NamedQueueGetResponse namedQueueGetResponse =
namedQueueRecorder.getNamedQueueRecords(namedQueueGetRequest);
List<RecentLogs.BalancerRejection> balancerRejections =
namedQueueGetResponse.getBalancerRejections();
return MasterProtos.BalancerRejectionsResponse.newBuilder()
.addAllBalancerRejection(balancerRejections).build();
}
} }

View File

@ -71,6 +71,10 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
"hbase.master.balancer.decision.buffer.enabled"; "hbase.master.balancer.decision.buffer.enabled";
public static final boolean DEFAULT_BALANCER_DECISION_BUFFER_ENABLED = false; public static final boolean DEFAULT_BALANCER_DECISION_BUFFER_ENABLED = false;
public static final String BALANCER_REJECTION_BUFFER_ENABLED =
"hbase.master.balancer.rejection.buffer.enabled";
public static final boolean DEFAULT_BALANCER_REJECTION_BUFFER_ENABLED = false;
protected static final int MIN_SERVER_BALANCE = 2; protected static final int MIN_SERVER_BALANCE = 2;
private volatile boolean stopped = false; private volatile boolean stopped = false;

View File

@ -35,9 +35,11 @@ import org.apache.hadoop.hbase.ServerMetrics;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.BalancerDecision; import org.apache.hadoop.hbase.client.BalancerDecision;
import org.apache.hadoop.hbase.client.BalancerRejection;
import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.master.RegionPlan; import org.apache.hadoop.hbase.master.RegionPlan;
import org.apache.hadoop.hbase.namequeues.BalancerDecisionDetails; import org.apache.hadoop.hbase.namequeues.BalancerDecisionDetails;
import org.apache.hadoop.hbase.namequeues.BalancerRejectionDetails;
import org.apache.hadoop.hbase.namequeues.NamedQueueRecorder; import org.apache.hadoop.hbase.namequeues.NamedQueueRecorder;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.ReflectionUtils; import org.apache.hadoop.hbase.util.ReflectionUtils;
@ -129,6 +131,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
private long maxRunningTime = 30 * 1000 * 1; // 30 seconds. private long maxRunningTime = 30 * 1000 * 1; // 30 seconds.
private int numRegionLoadsToRemember = 15; private int numRegionLoadsToRemember = 15;
private float minCostNeedBalance = 0.05f; private float minCostNeedBalance = 0.05f;
private boolean isBalancerDecisionRecording = false;
private boolean isBalancerRejectionRecording = false;
private List<CandidateGenerator> candidateGenerators; private List<CandidateGenerator> candidateGenerators;
private CostFromRegionLoadFunction[] regionLoadFunctions; private CostFromRegionLoadFunction[] regionLoadFunctions;
@ -215,11 +219,16 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
curFunctionCosts = new double[costFunctions.size()]; curFunctionCosts = new double[costFunctions.size()];
tempFunctionCosts = new double[costFunctions.size()]; tempFunctionCosts = new double[costFunctions.size()];
boolean isBalancerDecisionRecording = getConf() isBalancerDecisionRecording = getConf()
.getBoolean(BaseLoadBalancer.BALANCER_DECISION_BUFFER_ENABLED, .getBoolean(BaseLoadBalancer.BALANCER_DECISION_BUFFER_ENABLED,
BaseLoadBalancer.DEFAULT_BALANCER_DECISION_BUFFER_ENABLED); BaseLoadBalancer.DEFAULT_BALANCER_DECISION_BUFFER_ENABLED);
if (this.namedQueueRecorder == null && isBalancerDecisionRecording) { isBalancerRejectionRecording = getConf()
this.namedQueueRecorder = NamedQueueRecorder.getInstance(conf); .getBoolean(BaseLoadBalancer.BALANCER_REJECTION_BUFFER_ENABLED,
BaseLoadBalancer.DEFAULT_BALANCER_REJECTION_BUFFER_ENABLED);
if (this.namedQueueRecorder == null && (isBalancerDecisionRecording
|| isBalancerRejectionRecording)) {
this.namedQueueRecorder = NamedQueueRecorder.getInstance(getConf());
} }
LOG.info("Loaded config; maxSteps=" + maxSteps + ", stepsPerRegion=" + stepsPerRegion + LOG.info("Loaded config; maxSteps=" + maxSteps + ", stepsPerRegion=" + stepsPerRegion +
@ -325,6 +334,10 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
LOG.debug("Not running balancer because only " + cs.getNumServers() LOG.debug("Not running balancer because only " + cs.getNumServers()
+ " active regionserver(s)"); + " active regionserver(s)");
} }
if (this.isBalancerRejectionRecording) {
sendRejectionReasonToRingBuffer("The number of RegionServers " +
cs.getNumServers() + " < MIN_SERVER_BALANCE(" + MIN_SERVER_BALANCE + ")", null);
}
return false; return false;
} }
if (areSomeRegionReplicasColocated(cluster)) { if (areSomeRegionReplicasColocated(cluster)) {
@ -353,6 +366,19 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
boolean balanced = total <= 0 || sumMultiplier <= 0 || boolean balanced = total <= 0 || sumMultiplier <= 0 ||
(sumMultiplier > 0 && (total / sumMultiplier) < minCostNeedBalance); (sumMultiplier > 0 && (total / sumMultiplier) < minCostNeedBalance);
if(balanced && isBalancerRejectionRecording){
String reason = "";
if (total <= 0) {
reason = "(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern) = " + total + " <= 0";
} else if (sumMultiplier <= 0) {
reason = "sumMultiplier = " + sumMultiplier + " <= 0";
} else if ((total / sumMultiplier) < minCostNeedBalance) {
reason =
"[(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern)]/sumMultiplier = " + (total
/ sumMultiplier) + " <= minCostNeedBalance(" + minCostNeedBalance + ")";
}
sendRejectionReasonToRingBuffer(reason, costFunctions);
}
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("{} {}; total cost={}, sum multiplier={}; cost/multiplier to need a balance is {}", LOG.debug("{} {}; total cost={}, sum multiplier={}; cost/multiplier to need a balance is {}",
balanced ? "Skipping load balancing because balanced" : "We need to load balance", balanced ? "Skipping load balancing because balanced" : "We need to load balance",
@ -499,9 +525,27 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
return null; return null;
} }
private void sendRejectionReasonToRingBuffer(String reason, List<CostFunction> costFunctions){
if (this.isBalancerRejectionRecording){
BalancerRejection.Builder builder =
new BalancerRejection.Builder()
.setReason(reason);
if (costFunctions != null) {
for (CostFunction c : costFunctions) {
float multiplier = c.getMultiplier();
if (multiplier <= 0 || !c.isNeeded()) {
continue;
}
builder.addCostFuncInfo(c.getClass().getName(), c.cost(), c.getMultiplier());
}
}
namedQueueRecorder.addRecord(new BalancerRejectionDetails(builder.build()));
}
}
private void sendRegionPlansToRingBuffer(List<RegionPlan> plans, double currentCost, private void sendRegionPlansToRingBuffer(List<RegionPlan> plans, double currentCost,
double initCost, String initFunctionTotalCosts, long step) { double initCost, String initFunctionTotalCosts, long step) {
if (this.namedQueueRecorder != null) { if (this.isBalancerDecisionRecording) {
List<String> regionPlans = new ArrayList<>(); List<String> regionPlans = new ArrayList<>();
for (RegionPlan plan : plans) { for (RegionPlan plan : plans) {
regionPlans.add( regionPlans.add(

View File

@ -0,0 +1,51 @@
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.namequeues;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.hadoop.hbase.client.BalancerRejection;
import org.apache.yetus.audience.InterfaceAudience;
/**
* Balancer rejection details that would be passed on to ring buffer for history
*/
@InterfaceAudience.Private
public class BalancerRejectionDetails extends NamedQueuePayload {
public static final int BALANCER_REJECTION_EVENT = 2;
private final BalancerRejection balancerRejection;
public BalancerRejectionDetails(BalancerRejection balancerRejection) {
super(BALANCER_REJECTION_EVENT);
this.balancerRejection = balancerRejection;
}
public BalancerRejection getBalancerRejection() {
return balancerRejection;
}
@Override
public String toString() {
return new ToStringBuilder(this)
.append("balancerRejection", balancerRejection)
.toString();
}
}

View File

@ -31,7 +31,8 @@ public class NamedQueuePayload {
public enum NamedQueueEvent { public enum NamedQueueEvent {
SLOW_LOG(0), SLOW_LOG(0),
BALANCE_DECISION(1); BALANCE_DECISION(1),
BALANCE_REJECTION(2);
private final int value; private final int value;
@ -47,6 +48,9 @@ public class NamedQueuePayload {
case 1: { case 1: {
return BALANCE_DECISION; return BALANCE_DECISION;
} }
case 2: {
return BALANCE_REJECTION;
}
default: { default: {
throw new IllegalArgumentException( throw new IllegalArgumentException(
"NamedQueue event with ordinal " + value + " not defined"); "NamedQueue event with ordinal " + value + " not defined");

View File

@ -0,0 +1,133 @@
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.namequeues.impl;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.BalancerRejection;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
import org.apache.hadoop.hbase.namequeues.BalancerRejectionDetails;
import org.apache.hadoop.hbase.namequeues.NamedQueuePayload;
import org.apache.hadoop.hbase.namequeues.NamedQueueService;
import org.apache.hadoop.hbase.namequeues.request.NamedQueueGetRequest;
import org.apache.hadoop.hbase.namequeues.response.NamedQueueGetResponse;
import org.apache.hadoop.hbase.shaded.protobuf.generated.RecentLogs;
import org.apache.hbase.thirdparty.com.google.common.collect.EvictingQueue;
import org.apache.hbase.thirdparty.com.google.common.collect.Queues;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Queue;
import java.util.stream.Collectors;
/**
* In-memory Queue service provider for Balancer Rejection events
*/
@InterfaceAudience.Private
public class BalancerRejectionQueueService implements NamedQueueService {
private static final Logger LOG = LoggerFactory.getLogger(BalancerRejectionQueueService.class);
private final boolean isBalancerRejectionRecording;
private static final String BALANCER_REJECTION_QUEUE_SIZE =
"hbase.master.balancer.rejection.queue.size";
private static final int DEFAULT_BALANCER_REJECTION_QUEUE_SIZE = 250;
private final Queue<RecentLogs.BalancerRejection> balancerRejectionQueue;
public BalancerRejectionQueueService(Configuration conf) {
isBalancerRejectionRecording = conf.getBoolean(BaseLoadBalancer.BALANCER_REJECTION_BUFFER_ENABLED,
BaseLoadBalancer.DEFAULT_BALANCER_REJECTION_BUFFER_ENABLED);
if (!isBalancerRejectionRecording) {
balancerRejectionQueue = null;
return;
}
final int queueSize =
conf.getInt(BALANCER_REJECTION_QUEUE_SIZE, DEFAULT_BALANCER_REJECTION_QUEUE_SIZE);
final EvictingQueue<RecentLogs.BalancerRejection> evictingQueue =
EvictingQueue.create(queueSize);
balancerRejectionQueue = Queues.synchronizedQueue(evictingQueue);
}
@Override
public NamedQueuePayload.NamedQueueEvent getEvent() {
return NamedQueuePayload.NamedQueueEvent.BALANCE_REJECTION;
}
@Override
public void consumeEventFromDisruptor(NamedQueuePayload namedQueuePayload) {
if (!isBalancerRejectionRecording) {
return;
}
if (!(namedQueuePayload instanceof BalancerRejectionDetails)) {
LOG.warn(
"BalancerRejectionQueueService: NamedQueuePayload is not of type BalancerRejectionDetails.");
return;
}
BalancerRejectionDetails balancerRejectionDetails = (BalancerRejectionDetails) namedQueuePayload;
BalancerRejection balancerRejectionRecord =
balancerRejectionDetails.getBalancerRejection();
RecentLogs.BalancerRejection BalancerRejection = RecentLogs.BalancerRejection.newBuilder()
.setReason(balancerRejectionRecord.getReason())
.addAllCostFuncInfo(balancerRejectionRecord.getCostFuncInfoList())
.build();
balancerRejectionQueue.add(BalancerRejection);
}
@Override
public boolean clearNamedQueue() {
if (!isBalancerRejectionRecording) {
return false;
}
LOG.debug("Received request to clean up balancer rejection queue.");
balancerRejectionQueue.clear();
return true;
}
@Override
public NamedQueueGetResponse getNamedQueueRecords(NamedQueueGetRequest request) {
if (!isBalancerRejectionRecording) {
return null;
}
List<RecentLogs.BalancerRejection> balancerRejections =
Arrays.stream(balancerRejectionQueue.toArray(new RecentLogs.BalancerRejection[0]))
.collect(Collectors.toList());
// latest records should be displayed first, hence reverse order sorting
Collections.reverse(balancerRejections);
int limit = balancerRejections.size();
if (request.getBalancerRejectionsRequest().hasLimit()) {
limit = Math.min(request.getBalancerRejectionsRequest().getLimit(), balancerRejections.size());
}
// filter limit if provided
balancerRejections = balancerRejections.subList(0, limit);
final NamedQueueGetResponse namedQueueGetResponse = new NamedQueueGetResponse();
namedQueueGetResponse.setNamedQueueEvent(BalancerRejectionDetails.BALANCER_REJECTION_EVENT);
namedQueueGetResponse.setBalancerRejections(balancerRejections);
return namedQueueGetResponse;
}
@Override
public void persistAll() {
// no-op for now
}
}

View File

@ -38,6 +38,7 @@ public class NamedQueueGetRequest {
private AdminProtos.SlowLogResponseRequest slowLogResponseRequest; private AdminProtos.SlowLogResponseRequest slowLogResponseRequest;
private NamedQueuePayload.NamedQueueEvent namedQueueEvent; private NamedQueuePayload.NamedQueueEvent namedQueueEvent;
private MasterProtos.BalancerDecisionsRequest balancerDecisionsRequest; private MasterProtos.BalancerDecisionsRequest balancerDecisionsRequest;
private MasterProtos.BalancerRejectionsRequest balancerRejectionsRequest;
public AdminProtos.SlowLogResponseRequest getSlowLogResponseRequest() { public AdminProtos.SlowLogResponseRequest getSlowLogResponseRequest() {
return slowLogResponseRequest; return slowLogResponseRequest;
@ -52,11 +53,20 @@ public class NamedQueueGetRequest {
return balancerDecisionsRequest; return balancerDecisionsRequest;
} }
public MasterProtos.BalancerRejectionsRequest getBalancerRejectionsRequest() {
return balancerRejectionsRequest;
}
public void setBalancerDecisionsRequest( public void setBalancerDecisionsRequest(
MasterProtos.BalancerDecisionsRequest balancerDecisionsRequest) { MasterProtos.BalancerDecisionsRequest balancerDecisionsRequest) {
this.balancerDecisionsRequest = balancerDecisionsRequest; this.balancerDecisionsRequest = balancerDecisionsRequest;
} }
public void setBalancerRejectionsRequest(
MasterProtos.BalancerRejectionsRequest balancerRejectionsRequest) {
this.balancerRejectionsRequest = balancerRejectionsRequest;
}
public NamedQueuePayload.NamedQueueEvent getNamedQueueEvent() { public NamedQueuePayload.NamedQueueEvent getNamedQueueEvent() {
return namedQueueEvent; return namedQueueEvent;
} }
@ -71,6 +81,7 @@ public class NamedQueueGetRequest {
.append("slowLogResponseRequest", slowLogResponseRequest) .append("slowLogResponseRequest", slowLogResponseRequest)
.append("namedQueueEvent", namedQueueEvent) .append("namedQueueEvent", namedQueueEvent)
.append("balancerDecisionsRequest", balancerDecisionsRequest) .append("balancerDecisionsRequest", balancerDecisionsRequest)
.append("balancerRejectionsRequest", balancerRejectionsRequest)
.toString(); .toString();
} }
} }

View File

@ -34,6 +34,7 @@ public class NamedQueueGetResponse {
private List<TooSlowLog.SlowLogPayload> slowLogPayloads; private List<TooSlowLog.SlowLogPayload> slowLogPayloads;
private List<RecentLogs.BalancerDecision> balancerDecisions; private List<RecentLogs.BalancerDecision> balancerDecisions;
private List<RecentLogs.BalancerRejection> balancerRejections;
private NamedQueuePayload.NamedQueueEvent namedQueueEvent; private NamedQueuePayload.NamedQueueEvent namedQueueEvent;
public List<TooSlowLog.SlowLogPayload> getSlowLogPayloads() { public List<TooSlowLog.SlowLogPayload> getSlowLogPayloads() {
@ -52,6 +53,14 @@ public class NamedQueueGetResponse {
this.balancerDecisions = balancerDecisions; this.balancerDecisions = balancerDecisions;
} }
public List<RecentLogs.BalancerRejection> getBalancerRejections() {
return balancerRejections;
}
public void setBalancerRejections(List<RecentLogs.BalancerRejection> balancerRejections) {
this.balancerRejections = balancerRejections;
}
public NamedQueuePayload.NamedQueueEvent getNamedQueueEvent() { public NamedQueuePayload.NamedQueueEvent getNamedQueueEvent() {
return namedQueueEvent; return namedQueueEvent;
} }
@ -65,6 +74,7 @@ public class NamedQueueGetResponse {
return new ToStringBuilder(this) return new ToStringBuilder(this)
.append("slowLogPayloads", slowLogPayloads) .append("slowLogPayloads", slowLogPayloads)
.append("balancerDecisions", balancerDecisions) .append("balancerDecisions", balancerDecisions)
.append("balancerRejections", balancerRejections)
.append("namedQueueEvent", namedQueueEvent) .append("namedQueueEvent", namedQueueEvent)
.toString(); .toString();
} }

View File

@ -719,7 +719,10 @@ public class HRegionServer extends Thread implements
final boolean isBalancerDecisionRecording = conf final boolean isBalancerDecisionRecording = conf
.getBoolean(BaseLoadBalancer.BALANCER_DECISION_BUFFER_ENABLED, .getBoolean(BaseLoadBalancer.BALANCER_DECISION_BUFFER_ENABLED,
BaseLoadBalancer.DEFAULT_BALANCER_DECISION_BUFFER_ENABLED); BaseLoadBalancer.DEFAULT_BALANCER_DECISION_BUFFER_ENABLED);
if (isBalancerDecisionRecording) { final boolean isBalancerRejectionRecording = conf
.getBoolean(BaseLoadBalancer.BALANCER_REJECTION_BUFFER_ENABLED,
BaseLoadBalancer.DEFAULT_BALANCER_REJECTION_BUFFER_ENABLED);
if (isBalancerDecisionRecording || isBalancerRejectionRecording) {
this.namedQueueRecorder = NamedQueueRecorder.getInstance(this.conf); this.namedQueueRecorder = NamedQueueRecorder.getInstance(this.conf);
} }
} }

View File

@ -0,0 +1,134 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.balancer;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.LogEntry;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.namequeues.BalancerRejectionDetails;
import org.apache.hadoop.hbase.namequeues.request.NamedQueueGetRequest;
import org.apache.hadoop.hbase.namequeues.response.NamedQueueGetResponse;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.junit.Assert;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.RecentLogs;
/**
* Test BalancerRejection ring buffer using namedQueue interface
*/
@Category({ MasterTests.class, MediumTests.class })
public class TestBalancerRejection extends BalancerTestBase {
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestBalancerRejection.class);
static class MockCostFunction extends CostFunction{
public static double mockCost;
public MockCostFunction(Configuration c) {
}
@Override
protected double cost() {
return mockCost;
}
@Override
boolean isNeeded() {
return super.isNeeded();
}
@Override
float getMultiplier() {
return 1;
}
}
@Test
public void testBalancerRejections() throws Exception{
try {
//enabled balancer rejection recording
conf.setBoolean(BaseLoadBalancer.BALANCER_REJECTION_BUFFER_ENABLED, true);
conf.set(StochasticLoadBalancer.COST_FUNCTIONS_COST_FUNCTIONS_KEY, MockCostFunction.class.getName());
loadBalancer.setConf(conf);
//Simulate 2 servers with 5 regions.
Map<ServerName, List<RegionInfo>> servers = mockClusterServers(new int[] { 5, 5 });
Map<TableName, Map<ServerName, List<RegionInfo>>> LoadOfAllTable = (Map) mockClusterServersWithTables(servers);
//Reject case 1: Total cost < 0
MockCostFunction.mockCost = -Double.MAX_VALUE;
//Since the Balancer was rejected, there should not be any plans
Assert.assertNull(loadBalancer.balanceCluster(LoadOfAllTable));
//Reject case 2: Cost < minCostNeedBalance
MockCostFunction.mockCost = 1;
conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", Float.MAX_VALUE);
loadBalancer.setConf(conf);
Assert.assertNull(loadBalancer.balanceCluster(LoadOfAllTable));
//NamedQueue is an async Producer-consumer Pattern, waiting here until it completed
int maxWaitingCount = 10;
while (maxWaitingCount-- > 0 && getBalancerRejectionLogEntries().size() != 2) {
Thread.sleep(1000);
}
//There are two cases, should be 2 logEntries
List<LogEntry> logEntries = getBalancerRejectionLogEntries();
Assert.assertEquals(2, logEntries.size());
Assert.assertTrue(
logEntries.get(0).toJsonPrettyPrint().contains("minCostNeedBalance"));
Assert.assertTrue(
logEntries.get(1).toJsonPrettyPrint().contains("cost1*multiplier1"));
}finally {
conf.unset(StochasticLoadBalancer.COST_FUNCTIONS_COST_FUNCTIONS_KEY);
conf.unset(BaseLoadBalancer.BALANCER_REJECTION_BUFFER_ENABLED);
loadBalancer.setConf(conf);
}
}
private List<LogEntry> getBalancerRejectionLogEntries(){
NamedQueueGetRequest namedQueueGetRequest = new NamedQueueGetRequest();
namedQueueGetRequest.setNamedQueueEvent(BalancerRejectionDetails.BALANCER_REJECTION_EVENT);
namedQueueGetRequest.setBalancerRejectionsRequest(MasterProtos.BalancerRejectionsRequest.getDefaultInstance());
NamedQueueGetResponse namedQueueGetResponse =
loadBalancer.namedQueueRecorder.getNamedQueueRecords(namedQueueGetRequest);
List<RecentLogs.BalancerRejection> balancerRejections = namedQueueGetResponse.getBalancerRejections();
MasterProtos.BalancerRejectionsResponse response =
MasterProtos.BalancerRejectionsResponse.newBuilder()
.addAllBalancerRejection(balancerRejections)
.build();
List<LogEntry> balancerRejectionRecords =
ProtobufUtil.getBalancerRejectionEntries(response);
return balancerRejectionRecords;
}
}

View File

@ -1695,6 +1695,25 @@ module Hbase
balancer_decisions_resp_arr balancer_decisions_resp_arr
end end
#----------------------------------------------------------------------------------------------
# Retrieve latest balancer rejections made by LoadBalancers
def get_balancer_rejections(args)
if args.key? 'LIMIT'
limit = args['LIMIT']
else
limit = 250
end
log_type = 'BALANCER_REJECTION'
log_dest = org.apache.hadoop.hbase.client.ServerType::MASTER
balancer_rejections_responses = @admin.getLogEntries(nil, log_type, log_dest, limit, nil)
balancer_rejections_resp_arr = []
balancer_rejections_responses.each { |balancer_dec_resp|
balancer_rejections_resp_arr << balancer_dec_resp.toJsonPrettyPrint
}
balancer_rejections_resp_arr
end
#---------------------------------------------------------------------------------------------- #----------------------------------------------------------------------------------------------
# Stop the active Master # Stop the active Master
def stop_master def stop_master

View File

@ -455,6 +455,7 @@ Shell.load_command_group(
compaction_switch compaction_switch
flush flush
get_balancer_decisions get_balancer_decisions
get_balancer_rejections
get_slowlog_responses get_slowlog_responses
get_largelog_responses get_largelog_responses
major_compact major_compact

View File

@ -0,0 +1,49 @@
#
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with this
# work for additional information regarding copyright ownership. The ASF
# licenses this file to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
# Retrieve latest balancer rejections maintained in memory by HMaster
module Shell
module Commands
# Retrieve latest large log responses
class GetBalancerRejections < Command
def help
<<-EOF
Retrieve latest balancer rejections made by LoadBalancers.
Examples:
hbase> get_balancer_rejections => Retrieve recent balancer rejections with
region plans
hbase> get_balancer_rejections LIMIT => 10 => Retrieve 10 most recent balancer rejections
with region plans
EOF
end
def command(args = {})
unless args.is_a? Hash
raise 'Filter parameters are not Hash'
end
balancer_rejections_resp_arr = admin.get_balancer_rejections(args)
puts 'Retrieved BalancerRejection Responses'
puts balancer_rejections_resp_arr
end
end
end
end

View File

@ -2334,3 +2334,17 @@ The percent of region server RPC threads failed to abort RS.
.Default .Default
`false` `false`
[[hbase.master.balancer.rejection.buffer.enabled]]
*`hbase.master.balancer.rejection.buffer.enabled`*::
+
.Description
Indicates whether active HMaster has ring buffer running for storing
balancer rejection in FIFO manner with limited entries. The size of
the ring buffer is indicated by config:
hbase.master.balancer.rejection.queue.size
+
.Default
`false`