HDFS-16302. RBF: RouterRpcFairnessPolicyController record requests accepted by each nameservice (#3621)
This commit is contained in:
parent
e5cee76785
commit
d2b8d6b534
|
@ -132,4 +132,10 @@ public interface FederationRPCMBean {
|
||||||
* @return Number of operations rejected due to lack of permits of each namespace.
|
* @return Number of operations rejected due to lack of permits of each namespace.
|
||||||
*/
|
*/
|
||||||
String getProxyOpPermitRejectedPerNs();
|
String getProxyOpPermitRejectedPerNs();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the number of operations accepted of each namespace.
|
||||||
|
* @return Number of operations accepted of each namespace.
|
||||||
|
*/
|
||||||
|
String getProxyOpPermitAcceptedPerNs();
|
||||||
}
|
}
|
||||||
|
|
|
@ -297,4 +297,9 @@ public class FederationRPCMetrics implements FederationRPCMBean {
|
||||||
public String getProxyOpPermitRejectedPerNs() {
|
public String getProxyOpPermitRejectedPerNs() {
|
||||||
return rpcServer.getRPCClient().getRejectedPermitsPerNsJSON();
|
return rpcServer.getRPCClient().getRejectedPermitsPerNsJSON();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getProxyOpPermitAcceptedPerNs() {
|
||||||
|
return rpcServer.getRPCClient().getAcceptedPermitsPerNsJSON();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -136,6 +136,7 @@ public class RouterRpcClient {
|
||||||
/** Fairness manager to control handlers assigned per NS. */
|
/** Fairness manager to control handlers assigned per NS. */
|
||||||
private RouterRpcFairnessPolicyController routerRpcFairnessPolicyController;
|
private RouterRpcFairnessPolicyController routerRpcFairnessPolicyController;
|
||||||
private Map<String, LongAdder> rejectedPermitsPerNs = new ConcurrentHashMap<>();
|
private Map<String, LongAdder> rejectedPermitsPerNs = new ConcurrentHashMap<>();
|
||||||
|
private Map<String, LongAdder> acceptedPermitsPerNs = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a router RPC client to manage remote procedure calls to NNs.
|
* Create a router RPC client to manage remote procedure calls to NNs.
|
||||||
|
@ -330,6 +331,14 @@ public class RouterRpcClient {
|
||||||
return JSON.toString(rejectedPermitsPerNs);
|
return JSON.toString(rejectedPermitsPerNs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* JSON representation of the accepted permits for each nameservice.
|
||||||
|
*
|
||||||
|
* @return String representation of the accepted permits for each nameservice.
|
||||||
|
*/
|
||||||
|
public String getAcceptedPermitsPerNsJSON() {
|
||||||
|
return JSON.toString(acceptedPermitsPerNs);
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Get ClientProtocol proxy client for a NameNode. Each combination of user +
|
* Get ClientProtocol proxy client for a NameNode. Each combination of user +
|
||||||
* NN must use a unique proxy client. Previously created clients are cached
|
* NN must use a unique proxy client. Previously created clients are cached
|
||||||
|
@ -1548,20 +1557,22 @@ public class RouterRpcClient {
|
||||||
private void acquirePermit(
|
private void acquirePermit(
|
||||||
final String nsId, final UserGroupInformation ugi, final RemoteMethod m)
|
final String nsId, final UserGroupInformation ugi, final RemoteMethod m)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
if (routerRpcFairnessPolicyController != null
|
if (routerRpcFairnessPolicyController != null) {
|
||||||
&& !routerRpcFairnessPolicyController.acquirePermit(nsId)) {
|
if (!routerRpcFairnessPolicyController.acquirePermit(nsId)) {
|
||||||
// Throw StandByException,
|
// Throw StandByException,
|
||||||
// Clients could fail over and try another router.
|
// Clients could fail over and try another router.
|
||||||
if (rpcMonitor != null) {
|
if (rpcMonitor != null) {
|
||||||
rpcMonitor.getRPCMetrics().incrProxyOpPermitRejected();
|
rpcMonitor.getRPCMetrics().incrProxyOpPermitRejected();
|
||||||
|
}
|
||||||
|
incrRejectedPermitForNs(nsId);
|
||||||
|
LOG.debug("Permit denied for ugi: {} for method: {}",
|
||||||
|
ugi, m.getMethodName());
|
||||||
|
String msg =
|
||||||
|
"Router " + router.getRouterId() +
|
||||||
|
" is overloaded for NS: " + nsId;
|
||||||
|
throw new StandbyException(msg);
|
||||||
}
|
}
|
||||||
incrRejectedPermitForNs(nsId);
|
incrAcceptedPermitForNs(nsId);
|
||||||
LOG.debug("Permit denied for ugi: {} for method: {}",
|
|
||||||
ugi, m.getMethodName());
|
|
||||||
String msg =
|
|
||||||
"Router " + router.getRouterId() +
|
|
||||||
" is overloaded for NS: " + nsId;
|
|
||||||
throw new StandbyException(msg);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1596,4 +1607,13 @@ public class RouterRpcClient {
|
||||||
return rejectedPermitsPerNs.containsKey(ns) ?
|
return rejectedPermitsPerNs.containsKey(ns) ?
|
||||||
rejectedPermitsPerNs.get(ns).longValue() : 0L;
|
rejectedPermitsPerNs.get(ns).longValue() : 0L;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void incrAcceptedPermitForNs(String ns) {
|
||||||
|
acceptedPermitsPerNs.computeIfAbsent(ns, k -> new LongAdder()).increment();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Long getAcceptedPermitForNs(String ns) {
|
||||||
|
return acceptedPermitsPerNs.containsKey(ns) ?
|
||||||
|
acceptedPermitsPerNs.get(ns).longValue() : 0L;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,8 +41,7 @@ import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test the Router handlers fairness control rejects
|
* Test the Router handlers fairness control rejects and accepts requests.
|
||||||
* requests when the handlers are overloaded.
|
|
||||||
*/
|
*/
|
||||||
public class TestRouterHandlersFairness {
|
public class TestRouterHandlersFairness {
|
||||||
|
|
||||||
|
@ -126,6 +125,12 @@ public class TestRouterHandlersFairness {
|
||||||
throws Exception {
|
throws Exception {
|
||||||
|
|
||||||
RouterContext routerContext = cluster.getRandomRouter();
|
RouterContext routerContext = cluster.getRandomRouter();
|
||||||
|
URI address = routerContext.getFileSystemURI();
|
||||||
|
Configuration conf = new HdfsConfiguration();
|
||||||
|
final int numOps = 10;
|
||||||
|
AtomicInteger overloadException = new AtomicInteger();
|
||||||
|
|
||||||
|
// Test when handlers are overloaded
|
||||||
if (fairness) {
|
if (fairness) {
|
||||||
if (isConcurrent) {
|
if (isConcurrent) {
|
||||||
LOG.info("Taking fanout lock first");
|
LOG.info("Taking fanout lock first");
|
||||||
|
@ -142,12 +147,80 @@ public class TestRouterHandlersFairness {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
URI address = routerContext.getFileSystemURI();
|
|
||||||
Configuration conf = new HdfsConfiguration();
|
|
||||||
final int numOps = 10;
|
|
||||||
final AtomicInteger overloadException = new AtomicInteger();
|
|
||||||
int originalRejectedPermits = getTotalRejectedPermits(routerContext);
|
int originalRejectedPermits = getTotalRejectedPermits(routerContext);
|
||||||
|
|
||||||
|
// |- All calls should fail since permits not released
|
||||||
|
innerCalls(address, numOps, isConcurrent, conf, overloadException);
|
||||||
|
|
||||||
|
int latestRejectedPermits = getTotalRejectedPermits(routerContext);
|
||||||
|
assertEquals(latestRejectedPermits - originalRejectedPermits,
|
||||||
|
overloadException.get());
|
||||||
|
|
||||||
|
if (fairness) {
|
||||||
|
assertTrue(overloadException.get() > 0);
|
||||||
|
if (isConcurrent) {
|
||||||
|
LOG.info("Release fanout lock that was taken before test");
|
||||||
|
// take the lock for concurrent NS to block fanout calls
|
||||||
|
routerContext.getRouter().getRpcServer()
|
||||||
|
.getRPCClient().getRouterRpcFairnessPolicyController()
|
||||||
|
.releasePermit(RouterRpcFairnessConstants.CONCURRENT_NS);
|
||||||
|
} else {
|
||||||
|
for (String ns : cluster.getNameservices()) {
|
||||||
|
routerContext.getRouter().getRpcServer()
|
||||||
|
.getRPCClient().getRouterRpcFairnessPolicyController()
|
||||||
|
.releasePermit(ns);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
assertEquals("Number of failed RPCs without fairness configured",
|
||||||
|
0, overloadException.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test when handlers are not overloaded
|
||||||
|
int originalAcceptedPermits = getTotalAcceptedPermits(routerContext);
|
||||||
|
overloadException = new AtomicInteger();
|
||||||
|
|
||||||
|
// |- All calls should succeed since permits not acquired
|
||||||
|
innerCalls(address, numOps, isConcurrent, conf, overloadException);
|
||||||
|
|
||||||
|
int latestAcceptedPermits = getTotalAcceptedPermits(routerContext);
|
||||||
|
assertEquals(latestAcceptedPermits - originalAcceptedPermits, numOps);
|
||||||
|
assertEquals(overloadException.get(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void invokeSequential(ClientProtocol routerProto) throws IOException {
|
||||||
|
routerProto.getFileInfo("/test.txt");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void invokeConcurrent(ClientProtocol routerProto, String clientName)
|
||||||
|
throws IOException {
|
||||||
|
routerProto.renewLease(clientName);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int getTotalRejectedPermits(RouterContext routerContext) {
|
||||||
|
int totalRejectedPermits = 0;
|
||||||
|
for (String ns : cluster.getNameservices()) {
|
||||||
|
totalRejectedPermits += routerContext.getRouterRpcClient()
|
||||||
|
.getRejectedPermitForNs(ns);
|
||||||
|
}
|
||||||
|
totalRejectedPermits += routerContext.getRouterRpcClient()
|
||||||
|
.getRejectedPermitForNs(RouterRpcFairnessConstants.CONCURRENT_NS);
|
||||||
|
return totalRejectedPermits;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int getTotalAcceptedPermits(RouterContext routerContext) {
|
||||||
|
int totalAcceptedPermits = 0;
|
||||||
|
for (String ns : cluster.getNameservices()) {
|
||||||
|
totalAcceptedPermits += routerContext.getRouterRpcClient()
|
||||||
|
.getAcceptedPermitForNs(ns);
|
||||||
|
}
|
||||||
|
totalAcceptedPermits += routerContext.getRouterRpcClient()
|
||||||
|
.getAcceptedPermitForNs(RouterRpcFairnessConstants.CONCURRENT_NS);
|
||||||
|
return totalAcceptedPermits;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void innerCalls(URI address, int numOps, boolean isConcurrent,
|
||||||
|
Configuration conf, AtomicInteger overloadException) throws IOException {
|
||||||
for (int i = 0; i < numOps; i++) {
|
for (int i = 0; i < numOps; i++) {
|
||||||
DFSClient routerClient = null;
|
DFSClient routerClient = null;
|
||||||
try {
|
try {
|
||||||
|
@ -178,48 +251,5 @@ public class TestRouterHandlersFairness {
|
||||||
}
|
}
|
||||||
overloadException.get();
|
overloadException.get();
|
||||||
}
|
}
|
||||||
int latestRejectedPermits = getTotalRejectedPermits(routerContext);
|
|
||||||
assertEquals(latestRejectedPermits - originalRejectedPermits,
|
|
||||||
overloadException.get());
|
|
||||||
|
|
||||||
if (fairness) {
|
|
||||||
assertTrue(overloadException.get() > 0);
|
|
||||||
if (isConcurrent) {
|
|
||||||
LOG.info("Release fanout lock that was taken before test");
|
|
||||||
// take the lock for concurrent NS to block fanout calls
|
|
||||||
routerContext.getRouter().getRpcServer()
|
|
||||||
.getRPCClient().getRouterRpcFairnessPolicyController()
|
|
||||||
.releasePermit(RouterRpcFairnessConstants.CONCURRENT_NS);
|
|
||||||
} else {
|
|
||||||
for (String ns : cluster.getNameservices()) {
|
|
||||||
routerContext.getRouter().getRpcServer()
|
|
||||||
.getRPCClient().getRouterRpcFairnessPolicyController()
|
|
||||||
.releasePermit(ns);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
assertEquals("Number of failed RPCs without fairness configured",
|
|
||||||
0, overloadException.get());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void invokeSequential(ClientProtocol routerProto) throws IOException {
|
|
||||||
routerProto.getFileInfo("/test.txt");
|
|
||||||
}
|
|
||||||
|
|
||||||
private void invokeConcurrent(ClientProtocol routerProto, String clientName)
|
|
||||||
throws IOException {
|
|
||||||
routerProto.renewLease(clientName);
|
|
||||||
}
|
|
||||||
|
|
||||||
private int getTotalRejectedPermits(RouterContext routerContext) {
|
|
||||||
int totalRejectedPermits = 0;
|
|
||||||
for (String ns : cluster.getNameservices()) {
|
|
||||||
totalRejectedPermits += routerContext.getRouterRpcClient()
|
|
||||||
.getRejectedPermitForNs(ns);
|
|
||||||
}
|
|
||||||
totalRejectedPermits += routerContext.getRouterRpcClient()
|
|
||||||
.getRejectedPermitForNs(RouterRpcFairnessConstants.CONCURRENT_NS);
|
|
||||||
return totalRejectedPermits;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue