YARN-11446. [Federation] Add updateSchedulerConfiguration, getSchedulerConfiguration REST APIs for Router. (#5476)

This commit is contained in:
slfan1989 2023-03-29 00:33:19 +08:00 committed by GitHub
parent aa602381c5
commit 5bc8f25327
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 430 additions and 6 deletions

View File

@ -33,6 +33,8 @@ public class ConfInfo {
private ArrayList<ConfItem> property = new ArrayList<>(); private ArrayList<ConfItem> property = new ArrayList<>();
private String subClusterId;
public ConfInfo() { public ConfInfo() {
} // JAXB needs this } // JAXB needs this
@ -74,5 +76,14 @@ public class ConfInfo {
public String getValue() { public String getValue() {
return value; return value;
} }
}
public String getSubClusterId() {
return subClusterId;
}
public void setSubClusterId(String subClusterId) {
this.subClusterId = subClusterId;
} }
} }

View File

@ -44,6 +44,9 @@ public class SchedConfUpdateInfo {
@XmlElement(name = "update-queue") @XmlElement(name = "update-queue")
private ArrayList<QueueConfigInfo> updateQueueInfo = new ArrayList<>(); private ArrayList<QueueConfigInfo> updateQueueInfo = new ArrayList<>();
@XmlElement(name = "subClusterId")
private String subClusterId = "";
private HashMap<String, String> global = new HashMap<>(); private HashMap<String, String> global = new HashMap<>();
public SchedConfUpdateInfo() { public SchedConfUpdateInfo() {
@ -82,4 +85,12 @@ public class SchedConfUpdateInfo {
public void setGlobalParams(HashMap<String, String> globalInfo) { public void setGlobalParams(HashMap<String, String> globalInfo) {
this.global = globalInfo; this.global = globalInfo;
} }
public String getSubClusterId() {
return subClusterId;
}
public void setSubClusterId(String subClusterId) {
this.subClusterId = subClusterId;
}
} }

View File

@ -159,6 +159,10 @@ public final class RouterMetrics {
private MutableGaugeInt numAddToClusterNodeLabelsFailedRetrieved; private MutableGaugeInt numAddToClusterNodeLabelsFailedRetrieved;
@Metric("# of removeFromClusterNodeLabels failed to be retrieved") @Metric("# of removeFromClusterNodeLabels failed to be retrieved")
private MutableGaugeInt numRemoveFromClusterNodeLabelsFailedRetrieved; private MutableGaugeInt numRemoveFromClusterNodeLabelsFailedRetrieved;
@Metric("# of numUpdateSchedulerConfiguration failed to be retrieved")
private MutableGaugeInt numUpdateSchedulerConfigurationFailedRetrieved;
@Metric("# of numGetSchedulerConfiguration failed to be retrieved")
private MutableGaugeInt numGetSchedulerConfigurationFailedRetrieved;
@Metric("# of getClusterInfo failed to be retrieved") @Metric("# of getClusterInfo failed to be retrieved")
private MutableGaugeInt numGetClusterInfoFailedRetrieved; private MutableGaugeInt numGetClusterInfoFailedRetrieved;
@Metric("# of getClusterUserInfo failed to be retrieved") @Metric("# of getClusterUserInfo failed to be retrieved")
@ -287,6 +291,10 @@ public final class RouterMetrics {
private MutableRate totalSucceededAddToClusterNodeLabelsRetrieved; private MutableRate totalSucceededAddToClusterNodeLabelsRetrieved;
@Metric("Total number of successful Retrieved RemoveFromClusterNodeLabels and latency(ms)") @Metric("Total number of successful Retrieved RemoveFromClusterNodeLabels and latency(ms)")
private MutableRate totalSucceededRemoveFromClusterNodeLabelsRetrieved; private MutableRate totalSucceededRemoveFromClusterNodeLabelsRetrieved;
@Metric("Total number of successful Retrieved updateSchedulerConfiguration and latency(ms)")
private MutableRate totalSucceededUpdateSchedulerConfigurationRetrieved;
@Metric("Total number of successful Retrieved getSchedulerConfiguration and latency(ms)")
private MutableRate totalSucceededGetSchedulerConfigurationRetrieved;
@Metric("Total number of successful Retrieved GetClusterInfoRetrieved and latency(ms)") @Metric("Total number of successful Retrieved GetClusterInfoRetrieved and latency(ms)")
private MutableRate totalSucceededGetClusterInfoRetrieved; private MutableRate totalSucceededGetClusterInfoRetrieved;
@Metric("Total number of successful Retrieved GetClusterUserInfoRetrieved and latency(ms)") @Metric("Total number of successful Retrieved GetClusterUserInfoRetrieved and latency(ms)")
@ -358,6 +366,8 @@ public final class RouterMetrics {
private MutableQuantiles replaceLabelsOnNodeLatency; private MutableQuantiles replaceLabelsOnNodeLatency;
private MutableQuantiles addToClusterNodeLabelsLatency; private MutableQuantiles addToClusterNodeLabelsLatency;
private MutableQuantiles removeFromClusterNodeLabelsLatency; private MutableQuantiles removeFromClusterNodeLabelsLatency;
private MutableQuantiles updateSchedulerConfigLatency;
private MutableQuantiles getSchedulerConfigurationLatency;
private MutableQuantiles getClusterInfoLatency; private MutableQuantiles getClusterInfoLatency;
private MutableQuantiles getClusterUserInfoLatency; private MutableQuantiles getClusterUserInfoLatency;
private MutableQuantiles updateNodeResourceLatency; private MutableQuantiles updateNodeResourceLatency;
@ -572,6 +582,12 @@ public final class RouterMetrics {
removeFromClusterNodeLabelsLatency = registry.newQuantiles("removeFromClusterNodeLabelsLatency", removeFromClusterNodeLabelsLatency = registry.newQuantiles("removeFromClusterNodeLabelsLatency",
"latency of remove cluster nodelabels timeouts", "ops", "latency", 10); "latency of remove cluster nodelabels timeouts", "ops", "latency", 10);
updateSchedulerConfigLatency = registry.newQuantiles("updateSchedulerConfigurationLatency",
"latency of update scheduler configuration timeouts", "ops", "latency", 10);
getSchedulerConfigurationLatency = registry.newQuantiles("getSchedulerConfigurationLatency",
"latency of get scheduler configuration timeouts", "ops", "latency", 10);
getClusterInfoLatency = registry.newQuantiles("getClusterInfoLatency", getClusterInfoLatency = registry.newQuantiles("getClusterInfoLatency",
"latency of get cluster info timeouts", "ops", "latency", 10); "latency of get cluster info timeouts", "ops", "latency", 10);
@ -879,6 +895,16 @@ public final class RouterMetrics {
return totalSucceededRemoveFromClusterNodeLabelsRetrieved.lastStat().numSamples(); return totalSucceededRemoveFromClusterNodeLabelsRetrieved.lastStat().numSamples();
} }
@VisibleForTesting
public long getNumSucceededUpdateSchedulerConfigurationRetrieved() {
return totalSucceededUpdateSchedulerConfigurationRetrieved.lastStat().numSamples();
}
@VisibleForTesting
public long getNumSucceededGetSchedulerConfigurationRetrieved() {
return totalSucceededGetSchedulerConfigurationRetrieved.lastStat().numSamples();
}
@VisibleForTesting @VisibleForTesting
public long getNumSucceededGetClusterInfoRetrieved() { public long getNumSucceededGetClusterInfoRetrieved() {
return totalSucceededGetClusterInfoRetrieved.lastStat().numSamples(); return totalSucceededGetClusterInfoRetrieved.lastStat().numSamples();
@ -1189,6 +1215,16 @@ public final class RouterMetrics {
return totalSucceededRemoveFromClusterNodeLabelsRetrieved.lastStat().mean(); return totalSucceededRemoveFromClusterNodeLabelsRetrieved.lastStat().mean();
} }
@VisibleForTesting
public double getLatencySucceededUpdateSchedulerConfigurationRetrieved() {
return totalSucceededUpdateSchedulerConfigurationRetrieved.lastStat().mean();
}
@VisibleForTesting
public double getLatencySucceededGetSchedulerConfigurationRetrieved() {
return totalSucceededGetSchedulerConfigurationRetrieved.lastStat().mean();
}
@VisibleForTesting @VisibleForTesting
public double getLatencySucceededGetClusterInfoRetrieved() { public double getLatencySucceededGetClusterInfoRetrieved() {
return totalSucceededGetClusterInfoRetrieved.lastStat().mean(); return totalSucceededGetClusterInfoRetrieved.lastStat().mean();
@ -1454,6 +1490,14 @@ public final class RouterMetrics {
return numRemoveFromClusterNodeLabelsFailedRetrieved.value(); return numRemoveFromClusterNodeLabelsFailedRetrieved.value();
} }
public int getUpdateSchedulerConfigurationFailedRetrieved() {
return numUpdateSchedulerConfigurationFailedRetrieved.value();
}
public int getSchedulerConfigurationFailedRetrieved() {
return numGetSchedulerConfigurationFailedRetrieved.value();
}
public int getClusterInfoFailedRetrieved() { public int getClusterInfoFailedRetrieved() {
return numGetClusterInfoFailedRetrieved.value(); return numGetClusterInfoFailedRetrieved.value();
} }
@ -1773,6 +1817,16 @@ public final class RouterMetrics {
removeFromClusterNodeLabelsLatency.add(duration); removeFromClusterNodeLabelsLatency.add(duration);
} }
public void succeededUpdateSchedulerConfigurationRetrieved(long duration) {
totalSucceededUpdateSchedulerConfigurationRetrieved.add(duration);
updateSchedulerConfigLatency.add(duration);
}
public void succeededGetSchedulerConfigurationRetrieved(long duration) {
totalSucceededGetSchedulerConfigurationRetrieved.add(duration);
getSchedulerConfigurationLatency.add(duration);
}
public void succeededGetClusterInfoRetrieved(long duration) { public void succeededGetClusterInfoRetrieved(long duration) {
totalSucceededGetClusterInfoRetrieved.add(duration); totalSucceededGetClusterInfoRetrieved.add(duration);
getClusterInfoLatency.add(duration); getClusterInfoLatency.add(duration);
@ -2013,6 +2067,14 @@ public final class RouterMetrics {
numRemoveFromClusterNodeLabelsFailedRetrieved.incr(); numRemoveFromClusterNodeLabelsFailedRetrieved.incr();
} }
public void incrUpdateSchedulerConfigurationFailedRetrieved() {
numUpdateSchedulerConfigurationFailedRetrieved.incr();
}
public void incrGetSchedulerConfigurationFailedRetrieved() {
numGetSchedulerConfigurationFailedRetrieved.incr();
}
public void incrGetClusterInfoFailedRetrieved() { public void incrGetClusterInfoFailedRetrieved() {
numGetClusterInfoFailedRetrieved.incr(); numGetClusterInfoFailedRetrieved.incr();
} }

View File

@ -44,7 +44,6 @@ import javax.ws.rs.core.Response;
import javax.ws.rs.core.Response.Status; import javax.ws.rs.core.Response.Status;
import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.impl.prefetch.Validate; import org.apache.hadoop.fs.impl.prefetch.Validate;
@ -129,6 +128,7 @@ import org.apache.hadoop.yarn.server.router.webapp.dao.FederationBulkActivitiesI
import org.apache.hadoop.yarn.server.router.webapp.dao.FederationRMQueueAclInfo; import org.apache.hadoop.yarn.server.router.webapp.dao.FederationRMQueueAclInfo;
import org.apache.hadoop.yarn.server.router.webapp.dao.SubClusterResult; import org.apache.hadoop.yarn.server.router.webapp.dao.SubClusterResult;
import org.apache.hadoop.yarn.server.router.webapp.dao.FederationSchedulerTypeInfo; import org.apache.hadoop.yarn.server.router.webapp.dao.FederationSchedulerTypeInfo;
import org.apache.hadoop.yarn.server.router.webapp.dao.FederationConfInfo;
import org.apache.hadoop.yarn.server.router.webapp.dao.FederationClusterUserInfo; import org.apache.hadoop.yarn.server.router.webapp.dao.FederationClusterUserInfo;
import org.apache.hadoop.yarn.server.router.webapp.dao.FederationClusterInfo; import org.apache.hadoop.yarn.server.router.webapp.dao.FederationClusterInfo;
import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.server.utils.BuilderUtils;
@ -136,6 +136,7 @@ import org.apache.hadoop.yarn.server.webapp.dao.AppAttemptInfo;
import org.apache.hadoop.yarn.server.webapp.dao.ContainerInfo; import org.apache.hadoop.yarn.server.webapp.dao.ContainerInfo;
import org.apache.hadoop.yarn.server.webapp.dao.ContainersInfo; import org.apache.hadoop.yarn.server.webapp.dao.ContainersInfo;
import org.apache.hadoop.yarn.util.LRUCacheHashMap; import org.apache.hadoop.yarn.util.LRUCacheHashMap;
import org.apache.hadoop.yarn.webapp.dao.ConfInfo;
import org.apache.hadoop.yarn.webapp.dao.SchedConfUpdateInfo; import org.apache.hadoop.yarn.webapp.dao.SchedConfUpdateInfo;
import org.apache.hadoop.yarn.util.Clock; import org.apache.hadoop.yarn.util.Clock;
import org.apache.hadoop.yarn.util.MonotonicClock; import org.apache.hadoop.yarn.util.MonotonicClock;
@ -848,6 +849,29 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
} }
} }
/**
* Get the active subcluster in the federation.
*
* @param subClusterId subClusterId.
* @return subClusterInfo.
* @throws NotFoundException If the subclusters cannot be found.
*/
private SubClusterInfo getActiveSubCluster(String subClusterId)
throws NotFoundException {
try {
SubClusterId pSubClusterId = SubClusterId.newInstance(subClusterId);
Map<SubClusterId, SubClusterInfo> subClusterInfoMap =
federationFacade.getSubClusters(true);
SubClusterInfo subClusterInfo = subClusterInfoMap.get(pSubClusterId);
if (subClusterInfo == null) {
throw new NotFoundException(subClusterId + " not found.");
}
return subClusterInfo;
} catch (YarnException e) {
throw new NotFoundException(e.getMessage());
}
}
/** /**
* The YARN Router will forward to the request to all the SubClusters to find * The YARN Router will forward to the request to all the SubClusters to find
* where the node is running. * where the node is running.
@ -2906,17 +2930,117 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
throw new RuntimeException("getContainer Failed."); throw new RuntimeException("getContainer Failed.");
} }
/**
* This method updates the Scheduler configuration, and it is reachable by
* using {@link RMWSConsts#SCHEDULER_CONF}.
*
* @param mutationInfo th information for making scheduler configuration
* changes (supports adding, removing, or updating a queue, as well
* as global scheduler conf changes)
* @param hsr the servlet request
* @return Response containing the status code
* @throws AuthorizationException if the user is not authorized to invoke this
* method
* @throws InterruptedException if interrupted
*/
@Override @Override
public Response updateSchedulerConfiguration(SchedConfUpdateInfo mutationInfo, public Response updateSchedulerConfiguration(SchedConfUpdateInfo mutationInfo,
HttpServletRequest hsr) HttpServletRequest hsr) throws AuthorizationException, InterruptedException {
throws AuthorizationException, InterruptedException {
throw new NotImplementedException("Code is not implemented"); // Make Sure mutationInfo is not null.
if (mutationInfo == null) {
routerMetrics.incrUpdateSchedulerConfigurationFailedRetrieved();
throw new IllegalArgumentException(
"Parameter error, the schedConfUpdateInfo is empty or null.");
}
// In federated mode, we may have a mix of multiple schedulers.
// In order to ensure accurate update scheduler configuration,
// we need users to explicitly set subClusterId.
String pSubClusterId = mutationInfo.getSubClusterId();
if (StringUtils.isBlank(pSubClusterId)) {
routerMetrics.incrUpdateSchedulerConfigurationFailedRetrieved();
throw new IllegalArgumentException("Parameter error, " +
"the subClusterId is empty or null.");
}
// Get the subClusterInfo , then update the scheduler configuration.
try {
long startTime = clock.getTime();
SubClusterInfo subClusterInfo = getActiveSubCluster(pSubClusterId);
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
Response response = interceptor.updateSchedulerConfiguration(mutationInfo, hsr);
if (response != null) {
long endTime = clock.getTime();
routerMetrics.succeededUpdateSchedulerConfigurationRetrieved(endTime - startTime);
return Response.status(response.getStatus()).entity(response.getEntity()).build();
}
} catch (NotFoundException e) {
routerMetrics.incrUpdateSchedulerConfigurationFailedRetrieved();
RouterServerUtil.logAndThrowRunTimeException(e,
"Get subCluster error. subClusterId = %s", pSubClusterId);
} catch (Exception e) {
routerMetrics.incrUpdateSchedulerConfigurationFailedRetrieved();
RouterServerUtil.logAndThrowRunTimeException(e,
"UpdateSchedulerConfiguration error. subClusterId = %s", pSubClusterId);
}
routerMetrics.incrUpdateSchedulerConfigurationFailedRetrieved();
throw new RuntimeException("UpdateSchedulerConfiguration error. subClusterId = "
+ pSubClusterId);
} }
/**
* This method retrieves all the Scheduler configuration, and it is reachable
* by using {@link RMWSConsts#SCHEDULER_CONF}.
*
* @param hsr the servlet request
* @return Response containing the status code
* @throws AuthorizationException if the user is not authorized to invoke this
* method.
*/
@Override @Override
public Response getSchedulerConfiguration(HttpServletRequest hsr) public Response getSchedulerConfiguration(HttpServletRequest hsr)
throws AuthorizationException { throws AuthorizationException {
throw new NotImplementedException("Code is not implemented"); try {
long startTime = clock.getTime();
FederationConfInfo federationConfInfo = new FederationConfInfo();
Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
final HttpServletRequest hsrCopy = clone(hsr);
Class[] argsClasses = new Class[]{HttpServletRequest.class};
Object[] args = new Object[]{hsrCopy};
ClientMethod remoteMethod = new ClientMethod("getSchedulerConfiguration", argsClasses, args);
Map<SubClusterInfo, Response> responseMap =
invokeConcurrent(subClustersActive.values(), remoteMethod, Response.class);
responseMap.forEach((subClusterInfo, response) -> {
SubClusterId subClusterId = subClusterInfo.getSubClusterId();
if (response == null) {
String errorMsg = subClusterId + " Can't getSchedulerConfiguration.";
federationConfInfo.getErrorMsgs().add(errorMsg);
} else if (response.getStatus() == Status.BAD_REQUEST.getStatusCode()) {
String errorMsg = String.valueOf(response.getEntity());
federationConfInfo.getErrorMsgs().add(errorMsg);
} else if (response.getStatus() == Status.OK.getStatusCode()) {
ConfInfo fedConfInfo = ConfInfo.class.cast(response.getEntity());
fedConfInfo.setSubClusterId(subClusterId.getId());
federationConfInfo.getList().add(fedConfInfo);
}
});
long endTime = clock.getTime();
routerMetrics.succeededGetSchedulerConfigurationRetrieved(endTime - startTime);
return Response.status(Status.OK).entity(federationConfInfo).build();
} catch (NotFoundException e) {
RouterServerUtil.logAndThrowRunTimeException("get all active sub cluster(s) error.", e);
routerMetrics.incrGetSchedulerConfigurationFailedRetrieved();
} catch (Exception e) {
routerMetrics.incrGetSchedulerConfigurationFailedRetrieved();
RouterServerUtil.logAndThrowRunTimeException("getSchedulerConfiguration error.", e);
return Response.status(Status.BAD_REQUEST).entity("getSchedulerConfiguration error.").build();
}
routerMetrics.incrGetSchedulerConfigurationFailedRetrieved();
throw new RuntimeException("getSchedulerConfiguration error.");
} }
@Override @Override

View File

@ -0,0 +1,55 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.router.webapp.dao;
import org.apache.hadoop.yarn.webapp.dao.ConfInfo;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;
import java.util.ArrayList;
import java.util.List;
@XmlRootElement
@XmlAccessorType(XmlAccessType.FIELD)
public class FederationConfInfo extends ConfInfo {
@XmlElement(name = "subCluster")
private List<ConfInfo> list = new ArrayList<>();
@XmlElement(name = "errorMsgs")
private List<String> errorMsgs = new ArrayList<>();
public FederationConfInfo() {
} // JAXB needs this
public List<ConfInfo> getList() {
return list;
}
public void setList(List<ConfInfo> list) {
this.list = list;
}
public List<String> getErrorMsgs() {
return errorMsgs;
}
public void setErrorMsgs(List<String> errorMsgs) {
this.errorMsgs = errorMsgs;
}
}

View File

@ -569,6 +569,16 @@ public class TestRouterMetrics {
metrics.incrGetBulkActivitiesFailedRetrieved(); metrics.incrGetBulkActivitiesFailedRetrieved();
} }
public void getSchedulerConfigurationFailed() {
LOG.info("Mocked: failed getSchedulerConfiguration call");
metrics.incrGetSchedulerConfigurationFailedRetrieved();
}
public void updateSchedulerConfigurationFailedRetrieved() {
LOG.info("Mocked: failed updateSchedulerConfiguration call");
metrics.incrUpdateSchedulerConfigurationFailedRetrieved();
}
public void getClusterInfoFailed() { public void getClusterInfoFailed() {
LOG.info("Mocked: failed getClusterInfo call"); LOG.info("Mocked: failed getClusterInfo call");
metrics.incrGetClusterInfoFailedRetrieved(); metrics.incrGetClusterInfoFailedRetrieved();
@ -859,6 +869,16 @@ public class TestRouterMetrics {
metrics.succeededAddToClusterNodeLabelsRetrieved(duration); metrics.succeededAddToClusterNodeLabelsRetrieved(duration);
} }
public void getSchedulerConfigurationRetrieved(long duration) {
LOG.info("Mocked: successful GetSchedulerConfiguration call with duration {}", duration);
metrics.succeededGetSchedulerConfigurationRetrieved(duration);
}
public void getUpdateSchedulerConfigurationRetrieved(long duration) {
LOG.info("Mocked: successful UpdateSchedulerConfiguration call with duration {}", duration);
metrics.succeededUpdateSchedulerConfigurationRetrieved(duration);
}
public void getClusterInfoRetrieved(long duration) { public void getClusterInfoRetrieved(long duration) {
LOG.info("Mocked: successful GetClusterInfoRetrieved call with duration {}", duration); LOG.info("Mocked: successful GetClusterInfoRetrieved call with duration {}", duration);
metrics.succeededGetClusterInfoRetrieved(duration); metrics.succeededGetClusterInfoRetrieved(duration);
@ -1889,6 +1909,52 @@ public class TestRouterMetrics {
metrics.getLatencySucceededAddToClusterNodeLabelsRetrieved(), ASSERT_DOUBLE_DELTA); metrics.getLatencySucceededAddToClusterNodeLabelsRetrieved(), ASSERT_DOUBLE_DELTA);
} }
@Test
public void testGetSchedulerConfigurationRetrievedFailed() {
long totalBadBefore = metrics.getSchedulerConfigurationFailedRetrieved();
badSubCluster.getSchedulerConfigurationFailed();
Assert.assertEquals(totalBadBefore + 1,
metrics.getSchedulerConfigurationFailedRetrieved());
}
@Test
public void testGetSchedulerConfigurationRetrieved() {
long totalGoodBefore = metrics.getNumSucceededGetSchedulerConfigurationRetrieved();
goodSubCluster.getSchedulerConfigurationRetrieved(150);
Assert.assertEquals(totalGoodBefore + 1,
metrics.getNumSucceededGetSchedulerConfigurationRetrieved());
Assert.assertEquals(150,
metrics.getLatencySucceededGetSchedulerConfigurationRetrieved(), ASSERT_DOUBLE_DELTA);
goodSubCluster.getSchedulerConfigurationRetrieved(300);
Assert.assertEquals(totalGoodBefore + 2,
metrics.getNumSucceededGetSchedulerConfigurationRetrieved());
Assert.assertEquals(225,
metrics.getLatencySucceededGetSchedulerConfigurationRetrieved(), ASSERT_DOUBLE_DELTA);
}
@Test
public void testUpdateSchedulerConfigurationRetrievedFailed() {
long totalBadBefore = metrics.getUpdateSchedulerConfigurationFailedRetrieved();
badSubCluster.updateSchedulerConfigurationFailedRetrieved();
Assert.assertEquals(totalBadBefore + 1,
metrics.getUpdateSchedulerConfigurationFailedRetrieved());
}
@Test
public void testUpdateSchedulerConfigurationRetrieved() {
long totalGoodBefore = metrics.getNumSucceededUpdateSchedulerConfigurationRetrieved();
goodSubCluster.getUpdateSchedulerConfigurationRetrieved(150);
Assert.assertEquals(totalGoodBefore + 1,
metrics.getNumSucceededUpdateSchedulerConfigurationRetrieved());
Assert.assertEquals(150,
metrics.getLatencySucceededUpdateSchedulerConfigurationRetrieved(), ASSERT_DOUBLE_DELTA);
goodSubCluster.getUpdateSchedulerConfigurationRetrieved(300);
Assert.assertEquals(totalGoodBefore + 2,
metrics.getNumSucceededUpdateSchedulerConfigurationRetrieved());
Assert.assertEquals(225,
metrics.getLatencySucceededUpdateSchedulerConfigurationRetrieved(), ASSERT_DOUBLE_DELTA);
}
@Test @Test
public void testGetClusterInfoRetrievedFailed() { public void testGetClusterInfoRetrievedFailed() {
long totalBadBefore = metrics.getClusterInfoFailedRetrieved(); long totalBadBefore = metrics.getClusterInfoFailedRetrieved();

View File

@ -102,6 +102,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueu
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.TestUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.TestUtils;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerTestUtilities; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerTestUtilities;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.conf.MutableCSConfigurationProvider;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.NodeIDsInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.NodeIDsInfo;
@ -159,6 +160,8 @@ import org.apache.hadoop.yarn.util.resource.Resources;
import org.apache.hadoop.yarn.webapp.BadRequestException; import org.apache.hadoop.yarn.webapp.BadRequestException;
import org.apache.hadoop.yarn.webapp.ForbiddenException; import org.apache.hadoop.yarn.webapp.ForbiddenException;
import org.apache.hadoop.yarn.webapp.NotFoundException; import org.apache.hadoop.yarn.webapp.NotFoundException;
import org.apache.hadoop.yarn.webapp.dao.ConfInfo;
import org.apache.hadoop.yarn.webapp.dao.SchedConfUpdateInfo;
import org.mockito.Mockito; import org.mockito.Mockito;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -1007,7 +1010,7 @@ public class MockDefaultRequestInterceptorREST
} }
if (resContext.getReservationId() == null) { if (resContext.getReservationId() == null) {
throw new BadRequestException("Update operations must specify an existing ReservaitonId"); throw new BadRequestException("Update operations must specify an existing ReservationId");
} }
ReservationRequestInterpreter[] values = ReservationRequestInterpreter.values(); ReservationRequestInterpreter[] values = ReservationRequestInterpreter.values();
@ -1366,6 +1369,29 @@ public class MockDefaultRequestInterceptorREST
} }
@Override @Override
public Response updateSchedulerConfiguration(SchedConfUpdateInfo mutationInfo,
HttpServletRequest req) throws AuthorizationException, InterruptedException {
RMContext rmContext = mockRM.getRMContext();
MutableCSConfigurationProvider provider = new MutableCSConfigurationProvider(rmContext);
try {
Configuration conf = new Configuration();
conf.set(YarnConfiguration.SCHEDULER_CONFIGURATION_STORE_CLASS,
YarnConfiguration.MEMORY_CONFIGURATION_STORE);
provider.init(conf);
provider.logAndApplyMutation(UserGroupInformation.getCurrentUser(), mutationInfo);
} catch (Exception e) {
throw new RuntimeException(e);
}
return Response.status(Status.OK).
entity("Configuration change successfully applied.").build();
}
@Override
public Response getSchedulerConfiguration(HttpServletRequest req) throws AuthorizationException {
return Response.status(Status.OK).entity(new ConfInfo(mockRM.getConfig()))
.build();
}
public ClusterInfo getClusterInfo() { public ClusterInfo getClusterInfo() {
ClusterInfo clusterInfo = new ClusterInfo(mockRM); ClusterInfo clusterInfo = new ClusterInfo(mockRM);
return clusterInfo; return clusterInfo;

View File

@ -126,6 +126,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ReservationDelet
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ActivitiesInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ActivitiesInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeAllocationInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeAllocationInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.BulkActivitiesInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.BulkActivitiesInfo;
import org.apache.hadoop.yarn.server.router.webapp.dao.FederationConfInfo;
import org.apache.hadoop.yarn.server.webapp.dao.ContainerInfo; import org.apache.hadoop.yarn.server.webapp.dao.ContainerInfo;
import org.apache.hadoop.yarn.server.webapp.dao.ContainersInfo; import org.apache.hadoop.yarn.server.webapp.dao.ContainersInfo;
import org.apache.hadoop.yarn.server.router.webapp.dao.FederationRMQueueAclInfo; import org.apache.hadoop.yarn.server.router.webapp.dao.FederationRMQueueAclInfo;
@ -138,6 +139,9 @@ import org.apache.hadoop.yarn.util.MonotonicClock;
import org.apache.hadoop.yarn.util.Times; import org.apache.hadoop.yarn.util.Times;
import org.apache.hadoop.yarn.util.YarnVersionInfo; import org.apache.hadoop.yarn.util.YarnVersionInfo;
import org.apache.hadoop.yarn.webapp.BadRequestException; import org.apache.hadoop.yarn.webapp.BadRequestException;
import org.apache.hadoop.yarn.webapp.dao.ConfInfo;
import org.apache.hadoop.yarn.webapp.dao.QueueConfigInfo;
import org.apache.hadoop.yarn.webapp.dao.SchedConfUpdateInfo;
import org.apache.hadoop.yarn.webapp.util.WebAppUtils; import org.apache.hadoop.yarn.webapp.util.WebAppUtils;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
@ -171,6 +175,7 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
private final static int NUM_SUBCLUSTER = 4; private final static int NUM_SUBCLUSTER = 4;
private static final int BAD_REQUEST = 400; private static final int BAD_REQUEST = 400;
private static final int ACCEPTED = 202; private static final int ACCEPTED = 202;
private static final int OK = 200;
private static String user = "test-user"; private static String user = "test-user";
private TestableFederationInterceptorREST interceptor; private TestableFederationInterceptorREST interceptor;
private MemoryFederationStateStore stateStore; private MemoryFederationStateStore stateStore;
@ -2134,6 +2139,35 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
() -> interceptor.removeFromClusterNodeLabels(oldNodeLabels1, null)); () -> interceptor.removeFromClusterNodeLabels(oldNodeLabels1, null));
} }
@Test
public void testGetSchedulerConfiguration() throws Exception {
Response response = interceptor.getSchedulerConfiguration(null);
Assert.assertNotNull(response);
Assert.assertEquals(OK, response.getStatus());
Object entity = response.getEntity();
Assert.assertNotNull(entity);
Assert.assertTrue(entity instanceof FederationConfInfo);
FederationConfInfo federationConfInfo = FederationConfInfo.class.cast(entity);
List<ConfInfo> confInfos = federationConfInfo.getList();
Assert.assertNotNull(confInfos);
Assert.assertEquals(4, confInfos.size());
List<String> errors = federationConfInfo.getErrorMsgs();
Assert.assertEquals(0, errors.size());
Set<String> subClusterSet = subClusters.stream()
.map(subClusterId -> subClusterId.getId()).collect(Collectors.toSet());
for (ConfInfo confInfo : confInfos) {
List<ConfInfo.ConfItem> confItems = confInfo.getItems();
Assert.assertNotNull(confItems);
Assert.assertTrue(confItems.size() > 0);
Assert.assertTrue(subClusterSet.contains(confInfo.getSubClusterId()));
}
}
@Test @Test
public void testGetClusterUserInfo() { public void testGetClusterUserInfo() {
String requestUserName = "test-user"; String requestUserName = "test-user";
@ -2173,6 +2207,41 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
} }
} }
@Test
public void testUpdateSchedulerConfigurationErrorMsg() throws Exception {
SchedConfUpdateInfo mutationInfo = new SchedConfUpdateInfo();
LambdaTestUtils.intercept(IllegalArgumentException.class,
"Parameter error, the subClusterId is empty or null.",
() -> interceptor.updateSchedulerConfiguration(mutationInfo, null));
LambdaTestUtils.intercept(IllegalArgumentException.class,
"Parameter error, the schedConfUpdateInfo is empty or null.",
() -> interceptor.updateSchedulerConfiguration(null, null));
}
@Test
public void testUpdateSchedulerConfiguration()
throws AuthorizationException, InterruptedException {
SchedConfUpdateInfo updateInfo = new SchedConfUpdateInfo();
updateInfo.setSubClusterId("1");
Map<String, String> goodUpdateMap = new HashMap<>();
goodUpdateMap.put("goodKey", "goodVal");
QueueConfigInfo goodUpdateInfo = new
QueueConfigInfo("root.default", goodUpdateMap);
updateInfo.getUpdateQueueInfo().add(goodUpdateInfo);
Response response = interceptor.updateSchedulerConfiguration(updateInfo, null);
Assert.assertNotNull(response);
Assert.assertEquals(OK, response.getStatus());
String expectMsg = "Configuration change successfully applied.";
Object entity = response.getEntity();
Assert.assertNotNull(entity);
String entityMsg = String.valueOf(entity);
Assert.assertEquals(expectMsg, entityMsg);
}
@Test @Test
public void testGetClusterInfo() { public void testGetClusterInfo() {
ClusterInfo clusterInfos = interceptor.getClusterInfo(); ClusterInfo clusterInfos = interceptor.getClusterInfo();