diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 21bb7d20d4f..641b5665c4d 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -62,6 +62,9 @@ Release 2.5.0 - UNRELEASED YARN-1366. Changed AMRMClient to re-register with RM and send outstanding requests back to RM on work-preserving RM restart. (Rohith via jianhe) + YARN-2181. Added preemption info to logs and RM web UI. (Wangda Tan via + jianhe) + IMPROVEMENTS YARN-1479. Invalid NaN values in Hadoop REST API JSON response (Chen He via diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java index 2b590a0e803..bf374b434d3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java @@ -223,4 +223,11 @@ ApplicationReport createAndGetApplicationReport(String clientUserName, * @return the external user-facing state of ApplicationMaster. */ YarnApplicationState createApplicationState(); + + /** + * Get RMAppMetrics of the {@link RMApp}. + * + * @return metrics + */ + RMAppMetrics getRMAppMetrics(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index bff41cfc219..523e6beb512 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -44,6 +44,7 @@ import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeState; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.Dispatcher; @@ -65,6 +66,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppStartAttemptEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; @@ -78,6 +80,7 @@ import org.apache.hadoop.yarn.state.SingleArcTransition; import org.apache.hadoop.yarn.state.StateMachine; import org.apache.hadoop.yarn.state.StateMachineFactory; +import org.apache.hadoop.yarn.util.resource.Resources; @SuppressWarnings({ "rawtypes", "unchecked" }) public class RMAppImpl implements RMApp, Recoverable { @@ -1202,4 +1205,25 @@ private RMAppState getRecoveredFinalState() { public Set getRanNodes() { return ranNodes; } + + @Override + public RMAppMetrics getRMAppMetrics() { + Resource resourcePreempted = Resource.newInstance(0, 0); + int numAMContainerPreempted = 0; + int numNonAMContainerPreempted = 0; + for (RMAppAttempt attempt : attempts.values()) { + if (null != attempt) { + RMAppAttemptMetrics attemptMetrics = + attempt.getRMAppAttemptMetrics(); + Resources.addTo(resourcePreempted, + attemptMetrics.getResourcePreempted()); + numAMContainerPreempted += attemptMetrics.getIsPreempted() ? 1 : 0; + numNonAMContainerPreempted += + attemptMetrics.getNumNonAMContainersPreempted(); + } + } + + return new RMAppMetrics(resourcePreempted, + numNonAMContainerPreempted, numAMContainerPreempted); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppMetrics.java new file mode 100644 index 00000000000..645db1631ec --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppMetrics.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.rmapp; + +import org.apache.hadoop.yarn.api.records.Resource; + +public class RMAppMetrics { + final Resource resourcePreempted; + final int numNonAMContainersPreempted; + final int numAMContainersPreempted; + + public RMAppMetrics(Resource resourcePreempted, + int numNonAMContainersPreempted, int numAMContainersPreempted) { + this.resourcePreempted = resourcePreempted; + this.numNonAMContainersPreempted = numNonAMContainersPreempted; + this.numAMContainersPreempted = numAMContainersPreempted; + } + + public Resource getResourcePreempted() { + return resourcePreempted; + } + + public int getNumNonAMContainersPreempted() { + return numNonAMContainersPreempted; + } + + public int getNumAMContainersPreempted() { + return numAMContainersPreempted; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java index 68a068b9811..b5ed92c8603 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java @@ -207,4 +207,10 @@ public interface RMAppAttempt extends EventHandler { * */ boolean shouldCountTowardsMaxAttemptRetry(); + + /** + * Get metrics from the {@link RMAppAttempt} + * @return metrics + */ + RMAppAttemptMetrics getRMAppAttemptMetrics(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index b57436b9155..01fb1ab90a5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -163,6 +163,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { private RMAppAttemptState recoveredFinalState; private RMAppAttemptState stateBeforeFinalSaving; private Object transitionTodo; + + private RMAppAttemptMetrics attemptMetrics = null; private static final StateMachineFactory pdiv = html. + _(InfoBlock.class). + div(_INFO_WRAP); + info("Application Overview").clear(); + info("Application Metrics") + ._("Total Resource Preempted:", + appMerics.getResourcePreempted()) + ._("Total Number of Non-AM Containers Preempted:", + String.valueOf(appMerics.getNumNonAMContainersPreempted())) + ._("Total Number of AM Containers Preempted:", + String.valueOf(appMerics.getNumAMContainersPreempted())) + ._("Resource Preempted from Current Attempt:", + attemptMetrics.getResourcePreempted()) + ._("Number of Non-AM Containers Preempted from Current Attempt:", + String.valueOf(attemptMetrics + .getNumNonAMContainersPreempted())); + pdiv._(); Collection attempts = rmApp.getAppAttempts().values(); String amString = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java index 11f798d18b0..c7354cedaba 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java @@ -30,6 +30,7 @@ import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.Times; @@ -78,6 +79,12 @@ public class AppInfo { protected int allocatedMB; protected int allocatedVCores; protected int runningContainers; + + // preemption info fields + protected int preemptedResourceMB; + protected int preemptedResourceVCores; + protected int numNonAMContainerPreempted; + protected int numAMContainerPreempted; public AppInfo() { } // JAXB needs this @@ -147,6 +154,17 @@ public AppInfo(RMApp app, Boolean hasAccess, String schemePrefix) { } } } + + // copy preemption info fields + RMAppMetrics appMetrics = app.getRMAppMetrics(); + numAMContainerPreempted = + appMetrics.getNumAMContainersPreempted(); + preemptedResourceMB = + appMetrics.getResourcePreempted().getMemory(); + numNonAMContainerPreempted = + appMetrics.getNumNonAMContainersPreempted(); + preemptedResourceVCores = + appMetrics.getResourcePreempted().getVirtualCores(); } } @@ -254,4 +272,19 @@ public int getAllocatedVCores() { return this.allocatedVCores; } + public int getPreemptedMB() { + return preemptedResourceMB; + } + + public int getPreemptedVCores() { + return preemptedResourceVCores; + } + + public int getNumNonAMContainersPreempted() { + return numNonAMContainerPreempted; + } + + public int getNumAMContainersPreempted() { + return numAMContainerPreempted; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java index cfd05f9d1c7..f1a3bbcae83 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java @@ -24,7 +24,6 @@ import java.util.List; import org.junit.Assert; - import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.yarn.api.ApplicationMasterProtocol; @@ -34,6 +33,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest; import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.Priority; @@ -251,4 +251,22 @@ public Object run() throws Exception { public ApplicationAttemptId getApplicationAttemptId() { return this.attemptId; } + + public List allocateAndWaitForContainers(int nContainer, + int memory, MockNM nm) throws Exception { + // AM request for containers + allocate("ANY", memory, nContainer, null); + // kick the scheduler + nm.nodeHeartbeat(true); + List conts = + allocate(new ArrayList(), null) + .getAllocatedContainers(); + while (conts.size() < nContainer) { + nm.nodeHeartbeat(true); + conts.addAll(allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers()); + Thread.sleep(500); + } + return conts; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java index 4349a236939..ff60fcd7a9a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java @@ -32,10 +32,12 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; @@ -165,6 +167,11 @@ public YarnApplicationState createApplicationState() { public Set getRanNodes() { throw new UnsupportedOperationException("Not supported yet."); } + + @Override + public RMAppMetrics getRMAppMetrics() { + return new RMAppMetrics(Resource.newInstance(0, 0), 0, 0); + } } public static RMApp newApplication(int i) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java index 8f26d1054ef..b63d2fe0e84 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java @@ -30,6 +30,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl; import org.apache.hadoop.yarn.conf.YarnConfiguration; @@ -238,4 +239,13 @@ public YarnApplicationState createApplicationState() { public Set getRanNodes() { return null; } + + public Resource getResourcePreempted() { + throw new UnsupportedOperationException("Not supported yet."); + } + + @Override + public RMAppMetrics getRMAppMetrics() { + throw new UnsupportedOperationException("Not supported yet."); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java index c3b1d575a5e..3ca3c480c57 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java @@ -50,6 +50,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAccessType; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.QueueInfo; @@ -62,6 +63,7 @@ import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.ipc.YarnRPC; import org.apache.hadoop.yarn.server.resourcemanager.Application; +import org.apache.hadoop.yarn.server.resourcemanager.MockAM; import org.apache.hadoop.yarn.server.resourcemanager.MockNM; import org.apache.hadoop.yarn.server.resourcemanager.MockNodes; import org.apache.hadoop.yarn.server.resourcemanager.MockRM; @@ -72,7 +74,10 @@ import org.apache.hadoop.yarn.server.resourcemanager.TestAMAuthorization.MockRMWithAMS; import org.apache.hadoop.yarn.server.resourcemanager.TestAMAuthorization.MyContainerManager; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler; @@ -706,7 +711,47 @@ public void testAsyncScheduling() throws Exception { CapacityScheduler.schedule(cs); } } + + private MockAM launchAM(RMApp app, MockRM rm, MockNM nm) + throws Exception { + RMAppAttempt attempt = app.getCurrentAppAttempt(); + nm.nodeHeartbeat(true); + MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId()); + am.registerAppAttempt(); + rm.waitForState(app.getApplicationId(), RMAppState.RUNNING); + return am; + } + private void waitForAppPreemptionInfo(RMApp app, Resource preempted, + int numAMPreempted, int numTaskPreempted, + Resource currentAttemptPreempted, boolean currentAttemptAMPreempted, + int numLatestAttemptTaskPreempted) throws InterruptedException { + while (true) { + RMAppMetrics appPM = app.getRMAppMetrics(); + RMAppAttemptMetrics attemptPM = + app.getCurrentAppAttempt().getRMAppAttemptMetrics(); + + if (appPM.getResourcePreempted().equals(preempted) + && appPM.getNumAMContainersPreempted() == numAMPreempted + && appPM.getNumNonAMContainersPreempted() == numTaskPreempted + && attemptPM.getResourcePreempted().equals(currentAttemptPreempted) + && app.getCurrentAppAttempt().getRMAppAttemptMetrics() + .getIsPreempted() == currentAttemptAMPreempted + && attemptPM.getNumNonAMContainersPreempted() == + numLatestAttemptTaskPreempted) { + return; + } + Thread.sleep(500); + } + } + + private void waitForNewAttemptCreated(RMApp app, + ApplicationAttemptId previousAttemptId) throws InterruptedException { + while (app.getCurrentAppAttempt().equals(previousAttemptId)) { + Thread.sleep(500); + } + } + @Test(timeout = 30000) public void testAllocateDoesNotBlockOnSchedulerLock() throws Exception { final YarnConfiguration conf = new YarnConfiguration(); @@ -828,4 +873,78 @@ null, new RMContainerTokenSecretManager(conf), cs.stop(); } + + @Test(timeout = 120000) + public void testPreemptionInfo() throws Exception { + Configuration conf = new Configuration(); + conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 3); + conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, + ResourceScheduler.class); + int CONTAINER_MEMORY = 1024; // start RM + MockRM rm1 = new MockRM(conf); + rm1.start(); + + // get scheduler + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + + // start NM + MockNM nm1 = + new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService()); + nm1.registerNode(); + + // create app and launch the AM + RMApp app0 = rm1.submitApp(CONTAINER_MEMORY); + MockAM am0 = launchAM(app0, rm1, nm1); + + // get scheduler app + FiCaSchedulerApp schedulerAppAttempt = + cs.getSchedulerApplications().get(app0.getApplicationId()) + .getCurrentAppAttempt(); + + // allocate some containers and launch them + List allocatedContainers = + am0.allocateAndWaitForContainers(3, CONTAINER_MEMORY, nm1); + + // kill the 3 containers + for (Container c : allocatedContainers) { + cs.killContainer(schedulerAppAttempt.getRMContainer(c.getId())); + } + + // check values + waitForAppPreemptionInfo(app0, + Resource.newInstance(CONTAINER_MEMORY * 3, 3), 0, 3, + Resource.newInstance(CONTAINER_MEMORY * 3, 3), false, 3); + + // kill app0-attempt0 AM container + cs.killContainer(schedulerAppAttempt.getRMContainer(app0 + .getCurrentAppAttempt().getMasterContainer().getId())); + + // wait for app0 failed + waitForNewAttemptCreated(app0, am0.getApplicationAttemptId()); + + // check values + waitForAppPreemptionInfo(app0, + Resource.newInstance(CONTAINER_MEMORY * 4, 4), 1, 3, + Resource.newInstance(0, 0), false, 0); + + // launch app0-attempt1 + MockAM am1 = launchAM(app0, rm1, nm1); + schedulerAppAttempt = + cs.getSchedulerApplications().get(app0.getApplicationId()) + .getCurrentAppAttempt(); + + // allocate some containers and launch them + allocatedContainers = + am1.allocateAndWaitForContainers(3, CONTAINER_MEMORY, nm1); + for (Container c : allocatedContainers) { + cs.killContainer(schedulerAppAttempt.getRMContainer(c.getId())); + } + + // check values + waitForAppPreemptionInfo(app0, + Resource.newInstance(CONTAINER_MEMORY * 7, 7), 1, 6, + Resource.newInstance(CONTAINER_MEMORY * 3, 3), false, 3); + + rm1.stop(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java index 690fa7421b7..d5eb93393e1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java @@ -41,13 +41,14 @@ import java.util.Map; import org.junit.Assert; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.QueueACL; @@ -498,8 +499,11 @@ public void testSingleQueueWithOneUser() throws Exception { // Release each container from app_0 for (RMContainer rmContainer : app_0.getLiveContainers()) { - a.completedContainer(clusterResource, app_0, node_0, rmContainer, - null, RMContainerEventType.KILL, null); + a.completedContainer(clusterResource, app_0, node_0, rmContainer, + ContainerStatus.newInstance(rmContainer.getContainerId(), + ContainerState.COMPLETE, "", + ContainerExitStatus.KILLED_BY_RESOURCEMANAGER), + RMContainerEventType.KILL, null); } assertEquals(1*GB, a.getUsedResources().getMemory()); assertEquals(0*GB, app_0.getCurrentConsumption().getMemory()); @@ -509,8 +513,11 @@ public void testSingleQueueWithOneUser() throws Exception { // Release each container from app_1 for (RMContainer rmContainer : app_1.getLiveContainers()) { - a.completedContainer(clusterResource, app_1, node_0, rmContainer, - null, RMContainerEventType.KILL, null); + a.completedContainer(clusterResource, app_1, node_0, rmContainer, + ContainerStatus.newInstance(rmContainer.getContainerId(), + ContainerState.COMPLETE, "", + ContainerExitStatus.KILLED_BY_RESOURCEMANAGER), + RMContainerEventType.KILL, null); } assertEquals(0*GB, a.getUsedResources().getMemory()); @@ -870,8 +877,11 @@ public void testSingleQueueWithMultipleUsers() throws Exception { // 8. Release each container from app_0 for (RMContainer rmContainer : app_0.getLiveContainers()) { - a.completedContainer(clusterResource, app_0, node_0, rmContainer, - null, RMContainerEventType.KILL, null); + a.completedContainer(clusterResource, app_0, node_0, rmContainer, + ContainerStatus.newInstance(rmContainer.getContainerId(), + ContainerState.COMPLETE, "", + ContainerExitStatus.KILLED_BY_RESOURCEMANAGER), + RMContainerEventType.KILL, null); } assertEquals(5*GB, a.getUsedResources().getMemory()); assertEquals(0*GB, app_0.getCurrentConsumption().getMemory()); @@ -881,8 +891,11 @@ public void testSingleQueueWithMultipleUsers() throws Exception { // 9. Release each container from app_2 for (RMContainer rmContainer : app_2.getLiveContainers()) { - a.completedContainer(clusterResource, app_2, node_0, rmContainer, - null, RMContainerEventType.KILL, null); + a.completedContainer(clusterResource, app_2, node_0, rmContainer, + ContainerStatus.newInstance(rmContainer.getContainerId(), + ContainerState.COMPLETE, "", + ContainerExitStatus.KILLED_BY_RESOURCEMANAGER), + RMContainerEventType.KILL, null); } assertEquals(2*GB, a.getUsedResources().getMemory()); assertEquals(0*GB, app_0.getCurrentConsumption().getMemory()); @@ -892,8 +905,11 @@ public void testSingleQueueWithMultipleUsers() throws Exception { // 10. Release each container from app_3 for (RMContainer rmContainer : app_3.getLiveContainers()) { - a.completedContainer(clusterResource, app_3, node_0, rmContainer, - null, RMContainerEventType.KILL, null); + a.completedContainer(clusterResource, app_3, node_0, rmContainer, + ContainerStatus.newInstance(rmContainer.getContainerId(), + ContainerState.COMPLETE, "", + ContainerExitStatus.KILLED_BY_RESOURCEMANAGER), + RMContainerEventType.KILL, null); } assertEquals(0*GB, a.getUsedResources().getMemory()); assertEquals(0*GB, app_0.getCurrentConsumption().getMemory()); @@ -979,9 +995,12 @@ public void testReservation() throws Exception { assertEquals(2*GB, a.getMetrics().getAllocatedMB()); // Now free 1 container from app_0 i.e. 1G - a.completedContainer(clusterResource, app_0, node_0, - app_0.getLiveContainers().iterator().next(), - null, RMContainerEventType.KILL, null); + RMContainer rmContainer = app_0.getLiveContainers().iterator().next(); + a.completedContainer(clusterResource, app_0, node_0, rmContainer, + ContainerStatus.newInstance(rmContainer.getContainerId(), + ContainerState.COMPLETE, "", + ContainerExitStatus.KILLED_BY_RESOURCEMANAGER), + RMContainerEventType.KILL, null); a.assignContainers(clusterResource, node_0); assertEquals(5*GB, a.getUsedResources().getMemory()); assertEquals(1*GB, app_0.getCurrentConsumption().getMemory()); @@ -992,9 +1011,12 @@ public void testReservation() throws Exception { assertEquals(1*GB, a.getMetrics().getAllocatedMB()); // Now finish another container from app_0 and fulfill the reservation - a.completedContainer(clusterResource, app_0, node_0, - app_0.getLiveContainers().iterator().next(), - null, RMContainerEventType.KILL, null); + rmContainer = app_0.getLiveContainers().iterator().next(); + a.completedContainer(clusterResource, app_0, node_0, rmContainer, + ContainerStatus.newInstance(rmContainer.getContainerId(), + ContainerState.COMPLETE, "", + ContainerExitStatus.KILLED_BY_RESOURCEMANAGER), + RMContainerEventType.KILL, null); a.assignContainers(clusterResource, node_0); assertEquals(4*GB, a.getUsedResources().getMemory()); assertEquals(0*GB, app_0.getCurrentConsumption().getMemory()); @@ -1094,9 +1116,12 @@ public void testStolenReservedContainer() throws Exception { assertEquals(6*GB, a.getMetrics().getAllocatedMB()); // Now free 1 container from app_0 and try to assign to node_0 - a.completedContainer(clusterResource, app_0, node_0, - app_0.getLiveContainers().iterator().next(), - null, RMContainerEventType.KILL, null); + RMContainer rmContainer = app_0.getLiveContainers().iterator().next(); + a.completedContainer(clusterResource, app_0, node_0, rmContainer, + ContainerStatus.newInstance(rmContainer.getContainerId(), + ContainerState.COMPLETE, "", + ContainerExitStatus.KILLED_BY_RESOURCEMANAGER), + RMContainerEventType.KILL, null); a.assignContainers(clusterResource, node_0); assertEquals(8*GB, a.getUsedResources().getMemory()); assertEquals(0*GB, app_0.getCurrentConsumption().getMemory()); @@ -1186,9 +1211,12 @@ public void testReservationExchange() throws Exception { assertEquals(2*GB, node_0.getUsedResource().getMemory()); // Now free 1 container from app_0 i.e. 1G, and re-reserve it - a.completedContainer(clusterResource, app_0, node_0, - app_0.getLiveContainers().iterator().next(), - null, RMContainerEventType.KILL, null); + RMContainer rmContainer = app_0.getLiveContainers().iterator().next(); + a.completedContainer(clusterResource, app_0, node_0, rmContainer, + ContainerStatus.newInstance(rmContainer.getContainerId(), + ContainerState.COMPLETE, "", + ContainerExitStatus.KILLED_BY_RESOURCEMANAGER), + RMContainerEventType.KILL, null); a.assignContainers(clusterResource, node_0); assertEquals(5*GB, a.getUsedResources().getMemory()); assertEquals(1*GB, app_0.getCurrentConsumption().getMemory()); @@ -1218,9 +1246,12 @@ public void testReservationExchange() throws Exception { assertEquals(2, app_1.getReReservations(priority)); // Now finish another container from app_0 and see the reservation cancelled - a.completedContainer(clusterResource, app_0, node_0, - app_0.getLiveContainers().iterator().next(), - null, RMContainerEventType.KILL, null); + rmContainer = app_0.getLiveContainers().iterator().next(); + a.completedContainer(clusterResource, app_0, node_0, rmContainer, + ContainerStatus.newInstance(rmContainer.getContainerId(), + ContainerState.COMPLETE, "", + ContainerExitStatus.KILLED_BY_RESOURCEMANAGER), + RMContainerEventType.KILL, null); CSAssignment assignment = a.assignContainers(clusterResource, node_0); assertEquals(8*GB, a.getUsedResources().getMemory()); assertEquals(0*GB, app_0.getCurrentConsumption().getMemory()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java index dd1e4bb6cde..ec942f97696 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java @@ -17,6 +17,13 @@ */ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -32,16 +39,14 @@ import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.util.Clock; -import java.io.File; -import java.util.ArrayList; -import java.util.List; - public class FairSchedulerTestBase { protected static class MockClock implements Clock { private long time = 0; @@ -153,6 +158,13 @@ protected ApplicationAttemptId createSchedulingRequest( priority, numContainers, true); ask.add(request); scheduler.allocate(id, ask, new ArrayList(), null, null); + RMApp rmApp = mock(RMApp.class); + RMAppAttempt rmAppAttempt = mock(RMAppAttempt.class); + when(rmApp.getCurrentAppAttempt()).thenReturn(rmAppAttempt); + when(rmAppAttempt.getRMAppAttemptMetrics()).thenReturn( + new RMAppAttemptMetrics(id)); + resourceManager.getRMContext().getRMApps() + .put(id.getApplicationId(), rmApp); return id; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java index 45b380326c0..b009bfa2e8e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java @@ -1310,33 +1310,44 @@ public void verifyAppsXML(NodeList nodes, RMApp app) throws JSONException, WebServicesTestUtils.getXmlString(element, "amContainerLogs"), WebServicesTestUtils.getXmlInt(element, "allocatedMB"), WebServicesTestUtils.getXmlInt(element, "allocatedVCores"), - WebServicesTestUtils.getXmlInt(element, "runningContainers")); + WebServicesTestUtils.getXmlInt(element, "runningContainers"), + WebServicesTestUtils.getXmlInt(element, "preemptedResourceMB"), + WebServicesTestUtils.getXmlInt(element, "preemptedResourceVCores"), + WebServicesTestUtils.getXmlInt(element, "numNonAMContainerPreempted"), + WebServicesTestUtils.getXmlInt(element, "numAMContainerPreempted")); } } public void verifyAppInfo(JSONObject info, RMApp app) throws JSONException, Exception { - // 20 because trackingUrl not assigned yet - assertEquals("incorrect number of elements", 20, info.length()); + // 28 because trackingUrl not assigned yet + assertEquals("incorrect number of elements", 24, info.length()); verifyAppInfoGeneric(app, info.getString("id"), info.getString("user"), - info.getString("name"), info.getString("applicationType"), info.getString("queue"), - info.getString("state"), info.getString("finalStatus"), - (float) info.getDouble("progress"), info.getString("trackingUI"), - info.getString("diagnostics"), info.getLong("clusterId"), - info.getLong("startedTime"), info.getLong("finishedTime"), - info.getLong("elapsedTime"), info.getString("amHostHttpAddress"), - info.getString("amContainerLogs"), info.getInt("allocatedMB"), - info.getInt("allocatedVCores"), info.getInt("runningContainers")); + info.getString("name"), info.getString("applicationType"), + info.getString("queue"), info.getString("state"), + info.getString("finalStatus"), (float) info.getDouble("progress"), + info.getString("trackingUI"), info.getString("diagnostics"), + info.getLong("clusterId"), info.getLong("startedTime"), + info.getLong("finishedTime"), info.getLong("elapsedTime"), + info.getString("amHostHttpAddress"), info.getString("amContainerLogs"), + info.getInt("allocatedMB"), info.getInt("allocatedVCores"), + info.getInt("runningContainers"), + info.getInt("preemptedResourceMB"), + info.getInt("preemptedResourceVCores"), + info.getInt("numNonAMContainerPreempted"), + info.getInt("numAMContainerPreempted")); } public void verifyAppInfoGeneric(RMApp app, String id, String user, - String name, String applicationType, String queue, String state, String finalStatus, - float progress, String trackingUI, String diagnostics, long clusterId, - long startedTime, long finishedTime, long elapsedTime, - String amHostHttpAddress, String amContainerLogs, int allocatedMB, - int allocatedVCores, int numContainers) throws JSONException, + String name, String applicationType, String queue, String state, + String finalStatus, float progress, String trackingUI, + String diagnostics, long clusterId, long startedTime, long finishedTime, + long elapsedTime, String amHostHttpAddress, String amContainerLogs, + int allocatedMB, int allocatedVCores, int numContainers, + int preemptedResourceMB, int preemptedResourceVCores, + int numNonAMContainerPreempted, int numAMContainerPreempted) throws JSONException, Exception { WebServicesTestUtils.checkStringMatch("id", app.getApplicationId() @@ -1371,6 +1382,18 @@ public void verifyAppInfoGeneric(RMApp app, String id, String user, assertEquals("allocatedMB doesn't match", 1024, allocatedMB); assertEquals("allocatedVCores doesn't match", 1, allocatedVCores); assertEquals("numContainers doesn't match", 1, numContainers); + assertEquals("preemptedResourceMB doesn't match", app + .getRMAppMetrics().getResourcePreempted().getMemory(), + preemptedResourceMB); + assertEquals("preemptedResourceVCores doesn't match", app + .getRMAppMetrics().getResourcePreempted().getVirtualCores(), + preemptedResourceVCores); + assertEquals("numNonAMContainerPreempted doesn't match", app + .getRMAppMetrics().getNumNonAMContainersPreempted(), + numNonAMContainerPreempted); + assertEquals("numAMContainerPreempted doesn't match", app + .getRMAppMetrics().getNumAMContainersPreempted(), + numAMContainerPreempted); } @Test