YARN-4546. ResourceManager crash due to scheduling opportunity overflow. Contributed by Jason Lowe.
(cherry picked from commitc1462a67ff
) (cherry picked from commit1cc001db4c
) Conflicts: hadoop-yarn-project/CHANGES.txt hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerApplicationAttempt.java
This commit is contained in:
parent
28fd4c70ca
commit
258ef9f9c3
|
@ -50,6 +50,9 @@ Release 2.7.3 - UNRELEASED
|
||||||
|
|
||||||
YARN-4452. NPE when submit Unmanaged application. (Naganarasimha G R
|
YARN-4452. NPE when submit Unmanaged application. (Naganarasimha G R
|
||||||
via junping_du)
|
via junping_du)
|
||||||
|
|
||||||
|
YARN-4546. ResourceManager crash due to scheduling opportunity overflow.
|
||||||
|
(Jason Lowe via junping_du)
|
||||||
|
|
||||||
Release 2.7.2 - UNRELEASED
|
Release 2.7.2 - UNRELEASED
|
||||||
|
|
||||||
|
|
|
@ -57,6 +57,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerStat
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent;
|
||||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import com.google.common.collect.HashMultiset;
|
import com.google.common.collect.HashMultiset;
|
||||||
import com.google.common.collect.Multiset;
|
import com.google.common.collect.Multiset;
|
||||||
|
@ -488,8 +489,10 @@ public class SchedulerApplicationAttempt {
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized void addSchedulingOpportunity(Priority priority) {
|
public synchronized void addSchedulingOpportunity(Priority priority) {
|
||||||
schedulingOpportunities.setCount(priority,
|
int count = schedulingOpportunities.count(priority);
|
||||||
schedulingOpportunities.count(priority) + 1);
|
if (count < Integer.MAX_VALUE) {
|
||||||
|
schedulingOpportunities.setCount(priority, count + 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized void subtractSchedulingOpportunity(Priority priority) {
|
public synchronized void subtractSchedulingOpportunity(Priority priority) {
|
||||||
|
@ -523,6 +526,11 @@ public class SchedulerApplicationAttempt {
|
||||||
schedulingOpportunities.setCount(priority, 0);
|
schedulingOpportunities.setCount(priority, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
void setSchedulingOpportunities(Priority priority, int count) {
|
||||||
|
schedulingOpportunities.setCount(priority, count);
|
||||||
|
}
|
||||||
|
|
||||||
synchronized AggregateAppResourceUsage getRunningAggregateAppResourceUsage() {
|
synchronized AggregateAppResourceUsage getRunningAggregateAppResourceUsage() {
|
||||||
long currentTimeMillis = System.currentTimeMillis();
|
long currentTimeMillis = System.currentTimeMillis();
|
||||||
// Don't walk the whole container list if the resources were computed
|
// Don't walk the whole container list if the resources were computed
|
||||||
|
|
|
@ -163,4 +163,26 @@ public class TestSchedulerApplicationAttempt {
|
||||||
ApplicationAttemptId.newInstance(appIdImpl, attemptId);
|
ApplicationAttemptId.newInstance(appIdImpl, attemptId);
|
||||||
return attId;
|
return attId;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSchedulingOpportunityOverflow() throws Exception {
|
||||||
|
ApplicationAttemptId attemptId = createAppAttemptId(0, 0);
|
||||||
|
Queue queue = createQueue("test", null);
|
||||||
|
RMContext rmContext = mock(RMContext.class);
|
||||||
|
when(rmContext.getEpoch()).thenReturn(3L);
|
||||||
|
SchedulerApplicationAttempt app = new SchedulerApplicationAttempt(
|
||||||
|
attemptId, "user", queue, queue.getActiveUsersManager(), rmContext);
|
||||||
|
Priority priority = Priority.newInstance(1);
|
||||||
|
assertEquals(0, app.getSchedulingOpportunities(priority));
|
||||||
|
app.addSchedulingOpportunity(priority);
|
||||||
|
assertEquals(1, app.getSchedulingOpportunities(priority));
|
||||||
|
// verify the count is capped at MAX_VALUE and does not overflow
|
||||||
|
app.setSchedulingOpportunities(priority, Integer.MAX_VALUE - 1);
|
||||||
|
assertEquals(Integer.MAX_VALUE - 1,
|
||||||
|
app.getSchedulingOpportunities(priority));
|
||||||
|
app.addSchedulingOpportunity(priority);
|
||||||
|
assertEquals(Integer.MAX_VALUE, app.getSchedulingOpportunities(priority));
|
||||||
|
app.addSchedulingOpportunity(priority);
|
||||||
|
assertEquals(Integer.MAX_VALUE, app.getSchedulingOpportunities(priority));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue