YARN-6547. Enhance SLS-based tests leveraging invariant checker.
This commit is contained in:
parent
c31cb879a3
commit
b65100c14b
|
@ -27,6 +27,7 @@ import java.lang.management.GarbageCollectorMXBean;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.log.metrics.EventCounter;
|
import org.apache.hadoop.log.metrics.EventCounter;
|
||||||
import org.apache.hadoop.metrics2.MetricsCollector;
|
import org.apache.hadoop.metrics2.MetricsCollector;
|
||||||
|
@ -59,6 +60,15 @@ public class JvmMetrics implements MetricsSource {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public synchronized void registerIfNeeded(){
|
||||||
|
// during tests impl might exist, but is not registered
|
||||||
|
MetricsSystem ms = DefaultMetricsSystem.instance();
|
||||||
|
if (ms.getSource("JvmMetrics") == null) {
|
||||||
|
ms.register(JvmMetrics.name(), JvmMetrics.description(), this);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static final float M = 1024*1024;
|
static final float M = 1024*1024;
|
||||||
static public final float MEMORY_MAX_UNLIMITED_MB = -1;
|
static public final float MEMORY_MAX_UNLIMITED_MB = -1;
|
||||||
|
|
||||||
|
|
|
@ -135,6 +135,8 @@
|
||||||
<exclude>src/test/resources/syn.json</exclude>
|
<exclude>src/test/resources/syn.json</exclude>
|
||||||
<exclude>src/test/resources/inputsls.json</exclude>
|
<exclude>src/test/resources/inputsls.json</exclude>
|
||||||
<exclude>src/test/resources/nodes.json</exclude>
|
<exclude>src/test/resources/nodes.json</exclude>
|
||||||
|
<exclude>src/test/resources/exit-invariants.txt</exclude>
|
||||||
|
<exclude>src/test/resources/ongoing-invariants.txt</exclude>
|
||||||
</excludes>
|
</excludes>
|
||||||
</configuration>
|
</configuration>
|
||||||
</plugin>
|
</plugin>
|
||||||
|
|
|
@ -49,6 +49,7 @@ import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.conf.Configured;
|
import org.apache.hadoop.conf.Configured;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.mapreduce.TaskType;
|
import org.apache.hadoop.mapreduce.TaskType;
|
||||||
|
import org.apache.hadoop.metrics2.source.JvmMetrics;
|
||||||
import org.apache.hadoop.tools.rumen.JobTraceReader;
|
import org.apache.hadoop.tools.rumen.JobTraceReader;
|
||||||
import org.apache.hadoop.tools.rumen.LoggedJob;
|
import org.apache.hadoop.tools.rumen.LoggedJob;
|
||||||
import org.apache.hadoop.tools.rumen.LoggedTask;
|
import org.apache.hadoop.tools.rumen.LoggedTask;
|
||||||
|
@ -243,6 +244,13 @@ public class SLSRunner extends Configured implements Tool {
|
||||||
return new MockAMLauncher(se, this.rmContext, amMap);
|
return new MockAMLauncher(se, this.rmContext, amMap);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Across runs of parametrized tests, the JvmMetrics objects is retained,
|
||||||
|
// but is not registered correctly
|
||||||
|
JvmMetrics jvmMetrics = JvmMetrics.initSingleton("ResourceManager", null);
|
||||||
|
jvmMetrics.registerIfNeeded();
|
||||||
|
|
||||||
|
// Init and start the actual ResourceManager
|
||||||
rm.init(rmConf);
|
rm.init(rmConf);
|
||||||
rm.start();
|
rm.start();
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,8 +21,10 @@ import net.jcip.annotations.NotThreadSafe;
|
||||||
import org.apache.commons.lang3.ArrayUtils;
|
import org.apache.commons.lang3.ArrayUtils;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.monitor.invariants.MetricsInvariantChecker;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
|
import org.junit.Before;
|
||||||
import org.junit.runner.RunWith;
|
import org.junit.runner.RunWith;
|
||||||
import org.junit.runners.Parameterized;
|
import org.junit.runners.Parameterized;
|
||||||
import org.junit.runners.Parameterized.Parameter;
|
import org.junit.runners.Parameterized.Parameter;
|
||||||
|
@ -39,7 +41,7 @@ import java.util.UUID;
|
||||||
@RunWith(value = Parameterized.class)
|
@RunWith(value = Parameterized.class)
|
||||||
@NotThreadSafe
|
@NotThreadSafe
|
||||||
@SuppressWarnings("VisibilityModifier")
|
@SuppressWarnings("VisibilityModifier")
|
||||||
public class BaseSLSRunnerTest {
|
public abstract class BaseSLSRunnerTest {
|
||||||
|
|
||||||
@Parameter(value = 0)
|
@Parameter(value = 0)
|
||||||
public String schedulerType;
|
public String schedulerType;
|
||||||
|
@ -54,6 +56,11 @@ public class BaseSLSRunnerTest {
|
||||||
public String nodeFile;
|
public String nodeFile;
|
||||||
|
|
||||||
protected SLSRunner sls;
|
protected SLSRunner sls;
|
||||||
|
protected String ongoingInvariantFile;
|
||||||
|
protected String exitInvariantFile;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public abstract void setup();
|
||||||
|
|
||||||
@After
|
@After
|
||||||
public void tearDown() throws InterruptedException {
|
public void tearDown() throws InterruptedException {
|
||||||
|
@ -82,22 +89,30 @@ public class BaseSLSRunnerTest {
|
||||||
switch (traceType) {
|
switch (traceType) {
|
||||||
case "OLD_SLS":
|
case "OLD_SLS":
|
||||||
args = new String[] {"-inputsls", traceLocation, "-output",
|
args = new String[] {"-inputsls", traceLocation, "-output",
|
||||||
slsOutputDir.getAbsolutePath()};
|
slsOutputDir.getAbsolutePath() };
|
||||||
break;
|
break;
|
||||||
case "OLD_RUMEN":
|
case "OLD_RUMEN":
|
||||||
args = new String[] {"-inputrumen", traceLocation, "-output",
|
args = new String[] {"-inputrumen", traceLocation, "-output",
|
||||||
slsOutputDir.getAbsolutePath()};
|
slsOutputDir.getAbsolutePath() };
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
args = new String[] {"-tracetype", traceType, "-tracelocation",
|
args = new String[] {"-tracetype", traceType, "-tracelocation",
|
||||||
traceLocation, "-output", slsOutputDir.getAbsolutePath()};
|
traceLocation, "-output", slsOutputDir.getAbsolutePath() };
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nodeFile != null) {
|
if (nodeFile != null) {
|
||||||
args = ArrayUtils.addAll(args, new String[] {"-nodes", nodeFile});
|
args = ArrayUtils.addAll(args, new String[] {"-nodes", nodeFile });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// enable continuous invariant checks
|
||||||
conf.set(YarnConfiguration.RM_SCHEDULER, schedulerType);
|
conf.set(YarnConfiguration.RM_SCHEDULER, schedulerType);
|
||||||
|
if (ongoingInvariantFile != null) {
|
||||||
|
conf.set(YarnConfiguration.RM_SCHEDULER_MONITOR_POLICIES,
|
||||||
|
MetricsInvariantChecker.class.getCanonicalName());
|
||||||
|
conf.set(MetricsInvariantChecker.INVARIANTS_FILE, ongoingInvariantFile);
|
||||||
|
conf.setBoolean(MetricsInvariantChecker.THROW_ON_VIOLATION, true);
|
||||||
|
}
|
||||||
|
|
||||||
sls = new SLSRunner(conf);
|
sls = new SLSRunner(conf);
|
||||||
sls.run(args);
|
sls.run(args);
|
||||||
|
|
||||||
|
@ -115,6 +130,22 @@ public class BaseSLSRunnerTest {
|
||||||
}
|
}
|
||||||
timeout--;
|
timeout--;
|
||||||
}
|
}
|
||||||
|
shutdownHookInvariantCheck();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks exit invariants (e.g., number of apps submitted, completed, etc.).
|
||||||
|
*/
|
||||||
|
private void shutdownHookInvariantCheck() {
|
||||||
|
|
||||||
|
if(exitInvariantFile!=null) {
|
||||||
|
MetricsInvariantChecker ic = new MetricsInvariantChecker();
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.set(MetricsInvariantChecker.INVARIANTS_FILE, exitInvariantFile);
|
||||||
|
conf.setBoolean(MetricsInvariantChecker.THROW_ON_VIOLATION, true);
|
||||||
|
ic.init(conf, null, null);
|
||||||
|
ic.editSchedule();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -70,4 +70,8 @@ public class TestReservationSystemInvariants extends BaseSLSRunnerTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setup() {
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,9 @@ package org.apache.hadoop.yarn.sls;
|
||||||
|
|
||||||
import net.jcip.annotations.NotThreadSafe;
|
import net.jcip.annotations.NotThreadSafe;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler;
|
||||||
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.junit.runner.RunWith;
|
import org.junit.runner.RunWith;
|
||||||
import org.junit.runners.Parameterized;
|
import org.junit.runners.Parameterized;
|
||||||
|
@ -38,12 +41,8 @@ public class TestSLSRunner extends BaseSLSRunnerTest {
|
||||||
@Parameters(name = "Testing with: {1}, {0}, (nodeFile {3})")
|
@Parameters(name = "Testing with: {1}, {0}, (nodeFile {3})")
|
||||||
public static Collection<Object[]> data() {
|
public static Collection<Object[]> data() {
|
||||||
|
|
||||||
String capScheduler =
|
String capScheduler = CapacityScheduler.class.getCanonicalName();
|
||||||
"org.apache.hadoop.yarn.server.resourcemanager.scheduler."
|
String fairScheduler = FairScheduler.class.getCanonicalName();
|
||||||
+ "capacity.CapacityScheduler";
|
|
||||||
String fairScheduler =
|
|
||||||
"org.apache.hadoop.yarn.server.resourcemanager.scheduler."
|
|
||||||
+ "fair.FairScheduler";
|
|
||||||
String slsTraceFile = "src/test/resources/inputsls.json";
|
String slsTraceFile = "src/test/resources/inputsls.json";
|
||||||
String rumenTraceFile = "src/main/data/2jobs2min-rumen-jh.json";
|
String rumenTraceFile = "src/main/data/2jobs2min-rumen-jh.json";
|
||||||
String synthTraceFile = "src/test/resources/syn.json";
|
String synthTraceFile = "src/test/resources/syn.json";
|
||||||
|
@ -73,6 +72,12 @@ public class TestSLSRunner extends BaseSLSRunnerTest {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setup() {
|
||||||
|
ongoingInvariantFile = "src/test/resources/ongoing-invariants.txt";
|
||||||
|
exitInvariantFile = "src/test/resources/exit-invariants.txt";
|
||||||
|
}
|
||||||
|
|
||||||
@Test(timeout = 60000)
|
@Test(timeout = 60000)
|
||||||
@SuppressWarnings("all")
|
@SuppressWarnings("all")
|
||||||
public void testSimulatorRunning() throws Exception {
|
public void testSimulatorRunning() throws Exception {
|
||||||
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
ActiveApplications >= 0
|
||||||
|
AppsCompleted >= 0
|
||||||
|
AppsFailed >= 0
|
||||||
|
AppsKilled >= 0
|
||||||
|
AppsPending >= 0
|
||||||
|
AppsRunning >= 0
|
||||||
|
AppsSubmitted >= 0
|
||||||
|
PendingContainers >= 0
|
|
@ -0,0 +1,19 @@
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
# log4j configuration used during build and unit tests
|
||||||
|
|
||||||
|
log4j.rootLogger=info,stdout
|
||||||
|
log4j.threshold=ALL
|
||||||
|
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
|
||||||
|
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
|
||||||
|
log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2} (%F:%M(%L)) - %m%n
|
|
@ -0,0 +1,54 @@
|
||||||
|
running_0 >= 0
|
||||||
|
running_60 >= 0
|
||||||
|
running_300 >= 0
|
||||||
|
running_1440 >= 0
|
||||||
|
AppsSubmitted >= 0
|
||||||
|
AppsRunning >= 0
|
||||||
|
AppsPending >= 0
|
||||||
|
AppsCompleted >= 0
|
||||||
|
AppsKilled >= 0
|
||||||
|
AppsFailed >= 0
|
||||||
|
AllocatedMB >= 0
|
||||||
|
AllocatedVCores >= 0
|
||||||
|
AllocatedContainers >= 0
|
||||||
|
AggregateContainersAllocated >= 0
|
||||||
|
AggregateNodeLocalContainersAllocated >= 0
|
||||||
|
AggregateRackLocalContainersAllocated >= 0
|
||||||
|
AggregateOffSwitchContainersAllocated >= 0
|
||||||
|
AggregateContainersReleased >= 0
|
||||||
|
AggregateContainersPreempted >= 0
|
||||||
|
AvailableMB >= 0
|
||||||
|
AvailableVCores >= 0
|
||||||
|
PendingMB >= 0
|
||||||
|
PendingVCores >= 0
|
||||||
|
PendingContainers >= 0
|
||||||
|
ReservedMB >= 0
|
||||||
|
ReservedVCores >= 0
|
||||||
|
ReservedContainers >= 0
|
||||||
|
ActiveUsers >= 0
|
||||||
|
ActiveApplications >= 0
|
||||||
|
AppAttemptFirstContainerAllocationDelayNumOps >= 0
|
||||||
|
AppAttemptFirstContainerAllocationDelayAvgTime >= 0
|
||||||
|
MemNonHeapUsedM >= 0
|
||||||
|
MemNonHeapCommittedM >= 0
|
||||||
|
MemNonHeapMaxM >= 0 || MemNonHeapMaxM == -1
|
||||||
|
MemHeapUsedM >= 0
|
||||||
|
MemHeapCommittedM >= 0
|
||||||
|
MemHeapMaxM >= 0
|
||||||
|
MemMaxM >= 0
|
||||||
|
GcCountPS_Scavenge >= 0
|
||||||
|
GcTimeMillisPS_Scavenge >= 0
|
||||||
|
GcCountPS_MarkSweep >= 0
|
||||||
|
GcTimeMillisPS_MarkSweep >= 0
|
||||||
|
GcCount >= 0
|
||||||
|
GcTimeMillis >= 0
|
||||||
|
ThreadsNew >= 0
|
||||||
|
ThreadsRunnable >= 0
|
||||||
|
ThreadsBlocked >= 0
|
||||||
|
ThreadsWaiting >= 0
|
||||||
|
ThreadsTimedWaiting >= 0
|
||||||
|
ThreadsTerminated >= 0
|
||||||
|
LogFatal >= 0
|
||||||
|
LogError >= 0
|
||||||
|
LogWarn >= 0
|
||||||
|
LogInfo >= 0
|
Loading…
Reference in New Issue