YARN-6547. Enhance SLS-based tests leveraging invariant checker.
This commit is contained in:
parent
c31cb879a3
commit
b65100c14b
|
@ -27,6 +27,7 @@ import java.lang.management.GarbageCollectorMXBean;
|
|||
import java.util.List;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.log.metrics.EventCounter;
|
||||
import org.apache.hadoop.metrics2.MetricsCollector;
|
||||
|
@ -59,6 +60,15 @@ public class JvmMetrics implements MetricsSource {
|
|||
}
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public synchronized void registerIfNeeded(){
|
||||
// during tests impl might exist, but is not registered
|
||||
MetricsSystem ms = DefaultMetricsSystem.instance();
|
||||
if (ms.getSource("JvmMetrics") == null) {
|
||||
ms.register(JvmMetrics.name(), JvmMetrics.description(), this);
|
||||
}
|
||||
}
|
||||
|
||||
static final float M = 1024*1024;
|
||||
static public final float MEMORY_MAX_UNLIMITED_MB = -1;
|
||||
|
||||
|
|
|
@ -135,6 +135,8 @@
|
|||
<exclude>src/test/resources/syn.json</exclude>
|
||||
<exclude>src/test/resources/inputsls.json</exclude>
|
||||
<exclude>src/test/resources/nodes.json</exclude>
|
||||
<exclude>src/test/resources/exit-invariants.txt</exclude>
|
||||
<exclude>src/test/resources/ongoing-invariants.txt</exclude>
|
||||
</excludes>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
|
|
@ -49,6 +49,7 @@ import org.apache.hadoop.conf.Configuration;
|
|||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.mapreduce.TaskType;
|
||||
import org.apache.hadoop.metrics2.source.JvmMetrics;
|
||||
import org.apache.hadoop.tools.rumen.JobTraceReader;
|
||||
import org.apache.hadoop.tools.rumen.LoggedJob;
|
||||
import org.apache.hadoop.tools.rumen.LoggedTask;
|
||||
|
@ -243,6 +244,13 @@ public class SLSRunner extends Configured implements Tool {
|
|||
return new MockAMLauncher(se, this.rmContext, amMap);
|
||||
}
|
||||
};
|
||||
|
||||
// Across runs of parametrized tests, the JvmMetrics objects is retained,
|
||||
// but is not registered correctly
|
||||
JvmMetrics jvmMetrics = JvmMetrics.initSingleton("ResourceManager", null);
|
||||
jvmMetrics.registerIfNeeded();
|
||||
|
||||
// Init and start the actual ResourceManager
|
||||
rm.init(rmConf);
|
||||
rm.start();
|
||||
}
|
||||
|
|
|
@ -21,8 +21,10 @@ import net.jcip.annotations.NotThreadSafe;
|
|||
import org.apache.commons.lang3.ArrayUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.monitor.invariants.MetricsInvariantChecker;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.junit.runners.Parameterized.Parameter;
|
||||
|
@ -39,7 +41,7 @@ import java.util.UUID;
|
|||
@RunWith(value = Parameterized.class)
|
||||
@NotThreadSafe
|
||||
@SuppressWarnings("VisibilityModifier")
|
||||
public class BaseSLSRunnerTest {
|
||||
public abstract class BaseSLSRunnerTest {
|
||||
|
||||
@Parameter(value = 0)
|
||||
public String schedulerType;
|
||||
|
@ -54,6 +56,11 @@ public class BaseSLSRunnerTest {
|
|||
public String nodeFile;
|
||||
|
||||
protected SLSRunner sls;
|
||||
protected String ongoingInvariantFile;
|
||||
protected String exitInvariantFile;
|
||||
|
||||
@Before
|
||||
public abstract void setup();
|
||||
|
||||
@After
|
||||
public void tearDown() throws InterruptedException {
|
||||
|
@ -82,22 +89,30 @@ public class BaseSLSRunnerTest {
|
|||
switch (traceType) {
|
||||
case "OLD_SLS":
|
||||
args = new String[] {"-inputsls", traceLocation, "-output",
|
||||
slsOutputDir.getAbsolutePath()};
|
||||
slsOutputDir.getAbsolutePath() };
|
||||
break;
|
||||
case "OLD_RUMEN":
|
||||
args = new String[] {"-inputrumen", traceLocation, "-output",
|
||||
slsOutputDir.getAbsolutePath()};
|
||||
slsOutputDir.getAbsolutePath() };
|
||||
break;
|
||||
default:
|
||||
args = new String[] {"-tracetype", traceType, "-tracelocation",
|
||||
traceLocation, "-output", slsOutputDir.getAbsolutePath()};
|
||||
traceLocation, "-output", slsOutputDir.getAbsolutePath() };
|
||||
}
|
||||
|
||||
if (nodeFile != null) {
|
||||
args = ArrayUtils.addAll(args, new String[] {"-nodes", nodeFile});
|
||||
args = ArrayUtils.addAll(args, new String[] {"-nodes", nodeFile });
|
||||
}
|
||||
|
||||
// enable continuous invariant checks
|
||||
conf.set(YarnConfiguration.RM_SCHEDULER, schedulerType);
|
||||
if (ongoingInvariantFile != null) {
|
||||
conf.set(YarnConfiguration.RM_SCHEDULER_MONITOR_POLICIES,
|
||||
MetricsInvariantChecker.class.getCanonicalName());
|
||||
conf.set(MetricsInvariantChecker.INVARIANTS_FILE, ongoingInvariantFile);
|
||||
conf.setBoolean(MetricsInvariantChecker.THROW_ON_VIOLATION, true);
|
||||
}
|
||||
|
||||
sls = new SLSRunner(conf);
|
||||
sls.run(args);
|
||||
|
||||
|
@ -115,6 +130,22 @@ public class BaseSLSRunnerTest {
|
|||
}
|
||||
timeout--;
|
||||
}
|
||||
shutdownHookInvariantCheck();
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks exit invariants (e.g., number of apps submitted, completed, etc.).
|
||||
*/
|
||||
private void shutdownHookInvariantCheck() {
|
||||
|
||||
if(exitInvariantFile!=null) {
|
||||
MetricsInvariantChecker ic = new MetricsInvariantChecker();
|
||||
Configuration conf = new Configuration();
|
||||
conf.set(MetricsInvariantChecker.INVARIANTS_FILE, exitInvariantFile);
|
||||
conf.setBoolean(MetricsInvariantChecker.THROW_ON_VIOLATION, true);
|
||||
ic.init(conf, null, null);
|
||||
ic.editSchedule();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -70,4 +70,8 @@ public class TestReservationSystemInvariants extends BaseSLSRunnerTest {
|
|||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setup() {
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,6 +20,9 @@ package org.apache.hadoop.yarn.sls;
|
|||
|
||||
import net.jcip.annotations.NotThreadSafe;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
|
@ -38,12 +41,8 @@ public class TestSLSRunner extends BaseSLSRunnerTest {
|
|||
@Parameters(name = "Testing with: {1}, {0}, (nodeFile {3})")
|
||||
public static Collection<Object[]> data() {
|
||||
|
||||
String capScheduler =
|
||||
"org.apache.hadoop.yarn.server.resourcemanager.scheduler."
|
||||
+ "capacity.CapacityScheduler";
|
||||
String fairScheduler =
|
||||
"org.apache.hadoop.yarn.server.resourcemanager.scheduler."
|
||||
+ "fair.FairScheduler";
|
||||
String capScheduler = CapacityScheduler.class.getCanonicalName();
|
||||
String fairScheduler = FairScheduler.class.getCanonicalName();
|
||||
String slsTraceFile = "src/test/resources/inputsls.json";
|
||||
String rumenTraceFile = "src/main/data/2jobs2min-rumen-jh.json";
|
||||
String synthTraceFile = "src/test/resources/syn.json";
|
||||
|
@ -73,6 +72,12 @@ public class TestSLSRunner extends BaseSLSRunnerTest {
|
|||
});
|
||||
}
|
||||
|
||||
@Before
|
||||
public void setup() {
|
||||
ongoingInvariantFile = "src/test/resources/ongoing-invariants.txt";
|
||||
exitInvariantFile = "src/test/resources/exit-invariants.txt";
|
||||
}
|
||||
|
||||
@Test(timeout = 60000)
|
||||
@SuppressWarnings("all")
|
||||
public void testSimulatorRunning() throws Exception {
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
ActiveApplications >= 0
|
||||
AppsCompleted >= 0
|
||||
AppsFailed >= 0
|
||||
AppsKilled >= 0
|
||||
AppsPending >= 0
|
||||
AppsRunning >= 0
|
||||
AppsSubmitted >= 0
|
||||
PendingContainers >= 0
|
|
@ -0,0 +1,19 @@
|
|||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# log4j configuration used during build and unit tests
|
||||
|
||||
log4j.rootLogger=info,stdout
|
||||
log4j.threshold=ALL
|
||||
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
|
||||
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2} (%F:%M(%L)) - %m%n
|
|
@ -0,0 +1,54 @@
|
|||
running_0 >= 0
|
||||
running_60 >= 0
|
||||
running_300 >= 0
|
||||
running_1440 >= 0
|
||||
AppsSubmitted >= 0
|
||||
AppsRunning >= 0
|
||||
AppsPending >= 0
|
||||
AppsCompleted >= 0
|
||||
AppsKilled >= 0
|
||||
AppsFailed >= 0
|
||||
AllocatedMB >= 0
|
||||
AllocatedVCores >= 0
|
||||
AllocatedContainers >= 0
|
||||
AggregateContainersAllocated >= 0
|
||||
AggregateNodeLocalContainersAllocated >= 0
|
||||
AggregateRackLocalContainersAllocated >= 0
|
||||
AggregateOffSwitchContainersAllocated >= 0
|
||||
AggregateContainersReleased >= 0
|
||||
AggregateContainersPreempted >= 0
|
||||
AvailableMB >= 0
|
||||
AvailableVCores >= 0
|
||||
PendingMB >= 0
|
||||
PendingVCores >= 0
|
||||
PendingContainers >= 0
|
||||
ReservedMB >= 0
|
||||
ReservedVCores >= 0
|
||||
ReservedContainers >= 0
|
||||
ActiveUsers >= 0
|
||||
ActiveApplications >= 0
|
||||
AppAttemptFirstContainerAllocationDelayNumOps >= 0
|
||||
AppAttemptFirstContainerAllocationDelayAvgTime >= 0
|
||||
MemNonHeapUsedM >= 0
|
||||
MemNonHeapCommittedM >= 0
|
||||
MemNonHeapMaxM >= 0 || MemNonHeapMaxM == -1
|
||||
MemHeapUsedM >= 0
|
||||
MemHeapCommittedM >= 0
|
||||
MemHeapMaxM >= 0
|
||||
MemMaxM >= 0
|
||||
GcCountPS_Scavenge >= 0
|
||||
GcTimeMillisPS_Scavenge >= 0
|
||||
GcCountPS_MarkSweep >= 0
|
||||
GcTimeMillisPS_MarkSweep >= 0
|
||||
GcCount >= 0
|
||||
GcTimeMillis >= 0
|
||||
ThreadsNew >= 0
|
||||
ThreadsRunnable >= 0
|
||||
ThreadsBlocked >= 0
|
||||
ThreadsWaiting >= 0
|
||||
ThreadsTimedWaiting >= 0
|
||||
ThreadsTerminated >= 0
|
||||
LogFatal >= 0
|
||||
LogError >= 0
|
||||
LogWarn >= 0
|
||||
LogInfo >= 0
|
Loading…
Reference in New Issue