YARN-1021. Yarn Scheduler Load Simulator. (ywskycn via tucu)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1527065 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Alejandro Abdelnur 2013-09-27 20:32:06 +00:00
parent c03040a498
commit 75f9db1c4d
71 changed files with 36836 additions and 0 deletions

View File

@ -0,0 +1,45 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<assembly>
<id>hadoop-sls</id>
<formats>
<format>dir</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>${basedir}/src/main/bin</directory>
<outputDirectory>sls/bin</outputDirectory>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>${basedir}/src/main/html</directory>
<outputDirectory>sls/html</outputDirectory>
</fileSet>
<fileSet>
<directory>${basedir}/src/main/sample-conf</directory>
<outputDirectory>sls/sample-conf</outputDirectory>
</fileSet>
<fileSet>
<directory>${basedir}/src/main/data</directory>
<outputDirectory>sls/sample-data</outputDirectory>
</fileSet>
</fileSets>
</assembly>

View File

@ -93,6 +93,17 @@
<include>*-sources.jar</include>
</includes>
</fileSet>
<fileSet>
<directory>../hadoop-sls/target</directory>
<outputDirectory>/share/hadoop/${hadoop.component}/sources</outputDirectory>
<includes>
<include>*-sources.jar</include>
</includes>
</fileSet>
<fileSet>
<directory>../hadoop-sls/target/hadoop-sls-${project.version}/sls</directory>
<outputDirectory>/share/hadoop/${hadoop.component}/sls</outputDirectory>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>

View File

@ -729,6 +729,16 @@
<artifactId>hsqldb</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>com.codahale.metrics</groupId>
<artifactId>metrics-core</artifactId>
<version>3.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-sls</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
</dependencyManagement>

View File

@ -95,6 +95,7 @@
<item name="Fair Scheduler" href="hadoop-yarn/hadoop-yarn-site/FairScheduler.html"/>
<item name="Web Application Proxy" href="hadoop-yarn/hadoop-yarn-site/WebApplicationProxy.html"/>
<item name="YARN Commands" href="hadoop-yarn/hadoop-yarn-site/YarnCommands.html"/>
<item name="Scheduler Load Simulator" href="hadoop-sls/SchedulerLoadSimulator.html"/>
</menu>
<menu name="YARN REST APIs" inherit="top">

View File

@ -0,0 +1,12 @@
Yarn Scheduler Load Simulator (SLS)
SLS is a stress and performance harness for the Yarn Resource Manager Scheduler
that exercises the scheduler implementation simulating the cluster size and the
applications load without having to have a cluster nor applications.
SLS runs a regular RM without RPC endpoints and uses a NodeManager and
Application Manager simulators to send and receive events simulating cluster
and application load behavior.
The size of the cluster and the application load is scripted in a configuration
file.

View File

@ -0,0 +1,26 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<FindBugsFilter>
<!-- Ignore comparedTo, equals warnings -->
<Match>
<Class name="org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator" />
<Bug pattern="EQ_COMPARETO_USE_OBJECT_EQUALS" />
</Match>
</FindBugsFilter>

View File

@ -0,0 +1,184 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-project</artifactId>
<version>2.3.0-SNAPSHOT</version>
<relativePath>../../hadoop-project</relativePath>
</parent>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-sls</artifactId>
<version>2.3.0-SNAPSHOT</version>
<description>Apache Hadoop Scheduler Load Simulator</description>
<name>Apache Hadoop Scheduler Load Simulator</name>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-minicluster</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-rumen</artifactId>
</dependency>
<dependency>
<groupId>com.codahale.metrics</groupId>
<artifactId>metrics-core</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.mortbay.jetty</groupId>
<artifactId>jetty</artifactId>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.mortbay.jetty</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.mortbay.jetty</groupId>
<artifactId>jetty-util</artifactId>
<scope>provided</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<configuration>
<attach>true</attach>
</configuration>
<executions>
<execution>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>findbugs-maven-plugin</artifactId>
<configuration>
<findbugsXmlOutput>true</findbugsXmlOutput>
<xmlOutput>true</xmlOutput>
<excludeFilterFile>${basedir}/dev-support/findbugs-exclude.xml</excludeFilterFile>
<effort>Max</effort>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<configuration>
<excludes>
<exclude>src/main/data/2jobs2min-rumen-jh.json</exclude>
<exclude>src/main/html/js/thirdparty/jquery.js</exclude>
<exclude>src/main/html/js/thirdparty/d3-LICENSE</exclude>
<exclude>src/main/html/js/thirdparty/d3.v3.js</exclude>
<exclude>src/main/html/simulate.html.template</exclude>
<exclude>src/main/html/simulate.info.html.template</exclude>
<exclude>src/main/html/track.html.template</exclude>
<exclude>src/test/resources/simulate.html.template</exclude>
<exclude>src/test/resources/simulate.info.html.template</exclude>
<exclude>src/test/resources/track.html.template</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>
<profiles>
<profile>
<id>docs</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>site</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>dist</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-assemblies</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
<executions>
<execution>
<id>dist</id>
<phase>prepare-package</phase>
<goals>
<goal>single</goal>
</goals>
<configuration>
<appendAssemblyId>false</appendAssemblyId>
<attach>false</attach>
<finalName>${project.artifactId}-${project.version}</finalName>
<descriptorRefs>
<descriptorRef>hadoop-sls</descriptorRef>
</descriptorRefs>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>

View File

@ -0,0 +1,55 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<assembly>
<id>sls</id>
<formats>
<format>dir</format>
<format>tar.gz</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>${basedir}/src/main/bin</directory>
<outputDirectory>bin</outputDirectory>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>${basedir}/src/main/data</directory>
<outputDirectory>sample-data</outputDirectory>
</fileSet>
<fileSet>
<directory>${basedir}/src/main/html</directory>
<outputDirectory>html</outputDirectory>
</fileSet>
<fileSet>
<directory>${basedir}/src/main/sample-conf</directory>
<outputDirectory>sample-conf</outputDirectory>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>
<outputDirectory>/lib</outputDirectory>
<unpack>false</unpack>
<scope>compile</scope>
<useProjectArtifact>true</useProjectArtifact>
</dependencySet>
</dependencySets>
</assembly>

View File

@ -0,0 +1,106 @@
#!/bin/bash
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. See accompanying LICENSE file.
#
###############################################################################
printUsage() {
echo "Usage: rumen2sls.sh <OPTIONS>"
echo " --rumen-file=<RUMEN_FILE>"
echo " --output-dir=<SLS_OUTPUT_DIR>"
echo " [--output-prefix=<PREFIX>] (default is sls)"
echo
}
###############################################################################
parseArgs() {
for i in $*
do
case $i in
--rumen-file=*)
rumenfile=${i#*=}
;;
--output-dir=*)
outputdir=${i#*=}
;;
--output-prefix=*)
outputprefix=${i#*=}
;;
*)
echo "Invalid option"
echo
printUsage
exit 1
;;
esac
done
if [[ "${rumenfile}" == "" || "${outputdir}" == "" ]] ; then
echo "Both --rumen-file ${rumenfile} and --output-dir \
${outputfdir} must be specified"
echo
printUsage
exit 1
fi
}
###############################################################################
calculateBasedir() {
# resolve links - $0 may be a softlink
PRG="${1}"
while [ -h "${PRG}" ]; do
ls=`ls -ld "${PRG}"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "${PRG}"`/"$link"
fi
done
BASEDIR=`dirname ${PRG}`
BASEDIR=`cd ${BASEDIR}/..;pwd`
}
###############################################################################
calculateClasspath() {
HADOOP_BASE=`which hadoop`
HADOOP_BASE=`dirname $HADOOP_BASE`
DEFAULT_LIBEXEC_DIR=${HADOOP_BASE}/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
export HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${TOOL_PATH}"
}
###############################################################################
runSLSGenerator() {
if [[ "${outputprefix}" == "" ]] ; then
outputprefix="sls"
fi
slsJobs=${outputdir}/${outputprefix}-jobs.json
slsNodes=${outputdir}/${outputprefix}-nodes.json
args="-input ${rumenfile} -outputJobs ${slsJobs}";
args="${args} -outputNodes ${slsNodes}";
hadoop org.apache.hadoop.yarn.sls.RumenToSLSConverter ${args}
}
###############################################################################
calculateBasedir $0
calculateClasspath
parseArgs "$@"
runSLSGenerator
echo
echo "SLS simulation files available at: ${outputdir}"
echo
exit 0

View File

@ -0,0 +1,112 @@
#!/bin/bash
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. See accompanying LICENSE file.
#
###############################################################################
printUsage() {
echo "Usage: slsrun.sh <OPTIONS>"
echo " --input-rumen|--input-sls=<FILE1,FILE2,...>"
echo " --output-dir=<SLS_SIMULATION_OUTPUT_DIRECTORY>"
echo " [--nodes=<SLS_NODES_FILE>]"
echo " [--track-jobs=<JOBID1,JOBID2,...>]"
echo " [--print-simulation]"
echo
}
###############################################################################
parseArgs() {
for i in $*
do
case $i in
--input-rumen=*)
inputrumen=${i#*=}
;;
--input-sls=*)
inputsls=${i#*=}
;;
--output-dir=*)
outputdir=${i#*=}
;;
--nodes=*)
nodes=${i#*=}
;;
--track-jobs=*)
trackjobs=${i#*=}
;;
--print-simulation)
printsimulation="true"
;;
*)
echo "Invalid option"
echo
printUsage
exit 1
;;
esac
done
if [[ "${inputrumen}" == "" && "${inputsls}" == "" ]] ; then
echo "Either --input-rumen or --input-sls must be specified"
echo
printUsage
exit 1
fi
if [[ "${outputdir}" == "" ]] ; then
echo "The output directory --output-dir must be specified"
echo
printUsage
exit 1
fi
}
###############################################################################
calculateClasspath() {
HADOOP_BASE=`which hadoop`
HADOOP_BASE=`dirname $HADOOP_BASE`
DEFAULT_LIBEXEC_DIR=${HADOOP_BASE}/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
export HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${TOOL_PATH}:html"
}
###############################################################################
runSimulation() {
if [[ "${inputsls}" == "" ]] ; then
args="-inputrumen ${inputrumen}"
else
args="-inputsls ${inputsls}"
fi
args="${args} -output ${outputdir}"
if [[ "${nodes}" != "" ]] ; then
args="${args} -nodes ${nodes}"
fi
if [[ "${trackjobs}" != "" ]] ; then
args="${args} -trackjobs ${trackjobs}"
fi
if [[ "${printsimulation}" == "true" ]] ; then
args="${args} -printsimulation"
fi
hadoop org.apache.hadoop.yarn.sls.SLSRunner ${args}
}
###############################################################################
calculateClasspath
parseArgs "$@"
runSimulation
exit 0

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,26 @@
Copyright (c) 2013, Michael Bostock
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* The name Michael Bostock may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL MICHAEL BOSTOCK BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,334 @@
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!doctype>
<html>
<head>
<meta charset="utf-8">
<link rel="stylesheet" href="css/bootstrap.min.css" media="screen">
<link rel="stylesheet" href="css/bootstrap-responsive.min.css">
<style type="text/css">
body {
font: 20px sans-serif;
}
.axis path,
.axis line {
fill: none;
stroke: #000;
shape-rendering: crispEdges;
}
.axis text {
font-family: sans-serif;
font-size: 20px;
}
.line {
fill: none;
stroke: steelblue;
stroke-width: 3px;
}
.legend {
padding: 1px;
font: 18px sans-serif;
background: yellow;
box-shadow: 2px 2px 1px #888;
}
.title {
font: 24px sans-serif;
}
.divborder {
border-width: 1px;
border-style: solid;
border-color: black;
margin-top:10px
}
</style>
<script src="js/thirdparty/d3.v3.js"></script>
<script src="js/thirdparty/jquery.js"></script>
<script src="js/thirdparty/bootstrap.min.js"></script>
</head>
<body>
<div class="row">
<div class="offset5" style="margin-top:20px; margin-bottom:20px">
Select the generated metrics log file (realtimetrack.json): <input type='file' id='jsonfile' /> <input type='button' value='Generate !' onClick='draw()' /><br>
</div>
</div>
<div class="row">
<div class="divborder span8" style="margin-left:50px" id="area1"></div>
<div class="divborder span8" id="area2"></div>
</div>
<div class="row">
<div class="divborder span8" style="margin-left:50px" id="area3"></div>
<div class="divborder span8" id="area4"></div>
</div>
<div class="row">
<div class="divborder span8" style="margin-left:50px" id="area5"></div>
<div class="divborder span8" id="area6"></div>
</div>
<div class="row">
<div class="divborder span8" style="margin-left:50px" id="area7"></div>
<div class="span7" id="area8"></div>
</div>
<p>&nbsp;</p>
<script>
// select file and draw
function draw() {
var filepath = document.getElementById('jsonfile').value;
if (filepath) {
for (var i = 1; i < 9; i ++) {
$('#area' + i).empty();
}
filepath = filepath.replace("C:\\fakepath\\", "");
drawCharts(filepath);
} else {
alert('choose file firstly.');
}
}
function drawCharts(filepath) {
$.getJSON(filepath, function(data) {
var numQueues = 0;
var queueNames = new Array();
for (var j in data[0]) {
if (j.substring(0, 'queue'.length) === 'queue') {
queueNames[numQueues] = j;
numQueues ++;
}
}
numQueues /= 2;
// create graph
$.getJSON(filepath, function(data) {
var basetime = data[0].time;
data.forEach(function(d) {
d.time = (d.time - basetime) / 1000;
});
var legends = ["running.applications", "running.containers"];
drawEachChart("#area1", data, legends, "Cluster running applications & containers", "Number", 0, 0);
legends = ["jvm.free.memory", "jvm.max.memory", "jvm.total.memory"];
drawEachChart("#area2", data, legends, "JVM memory", "Memory (GB)", 0, 0);
legends = ["cluster.allocated.memory", "cluster.available.memory"];
drawEachChart("#area3", data, legends, "Cluster allocated & available memory", "Memory (GB)", 0, 0);
legends = ["cluster.allocated.vcores", "cluster.available.vcores"];
drawEachChart("#area4", data, legends, "Cluster allocated & available vcores", "Number", 0, 0);
for (var i = 0; i < numQueues; i ++) {
legends[i] = queueNames[i * 2];
}
drawEachChart("#area5", data, legends, "Queue allocated memory", "Memory (GB)", 1, 100);
for (var i = 0; i < numQueues; i ++) {
legends[i] = queueNames[i * 2 + 1];
}
drawEachChart("#area6", data, legends, "Queue allocated vcores", "VCores", 1, 90);
legends = [
"scheduler.allocate.timecost",
"scheduler.handle-NODE_ADDED.timecost", "scheduler.handle-NODE_REMOVED.timecost",
"scheduler.handle-NODE_UPDATE.timecost", "scheduler.handle-APP_ADDED.timecost",
"scheduler.handle-APP_REMOVED.timecost", "scheduler.handle-CONTAINER_EXPIRED.timecost"
];
drawEachChart("#area7", data, legends, "Scheduler allocate & handle operations timecost", "Timecost (ms)", 0, 210);
});
});
}
// draw different chart
function drawEachChart(chartArea, data, legends, title, yLabelTitle, isArea, pl) {
// drawchart
var margin = {top: 50, right: 250, bottom: 50, left: 70};
var width = 800 - margin.left - margin.right;
var height = 420 - margin.top - margin.bottom;
var x = d3.scale.linear().range([0, width]);
var y = d3.scale.linear().range([height, 0]);
var xAxis = d3.svg.axis().scale(x).orient("bottom");
var yAxis = d3.svg.axis().scale(y).orient("left");
var color = d3.scale.category10();
if (isArea == 1){
var area = d3.svg.area()
.x(function(d) { return x(d.time); })
.y0(function(d) { return y(d.y0); })
.y1(function(d) { return y(d.y0 + d.y); });
var stack = d3.layout.stack()
.values(function(d) { return d.values; });
// create chart
var svg = d3.select(chartArea).append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
color.domain(d3.keys(data[0])
.filter(function(key) {return $.inArray(key, legends) !== -1; }));
var points = stack(color.domain().map(function(name) {
return {
name: name,
values: data.map(function(d) {
return {time: d.time, y: d[name]};
})
};
}));
// x & y
x.domain(d3.extent(data, function(d) { return d.time; }));
y.domain([
d3.min(points, function(c) {
return 0.9 * d3.min(c.values, function(v) { return v.y; }); }),
d3.max(points, function(c) {
return 1.1 * d3.max(c.values, function(v) { return v.y + v.y0; }); })
]);
svg.append("g").attr("class", "x axis")
.attr("transform", "translate(0," + height + ")")
.call(xAxis)
.append("text")
.attr("transform", "translate(" + (width / 2) + ", 45)")
.style("text-anchor", "middle")
.text("Time (s)");
svg.append("g")
.attr("class", "y axis")
.call(yAxis)
.append("text")
.attr("transform", "rotate(-90)")
.attr("y", 0 - margin.left)
.attr("x",0 - (height / 2))
.attr("dy", "1em")
.style("text-anchor", "middle")
.text(yLabelTitle);
var point = svg.selectAll(".point")
.data(points)
.enter().append("g");
point.append("path")
.attr("class", "area")
.attr("d", function(d) { return area(d.values); })
.style("fill", function(d) { return color(d.name); });
} else {
// lines
var line = d3.svg.line()
.interpolate("basis")
.x(function(d) { return x(d.time); })
.y(function(d) { return y(d.value); });
// create chart
var svg = d3.select(chartArea).append("svg")
.attr("id", title)
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
color.domain(d3.keys(data[0])
.filter(function(key) {return $.inArray(key, legends) !== -1; }));
var values = color.domain().map(function(name) {
return {
name: name,
values: data.map(function(d) {
return {time: d.time, value: +d[name]};
})
};
});
// x & y
x.domain(d3.extent(data, function(d) { return d.time; }));
y.domain([
d3.min(values, function(c) { return 0.9 * d3.min(c.values, function(v) { return v.value; }); }),
d3.max(values, function(c) { return 1.1 * d3.max(c.values, function(v) { return v.value; }); })
]);
svg.append("g").attr("class", "x axis")
.attr("transform", "translate(0," + height + ")")
.call(xAxis)
.append("text")
.attr("transform", "translate(" + (width / 2) + ", 45)")
.style("text-anchor", "middle")
.text("Time (s)");
svg.append("g")
.attr("class", "y axis")
.call(yAxis)
.append("text")
.attr("transform", "rotate(-90)")
.attr("y", 0 - margin.left)
.attr("x",0 - (height / 2))
.attr("dy", "1em")
.style("text-anchor", "middle")
.text(yLabelTitle);
var value = svg.selectAll(".city")
.data(values)
.enter().append("g")
.attr("class", "city");
value.append("path")
.attr("class", "line")
.attr("d", function(d) { return line(d.values); })
.style("stroke", function(d) { return color(d.name); });
}
// title
svg.append("text")
.attr("x", (width / 2))
.attr("y", 10 - (margin.top / 2))
.attr("text-anchor", "middle")
.text(title);
// legend
var legend = svg.append("g")
.attr("class", "legend")
.attr("x", width - 50)
.attr("y", 25)
.attr("height", 120)
.attr("width", 140);
legend.selectAll('g').data(legends)
.enter()
.append('g')
.each(function(d, i) {
var g = d3.select(this);
g.append("rect")
.attr("x", width - 5 - pl)
.attr("y", i*20 + 0)
.attr("width", 10)
.attr("height", 10)
.style("fill", color(d));
g.append("text")
.attr("x", width + 15 - pl)
.attr("y", i * 20 + 8)
.attr("height",30)
.attr("width",250)
.style("fill", color(d))
.text(d);
});
}
</script>
</body>
</html>

View File

@ -0,0 +1,278 @@
<html>
<head>
<meta charset="utf-8">
<link rel="stylesheet" href="css/bootstrap.min.css" media="screen">
<link rel="stylesheet" href="css/bootstrap-responsive.min.css">
<style type="text/css">
body '{' font: 20px sans-serif; '}'
.axis path,
.axis line '{' fill: none; stroke: #000; shape-rendering: crispEdges; '}'
.axis text '{' font-family: sans-serif; font-size: 20px; '}'
.line '{' fill: none; stroke: steelblue; stroke-width: 3px; '}'
.legend '{'
padding: 5px;
font: 18px sans-serif;
background: yellow;
box-shadow: 2px 2px 1px #888;
'}'
.title '{' font: 24px sans-serif; '}'
.divborder '{'
border-width: 1px;
border-style: solid;
border-color: black;
margin-top:10px
'}'
</style>
<script src="js/thirdparty/d3.v3.js"></script>
<script src="js/thirdparty/jquery.js"></script>
<script src="js/thirdparty/bootstrap.min.js"></script>
</head>
<body>
<div class="row">
<div class="span10 offset2"><br>
<input type="button" style="float: right;" value="Stop"
onClick="stop()" />
</div>
</div>
<div class="row">
<div class="divborder span8" style="margin-left:50px" id="area1"></div>
<div class="divborder span8" id="area2"></div>
</div>
<div class="row">
<div class="divborder span8" style="margin-left:50px" id="area3"></div>
<div class="divborder span8" id="area4"></div>
</div>
<div class="row">
<div class="divborder span8" style="margin-left:50px" id="area5"></div>
<div class="divborder span8" id="area6"></div>
</div>
<div class="row">
<div class="divborder span8" style="margin-left:50px" id="area7"></div>
<div class="span8" id="area8"></div>
</div><br/><br/>
<script>
var basetime = 0;
var running = 1;
var data = [];
var width, height;
var legends = [];
var titles = [];
var yLabels = [];
var isAreas = [];
var svgs = [];
var xs = [];
var ys = [];
var xAxiss = [];
var yAxiss = [];
var lineAreas = [];
var stacks = [];
// legends
legends[0] = [''running.applications'', ''running.containers''];
legends[1] = [''jvm.free.memory'', ''jvm.max.memory'', ''jvm.total.memory''];
legends[2] = [''cluster.allocated.memory'', ''cluster.available.memory''];
legends[3] = [''cluster.allocated.vcores'', ''cluster.available.vcores''];
legends[4] = [];
legends[5] = [];
{0}
legends[6] = [''scheduler.allocate.timecost'',
''scheduler.handle-NODE_ADDED.timecost'',
''scheduler.handle-NODE_REMOVED.timecost'',
''scheduler.handle-NODE_UPDATE.timecost'',
''scheduler.handle-APP_ADDED.timecost'',
''scheduler.handle-APP_REMOVED.timecost'',
''scheduler.handle-CONTAINER_EXPIRED.timecost''];
// title
titles[0] = ''Cluster running applications & containers'';
titles[1] = ''JVM memory'';
titles[2] = ''Cluster allocated & available memory'';
titles[3] = ''Cluster allocated & available vcores'';
titles[4] = ''Queue allocated memory'';
titles[5] = ''Queue allocated vcores'';
titles[6] = ''Scheduler allocate & handle operation timecost'';
// ylabels
yLabels[0] = ''Number'';
yLabels[1] = ''Memory (GB)'';
yLabels[2] = ''Memory (GB)'';
yLabels[3] = ''Number'';
yLabels[4] = ''Memory (GB)'';
yLabels[5] = ''Number'';
yLabels[6] = ''Timecost (ms)'';
// is area?
isAreas = [0, 0, 0, 0, 1, 1, 0];
// draw all charts
for (var i = 0; i < 7; i ++) '{'
drawEachChart(i);
'}'
// draw each chart
function drawEachChart(index) '{'
var margin = '{'top: 50, right: 250, bottom: 50, left: 70'}';
width = 750 - margin.left - margin.right;
height = 420 - margin.top - margin.bottom;
xs[index] = d3.scale.linear().range([0, width]);
ys[index] = d3.scale.linear().range([height, 0]);
xAxiss[index] = d3.svg.axis().scale(xs[index]).orient(''bottom'');
yAxiss[index] = d3.svg.axis().scale(ys[index]).orient(''left'');
if (isAreas[index] == 1)'{'
lineAreas[index] = d3.svg.area()
.x(function(d) '{' return xs[index](d.time); '}')
.y0(function(d) '{' return ys[index](d.y0); '}')
.y1(function(d) '{' return ys[index](d.y0 + d.y); '}');
stacks[index] = d3.layout.stack()
.values(function(d) '{' return d.values; '}');
'}' else '{'
lineAreas[index] = d3.svg.line()
.interpolate(''basis'')
.x(function(d) '{' return xs[index](d.time); '}')
.y(function(d) '{' return ys[index](d.value); '}');
'}'
svgs[index] = d3.select(''#area'' + (index + 1)).append(''svg'')
.attr(''width'', width + margin.left + margin.right)
.attr(''height'', height + margin.top + margin.bottom)
.append(''g'')
.attr(''transform'', ''translate('' + margin.left + '','' + margin.top + '')'');
// x, y and title
svgs[index].append(''text'')
.attr(''transform'', ''translate('' + (width / 2) + '' ,'' +
(height + margin.bottom - 10 ) + '')'')
.style(''text-anchor'', ''middle'')
.text(''Time ({1})'');
svgs[index].append(''text'')
.attr(''transform'', ''rotate(-90)'')
.attr(''y'', 0 - margin.left)
.attr(''x'',0 - (height / 2))
.attr(''dy'', ''1em'')
.style(''text-anchor'', ''middle'')
.text(yLabels[index]);
svgs[index].append(''text'')
.attr(''x'', (width / 2))
.attr(''y'', 10 - (margin.top / 2))
.attr(''text-anchor'', ''middle'')
.text(titles[index]);
'}'
// request data
function requestData() '{'
$.ajax('{'url: ''simulateMetrics'',
success: function(point) '{'
// update data
if (basetime == 0) basetime = point.time;
point.time = (point.time - basetime) / {2};
data.push(point);
// clear old
for (var i = 0; i < 7; i ++) '{'
svgs[i].selectAll(''g.tick'').remove();
svgs[i].selectAll(''g'').remove();
var color = d3.scale.category10();
color.domain(d3.keys(data[0]).filter(function(key) '{'
return $.inArray(key, legends[i]) !== -1;
'}'));
var values;
if (isAreas[i] == 1) '{'
values = stacks[i](color.domain().map(function(name) '{'
return '{'
name: name,
values: data.map(function(d) '{'
return '{'time: d.time, y: d[name]'}';
'}')
'}'
'}'));
xs[i].domain(d3.extent(data, function(d) '{' return d.time;'}'));
ys[i].domain([
d3.min(values, function(c) '{' return 0; '}'),
d3.max(values, function(c) '{' return 1.1 * d3.max(c.values,
function(v) '{' return v.y + v.y0; '}'); '}')
]);
'}' else '{'
values = color.domain().map(function(name) '{'
return '{'
name: name,
values: data.map(function(d) '{'
return '{'time: d.time, value: d[name]'}';
'}')
'}'
'}');
xs[i].domain(d3.extent(data, function(d) '{' return d.time;'}'));
ys[i].domain([
d3.min(values, function(c) '{' return 0; '}'),
d3.max(values, function(c) '{' return 1.1 * d3.max(c.values,
function(v) '{' return v.value; '}'); '}')
]);
'}'
svgs[i].append(''g'').attr(''class'', ''x axis'')
.attr(''transform'', ''translate(0,'' + height + '')'').call(xAxiss[i]);
svgs[i].append(''g'').attr(''class'', ''y axis'').call(yAxiss[i]);
var value = svgs[i].selectAll(''.path'')
.data(values).enter().append(''g'').attr(''class'', ''line'');
if(isAreas[i] == 1) '{'
value.append(''path'').attr(''class'', ''area'')
.attr(''d'', function(d) '{'return lineAreas[i](d.values); '}')
.style(''fill'', function(d) '{'return color(d.name); '}');
'}' else '{'
value.append(''path'').attr(''class'', ''line'')
.attr(''d'', function(d) '{'return lineAreas[i](d.values); '}')
.style(''stroke'', function(d) '{'return color(d.name); '}');
'}'
// legend
var legend = svgs[i].append(''g'')
.attr(''class'', ''legend'')
.attr(''x'', width + 5)
.attr(''y'', 25)
.attr(''height'', 120)
.attr(''width'', 140);
legend.selectAll(''g'').data(legends[i])
.enter()
.append(''g'')
.each(function(d, i) '{'
var g = d3.select(this);
g.append(''rect'')
.attr(''x'', width + 5)
.attr(''y'', i*20)
.attr(''width'', 10)
.attr(''height'', 10)
.style(''fill'', color(d));
g.append(''text'')
.attr(''x'', width + 25)
.attr(''y'', i * 20 + 8)
.attr(''height'',30)
.attr(''width'',250)
.style(''fill'', color(d))
.text(d);
'}');
'}'
if(running == 1)
setTimeout(requestData, {3});
'}',
cache: false
'}');
'}'
// stop
function stop() '{'
running = 0;
'}'
requestData();
</script>
</body>
</html>

View File

@ -0,0 +1,50 @@
<html>
<head>
<meta charset="utf-8">
<style type="text/css">
.td1 '{'
border-width: 1px;
padding: 8px;
border-style: solid;
border-color: #666666;
background-color: #dedede;
width: 50%;
'}'
table.gridtable '{'
font-family: verdana,arial,sans-serif;
font-size:11px;
color:#333333;
border-width: 1px;
border-color: #666666;
border-collapse: collapse;
margin-top: 80px;
'}'
.td2 '{'
border-width: 1px;
padding: 8px;
border-style: solid;
border-color: #666666;
background-color: #ffffff;
width: 50%;
'}'
</style>
</head>
<body>
<table class="gridtable" align="center" width="400px">
<tr>
<td colspan="2" class="td2" align="center">
<b>SLS Simulate Information</b>
</td>
</tr>
{0}
<tr>
<td align="center" height="80px">
<a href="simulate">Simulation Charts</a>
</td>
<td align="center">
<a href="track">Tracked Jobs & Queues</a>
</td>
</tr>
</table>
</body>
</html>

View File

@ -0,0 +1,193 @@
<html>
<head>
<meta charset="utf-8">
<link rel="stylesheet" href="css/bootstrap.min.css" media="screen">
<link rel="stylesheet" href="css/bootstrap-responsive.min.css">
<style type="text/css">
body '{' font: 20px sans-serif;'}'
.axis path,
.axis line '{' fill: none; stroke: #000; shape-rendering: crispEdges;'}'
.axis text '{' font-family: sans-serif; font-size: 20px; '}'
.line '{' fill: none; stroke: steelblue; stroke-width: 3px; '}'
.legend '{' padding: 5px; font: 18px sans-serif; background: yellow;
box-shadow: 2px 2px 1px #888;'}'
.title '{' font: 24px sans-serif; '}'
.divborder '{' border-width: 1px; border-style: solid; border-color: black;
margin-top:10px '}'
</style>
<script src="js/thirdparty/d3.v3.js"></script>
<script src="js/thirdparty/jquery.js"></script>
<script src="js/thirdparty/bootstrap.min.js"></script>
</head>
<body>
<div class="row">
<div class="offset4 span8"><br/><br/><br/>
Select Tracked Job/Queue:
<select id="trackedSelect" onchange="redrawChart()">
<option>----Queue----</option>
{0}
<option>----Job----</option>
{1}
</select>
<input type="button" style="float: right;" value="Stop"
onClick="stop()" />
</div>
</div>
<div class="row">
<div class="divborder span9 offset4" id="area1"></div>
</div>
<script>
// global variables
var basetime = 0;
var running = 1;
var para = '''';
var data = [];
var path, line, svg;
var x, y;
var width, height;
var xAxis, yAxis;
var legends = [''usage.memory'', ''demand.memory'', ''maxshare.memory'',
''minshare.memory'', ''fairshare.memory''];
// stop function
function stop() '{'
running = 0;
'}'
// select changed event
function redrawChart() '{'
var value = $(''#trackedSelect'').val();
if (value.substring(0, ''Job ''.length) === ''Job ''
|| value.substring(0, ''Queue ''.length) === ''Queue '') '{'
para = value;
running = 0;
basetime = 0;
data = [];
$(''#area1'').empty();
drawChart(''Tracking '' + value);
running = 1;
requestData();
}
}
// draw chart
function drawChart(title) '{'
// location
var margin = '{'top: 50, right: 150, bottom: 50, left: 80'}';
width = 800 - margin.left - margin.right;
height = 420 - margin.top - margin.bottom;
x = d3.scale.linear().range([0, width]);
y = d3.scale.linear().range([height, 0]);
xAxis = d3.svg.axis().scale(x).orient(''bottom'');
yAxis = d3.svg.axis().scale(y).orient(''left'');
// lines
line = d3.svg.line().interpolate(''basis'')
.x(function(d) '{' return x(d.time); })
.y(function(d) '{' return y(d.value); });
// create chart
svg = d3.select(''#area1'').append(''svg'')
.attr(''width'', width + margin.left + margin.right)
.attr(''height'', height + margin.top + margin.bottom)
.append(''g'')
.attr(''transform'', ''translate('' + margin.left + '','' + margin.top + '')'');
// axis labels
svg.append(''text'')
.attr(''transform'', ''translate('' + (width / 2) + '','' + (height + margin.bottom - 5 ) + '')'')
.style(''text-anchor'', ''middle'')
.text(''Time ({2})'');
svg.append(''text'')
.attr(''transform'', ''rotate(-90)'')
.attr(''y'', 0 - margin.left)
.attr(''x'',0 - (height / 2))
.attr(''dy'', ''1em'')
.style(''text-anchor'', ''middle'')
.text(''Memory (GB)'');
// title
svg.append(''text'')
.attr(''x'', (width / 2))
.attr(''y'', 10 - (margin.top / 2))
.attr(''text-anchor'', ''middle'')
.text(title);
'}'
// request data
function requestData() '{'
$.ajax('{'url: ''trackMetrics?t='' + para,
success: function(point) '{'
// clear old
svg.selectAll(''g.tick'').remove();
svg.selectAll(''g'').remove();
if(basetime == 0) basetime = point.time;
point.time = (point.time - basetime)/{3};
data.push(point);
var color = d3.scale.category10();
color.domain(d3.keys(data[0]).filter(function(key) '{'
return $.inArray(key, legends) !== -1;
'}'));
var values = color.domain().map(function(name) '{'
return '{'
name: name,
values: data.map(function(d) '{'
return '{' time: d.time, value: d[name]'}';
'}')
'}';
'}');
// set x/y range
x.domain(d3.extent(data, function(d) '{' return d.time; '}'));
y.domain([
d3.min(values, function(c) '{' return 0 '}'),
d3.max(values, function(c) '{' return 1.1 * d3.max(c.values, function(v) '{' return v.value; '}'); '}')
]);
svg.append(''g'').attr(''class'', ''x axis'')
.attr(''transform'', ''translate(0,'' + height + '')'').call(xAxis);
svg.append(''g'').attr(''class'', ''y axis'').call(yAxis);
var value = svg.selectAll(''.path'')
.data(values).enter().append(''g'').attr(''class'', ''line'');
value.append(''path'').attr(''class'', ''line'')
.attr(''d'', function(d) '{'return line(d.values); '}')
.style(''stroke'', function(d) '{'return color(d.name); '}');
// legend
var legend = svg.append(''g'')
.attr(''class'', ''legend'')
.attr(''x'', width + 5)
.attr(''y'', 25)
.attr(''height'', 120)
.attr(''width'', 180);
legend.selectAll(''g'').data(legends)
.enter()
.append(''g'')
.each(function(d, i) '{'
var g = d3.select(this);
g.append(''rect'')
.attr(''x'', width + 5)
.attr(''y'', i * 20)
.attr(''width'', 10)
.attr(''height'', 10)
.style(''fill'', color(d));
g.append(''text'')
.attr(''x'', width + 25)
.attr(''y'', i * 20 + 8)
.attr(''height'',30)
.attr(''width'',250)
.style(''fill'', color(d))
.text(d);
'}');
if(running == 1)
setTimeout(requestData, {4});
'}',
cache: false
'}');
'}'
</script>
</body>
</html>

View File

@ -0,0 +1,234 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.sls;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.Options;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.map.ObjectWriter;
import org.apache.hadoop.yarn.sls.utils.SLSUtils;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
public class RumenToSLSConverter {
private static final String EOL = System.getProperty("line.separator");
private static long baseline = 0;
private static Map<String, Set<String>> rackNodeMap =
new TreeMap<String, Set<String>>();
public static void main(String args[]) throws Exception {
Options options = new Options();
options.addOption("input", true, "input rumen json file");
options.addOption("outputJobs", true, "output jobs file");
options.addOption("outputNodes", true, "output nodes file");
CommandLineParser parser = new GnuParser();
CommandLine cmd = parser.parse(options, args);
if (! cmd.hasOption("input") ||
! cmd.hasOption("outputJobs") ||
! cmd.hasOption("outputNodes")) {
System.err.println();
System.err.println("ERROR: Missing input or output file");
System.err.println();
System.err.println("LoadGenerator creates a SLS script " +
"from a Hadoop Rumen output");
System.err.println();
System.err.println("Options: -input FILE -outputJobs FILE " +
"-outputNodes FILE");
System.err.println();
System.exit(1);
}
String inputFile = cmd.getOptionValue("input");
String outputJsonFile = cmd.getOptionValue("outputJobs");
String outputNodeFile = cmd.getOptionValue("outputNodes");
// check existing
if (! new File(inputFile).exists()) {
System.err.println();
System.err.println("ERROR: input does not exist");
System.exit(1);
}
if (new File(outputJsonFile).exists()) {
System.err.println();
System.err.println("ERROR: output job file is existing");
System.exit(1);
}
if (new File(outputNodeFile).exists()) {
System.err.println();
System.err.println("ERROR: output node file is existing");
System.exit(1);
}
File jsonFile = new File(outputJsonFile);
if (! jsonFile.getParentFile().exists()
&& ! jsonFile.getParentFile().mkdirs()) {
System.err.println("ERROR: Cannot create output directory in path: "
+ jsonFile.getParentFile().getAbsoluteFile());
System.exit(1);
}
File nodeFile = new File(outputNodeFile);
if (! nodeFile.getParentFile().exists()
&& ! nodeFile.getParentFile().mkdirs()) {
System.err.println("ERROR: Cannot create output directory in path: "
+ jsonFile.getParentFile().getAbsoluteFile());
System.exit(1);
}
generateSLSLoadFile(inputFile, outputJsonFile);
generateSLSNodeFile(outputNodeFile);
}
private static void generateSLSLoadFile(String inputFile, String outputFile)
throws IOException {
Reader input = new FileReader(inputFile);
try {
Writer output = new FileWriter(outputFile);
try {
ObjectMapper mapper = new ObjectMapper();
ObjectWriter writer = mapper.defaultPrettyPrintingWriter();
Iterator<Map> i = mapper.readValues(
new JsonFactory().createJsonParser(input), Map.class);
while (i.hasNext()) {
Map m = i.next();
output.write(writer.writeValueAsString(createSLSJob(m)) + EOL);
}
} finally {
output.close();
}
} finally {
input.close();
}
}
@SuppressWarnings("unchecked")
private static void generateSLSNodeFile(String outputFile)
throws IOException {
Writer output = new FileWriter(outputFile);
try {
ObjectMapper mapper = new ObjectMapper();
ObjectWriter writer = mapper.defaultPrettyPrintingWriter();
for (Map.Entry<String, Set<String>> entry : rackNodeMap.entrySet()) {
Map rack = new LinkedHashMap();
rack.put("rack", entry.getKey());
List nodes = new ArrayList();
for (String name : entry.getValue()) {
Map node = new LinkedHashMap();
node.put("node", name);
nodes.add(node);
}
rack.put("nodes", nodes);
output.write(writer.writeValueAsString(rack) + EOL);
}
} finally {
output.close();
}
}
@SuppressWarnings("unchecked")
private static Map createSLSJob(Map rumenJob) {
Map json = new LinkedHashMap();
long jobStart = (Long) rumenJob.get("submitTime");
long jobFinish = (Long) rumenJob.get("finishTime");
String jobId = rumenJob.get("jobID").toString();
String queue = rumenJob.get("queue").toString();
String user = rumenJob.get("user").toString();
if (baseline == 0) {
baseline = jobStart;
}
jobStart -= baseline;
jobFinish -= baseline;
long offset = 0;
if (jobStart < 0) {
System.out.println("Warning: reset job " + jobId + " start time to 0.");
offset = -jobStart;
jobFinish = jobFinish - jobStart;
jobStart = 0;
}
json.put("am.type", "mapreduce");
json.put("job.start.ms", jobStart);
json.put("job.end.ms", jobFinish);
json.put("job.queue.name", queue);
json.put("job.id", jobId);
json.put("job.user", user);
List maps = createSLSTasks("map",
(List) rumenJob.get("mapTasks"), offset);
List reduces = createSLSTasks("reduce",
(List) rumenJob.get("reduceTasks"), offset);
List tasks = new ArrayList();
tasks.addAll(maps);
tasks.addAll(reduces);
json.put("job.tasks", tasks);
return json;
}
@SuppressWarnings("unchecked")
private static List createSLSTasks(String taskType,
List rumenTasks, long offset) {
int priority = taskType.equals("reduce") ? 10 : 20;
List array = new ArrayList();
for (Object e : rumenTasks) {
Map rumenTask = (Map) e;
for (Object ee : (List) rumenTask.get("attempts")) {
Map rumenAttempt = (Map) ee;
long taskStart = (Long) rumenAttempt.get("startTime");
long taskFinish = (Long) rumenAttempt.get("finishTime");
String hostname = (String) rumenAttempt.get("hostName");
taskStart = taskStart - baseline + offset;
taskFinish = taskFinish - baseline + offset;
Map task = new LinkedHashMap();
task.put("container.host", hostname);
task.put("container.start.ms", taskStart);
task.put("container.end.ms", taskFinish);
task.put("container.priority", priority);
task.put("container.type", taskType);
array.add(task);
String rackHost[] = SLSUtils.getRackHostName(hostname);
if (rackNodeMap.containsKey(rackHost[0])) {
rackNodeMap.get(rackHost[0]).add(rackHost[1]);
} else {
Set<String> hosts = new TreeSet<String>();
hosts.add(rackHost[1]);
rackNodeMap.put(rackHost[0], hosts);
}
}
}
return array;
}
}

View File

@ -0,0 +1,526 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.sls;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.text.MessageFormat;
import java.util.Map;
import java.util.HashMap;
import java.util.Set;
import java.util.HashSet;
import java.util.List;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Random;
import java.util.Arrays;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.tools.rumen.JobTraceReader;
import org.apache.hadoop.tools.rumen.LoggedJob;
import org.apache.hadoop.tools.rumen.LoggedTask;
import org.apache.hadoop.tools.rumen.LoggedTaskAttempt;
import org.apache.hadoop.yarn.api.records.NodeState;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
import org.apache.hadoop.yarn.sls.appmaster.AMSimulator;
import org.apache.hadoop.yarn.sls.conf.SLSConfiguration;
import org.apache.hadoop.yarn.sls.nodemanager.NMSimulator;
import org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator;
import org.apache.hadoop.yarn.sls.scheduler.ResourceSchedulerWrapper;
import org.apache.hadoop.yarn.sls.scheduler.TaskRunner;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.Options;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
import org.apache.hadoop.yarn.sls.utils.SLSUtils;
import org.apache.log4j.Logger;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.map.ObjectMapper;
public class SLSRunner {
// RM, Runner
private ResourceManager rm;
private static TaskRunner runner = new TaskRunner();
private String[] inputTraces;
private Configuration conf;
private Map<String, Integer> queueAppNumMap;
// NM simulator
private HashMap<NodeId, NMSimulator> nmMap;
private int nmMemoryMB, nmVCores;
private String nodeFile;
// AM simulator
private int AM_ID;
private Map<String, AMSimulator> amMap;
private Set<String> trackedApps;
private Map<String, Class> amClassMap;
private static int remainingApps = 0;
// metrics
private String metricsOutputDir;
private boolean printSimulation;
// other simulation information
private int numNMs, numRacks, numAMs, numTasks;
private long maxRuntime;
public final static Map<String, Object> simulateInfoMap =
new HashMap<String, Object>();
// logger
public final static Logger LOG = Logger.getLogger(SLSRunner.class);
// input traces, input-rumen or input-sls
private boolean isSLS;
public SLSRunner(boolean isSLS, String inputTraces[], String nodeFile,
String outputDir, Set<String> trackedApps,
boolean printsimulation)
throws IOException, ClassNotFoundException {
this.isSLS = isSLS;
this.inputTraces = inputTraces.clone();
this.nodeFile = nodeFile;
this.trackedApps = trackedApps;
this.printSimulation = printsimulation;
metricsOutputDir = outputDir;
nmMap = new HashMap<NodeId, NMSimulator>();
queueAppNumMap = new HashMap<String, Integer>();
amMap = new HashMap<String, AMSimulator>();
amClassMap = new HashMap<String, Class>();
// runner configuration
conf = new Configuration(false);
conf.addResource("sls-runner.xml");
// runner
int poolSize = conf.getInt(SLSConfiguration.RUNNER_POOL_SIZE,
SLSConfiguration.RUNNER_POOL_SIZE_DEFAULT);
SLSRunner.runner.setQueueSize(poolSize);
// <AMType, Class> map
for (Map.Entry e : conf) {
String key = e.getKey().toString();
if (key.startsWith(SLSConfiguration.AM_TYPE)) {
String amType = key.substring(SLSConfiguration.AM_TYPE.length());
amClassMap.put(amType, Class.forName(conf.get(key)));
}
}
}
public void start() throws Exception {
// start resource manager
startRM();
// start node managers
startNM();
// start application masters
startAM();
// set queue & tracked apps information
((ResourceSchedulerWrapper) rm.getResourceScheduler())
.setQueueSet(this.queueAppNumMap.keySet());
((ResourceSchedulerWrapper) rm.getResourceScheduler())
.setTrackedAppSet(this.trackedApps);
// print out simulation info
printSimulationInfo();
// blocked until all nodes RUNNING
waitForNodesRunning();
// starting the runner once everything is ready to go,
runner.start();
}
private void startRM() throws IOException, ClassNotFoundException {
Configuration rmConf = new YarnConfiguration();
String schedulerClass = rmConf.get(YarnConfiguration.RM_SCHEDULER);
rmConf.set(SLSConfiguration.RM_SCHEDULER, schedulerClass);
rmConf.set(YarnConfiguration.RM_SCHEDULER,
ResourceSchedulerWrapper.class.getName());
rmConf.set(SLSConfiguration.METRICS_OUTPUT_DIR, metricsOutputDir);
rm = new ResourceManager();
rm.init(rmConf);
rm.start();
}
private void startNM() throws YarnException, IOException {
// nm configuration
nmMemoryMB = conf.getInt(SLSConfiguration.NM_MEMORY_MB,
SLSConfiguration.NM_MEMORY_MB_DEFAULT);
nmVCores = conf.getInt(SLSConfiguration.NM_VCORES,
SLSConfiguration.NM_VCORES_DEFAULT);
int heartbeatInterval = conf.getInt(
SLSConfiguration.NM_HEARTBEAT_INTERVAL_MS,
SLSConfiguration.NM_HEARTBEAT_INTERVAL_MS_DEFAULT);
// nm information (fetch from topology file, or from sls/rumen json file)
Set<String> nodeSet = new HashSet<String>();
if (nodeFile.isEmpty()) {
if (isSLS) {
for (String inputTrace : inputTraces) {
nodeSet.addAll(SLSUtils.parseNodesFromSLSTrace(inputTrace));
}
} else {
for (String inputTrace : inputTraces) {
nodeSet.addAll(SLSUtils.parseNodesFromRumenTrace(inputTrace));
}
}
} else {
nodeSet.addAll(SLSUtils.parseNodesFromNodeFile(nodeFile));
}
// create NM simulators
Random random = new Random();
Set<String> rackSet = new HashSet<String>();
for (String hostName : nodeSet) {
// we randomize the heartbeat start time from zero to 1 interval
NMSimulator nm = new NMSimulator();
nm.init(hostName, nmMemoryMB, nmVCores,
random.nextInt(heartbeatInterval), heartbeatInterval, rm);
nmMap.put(nm.getNode().getNodeID(), nm);
runner.schedule(nm);
rackSet.add(nm.getNode().getRackName());
}
numRacks = rackSet.size();
numNMs = nmMap.size();
}
private void waitForNodesRunning() throws InterruptedException {
long startTimeMS = System.currentTimeMillis();
while (true) {
int numRunningNodes = 0;
for (RMNode node : rm.getRMContext().getRMNodes().values()) {
if (node.getState() == NodeState.RUNNING) {
numRunningNodes ++;
}
}
if (numRunningNodes == numNMs) {
break;
}
LOG.info(MessageFormat.format("SLSRunner is waiting for all " +
"nodes RUNNING. {0} of {1} NMs initialized.",
numRunningNodes, numNMs));
Thread.sleep(1000);
}
LOG.info(MessageFormat.format("SLSRunner takes {0} ms to launch all nodes.",
(System.currentTimeMillis() - startTimeMS)));
}
@SuppressWarnings("unchecked")
private void startAM() throws YarnException, IOException {
// application/container configuration
int heartbeatInterval = conf.getInt(
SLSConfiguration.AM_HEARTBEAT_INTERVAL_MS,
SLSConfiguration.AM_HEARTBEAT_INTERVAL_MS_DEFAULT);
int containerMemoryMB = conf.getInt(SLSConfiguration.CONTAINER_MEMORY_MB,
SLSConfiguration.CONTAINER_MEMORY_MB_DEFAULT);
int containerVCores = conf.getInt(SLSConfiguration.CONTAINER_VCORES,
SLSConfiguration.CONTAINER_VCORES_DEFAULT);
Resource containerResource =
BuilderUtils.newResource(containerMemoryMB, containerVCores);
// application workload
if (isSLS) {
startAMFromSLSTraces(containerResource, heartbeatInterval);
} else {
startAMFromRumenTraces(containerResource, heartbeatInterval);
}
numAMs = amMap.size();
remainingApps = numAMs;
}
/**
* parse workload information from sls trace files
*/
@SuppressWarnings("unchecked")
private void startAMFromSLSTraces(Resource containerResource,
int heartbeatInterval) throws IOException {
// parse from sls traces
JsonFactory jsonF = new JsonFactory();
ObjectMapper mapper = new ObjectMapper();
for (String inputTrace : inputTraces) {
Reader input = new FileReader(inputTrace);
try {
Iterator<Map> i = mapper.readValues(jsonF.createJsonParser(input),
Map.class);
while (i.hasNext()) {
Map jsonJob = i.next();
// load job information
long jobStartTime = Long.parseLong(
jsonJob.get("job.start.ms").toString());
long jobFinishTime = Long.parseLong(
jsonJob.get("job.end.ms").toString());
String user = (String) jsonJob.get("job.user");
if (user == null) user = "default";
String queue = jsonJob.get("job.queue.name").toString();
String oldAppId = jsonJob.get("job.id").toString();
boolean isTracked = trackedApps.contains(oldAppId);
int queueSize = queueAppNumMap.containsKey(queue) ?
queueAppNumMap.get(queue) : 0;
queueSize ++;
queueAppNumMap.put(queue, queueSize);
// tasks
List tasks = (List) jsonJob.get("job.tasks");
if (tasks == null || tasks.size() == 0) {
continue;
}
List<ContainerSimulator> containerList =
new ArrayList<ContainerSimulator>();
for (Object o : tasks) {
Map jsonTask = (Map) o;
String hostname = jsonTask.get("container.host").toString();
long taskStart = Long.parseLong(
jsonTask.get("container.start.ms").toString());
long taskFinish = Long.parseLong(
jsonTask.get("container.end.ms").toString());
long lifeTime = taskFinish - taskStart;
int priority = Integer.parseInt(
jsonTask.get("container.priority").toString());
String type = jsonTask.get("container.type").toString();
containerList.add(new ContainerSimulator(containerResource,
lifeTime, hostname, priority, type));
}
// create a new AM
String amType = jsonJob.get("am.type").toString();
AMSimulator amSim = (AMSimulator) ReflectionUtils.newInstance(
amClassMap.get(amType), new Configuration());
if (amSim != null) {
amSim.init(AM_ID++, heartbeatInterval, containerList, rm,
this, jobStartTime, jobFinishTime, user, queue,
isTracked, oldAppId);
runner.schedule(amSim);
maxRuntime = Math.max(maxRuntime, jobFinishTime);
numTasks += containerList.size();
amMap.put(oldAppId, amSim);
}
}
} finally {
input.close();
}
}
}
/**
* parse workload information from rumen trace files
*/
@SuppressWarnings("unchecked")
private void startAMFromRumenTraces(Resource containerResource,
int heartbeatInterval)
throws IOException {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "file:///");
long baselineTimeMS = 0;
for (String inputTrace : inputTraces) {
File fin = new File(inputTrace);
JobTraceReader reader = new JobTraceReader(
new Path(fin.getAbsolutePath()), conf);
try {
LoggedJob job = null;
while ((job = reader.getNext()) != null) {
// only support MapReduce currently
String jobType = "mapreduce";
String user = job.getUser() == null ?
"default" : job.getUser().getValue();
String jobQueue = job.getQueue().getValue();
String oldJobId = job.getJobID().toString();
long jobStartTimeMS = job.getSubmitTime();
long jobFinishTimeMS = job.getFinishTime();
if (baselineTimeMS == 0) {
baselineTimeMS = jobStartTimeMS;
}
jobStartTimeMS -= baselineTimeMS;
jobFinishTimeMS -= baselineTimeMS;
if (jobStartTimeMS < 0) {
LOG.warn("Warning: reset job " + oldJobId + " start time to 0.");
jobFinishTimeMS = jobFinishTimeMS - jobStartTimeMS;
jobStartTimeMS = 0;
}
boolean isTracked = trackedApps.contains(oldJobId);
int queueSize = queueAppNumMap.containsKey(jobQueue) ?
queueAppNumMap.get(jobQueue) : 0;
queueSize ++;
queueAppNumMap.put(jobQueue, queueSize);
List<ContainerSimulator> containerList =
new ArrayList<ContainerSimulator>();
// map tasks
for(LoggedTask mapTask : job.getMapTasks()) {
LoggedTaskAttempt taskAttempt = mapTask.getAttempts()
.get(mapTask.getAttempts().size() - 1);
String hostname = taskAttempt.getHostName().getValue();
long containerLifeTime = taskAttempt.getFinishTime()
- taskAttempt.getStartTime();
containerList.add(new ContainerSimulator(containerResource,
containerLifeTime, hostname, 10, "map"));
}
// reduce tasks
for(LoggedTask reduceTask : job.getReduceTasks()) {
LoggedTaskAttempt taskAttempt = reduceTask.getAttempts()
.get(reduceTask.getAttempts().size() - 1);
String hostname = taskAttempt.getHostName().getValue();
long containerLifeTime = taskAttempt.getFinishTime()
- taskAttempt.getStartTime();
containerList.add(new ContainerSimulator(containerResource,
containerLifeTime, hostname, 20, "reduce"));
}
// create a new AM
AMSimulator amSim = (AMSimulator) ReflectionUtils.newInstance(
amClassMap.get(jobType), conf);
if (amSim != null) {
amSim.init(AM_ID ++, heartbeatInterval, containerList,
rm, this, jobStartTimeMS, jobFinishTimeMS, user, jobQueue,
isTracked, oldJobId);
runner.schedule(amSim);
maxRuntime = Math.max(maxRuntime, jobFinishTimeMS);
numTasks += containerList.size();
amMap.put(oldJobId, amSim);
}
}
} finally {
reader.close();
}
}
}
private void printSimulationInfo() {
if (printSimulation) {
// node
LOG.info("------------------------------------");
LOG.info(MessageFormat.format("# nodes = {0}, # racks = {1}, capacity " +
"of each node {2} MB memory and {3} vcores.",
numNMs, numRacks, nmMemoryMB, nmVCores));
LOG.info("------------------------------------");
// job
LOG.info(MessageFormat.format("# applications = {0}, # total " +
"tasks = {1}, average # tasks per application = {2}",
numAMs, numTasks, (int)(Math.ceil((numTasks + 0.0) / numAMs))));
LOG.info("JobId\tQueue\tAMType\tDuration\t#Tasks");
for (Map.Entry<String, AMSimulator> entry : amMap.entrySet()) {
AMSimulator am = entry.getValue();
LOG.info(entry.getKey() + "\t" + am.getQueue() + "\t" + am.getAMType()
+ "\t" + am.getDuration() + "\t" + am.getNumTasks());
}
LOG.info("------------------------------------");
// queue
LOG.info(MessageFormat.format("number of queues = {0} average " +
"number of apps = {1}", queueAppNumMap.size(),
(int)(Math.ceil((numAMs + 0.0) / queueAppNumMap.size()))));
LOG.info("------------------------------------");
// runtime
LOG.info(MessageFormat.format("estimated simulation time is {0}" +
" seconds", (long)(Math.ceil(maxRuntime / 1000.0))));
LOG.info("------------------------------------");
}
// package these information in the simulateInfoMap used by other places
simulateInfoMap.put("Number of racks", numRacks);
simulateInfoMap.put("Number of nodes", numNMs);
simulateInfoMap.put("Node memory (MB)", nmMemoryMB);
simulateInfoMap.put("Node VCores", nmVCores);
simulateInfoMap.put("Number of applications", numAMs);
simulateInfoMap.put("Number of tasks", numTasks);
simulateInfoMap.put("Average tasks per applicaion",
(int)(Math.ceil((numTasks + 0.0) / numAMs)));
simulateInfoMap.put("Number of queues", queueAppNumMap.size());
simulateInfoMap.put("Average applications per queue",
(int)(Math.ceil((numAMs + 0.0) / queueAppNumMap.size())));
simulateInfoMap.put("Estimated simulate time (s)",
(long)(Math.ceil(maxRuntime / 1000.0)));
}
public HashMap<NodeId, NMSimulator> getNmMap() {
return nmMap;
}
public static TaskRunner getRunner() {
return runner;
}
public static void decreaseRemainingApps() {
remainingApps --;
if (remainingApps == 0) {
LOG.info("SLSRunner tears down.");
System.exit(0);
}
}
public static void main(String args[]) throws Exception {
Options options = new Options();
options.addOption("inputrumen", true, "input rumen files");
options.addOption("inputsls", true, "input sls files");
options.addOption("nodes", true, "input topology");
options.addOption("output", true, "output directory");
options.addOption("trackjobs", true,
"jobs to be tracked during simulating");
options.addOption("printsimulation", false,
"print out simulation information");
CommandLineParser parser = new GnuParser();
CommandLine cmd = parser.parse(options, args);
String inputRumen = cmd.getOptionValue("inputrumen");
String inputSLS = cmd.getOptionValue("inputsls");
String output = cmd.getOptionValue("output");
if ((inputRumen == null && inputSLS == null) || output == null) {
System.err.println();
System.err.println("ERROR: Missing input or output file");
System.err.println();
System.err.println("Options: -inputrumen|-inputsls FILE,FILE... " +
"-output FILE [-nodes FILE] [-trackjobs JobId,JobId...] " +
"[-printsimulation]");
System.err.println();
System.exit(1);
}
File outputFile = new File(output);
if (! outputFile.exists()
&& ! outputFile.mkdirs()) {
System.err.println("ERROR: Cannot create output directory "
+ outputFile.getAbsolutePath());
System.exit(1);
}
Set<String> trackedJobSet = new HashSet<String>();
if (cmd.hasOption("trackjobs")) {
String trackjobs = cmd.getOptionValue("trackjobs");
String jobIds[] = trackjobs.split(",");
trackedJobSet.addAll(Arrays.asList(jobIds));
}
String nodeFile = cmd.hasOption("nodes") ? cmd.getOptionValue("nodes") : "";
boolean isSLS = inputSLS != null;
String inputFiles[] = isSLS ? inputSLS.split(",") : inputRumen.split(",");
SLSRunner sls = new SLSRunner(isSLS, inputFiles, nodeFile, output,
trackedJobSet, cmd.hasOption("printsimulation"));
sls.start();
}
}

View File

@ -0,0 +1,385 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.sls.appmaster;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.security.PrivilegedExceptionAction;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.protocolrecords
.FinishApplicationMasterRequest;
import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
import org.apache.hadoop.yarn.api.protocolrecords
.RegisterApplicationMasterRequest;
import org.apache.hadoop.yarn.api.protocolrecords
.RegisterApplicationMasterResponse;
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
import org.apache.hadoop.yarn.util.Records;
import org.apache.log4j.Logger;
import org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator;
import org.apache.hadoop.yarn.sls.scheduler.ResourceSchedulerWrapper;
import org.apache.hadoop.yarn.sls.SLSRunner;
import org.apache.hadoop.yarn.sls.scheduler.TaskRunner;
import org.apache.hadoop.yarn.sls.utils.SLSUtils;
public abstract class AMSimulator extends TaskRunner.Task {
// resource manager
protected ResourceManager rm;
// main
protected SLSRunner se;
// application
protected ApplicationId appId;
protected ApplicationAttemptId appAttemptId;
protected String oldAppId; // jobId from the jobhistory file
// record factory
protected final static RecordFactory recordFactory =
RecordFactoryProvider.getRecordFactory(null);
// response queue
protected final BlockingQueue<AllocateResponse> responseQueue;
protected int RESPONSE_ID = 1;
// user name
protected String user;
// queue name
protected String queue;
// am type
protected String amtype;
// job start/end time
protected long traceStartTimeMS;
protected long traceFinishTimeMS;
protected long simulateStartTimeMS;
protected long simulateFinishTimeMS;
// whether tracked in Metrics
protected boolean isTracked;
// progress
protected int totalContainers;
protected int finishedContainers;
protected final Logger LOG = Logger.getLogger(AMSimulator.class);
public AMSimulator() {
this.responseQueue = new LinkedBlockingQueue<AllocateResponse>();
}
public void init(int id, int heartbeatInterval,
List<ContainerSimulator> containerList, ResourceManager rm, SLSRunner se,
long traceStartTime, long traceFinishTime, String user, String queue,
boolean isTracked, String oldAppId) {
super.init(traceStartTime, traceStartTime + 1000000L * heartbeatInterval,
heartbeatInterval);
this.user = user;
this.rm = rm;
this.se = se;
this.user = user;
this.queue = queue;
this.oldAppId = oldAppId;
this.isTracked = isTracked;
this.traceStartTimeMS = traceStartTime;
this.traceFinishTimeMS = traceFinishTime;
}
/**
* register with RM
*/
@Override
public void firstStep()
throws YarnException, IOException, InterruptedException {
simulateStartTimeMS = System.currentTimeMillis() -
SLSRunner.getRunner().getStartTimeMS();
// submit application, waiting until ACCEPTED
submitApp();
// register application master
registerAM();
// track app metrics
trackApp();
}
@Override
public void middleStep()
throws InterruptedException, YarnException, IOException {
// process responses in the queue
processResponseQueue();
// send out request
sendContainerRequest();
// check whether finish
checkStop();
}
@Override
public void lastStep() {
LOG.info(MessageFormat.format("Application {0} is shutting down.", appId));
// unregister tracking
if (isTracked) {
untrackApp();
}
// unregister application master
final FinishApplicationMasterRequest finishAMRequest = recordFactory
.newRecordInstance(FinishApplicationMasterRequest.class);
finishAMRequest.setFinalApplicationStatus(FinalApplicationStatus.SUCCEEDED);
try {
UserGroupInformation ugi =
UserGroupInformation.createRemoteUser(appAttemptId.toString());
Token<AMRMTokenIdentifier> token =
rm.getRMContext().getRMApps().get(appAttemptId.getApplicationId())
.getRMAppAttempt(appAttemptId).getAMRMToken();
ugi.addTokenIdentifier(token.decodeIdentifier());
ugi.doAs(new PrivilegedExceptionAction<Object>() {
@Override
public Object run() throws Exception {
rm.getApplicationMasterService()
.finishApplicationMaster(finishAMRequest);
return null;
}
});
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
simulateFinishTimeMS = System.currentTimeMillis() -
SLSRunner.getRunner().getStartTimeMS();
// record job running information
((ResourceSchedulerWrapper)rm.getResourceScheduler())
.addAMRuntime(appId,
traceStartTimeMS, traceFinishTimeMS,
simulateStartTimeMS, simulateFinishTimeMS);
}
protected ResourceRequest createResourceRequest(
Resource resource, String host, int priority, int numContainers) {
ResourceRequest request = recordFactory
.newRecordInstance(ResourceRequest.class);
request.setCapability(resource);
request.setResourceName(host);
request.setNumContainers(numContainers);
Priority prio = recordFactory.newRecordInstance(Priority.class);
prio.setPriority(priority);
request.setPriority(prio);
return request;
}
protected AllocateRequest createAllocateRequest(List<ResourceRequest> ask,
List<ContainerId> toRelease) {
AllocateRequest allocateRequest =
recordFactory.newRecordInstance(AllocateRequest.class);
allocateRequest.setResponseId(RESPONSE_ID ++);
allocateRequest.setAskList(ask);
allocateRequest.setReleaseList(toRelease);
return allocateRequest;
}
protected AllocateRequest createAllocateRequest(List<ResourceRequest> ask) {
return createAllocateRequest(ask, new ArrayList<ContainerId>());
}
protected abstract void processResponseQueue()
throws InterruptedException, YarnException, IOException;
protected abstract void sendContainerRequest()
throws YarnException, IOException, InterruptedException;
protected abstract void checkStop();
private void submitApp()
throws YarnException, InterruptedException, IOException {
// ask for new application
GetNewApplicationRequest newAppRequest =
Records.newRecord(GetNewApplicationRequest.class);
GetNewApplicationResponse newAppResponse =
rm.getClientRMService().getNewApplication(newAppRequest);
appId = newAppResponse.getApplicationId();
// submit the application
final SubmitApplicationRequest subAppRequest =
Records.newRecord(SubmitApplicationRequest.class);
ApplicationSubmissionContext appSubContext =
Records.newRecord(ApplicationSubmissionContext.class);
appSubContext.setApplicationId(appId);
appSubContext.setMaxAppAttempts(1);
appSubContext.setQueue(queue);
appSubContext.setPriority(Priority.newInstance(0));
ContainerLaunchContext conLauContext =
Records.newRecord(ContainerLaunchContext.class);
conLauContext.setApplicationACLs(
new HashMap<ApplicationAccessType, String>());
conLauContext.setCommands(new ArrayList<String>());
conLauContext.setEnvironment(new HashMap<String, String>());
conLauContext.setLocalResources(new HashMap<String, LocalResource>());
conLauContext.setServiceData(new HashMap<String, ByteBuffer>());
appSubContext.setAMContainerSpec(conLauContext);
appSubContext.setUnmanagedAM(true);
subAppRequest.setApplicationSubmissionContext(appSubContext);
UserGroupInformation ugi = UserGroupInformation.createRemoteUser(user);
ugi.doAs(new PrivilegedExceptionAction<Object>() {
@Override
public Object run() throws YarnException {
rm.getClientRMService().submitApplication(subAppRequest);
return null;
}
});
LOG.info(MessageFormat.format("Submit a new application {0}", appId));
// waiting until application ACCEPTED
RMApp app = rm.getRMContext().getRMApps().get(appId);
while(app.getState() != RMAppState.ACCEPTED) {
Thread.sleep(50);
}
appAttemptId = rm.getRMContext().getRMApps().get(appId)
.getCurrentAppAttempt().getAppAttemptId();
}
private void registerAM()
throws YarnException, IOException, InterruptedException {
// register application master
final RegisterApplicationMasterRequest amRegisterRequest =
Records.newRecord(RegisterApplicationMasterRequest.class);
amRegisterRequest.setHost("localhost");
amRegisterRequest.setRpcPort(1000);
amRegisterRequest.setTrackingUrl("localhost:1000");
UserGroupInformation ugi =
UserGroupInformation.createRemoteUser(appAttemptId.toString());
Token<AMRMTokenIdentifier> token =
rm.getRMContext().getRMApps().get(appAttemptId.getApplicationId())
.getRMAppAttempt(appAttemptId).getAMRMToken();
ugi.addTokenIdentifier(token.decodeIdentifier());
ugi.doAs(
new PrivilegedExceptionAction<RegisterApplicationMasterResponse>() {
@Override
public RegisterApplicationMasterResponse run() throws Exception {
return rm.getApplicationMasterService()
.registerApplicationMaster(amRegisterRequest);
}
});
LOG.info(MessageFormat.format(
"Register the application master for application {0}", appId));
}
private void trackApp() {
if (isTracked) {
((ResourceSchedulerWrapper) rm.getResourceScheduler())
.addTrackedApp(appAttemptId, oldAppId);
}
}
public void untrackApp() {
if (isTracked) {
((ResourceSchedulerWrapper) rm.getResourceScheduler())
.removeTrackedApp(appAttemptId, oldAppId);
}
}
protected List<ResourceRequest> packageRequests(
List<ContainerSimulator> csList, int priority) {
// create requests
Map<String, ResourceRequest> rackLocalRequestMap = new HashMap<String, ResourceRequest>();
Map<String, ResourceRequest> nodeLocalRequestMap = new HashMap<String, ResourceRequest>();
ResourceRequest anyRequest = null;
for (ContainerSimulator cs : csList) {
String rackHostNames[] = SLSUtils.getRackHostName(cs.getHostname());
// check rack local
String rackname = rackHostNames[0];
if (rackLocalRequestMap.containsKey(rackname)) {
rackLocalRequestMap.get(rackname).setNumContainers(
rackLocalRequestMap.get(rackname).getNumContainers() + 1);
} else {
ResourceRequest request = createResourceRequest(
cs.getResource(), rackname, priority, 1);
rackLocalRequestMap.put(rackname, request);
}
// check node local
String hostname = rackHostNames[1];
if (nodeLocalRequestMap.containsKey(hostname)) {
nodeLocalRequestMap.get(hostname).setNumContainers(
nodeLocalRequestMap.get(hostname).getNumContainers() + 1);
} else {
ResourceRequest request = createResourceRequest(
cs.getResource(), hostname, priority, 1);
nodeLocalRequestMap.put(hostname, request);
}
// any
if (anyRequest == null) {
anyRequest = createResourceRequest(
cs.getResource(), ResourceRequest.ANY, priority, 1);
} else {
anyRequest.setNumContainers(anyRequest.getNumContainers() + 1);
}
}
List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
ask.addAll(nodeLocalRequestMap.values());
ask.addAll(rackLocalRequestMap.values());
if (anyRequest != null) {
ask.add(anyRequest);
}
return ask;
}
public String getQueue() {
return queue;
}
public String getAMType() {
return amtype;
}
public long getDuration() {
return simulateFinishTimeMS - simulateStartTimeMS;
}
public int getNumTasks() {
return totalContainers;
}
}

View File

@ -0,0 +1,405 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.sls.appmaster;
import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
import org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator;
import org.apache.hadoop.yarn.sls.SLSRunner;
import org.apache.log4j.Logger;
public class MRAMSimulator extends AMSimulator {
/*
Vocabulary Used:
pending -> requests which are NOT yet sent to RM
scheduled -> requests which are sent to RM but not yet assigned
assigned -> requests which are assigned to a container
completed -> request corresponding to which container has completed
Maps are scheduled as soon as their requests are received. Reduces are
scheduled when all maps have finished (not support slow-start currently).
*/
private static final int PRIORITY_REDUCE = 10;
private static final int PRIORITY_MAP = 20;
// pending maps
private LinkedList<ContainerSimulator> pendingMaps =
new LinkedList<ContainerSimulator>();
// pending failed maps
private LinkedList<ContainerSimulator> pendingFailedMaps =
new LinkedList<ContainerSimulator>();
// scheduled maps
private LinkedList<ContainerSimulator> scheduledMaps =
new LinkedList<ContainerSimulator>();
// assigned maps
private Map<ContainerId, ContainerSimulator> assignedMaps =
new HashMap<ContainerId, ContainerSimulator>();
// reduces which are not yet scheduled
private LinkedList<ContainerSimulator> pendingReduces =
new LinkedList<ContainerSimulator>();
// pending failed reduces
private LinkedList<ContainerSimulator> pendingFailedReduces =
new LinkedList<ContainerSimulator>();
// scheduled reduces
private LinkedList<ContainerSimulator> scheduledReduces =
new LinkedList<ContainerSimulator>();
// assigned reduces
private Map<ContainerId, ContainerSimulator> assignedReduces =
new HashMap<ContainerId, ContainerSimulator>();
// all maps & reduces
private LinkedList<ContainerSimulator> allMaps =
new LinkedList<ContainerSimulator>();
private LinkedList<ContainerSimulator> allReduces =
new LinkedList<ContainerSimulator>();
// counters
private int mapFinished = 0;
private int mapTotal = 0;
private int reduceFinished = 0;
private int reduceTotal = 0;
// waiting for AM container
private boolean isAMContainerRunning = false;
private Container amContainer;
// finished
private boolean isFinished = false;
// resource for AM container
private final static int MR_AM_CONTAINER_RESOURCE_MEMORY_MB = 1024;
private final static int MR_AM_CONTAINER_RESOURCE_VCORES = 1;
public final Logger LOG = Logger.getLogger(MRAMSimulator.class);
public void init(int id, int heartbeatInterval,
List<ContainerSimulator> containerList, ResourceManager rm, SLSRunner se,
long traceStartTime, long traceFinishTime, String user, String queue,
boolean isTracked, String oldAppId) {
super.init(id, heartbeatInterval, containerList, rm, se,
traceStartTime, traceFinishTime, user, queue,
isTracked, oldAppId);
amtype = "mapreduce";
// get map/reduce tasks
for (ContainerSimulator cs : containerList) {
if (cs.getType().equals("map")) {
cs.setPriority(PRIORITY_MAP);
pendingMaps.add(cs);
} else if (cs.getType().equals("reduce")) {
cs.setPriority(PRIORITY_REDUCE);
pendingReduces.add(cs);
}
}
allMaps.addAll(pendingMaps);
allReduces.addAll(pendingReduces);
mapTotal = pendingMaps.size();
reduceTotal = pendingReduces.size();
totalContainers = mapTotal + reduceTotal;
}
@Override
public void firstStep()
throws YarnException, IOException, InterruptedException {
super.firstStep();
requestAMContainer();
}
/**
* send out request for AM container
*/
protected void requestAMContainer()
throws YarnException, IOException, InterruptedException {
List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
ResourceRequest amRequest = createResourceRequest(
BuilderUtils.newResource(MR_AM_CONTAINER_RESOURCE_MEMORY_MB,
MR_AM_CONTAINER_RESOURCE_VCORES),
ResourceRequest.ANY, 1, 1);
ask.add(amRequest);
LOG.debug(MessageFormat.format("Application {0} sends out allocate " +
"request for its AM", appId));
final AllocateRequest request = this.createAllocateRequest(ask);
UserGroupInformation ugi =
UserGroupInformation.createRemoteUser(appAttemptId.toString());
Token<AMRMTokenIdentifier> token = rm.getRMContext().getRMApps()
.get(appAttemptId.getApplicationId())
.getRMAppAttempt(appAttemptId).getAMRMToken();
ugi.addTokenIdentifier(token.decodeIdentifier());
AllocateResponse response = ugi.doAs(
new PrivilegedExceptionAction<AllocateResponse>() {
@Override
public AllocateResponse run() throws Exception {
return rm.getApplicationMasterService().allocate(request);
}
});
// waiting until the AM container is allocated
while (true) {
if (response != null && ! response.getAllocatedContainers().isEmpty()) {
// get AM container
Container container = response.getAllocatedContainers().get(0);
se.getNmMap().get(container.getNodeId())
.addNewContainer(container, -1L);
// start AM container
amContainer = container;
LOG.debug(MessageFormat.format("Application {0} starts its " +
"AM container ({1}).", appId, amContainer.getId()));
isAMContainerRunning = true;
break;
}
// this sleep time is different from HeartBeat
Thread.sleep(1000);
// send out empty request
sendContainerRequest();
response = responseQueue.take();
}
}
@Override
@SuppressWarnings("unchecked")
protected void processResponseQueue()
throws InterruptedException, YarnException, IOException {
while (! responseQueue.isEmpty()) {
AllocateResponse response = responseQueue.take();
// check completed containers
if (! response.getCompletedContainersStatuses().isEmpty()) {
for (ContainerStatus cs : response.getCompletedContainersStatuses()) {
ContainerId containerId = cs.getContainerId();
if (cs.getExitStatus() == ContainerExitStatus.SUCCESS) {
if (assignedMaps.containsKey(containerId)) {
LOG.debug(MessageFormat.format("Application {0} has one" +
"mapper finished ({1}).", appId, containerId));
assignedMaps.remove(containerId);
mapFinished ++;
finishedContainers ++;
} else if (assignedReduces.containsKey(containerId)) {
LOG.debug(MessageFormat.format("Application {0} has one" +
"reducer finished ({1}).", appId, containerId));
assignedReduces.remove(containerId);
reduceFinished ++;
finishedContainers ++;
} else {
// am container released event
isFinished = true;
LOG.info(MessageFormat.format("Application {0} goes to " +
"finish.", appId));
}
} else {
// container to be killed
if (assignedMaps.containsKey(containerId)) {
LOG.debug(MessageFormat.format("Application {0} has one " +
"mapper killed ({1}).", appId, containerId));
pendingFailedMaps.add(assignedMaps.remove(containerId));
} else if (assignedReduces.containsKey(containerId)) {
LOG.debug(MessageFormat.format("Application {0} has one " +
"reducer killed ({1}).", appId, containerId));
pendingFailedReduces.add(assignedReduces.remove(containerId));
} else {
LOG.info(MessageFormat.format("Application {0}'s AM is " +
"going to be killed. Restarting...", appId));
restart();
}
}
}
}
// check finished
if (isAMContainerRunning &&
(mapFinished == mapTotal) &&
(reduceFinished == reduceTotal)) {
// to release the AM container
se.getNmMap().get(amContainer.getNodeId())
.cleanupContainer(amContainer.getId());
isAMContainerRunning = false;
LOG.debug(MessageFormat.format("Application {0} sends out event " +
"to clean up its AM container.", appId));
isFinished = true;
}
// check allocated containers
for (Container container : response.getAllocatedContainers()) {
if (! scheduledMaps.isEmpty()) {
ContainerSimulator cs = scheduledMaps.remove();
LOG.debug(MessageFormat.format("Application {0} starts a " +
"launch a mapper ({1}).", appId, container.getId()));
assignedMaps.put(container.getId(), cs);
se.getNmMap().get(container.getNodeId())
.addNewContainer(container, cs.getLifeTime());
} else if (! this.scheduledReduces.isEmpty()) {
ContainerSimulator cs = scheduledReduces.remove();
LOG.debug(MessageFormat.format("Application {0} starts a " +
"launch a reducer ({1}).", appId, container.getId()));
assignedReduces.put(container.getId(), cs);
se.getNmMap().get(container.getNodeId())
.addNewContainer(container, cs.getLifeTime());
}
}
}
}
/**
* restart running because of the am container killed
*/
private void restart()
throws YarnException, IOException, InterruptedException {
// clear
finishedContainers = 0;
isFinished = false;
mapFinished = 0;
reduceFinished = 0;
pendingFailedMaps.clear();
pendingMaps.clear();
pendingReduces.clear();
pendingFailedReduces.clear();
pendingMaps.addAll(allMaps);
pendingReduces.addAll(pendingReduces);
isAMContainerRunning = false;
amContainer = null;
// resent am container request
requestAMContainer();
}
@Override
protected void sendContainerRequest()
throws YarnException, IOException, InterruptedException {
if (isFinished) {
return;
}
// send out request
List<ResourceRequest> ask = null;
if (isAMContainerRunning) {
if (mapFinished != mapTotal) {
// map phase
if (! pendingMaps.isEmpty()) {
ask = packageRequests(pendingMaps, PRIORITY_MAP);
LOG.debug(MessageFormat.format("Application {0} sends out " +
"request for {1} mappers.", appId, pendingMaps.size()));
scheduledMaps.addAll(pendingMaps);
pendingMaps.clear();
} else if (! pendingFailedMaps.isEmpty() && scheduledMaps.isEmpty()) {
ask = packageRequests(pendingFailedMaps, PRIORITY_MAP);
LOG.debug(MessageFormat.format("Application {0} sends out " +
"requests for {1} failed mappers.", appId,
pendingFailedMaps.size()));
scheduledMaps.addAll(pendingFailedMaps);
pendingFailedMaps.clear();
}
} else if (reduceFinished != reduceTotal) {
// reduce phase
if (! pendingReduces.isEmpty()) {
ask = packageRequests(pendingReduces, PRIORITY_REDUCE);
LOG.debug(MessageFormat.format("Application {0} sends out " +
"requests for {1} reducers.", appId, pendingReduces.size()));
scheduledReduces.addAll(pendingReduces);
pendingReduces.clear();
} else if (! pendingFailedReduces.isEmpty()
&& scheduledReduces.isEmpty()) {
ask = packageRequests(pendingFailedReduces, PRIORITY_REDUCE);
LOG.debug(MessageFormat.format("Application {0} sends out " +
"request for {1} failed reducers.", appId,
pendingFailedReduces.size()));
scheduledReduces.addAll(pendingFailedReduces);
pendingFailedReduces.clear();
}
}
}
if (ask == null) {
ask = new ArrayList<ResourceRequest>();
}
final AllocateRequest request = createAllocateRequest(ask);
if (totalContainers == 0) {
request.setProgress(1.0f);
} else {
request.setProgress((float) finishedContainers / totalContainers);
}
UserGroupInformation ugi =
UserGroupInformation.createRemoteUser(appAttemptId.toString());
Token<AMRMTokenIdentifier> token = rm.getRMContext().getRMApps()
.get(appAttemptId.getApplicationId())
.getRMAppAttempt(appAttemptId).getAMRMToken();
ugi.addTokenIdentifier(token.decodeIdentifier());
AllocateResponse response = ugi.doAs(
new PrivilegedExceptionAction<AllocateResponse>() {
@Override
public AllocateResponse run() throws Exception {
return rm.getApplicationMasterService().allocate(request);
}
});
if (response != null) {
responseQueue.put(response);
}
}
@Override
protected void checkStop() {
if (isFinished) {
super.setEndTime(System.currentTimeMillis());
}
}
@Override
public void lastStep() {
super.lastStep();
// clear data structures
allMaps.clear();
allReduces.clear();
assignedMaps.clear();
assignedReduces.clear();
pendingFailedMaps.clear();
pendingFailedReduces.clear();
pendingMaps.clear();
pendingReduces.clear();
scheduledMaps.clear();
scheduledReduces.clear();
responseQueue.clear();
}
}

View File

@ -0,0 +1,68 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.sls.conf;
public class SLSConfiguration {
// sls
public static final String PREFIX = "yarn.sls.";
// runner
public static final String RUNNER_PREFIX = PREFIX + "runner.";
public static final String RUNNER_POOL_SIZE = RUNNER_PREFIX + "pool.size";
public static final int RUNNER_POOL_SIZE_DEFAULT = 10;
// scheduler
public static final String SCHEDULER_PREFIX = PREFIX + "scheduler.";
public static final String RM_SCHEDULER = SCHEDULER_PREFIX + "class";
// metrics
public static final String METRICS_PREFIX = PREFIX + "metrics.";
public static final String METRICS_SWITCH = METRICS_PREFIX + "switch";
public static final String METRICS_WEB_ADDRESS_PORT = METRICS_PREFIX
+ "web.address.port";
public static final String METRICS_OUTPUT_DIR = METRICS_PREFIX + "output";
public static final int METRICS_WEB_ADDRESS_PORT_DEFAULT = 10001;
public static final String METRICS_TIMER_WINDOW_SIZE = METRICS_PREFIX
+ "timer.window.size";
public static final int METRICS_TIMER_WINDOW_SIZE_DEFAULT = 100;
public static final String METRICS_RECORD_INTERVAL_MS = METRICS_PREFIX
+ "record.interval.ms";
public static final int METRICS_RECORD_INTERVAL_MS_DEFAULT = 1000;
// nm
public static final String NM_PREFIX = PREFIX + "nm.";
public static final String NM_MEMORY_MB = NM_PREFIX + "memory.mb";
public static final int NM_MEMORY_MB_DEFAULT = 10240;
public static final String NM_VCORES = NM_PREFIX + "vcores";
public static final int NM_VCORES_DEFAULT = 10;
public static final String NM_HEARTBEAT_INTERVAL_MS = NM_PREFIX
+ "heartbeat.interval.ms";
public static final int NM_HEARTBEAT_INTERVAL_MS_DEFAULT = 1000;
// am
public static final String AM_PREFIX = PREFIX + "am.";
public static final String AM_HEARTBEAT_INTERVAL_MS = AM_PREFIX
+ "heartbeat.interval.ms";
public static final int AM_HEARTBEAT_INTERVAL_MS_DEFAULT = 1000;
public static final String AM_TYPE = AM_PREFIX + "type.";
// container
public static final String CONTAINER_PREFIX = PREFIX + "container.";
public static final String CONTAINER_MEMORY_MB = CONTAINER_PREFIX
+ "memory.mb";
public static final int CONTAINER_MEMORY_MB_DEFAULT = 1024;
public static final String CONTAINER_VCORES = CONTAINER_PREFIX + "vcores";
public static final int CONTAINER_VCORES_DEFAULT = 1;
}

View File

@ -0,0 +1,261 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.sls.nodemanager;
import java.io.IOException;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.DelayQueue;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
import org.apache.hadoop.yarn.server.api.protocolrecords
.RegisterNodeManagerRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords
.RegisterNodeManagerResponse;
import org.apache.hadoop.yarn.server.api.records.MasterKey;
import org.apache.hadoop.yarn.server.api.records.NodeAction;
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
import org.apache.hadoop.yarn.server.api.records.NodeStatus;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
import org.apache.hadoop.yarn.util.Records;
import org.apache.log4j.Logger;
import org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator;
import org.apache.hadoop.yarn.sls.scheduler.TaskRunner;
import org.apache.hadoop.yarn.sls.utils.SLSUtils;
public class NMSimulator extends TaskRunner.Task {
// node resource
private RMNode node;
// master key
private MasterKey masterKey;
// containers with various STATE
private List<ContainerId> completedContainerList;
private List<ContainerId> releasedContainerList;
private DelayQueue<ContainerSimulator> containerQueue;
private Map<ContainerId, ContainerSimulator> runningContainers;
private List<ContainerId> amContainerList;
// resource manager
private ResourceManager rm;
// heart beat response id
private int RESPONSE_ID = 1;
private final static Logger LOG = Logger.getLogger(NMSimulator.class);
public void init(String nodeIdStr, int memory, int cores,
int dispatchTime, int heartBeatInterval, ResourceManager rm)
throws IOException, YarnException {
super.init(dispatchTime, dispatchTime + 1000000L * heartBeatInterval,
heartBeatInterval);
// create resource
String rackHostName[] = SLSUtils.getRackHostName(nodeIdStr);
this.node = NodeInfo.newNodeInfo(rackHostName[0], rackHostName[1],
BuilderUtils.newResource(memory, cores));
this.rm = rm;
// init data structures
completedContainerList =
Collections.synchronizedList(new ArrayList<ContainerId>());
releasedContainerList =
Collections.synchronizedList(new ArrayList<ContainerId>());
containerQueue = new DelayQueue<ContainerSimulator>();
amContainerList =
Collections.synchronizedList(new ArrayList<ContainerId>());
runningContainers =
new ConcurrentHashMap<ContainerId, ContainerSimulator>();
// register NM with RM
RegisterNodeManagerRequest req =
Records.newRecord(RegisterNodeManagerRequest.class);
req.setNodeId(node.getNodeID());
req.setResource(node.getTotalCapability());
req.setHttpPort(80);
RegisterNodeManagerResponse response = rm.getResourceTrackerService()
.registerNodeManager(req);
masterKey = response.getNMTokenMasterKey();
}
@Override
public void firstStep() throws YarnException, IOException {
// do nothing
}
@Override
public void middleStep() {
// we check the lifetime for each running containers
ContainerSimulator cs = null;
synchronized(completedContainerList) {
while ((cs = containerQueue.poll()) != null) {
runningContainers.remove(cs.getId());
completedContainerList.add(cs.getId());
LOG.debug(MessageFormat.format("Container {0} has completed",
cs.getId()));
}
}
// send heart beat
NodeHeartbeatRequest beatRequest =
Records.newRecord(NodeHeartbeatRequest.class);
beatRequest.setLastKnownNMTokenMasterKey(masterKey);
NodeStatus ns = Records.newRecord(NodeStatus.class);
ns.setContainersStatuses(generateContainerStatusList());
ns.setNodeId(node.getNodeID());
ns.setKeepAliveApplications(new ArrayList<ApplicationId>());
ns.setResponseId(RESPONSE_ID ++);
ns.setNodeHealthStatus(NodeHealthStatus.newInstance(true, "", 0));
beatRequest.setNodeStatus(ns);
try {
NodeHeartbeatResponse beatResponse =
rm.getResourceTrackerService().nodeHeartbeat(beatRequest);
if (! beatResponse.getContainersToCleanup().isEmpty()) {
// remove from queue
synchronized(releasedContainerList) {
for (ContainerId containerId : beatResponse.getContainersToCleanup()){
if (amContainerList.contains(containerId)) {
// AM container (not killed?, only release)
synchronized(amContainerList) {
amContainerList.remove(containerId);
}
LOG.debug(MessageFormat.format("NodeManager {0} releases " +
"an AM ({1}).", node.getNodeID(), containerId));
} else {
cs = runningContainers.remove(containerId);
containerQueue.remove(cs);
releasedContainerList.add(containerId);
LOG.debug(MessageFormat.format("NodeManager {0} releases a " +
"container ({1}).", node.getNodeID(), containerId));
}
}
}
}
if (beatResponse.getNodeAction() == NodeAction.SHUTDOWN) {
lastStep();
}
} catch (YarnException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public void lastStep() {
// do nothing
}
/**
* catch status of all containers located on current node
*/
private ArrayList<ContainerStatus> generateContainerStatusList() {
ArrayList<ContainerStatus> csList = new ArrayList<ContainerStatus>();
// add running containers
for (ContainerSimulator container : runningContainers.values()) {
csList.add(newContainerStatus(container.getId(),
ContainerState.RUNNING, ContainerExitStatus.SUCCESS));
}
synchronized(amContainerList) {
for (ContainerId cId : amContainerList) {
csList.add(newContainerStatus(cId,
ContainerState.RUNNING, ContainerExitStatus.SUCCESS));
}
}
// add complete containers
synchronized(completedContainerList) {
for (ContainerId cId : completedContainerList) {
LOG.debug(MessageFormat.format("NodeManager {0} completed" +
" container ({1}).", node.getNodeID(), cId));
csList.add(newContainerStatus(
cId, ContainerState.COMPLETE, ContainerExitStatus.SUCCESS));
}
completedContainerList.clear();
}
// released containers
synchronized(releasedContainerList) {
for (ContainerId cId : releasedContainerList) {
LOG.debug(MessageFormat.format("NodeManager {0} released container" +
" ({1}).", node.getNodeID(), cId));
csList.add(newContainerStatus(
cId, ContainerState.COMPLETE, ContainerExitStatus.ABORTED));
}
releasedContainerList.clear();
}
return csList;
}
private ContainerStatus newContainerStatus(ContainerId cId,
ContainerState state,
int exitState) {
ContainerStatus cs = Records.newRecord(ContainerStatus.class);
cs.setContainerId(cId);
cs.setState(state);
cs.setExitStatus(exitState);
return cs;
}
public RMNode getNode() {
return node;
}
/**
* launch a new container with the given life time
*/
public void addNewContainer(Container container, long lifeTimeMS) {
LOG.debug(MessageFormat.format("NodeManager {0} launches a new " +
"container ({1}).", node.getNodeID(), container.getId()));
if (lifeTimeMS != -1) {
// normal container
ContainerSimulator cs = new ContainerSimulator(container.getId(),
container.getResource(), lifeTimeMS + System.currentTimeMillis(),
lifeTimeMS);
containerQueue.add(cs);
runningContainers.put(cs.getId(), cs);
} else {
// AM container
// -1 means AMContainer
synchronized(amContainerList) {
amContainerList.add(container.getId());
}
}
}
/**
* clean up an AM container and add to completed list
* @param containerId id of the container to be cleaned
*/
public void cleanupContainer(ContainerId containerId) {
synchronized(amContainerList) {
amContainerList.remove(containerId);
}
synchronized(completedContainerList) {
completedContainerList.add(containerId);
}
}
}

View File

@ -0,0 +1,167 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.sls.nodemanager;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.net.Node;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.NodeState;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode
.UpdatedContainerInfo;
public class NodeInfo {
private static int NODE_ID = 0;
public static NodeId newNodeID(String host, int port) {
return NodeId.newInstance(host, port);
}
private static class FakeRMNodeImpl implements RMNode {
private NodeId nodeId;
private String hostName;
private String nodeAddr;
private String httpAddress;
private int cmdPort;
private Resource perNode;
private String rackName;
private String healthReport;
private NodeState state;
private List<ContainerId> toCleanUpContainers;
private List<ApplicationId> toCleanUpApplications;
public FakeRMNodeImpl(NodeId nodeId, String nodeAddr, String httpAddress,
Resource perNode, String rackName, String healthReport,
int cmdPort, String hostName, NodeState state) {
this.nodeId = nodeId;
this.nodeAddr = nodeAddr;
this.httpAddress = httpAddress;
this.perNode = perNode;
this.rackName = rackName;
this.healthReport = healthReport;
this.cmdPort = cmdPort;
this.hostName = hostName;
this.state = state;
toCleanUpApplications = new ArrayList<ApplicationId>();
toCleanUpContainers = new ArrayList<ContainerId>();
}
public NodeId getNodeID() {
return nodeId;
}
public String getHostName() {
return hostName;
}
public int getCommandPort() {
return cmdPort;
}
public int getHttpPort() {
return 0;
}
public String getNodeAddress() {
return nodeAddr;
}
public String getHttpAddress() {
return httpAddress;
}
public String getHealthReport() {
return healthReport;
}
public long getLastHealthReportTime() {
return 0;
}
public Resource getTotalCapability() {
return perNode;
}
public String getRackName() {
return rackName;
}
public Node getNode() {
throw new UnsupportedOperationException("Not supported yet.");
}
public NodeState getState() {
return state;
}
public List<ContainerId> getContainersToCleanUp() {
return toCleanUpContainers;
}
public List<ApplicationId> getAppsToCleanup() {
return toCleanUpApplications;
}
public void updateNodeHeartbeatResponseForCleanup(
NodeHeartbeatResponse response) {
}
public NodeHeartbeatResponse getLastNodeHeartBeatResponse() {
return null;
}
public List<UpdatedContainerInfo> pullContainerUpdates() {
ArrayList<UpdatedContainerInfo> list = new ArrayList<UpdatedContainerInfo>();
ArrayList<ContainerStatus> list2 = new ArrayList<ContainerStatus>();
for(ContainerId cId : this.toCleanUpContainers) {
list2.add(ContainerStatus.newInstance(cId, ContainerState.RUNNING, "",
ContainerExitStatus.SUCCESS));
}
list.add(new UpdatedContainerInfo(new ArrayList<ContainerStatus>(),
list2));
return list;
}
}
public static RMNode newNodeInfo(String rackName, String hostName,
final Resource resource, int port) {
final NodeId nodeId = newNodeID(hostName, port);
final String nodeAddr = hostName + ":" + port;
final String httpAddress = hostName;
return new FakeRMNodeImpl(nodeId, nodeAddr, httpAddress,
resource, rackName, "Me good",
port, hostName, null);
}
public static RMNode newNodeInfo(String rackName, String hostName,
final Resource resource) {
return newNodeInfo(rackName, hostName, resource, NODE_ID++);
}
}

View File

@ -0,0 +1,31 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.sls.scheduler;
public class CapacitySchedulerMetrics extends SchedulerMetrics {
public CapacitySchedulerMetrics() {
super();
}
@Override
public void trackQueue(String queueName) {
trackedQueues.add(queueName);
}
}

View File

@ -0,0 +1,113 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.sls.scheduler;
import java.util.concurrent.Delayed;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.Resource;
public class ContainerSimulator implements Delayed {
// id
private ContainerId id;
// resource allocated
private Resource resource;
// end time
private long endTime;
// life time (ms)
private long lifeTime;
// host name
private String hostname;
// priority
private int priority;
// type
private String type;
/**
* invoked when AM schedules containers to allocate
*/
public ContainerSimulator(Resource resource, long lifeTime,
String hostname, int priority, String type) {
this.resource = resource;
this.lifeTime = lifeTime;
this.hostname = hostname;
this.priority = priority;
this.type = type;
}
/**
* invoke when NM schedules containers to run
*/
public ContainerSimulator(ContainerId id, Resource resource, long endTime,
long lifeTime) {
this.id = id;
this.resource = resource;
this.endTime = endTime;
this.lifeTime = lifeTime;
}
public Resource getResource() {
return resource;
}
public ContainerId getId() {
return id;
}
@Override
public int compareTo(Delayed o) {
if (!(o instanceof ContainerSimulator)) {
throw new IllegalArgumentException(
"Parameter must be a ContainerSimulator instance");
}
ContainerSimulator other = (ContainerSimulator) o;
return (int) Math.signum(endTime - other.endTime);
}
@Override
public long getDelay(TimeUnit unit) {
return unit.convert(endTime - System.currentTimeMillis(),
TimeUnit.MILLISECONDS);
}
public long getLifeTime() {
return lifeTime;
}
public String getHostname() {
return hostname;
}
public long getEndTime() {
return endTime;
}
public int getPriority() {
return priority;
}
public String getType() {
return type;
}
public void setPriority(int p) {
priority = p;
}
}

View File

@ -0,0 +1,266 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.sls.scheduler;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair
.AppSchedulable;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSQueue;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair
.FairScheduler;
import com.codahale.metrics.Gauge;
import org.apache.hadoop.yarn.sls.SLSRunner;
public class FairSchedulerMetrics extends SchedulerMetrics {
private int totalMemoryMB = Integer.MAX_VALUE;
private int totalVCores = Integer.MAX_VALUE;
private boolean maxReset = false;
public FairSchedulerMetrics() {
super();
appTrackedMetrics.add("demand.memory");
appTrackedMetrics.add("demand.vcores");
appTrackedMetrics.add("usage.memory");
appTrackedMetrics.add("usage.vcores");
appTrackedMetrics.add("minshare.memory");
appTrackedMetrics.add("minshare.vcores");
appTrackedMetrics.add("maxshare.memory");
appTrackedMetrics.add("maxshare.vcores");
appTrackedMetrics.add("fairshare.memory");
appTrackedMetrics.add("fairshare.vcores");
queueTrackedMetrics.add("demand.memory");
queueTrackedMetrics.add("demand.vcores");
queueTrackedMetrics.add("usage.memory");
queueTrackedMetrics.add("usage.vcores");
queueTrackedMetrics.add("minshare.memory");
queueTrackedMetrics.add("minshare.vcores");
queueTrackedMetrics.add("maxshare.memory");
queueTrackedMetrics.add("maxshare.vcores");
queueTrackedMetrics.add("fairshare.memory");
queueTrackedMetrics.add("fairshare.vcores");
}
@Override
public void trackApp(ApplicationAttemptId appAttemptId, String oldAppId) {
super.trackApp(appAttemptId, oldAppId);
FairScheduler fair = (FairScheduler) scheduler;
final AppSchedulable app = fair.getSchedulerApp(appAttemptId)
.getAppSchedulable();
metrics.register("variable.app." + oldAppId + ".demand.memory",
new Gauge<Integer>() {
@Override
public Integer getValue() {
return app.getDemand().getMemory();
}
}
);
metrics.register("variable.app." + oldAppId + ".demand.vcores",
new Gauge<Integer>() {
@Override
public Integer getValue() {
return app.getDemand().getVirtualCores();
}
}
);
metrics.register("variable.app." + oldAppId + ".usage.memory",
new Gauge<Integer>() {
@Override
public Integer getValue() {
return app.getResourceUsage().getMemory();
}
}
);
metrics.register("variable.app." + oldAppId + ".usage.vcores",
new Gauge<Integer>() {
@Override
public Integer getValue() {
return app.getResourceUsage().getVirtualCores();
}
}
);
metrics.register("variable.app." + oldAppId + ".minshare.memory",
new Gauge<Integer>() {
@Override
public Integer getValue() {
return app.getMinShare().getMemory();
}
}
);
metrics.register("variable.app." + oldAppId + ".minshare.vcores",
new Gauge<Integer>() {
@Override
public Integer getValue() {
return app.getMinShare().getMemory();
}
}
);
metrics.register("variable.app." + oldAppId + ".maxshare.memory",
new Gauge<Integer>() {
@Override
public Integer getValue() {
return Math.min(app.getMaxShare().getMemory(), totalMemoryMB);
}
}
);
metrics.register("variable.app." + oldAppId + ".maxshare.vcores",
new Gauge<Integer>() {
@Override
public Integer getValue() {
return Math.min(app.getMaxShare().getVirtualCores(), totalVCores);
}
}
);
metrics.register("variable.app." + oldAppId + ".fairshare.memory",
new Gauge<Integer>() {
@Override
public Integer getValue() {
return app.getFairShare().getVirtualCores();
}
}
);
metrics.register("variable.app." + oldAppId + ".fairshare.vcores",
new Gauge<Integer>() {
@Override
public Integer getValue() {
return app.getFairShare().getVirtualCores();
}
}
);
}
@Override
public void trackQueue(String queueName) {
trackedQueues.add(queueName);
FairScheduler fair = (FairScheduler) scheduler;
final FSQueue queue = fair.getQueueManager().getQueue(queueName);
metrics.register("variable.queue." + queueName + ".demand.memory",
new Gauge<Integer>() {
@Override
public Integer getValue() {
return queue.getDemand().getMemory();
}
}
);
metrics.register("variable.queue." + queueName + ".demand.vcores",
new Gauge<Integer>() {
@Override
public Integer getValue() {
return queue.getDemand().getVirtualCores();
}
}
);
metrics.register("variable.queue." + queueName + ".usage.memory",
new Gauge<Integer>() {
@Override
public Integer getValue() {
return queue.getResourceUsage().getMemory();
}
}
);
metrics.register("variable.queue." + queueName + ".usage.vcores",
new Gauge<Integer>() {
@Override
public Integer getValue() {
return queue.getResourceUsage().getVirtualCores();
}
}
);
metrics.register("variable.queue." + queueName + ".minshare.memory",
new Gauge<Integer>() {
@Override
public Integer getValue() {
return queue.getMinShare().getMemory();
}
}
);
metrics.register("variable.queue." + queueName + ".minshare.vcores",
new Gauge<Integer>() {
@Override
public Integer getValue() {
return queue.getMinShare().getVirtualCores();
}
}
);
metrics.register("variable.queue." + queueName + ".maxshare.memory",
new Gauge<Integer>() {
@Override
public Integer getValue() {
if (! maxReset &&
SLSRunner.simulateInfoMap.containsKey("Number of nodes") &&
SLSRunner.simulateInfoMap.containsKey("Node memory (MB)") &&
SLSRunner.simulateInfoMap.containsKey("Node VCores")) {
int numNMs = Integer.parseInt(
SLSRunner.simulateInfoMap.get("Number of nodes").toString());
int numMemoryMB = Integer.parseInt(
SLSRunner.simulateInfoMap.get("Node memory (MB)").toString());
int numVCores = Integer.parseInt(
SLSRunner.simulateInfoMap.get("Node VCores").toString());
totalMemoryMB = numNMs * numMemoryMB;
totalVCores = numNMs * numVCores;
maxReset = false;
}
return Math.min(queue.getMaxShare().getMemory(), totalMemoryMB);
}
}
);
metrics.register("variable.queue." + queueName + ".maxshare.vcores",
new Gauge<Integer>() {
@Override
public Integer getValue() {
return Math.min(queue.getMaxShare().getVirtualCores(), totalVCores);
}
}
);
metrics.register("variable.queue." + queueName + ".fairshare.memory",
new Gauge<Integer>() {
@Override
public Integer getValue() {
return queue.getFairShare().getMemory();
}
}
);
metrics.register("variable.queue." + queueName + ".fairshare.vcores",
new Gauge<Integer>() {
@Override
public Integer getValue() {
return queue.getFairShare().getVirtualCores();
}
}
);
}
@Override
public void untrackQueue(String queueName) {
trackedQueues.remove(queueName);
metrics.remove("variable.queue." + queueName + ".demand.memory");
metrics.remove("variable.queue." + queueName + ".demand.vcores");
metrics.remove("variable.queue." + queueName + ".usage.memory");
metrics.remove("variable.queue." + queueName + ".usage.vcores");
metrics.remove("variable.queue." + queueName + ".minshare.memory");
metrics.remove("variable.queue." + queueName + ".minshare.vcores");
metrics.remove("variable.queue." + queueName + ".maxshare.memory");
metrics.remove("variable.queue." + queueName + ".maxshare.vcores");
metrics.remove("variable.queue." + queueName + ".fairshare.memory");
metrics.remove("variable.queue." + queueName + ".fairshare.vcores");
}
}

View File

@ -0,0 +1,58 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.sls.scheduler;
import org.apache.hadoop.yarn.api.records.QueueInfo;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo
.FifoScheduler;
import com.codahale.metrics.Gauge;
public class FifoSchedulerMetrics extends SchedulerMetrics {
public FifoSchedulerMetrics() {
super();
}
@Override
public void trackQueue(String queueName) {
trackedQueues.add(queueName);
FifoScheduler fifo = (FifoScheduler) scheduler;
// for FifoScheduler, only DEFAULT_QUEUE
// here the three parameters doesn't affect results
final QueueInfo queue = fifo.getQueueInfo(queueName, false, false);
// track currentCapacity, maximumCapacity (always 1.0f)
metrics.register("variable.queue." + queueName + ".currentcapacity",
new Gauge<Float>() {
@Override
public Float getValue() {
return queue.getCurrentCapacity();
}
}
);
metrics.register("variable.queue." + queueName + ".",
new Gauge<Float>() {
@Override
public Float getValue() {
return queue.getCurrentCapacity();
}
}
);
}
}

View File

@ -0,0 +1,30 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.sls.scheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
.NodeUpdateSchedulerEvent;
public class NodeUpdateSchedulerEventWrapper extends NodeUpdateSchedulerEvent {
public NodeUpdateSchedulerEventWrapper(NodeUpdateSchedulerEvent event) {
super(new RMNodeWrapper(event.getRMNode()));
}
}

View File

@ -0,0 +1,141 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.sls.scheduler;
import org.apache.hadoop.net.Node;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.NodeState;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode
.UpdatedContainerInfo;
import java.util.Collections;
import java.util.List;
public class RMNodeWrapper implements RMNode {
private RMNode node;
private List<UpdatedContainerInfo> updates;
private boolean pulled = false;
public RMNodeWrapper(RMNode node) {
this.node = node;
updates = node.pullContainerUpdates();
}
@Override
public NodeId getNodeID() {
return node.getNodeID();
}
@Override
public String getHostName() {
return node.getHostName();
}
@Override
public int getCommandPort() {
return node.getCommandPort();
}
@Override
public int getHttpPort() {
return node.getHttpPort();
}
@Override
public String getNodeAddress() {
return node.getNodeAddress();
}
@Override
public String getHttpAddress() {
return node.getHttpAddress();
}
@Override
public String getHealthReport() {
return node.getHealthReport();
}
@Override
public long getLastHealthReportTime() {
return node.getLastHealthReportTime();
}
@Override
public Resource getTotalCapability() {
return node.getTotalCapability();
}
@Override
public String getRackName() {
return node.getRackName();
}
@Override
public Node getNode() {
return node.getNode();
}
@Override
public NodeState getState() {
return node.getState();
}
@Override
public List<ContainerId> getContainersToCleanUp() {
return node.getContainersToCleanUp();
}
@Override
public List<ApplicationId> getAppsToCleanup() {
return node.getAppsToCleanup();
}
@Override
public void updateNodeHeartbeatResponseForCleanup(
NodeHeartbeatResponse nodeHeartbeatResponse) {
node.updateNodeHeartbeatResponseForCleanup(nodeHeartbeatResponse);
}
@Override
public NodeHeartbeatResponse getLastNodeHeartBeatResponse() {
return node.getLastNodeHeartBeatResponse();
}
@Override
@SuppressWarnings("unchecked")
public List<UpdatedContainerInfo> pullContainerUpdates() {
List<UpdatedContainerInfo> list = Collections.EMPTY_LIST;
if (! pulled) {
list = updates;
pulled = true;
}
return list;
}
List<UpdatedContainerInfo> getContainerUpdates() {
return updates;
}
}

View File

@ -0,0 +1,855 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.sls.scheduler;
import org.apache.hadoop.util.ShutdownHookManager;
import org.apache.hadoop.yarn.sls.SLSRunner;
import org.apache.hadoop.yarn.sls.conf.SLSConfiguration;
import org.apache.hadoop.yarn.sls.web.SLSWebApp;
import com.codahale.metrics.Counter;
import com.codahale.metrics.CsvReporter;
import com.codahale.metrics.Gauge;
import com.codahale.metrics.Histogram;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.SlidingWindowReservoir;
import com.codahale.metrics.Timer;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.QueueInfo;
import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode
.UpdatedContainerInfo;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler
.ResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler
.SchedulerAppReport;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler
.SchedulerNodeReport;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity
.CapacityScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
.AppAddedSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
.AppRemovedSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
.NodeUpdateSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
.SchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
.SchedulerEventType;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair
.FairScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo
.FifoScheduler;
import org.apache.hadoop.yarn.util.resource.Resources;
import org.apache.log4j.Logger;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
public class ResourceSchedulerWrapper implements ResourceScheduler,
Configurable {
private static final String EOL = System.getProperty("line.separator");
private static final int SAMPLING_SIZE = 60;
private ScheduledExecutorService pool;
// counters for scheduler allocate/handle operations
private Counter schedulerAllocateCounter;
private Counter schedulerHandleCounter;
private Map<SchedulerEventType, Counter> schedulerHandleCounterMap;
// Timers for scheduler allocate/handle operations
private Timer schedulerAllocateTimer;
private Timer schedulerHandleTimer;
private Map<SchedulerEventType, Timer> schedulerHandleTimerMap;
private List<Histogram> schedulerHistogramList;
private Map<Histogram, Timer> histogramTimerMap;
private Lock samplerLock;
private Lock queueLock;
private Configuration conf;
private ResourceScheduler scheduler;
private Map<ApplicationAttemptId, String> appQueueMap =
new ConcurrentHashMap<ApplicationAttemptId, String>();
private BufferedWriter jobRuntimeLogBW;
// Priority of the ResourceSchedulerWrapper shutdown hook.
public static final int SHUTDOWN_HOOK_PRIORITY = 30;
// web app
private SLSWebApp web;
private Map<ContainerId, Resource> preemptionContainerMap =
new ConcurrentHashMap<ContainerId, Resource>();
// metrics
private MetricRegistry metrics;
private SchedulerMetrics schedulerMetrics;
private boolean metricsON;
private String metricsOutputDir;
private BufferedWriter metricsLogBW;
private boolean running = false;
private static Map<Class, Class> defaultSchedulerMetricsMap =
new HashMap<Class, Class>();
static {
defaultSchedulerMetricsMap.put(FairScheduler.class,
FairSchedulerMetrics.class);
defaultSchedulerMetricsMap.put(FifoScheduler.class,
FifoSchedulerMetrics.class);
defaultSchedulerMetricsMap.put(CapacityScheduler.class,
CapacitySchedulerMetrics.class);
}
// must set by outside
private Set<String> queueSet;
private Set<String> trackedAppSet;
public final Logger LOG = Logger.getLogger(ResourceSchedulerWrapper.class);
public ResourceSchedulerWrapper() {
samplerLock = new ReentrantLock();
queueLock = new ReentrantLock();
}
@Override
public void setConf(Configuration conf) {
this.conf = conf;
// set scheduler
Class<? extends ResourceScheduler> klass =
conf.getClass(SLSConfiguration.RM_SCHEDULER, null,
ResourceScheduler.class);
scheduler = ReflectionUtils.newInstance(klass, conf);
// start metrics
metricsON = conf.getBoolean(SLSConfiguration.METRICS_SWITCH, true);
if (metricsON) {
try {
initMetrics();
} catch (Exception e) {
e.printStackTrace();
}
}
ShutdownHookManager.get().addShutdownHook(new Runnable() {
@Override
public void run() {
try {
if (metricsLogBW != null) {
metricsLogBW.write("]");
metricsLogBW.close();
}
if (web != null) {
web.stop();
}
tearDown();
} catch (Exception e) {
e.printStackTrace();
}
}
}, SHUTDOWN_HOOK_PRIORITY);
}
@Override
public Allocation allocate(ApplicationAttemptId attemptId,
List<ResourceRequest> resourceRequests,
List<ContainerId> containerIds,
List<String> strings, List<String> strings2) {
if (metricsON) {
final Timer.Context context = schedulerAllocateTimer.time();
Allocation allocation = null;
try {
allocation = scheduler.allocate(attemptId, resourceRequests,
containerIds, strings, strings2);
return allocation;
} finally {
context.stop();
schedulerAllocateCounter.inc();
try {
updateQueueWithAllocateRequest(allocation, attemptId,
resourceRequests, containerIds);
} catch (IOException e) {
e.printStackTrace();
}
}
} else {
return scheduler.allocate(attemptId,
resourceRequests, containerIds, strings, strings2);
}
}
@Override
public void handle(SchedulerEvent schedulerEvent) {
// metrics off
if (! metricsON) {
scheduler.handle(schedulerEvent);
return;
}
if(!running) running = true;
// metrics on
Timer.Context handlerTimer = null;
Timer.Context operationTimer = null;
NodeUpdateSchedulerEventWrapper eventWrapper;
try {
//if (schedulerEvent instanceof NodeUpdateSchedulerEvent) {
if (schedulerEvent.getType() == SchedulerEventType.NODE_UPDATE
&& schedulerEvent instanceof NodeUpdateSchedulerEvent) {
eventWrapper = new NodeUpdateSchedulerEventWrapper(
(NodeUpdateSchedulerEvent)schedulerEvent);
schedulerEvent = eventWrapper;
updateQueueWithNodeUpdate(eventWrapper);
} else if (schedulerEvent.getType() == SchedulerEventType.APP_REMOVED
&& schedulerEvent instanceof AppRemovedSchedulerEvent) {
// check if having AM Container, update resource usage information
AppRemovedSchedulerEvent appRemoveEvent =
(AppRemovedSchedulerEvent) schedulerEvent;
ApplicationAttemptId appAttemptId =
appRemoveEvent.getApplicationAttemptID();
String queue = appQueueMap.get(appAttemptId);
SchedulerAppReport app = scheduler.getSchedulerAppInfo(appAttemptId);
if (! app.getLiveContainers().isEmpty()) { // have 0 or 1
// should have one container which is AM container
RMContainer rmc = app.getLiveContainers().iterator().next();
updateQueueMetrics(queue,
rmc.getContainer().getResource().getMemory(),
rmc.getContainer().getResource().getVirtualCores());
}
}
handlerTimer = schedulerHandleTimer.time();
operationTimer = schedulerHandleTimerMap
.get(schedulerEvent.getType()).time();
scheduler.handle(schedulerEvent);
} finally {
if (handlerTimer != null) handlerTimer.stop();
if (operationTimer != null) operationTimer.stop();
schedulerHandleCounter.inc();
schedulerHandleCounterMap.get(schedulerEvent.getType()).inc();
if (schedulerEvent.getType() == SchedulerEventType.APP_REMOVED
&& schedulerEvent instanceof AppRemovedSchedulerEvent) {
SLSRunner.decreaseRemainingApps();
AppRemovedSchedulerEvent appRemoveEvent =
(AppRemovedSchedulerEvent) schedulerEvent;
ApplicationAttemptId appAttemptId =
appRemoveEvent.getApplicationAttemptID();
appQueueMap.remove(appRemoveEvent.getApplicationAttemptID());
} else if (schedulerEvent.getType() == SchedulerEventType.APP_ADDED
&& schedulerEvent instanceof AppAddedSchedulerEvent) {
AppAddedSchedulerEvent appAddEvent =
(AppAddedSchedulerEvent) schedulerEvent;
String queueName = appAddEvent.getQueue();
appQueueMap.put(appAddEvent.getApplicationAttemptId(), queueName);
}
}
}
private void updateQueueWithNodeUpdate(
NodeUpdateSchedulerEventWrapper eventWrapper) {
RMNodeWrapper node = (RMNodeWrapper) eventWrapper.getRMNode();
List<UpdatedContainerInfo> containerList = node.getContainerUpdates();
for (UpdatedContainerInfo info : containerList) {
for (ContainerStatus status : info.getCompletedContainers()) {
ContainerId containerId = status.getContainerId();
SchedulerAppReport app = scheduler.getSchedulerAppInfo(
containerId.getApplicationAttemptId());
if (app == null) {
// this happens for the AM container
// The app have already removed when the NM sends the release
// information.
continue;
}
String queue = appQueueMap.get(containerId.getApplicationAttemptId());
int releasedMemory = 0, releasedVCores = 0;
if (status.getExitStatus() == ContainerExitStatus.SUCCESS) {
for (RMContainer rmc : app.getLiveContainers()) {
if (rmc.getContainerId() == containerId) {
releasedMemory += rmc.getContainer().getResource().getMemory();
releasedVCores += rmc.getContainer()
.getResource().getVirtualCores();
break;
}
}
} else if (status.getExitStatus() == ContainerExitStatus.ABORTED) {
if (preemptionContainerMap.containsKey(containerId)) {
Resource preResource = preemptionContainerMap.get(containerId);
releasedMemory += preResource.getMemory();
releasedVCores += preResource.getVirtualCores();
preemptionContainerMap.remove(containerId);
}
}
// update queue counters
updateQueueMetrics(queue, releasedMemory, releasedVCores);
}
}
}
private void updateQueueWithAllocateRequest(Allocation allocation,
ApplicationAttemptId attemptId,
List<ResourceRequest> resourceRequests,
List<ContainerId> containerIds) throws IOException {
// update queue information
Resource pendingResource = Resources.createResource(0, 0);
Resource allocatedResource = Resources.createResource(0, 0);
String queueName = appQueueMap.get(attemptId);
// container requested
for (ResourceRequest request : resourceRequests) {
if (request.getResourceName().equals(ResourceRequest.ANY)) {
Resources.addTo(pendingResource,
Resources.multiply(request.getCapability(),
request.getNumContainers()));
}
}
// container allocated
for (Container container : allocation.getContainers()) {
Resources.addTo(allocatedResource, container.getResource());
Resources.subtractFrom(pendingResource, container.getResource());
}
// container released from AM
SchedulerAppReport report = scheduler.getSchedulerAppInfo(attemptId);
for (ContainerId containerId : containerIds) {
Container container = null;
for (RMContainer c : report.getLiveContainers()) {
if (c.getContainerId().equals(containerId)) {
container = c.getContainer();
break;
}
}
if (container != null) {
// released allocated containers
Resources.subtractFrom(allocatedResource, container.getResource());
} else {
for (RMContainer c : report.getReservedContainers()) {
if (c.getContainerId().equals(containerId)) {
container = c.getContainer();
break;
}
}
if (container != null) {
// released reserved containers
Resources.subtractFrom(pendingResource, container.getResource());
}
}
}
// containers released/preemption from scheduler
Set<ContainerId> preemptionContainers = new HashSet<ContainerId>();
if (allocation.getContainerPreemptions() != null) {
preemptionContainers.addAll(allocation.getContainerPreemptions());
}
if (allocation.getStrictContainerPreemptions() != null) {
preemptionContainers.addAll(allocation.getStrictContainerPreemptions());
}
if (! preemptionContainers.isEmpty()) {
for (ContainerId containerId : preemptionContainers) {
if (! preemptionContainerMap.containsKey(containerId)) {
Container container = null;
for (RMContainer c : report.getLiveContainers()) {
if (c.getContainerId().equals(containerId)) {
container = c.getContainer();
break;
}
}
if (container != null) {
preemptionContainerMap.put(containerId, container.getResource());
}
}
}
}
// update metrics
SortedMap<String, Counter> counterMap = metrics.getCounters();
String names[] = new String[]{
"counter.queue." + queueName + ".pending.memory",
"counter.queue." + queueName + ".pending.cores",
"counter.queue." + queueName + ".allocated.memory",
"counter.queue." + queueName + ".allocated.cores"};
int values[] = new int[]{pendingResource.getMemory(),
pendingResource.getVirtualCores(),
allocatedResource.getMemory(), allocatedResource.getVirtualCores()};
for (int i = names.length - 1; i >= 0; i --) {
if (! counterMap.containsKey(names[i])) {
metrics.counter(names[i]);
counterMap = metrics.getCounters();
}
counterMap.get(names[i]).inc(values[i]);
}
queueLock.lock();
try {
if (! schedulerMetrics.isTracked(queueName)) {
schedulerMetrics.trackQueue(queueName);
}
} finally {
queueLock.unlock();
}
}
private void tearDown() throws IOException {
// close job runtime writer
if (jobRuntimeLogBW != null) {
jobRuntimeLogBW.close();
}
// shut pool
if (pool != null) pool.shutdown();
}
@SuppressWarnings("unchecked")
private void initMetrics() throws Exception {
metrics = new MetricRegistry();
// configuration
metricsOutputDir = conf.get(SLSConfiguration.METRICS_OUTPUT_DIR);
int metricsWebAddressPort = conf.getInt(
SLSConfiguration.METRICS_WEB_ADDRESS_PORT,
SLSConfiguration.METRICS_WEB_ADDRESS_PORT_DEFAULT);
// create SchedulerMetrics for current scheduler
String schedulerMetricsType = conf.get(scheduler.getClass().getName());
Class schedulerMetricsClass = schedulerMetricsType == null?
defaultSchedulerMetricsMap.get(scheduler.getClass()) :
Class.forName(schedulerMetricsType);
schedulerMetrics = (SchedulerMetrics)ReflectionUtils
.newInstance(schedulerMetricsClass, new Configuration());
schedulerMetrics.init(scheduler, metrics);
// register various metrics
registerJvmMetrics();
registerClusterResourceMetrics();
registerContainerAppNumMetrics();
registerSchedulerMetrics();
// .csv output
initMetricsCSVOutput();
// start web app to provide real-time tracking
web = new SLSWebApp(this, metricsWebAddressPort);
web.start();
// a thread to update histogram timer
pool = new ScheduledThreadPoolExecutor(2);
pool.scheduleAtFixedRate(new HistogramsRunnable(), 0, 1000,
TimeUnit.MILLISECONDS);
// a thread to output metrics for real-tiem tracking
pool.scheduleAtFixedRate(new MetricsLogRunnable(), 0, 1000,
TimeUnit.MILLISECONDS);
// application running information
jobRuntimeLogBW = new BufferedWriter(
new FileWriter(metricsOutputDir + "/jobruntime.csv"));
jobRuntimeLogBW.write("JobID,real_start_time,real_end_time," +
"simulate_start_time,simulate_end_time" + EOL);
jobRuntimeLogBW.flush();
}
private void registerJvmMetrics() {
// add JVM gauges
metrics.register("variable.jvm.free.memory",
new Gauge<Long>() {
@Override
public Long getValue() {
return Runtime.getRuntime().freeMemory();
}
}
);
metrics.register("variable.jvm.max.memory",
new Gauge<Long>() {
@Override
public Long getValue() {
return Runtime.getRuntime().maxMemory();
}
}
);
metrics.register("variable.jvm.total.memory",
new Gauge<Long>() {
@Override
public Long getValue() {
return Runtime.getRuntime().totalMemory();
}
}
);
}
private void registerClusterResourceMetrics() {
metrics.register("variable.cluster.allocated.memory",
new Gauge<Integer>() {
@Override
public Integer getValue() {
if(scheduler == null || scheduler.getRootQueueMetrics() == null) {
return 0;
} else {
return scheduler.getRootQueueMetrics().getAllocatedMB();
}
}
}
);
metrics.register("variable.cluster.allocated.vcores",
new Gauge<Integer>() {
@Override
public Integer getValue() {
if(scheduler == null || scheduler.getRootQueueMetrics() == null) {
return 0;
} else {
return scheduler.getRootQueueMetrics().getAllocatedVirtualCores();
}
}
}
);
metrics.register("variable.cluster.available.memory",
new Gauge<Integer>() {
@Override
public Integer getValue() {
if(scheduler == null || scheduler.getRootQueueMetrics() == null) {
return 0;
} else {
return scheduler.getRootQueueMetrics().getAvailableMB();
}
}
}
);
metrics.register("variable.cluster.available.vcores",
new Gauge<Integer>() {
@Override
public Integer getValue() {
if(scheduler == null || scheduler.getRootQueueMetrics() == null) {
return 0;
} else {
return scheduler.getRootQueueMetrics().getAvailableVirtualCores();
}
}
}
);
}
private void registerContainerAppNumMetrics() {
metrics.register("variable.running.application",
new Gauge<Integer>() {
@Override
public Integer getValue() {
if(scheduler == null || scheduler.getRootQueueMetrics() == null) {
return 0;
} else {
return scheduler.getRootQueueMetrics().getAppsRunning();
}
}
}
);
metrics.register("variable.running.container",
new Gauge<Integer>() {
@Override
public Integer getValue() {
if(scheduler == null || scheduler.getRootQueueMetrics() == null) {
return 0;
} else {
return scheduler.getRootQueueMetrics().getAllocatedContainers();
}
}
}
);
}
private void registerSchedulerMetrics() {
samplerLock.lock();
try {
// counters for scheduler operations
schedulerAllocateCounter = metrics.counter(
"counter.scheduler.operation.allocate");
schedulerHandleCounter = metrics.counter(
"counter.scheduler.operation.handle");
schedulerHandleCounterMap = new HashMap<SchedulerEventType, Counter>();
for (SchedulerEventType e : SchedulerEventType.values()) {
Counter counter = metrics.counter(
"counter.scheduler.operation.handle." + e);
schedulerHandleCounterMap.put(e, counter);
}
// timers for scheduler operations
int timeWindowSize = conf.getInt(
SLSConfiguration.METRICS_TIMER_WINDOW_SIZE,
SLSConfiguration.METRICS_TIMER_WINDOW_SIZE_DEFAULT);
schedulerAllocateTimer = new Timer(
new SlidingWindowReservoir(timeWindowSize));
schedulerHandleTimer = new Timer(
new SlidingWindowReservoir(timeWindowSize));
schedulerHandleTimerMap = new HashMap<SchedulerEventType, Timer>();
for (SchedulerEventType e : SchedulerEventType.values()) {
Timer timer = new Timer(new SlidingWindowReservoir(timeWindowSize));
schedulerHandleTimerMap.put(e, timer);
}
// histogram for scheduler operations (Samplers)
schedulerHistogramList = new ArrayList<Histogram>();
histogramTimerMap = new HashMap<Histogram, Timer>();
Histogram schedulerAllocateHistogram = new Histogram(
new SlidingWindowReservoir(SAMPLING_SIZE));
metrics.register("sampler.scheduler.operation.allocate.timecost",
schedulerAllocateHistogram);
schedulerHistogramList.add(schedulerAllocateHistogram);
histogramTimerMap.put(schedulerAllocateHistogram, schedulerAllocateTimer);
Histogram schedulerHandleHistogram = new Histogram(
new SlidingWindowReservoir(SAMPLING_SIZE));
metrics.register("sampler.scheduler.operation.handle.timecost",
schedulerHandleHistogram);
schedulerHistogramList.add(schedulerHandleHistogram);
histogramTimerMap.put(schedulerHandleHistogram, schedulerHandleTimer);
for (SchedulerEventType e : SchedulerEventType.values()) {
Histogram histogram = new Histogram(
new SlidingWindowReservoir(SAMPLING_SIZE));
metrics.register(
"sampler.scheduler.operation.handle." + e + ".timecost",
histogram);
schedulerHistogramList.add(histogram);
histogramTimerMap.put(histogram, schedulerHandleTimerMap.get(e));
}
} finally {
samplerLock.unlock();
}
}
private void initMetricsCSVOutput() {
int timeIntervalMS = conf.getInt(
SLSConfiguration.METRICS_RECORD_INTERVAL_MS,
SLSConfiguration.METRICS_RECORD_INTERVAL_MS_DEFAULT);
File dir = new File(metricsOutputDir + "/metrics");
if(! dir.exists()
&& ! dir.mkdirs()) {
LOG.error("Cannot create directory " + dir.getAbsoluteFile());
}
final CsvReporter reporter = CsvReporter.forRegistry(metrics)
.formatFor(Locale.US)
.convertRatesTo(TimeUnit.SECONDS)
.convertDurationsTo(TimeUnit.MILLISECONDS)
.build(new File(metricsOutputDir + "/metrics"));
reporter.start(timeIntervalMS, TimeUnit.MILLISECONDS);
}
class HistogramsRunnable implements Runnable {
@Override
public void run() {
samplerLock.lock();
try {
for (Histogram histogram : schedulerHistogramList) {
Timer timer = histogramTimerMap.get(histogram);
histogram.update((int) timer.getSnapshot().getMean());
}
} finally {
samplerLock.unlock();
}
}
}
class MetricsLogRunnable implements Runnable {
private boolean firstLine = true;
public MetricsLogRunnable() {
try {
metricsLogBW = new BufferedWriter(
new FileWriter(metricsOutputDir + "/realtimetrack.json"));
metricsLogBW.write("[");
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public void run() {
if(running) {
// all WebApp to get real tracking json
String metrics = web.generateRealTimeTrackingMetrics();
// output
try {
if(firstLine) {
metricsLogBW.write(metrics + EOL);
firstLine = false;
} else {
metricsLogBW.write("," + metrics + EOL);
}
metricsLogBW.flush();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
// the following functions are used by AMSimulator
public void addAMRuntime(ApplicationId appId,
long traceStartTimeMS, long traceEndTimeMS,
long simulateStartTimeMS, long simulateEndTimeMS) {
try {
// write job runtime information
StringBuilder sb = new StringBuilder();
sb.append(appId).append(",").append(traceStartTimeMS).append(",")
.append(traceEndTimeMS).append(",").append(simulateStartTimeMS)
.append(",").append(simulateEndTimeMS);
jobRuntimeLogBW.write(sb.toString() + EOL);
jobRuntimeLogBW.flush();
} catch (IOException e) {
e.printStackTrace();
}
}
private void updateQueueMetrics(String queue,
int releasedMemory, int releasedVCores) {
// update queue counters
SortedMap<String, Counter> counterMap = metrics.getCounters();
if (releasedMemory != 0) {
String name = "counter.queue." + queue + ".allocated.memory";
if (! counterMap.containsKey(name)) {
metrics.counter(name);
counterMap = metrics.getCounters();
}
counterMap.get(name).inc(-releasedMemory);
}
if (releasedVCores != 0) {
String name = "counter.queue." + queue + ".allocated.cores";
if (! counterMap.containsKey(name)) {
metrics.counter(name);
counterMap = metrics.getCounters();
}
counterMap.get(name).inc(-releasedVCores);
}
}
public void setQueueSet(Set<String> queues) {
this.queueSet = queues;
}
public Set<String> getQueueSet() {
return this.queueSet;
}
public void setTrackedAppSet(Set<String> apps) {
this.trackedAppSet = apps;
}
public Set<String> getTrackedAppSet() {
return this.trackedAppSet;
}
public MetricRegistry getMetrics() {
return metrics;
}
public SchedulerMetrics getSchedulerMetrics() {
return schedulerMetrics;
}
// API open to out classes
public void addTrackedApp(ApplicationAttemptId appAttemptId,
String oldAppId) {
if (metricsON) {
schedulerMetrics.trackApp(appAttemptId, oldAppId);
}
}
public void removeTrackedApp(ApplicationAttemptId appAttemptId,
String oldAppId) {
if (metricsON) {
schedulerMetrics.untrackApp(appAttemptId, oldAppId);
}
}
@Override
public Configuration getConf() {
return conf;
}
@Override
public void reinitialize(Configuration entries, RMContext rmContext)
throws IOException {
scheduler.reinitialize(entries, rmContext);
}
@Override
public void recover(RMStateStore.RMState rmState) throws Exception {
scheduler.recover(rmState);
}
@Override
public QueueInfo getQueueInfo(String s, boolean b, boolean b2)
throws IOException {
return scheduler.getQueueInfo(s, b, b2);
}
@Override
public List<QueueUserACLInfo> getQueueUserAclInfo() {
return scheduler.getQueueUserAclInfo();
}
@Override
public Resource getMinimumResourceCapability() {
return scheduler.getMinimumResourceCapability();
}
@Override
public Resource getMaximumResourceCapability() {
return scheduler.getMaximumResourceCapability();
}
@Override
public int getNumClusterNodes() {
return scheduler.getNumClusterNodes();
}
@Override
public SchedulerNodeReport getNodeReport(NodeId nodeId) {
return scheduler.getNodeReport(nodeId);
}
@Override
public SchedulerAppReport getSchedulerAppInfo(
ApplicationAttemptId attemptId) {
return scheduler.getSchedulerAppInfo(attemptId);
}
@Override
public QueueMetrics getRootQueueMetrics() {
return scheduler.getRootQueueMetrics();
}
}

View File

@ -0,0 +1,100 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.sls.scheduler;
import java.util.HashSet;
import java.util.Set;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler
.ResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler
.SchedulerAppReport;
import com.codahale.metrics.Gauge;
import com.codahale.metrics.MetricRegistry;
public abstract class SchedulerMetrics {
protected ResourceScheduler scheduler;
protected Set<String> trackedQueues;
protected MetricRegistry metrics;
protected Set<String> appTrackedMetrics;
protected Set<String> queueTrackedMetrics;
public SchedulerMetrics() {
appTrackedMetrics = new HashSet<String>();
appTrackedMetrics.add("live.containers");
appTrackedMetrics.add("reserved.containers");
queueTrackedMetrics = new HashSet<String>();
}
public void init(ResourceScheduler scheduler, MetricRegistry metrics) {
this.scheduler = scheduler;
this.trackedQueues = new HashSet<String>();
this.metrics = metrics;
}
public void trackApp(final ApplicationAttemptId appAttemptId,
String oldAppId) {
metrics.register("variable.app." + oldAppId + ".live.containers",
new Gauge<Integer>() {
@Override
public Integer getValue() {
SchedulerAppReport app = scheduler.getSchedulerAppInfo(appAttemptId);
return app.getLiveContainers().size();
}
}
);
metrics.register("variable.app." + oldAppId + ".reserved.containers",
new Gauge<Integer>() {
@Override
public Integer getValue() {
SchedulerAppReport app = scheduler.getSchedulerAppInfo(appAttemptId);
return app.getReservedContainers().size();
}
}
);
}
public void untrackApp(ApplicationAttemptId appAttemptId,
String oldAppId) {
for (String m : appTrackedMetrics) {
metrics.remove("variable.app." + oldAppId + "." + m);
}
}
public abstract void trackQueue(String queueName);
public void untrackQueue(String queueName) {
for (String m : queueTrackedMetrics) {
metrics.remove("variable.queue." + queueName + "." + m);
}
}
public boolean isTracked(String queueName) {
return trackedQueues.contains(queueName);
}
public Set<String> getAppTrackedMetrics() {
return appTrackedMetrics;
}
public Set<String> getQueueTrackedMetrics() {
return queueTrackedMetrics;
}
}

View File

@ -0,0 +1,183 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.sls.scheduler;
import java.io.IOException;
import java.text.MessageFormat;
import java.util.Queue;
import java.util.concurrent.DelayQueue;
import java.util.concurrent.Delayed;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.yarn.exceptions.YarnException;
public class TaskRunner {
public abstract static class Task implements Runnable, Delayed {
private long start;
private long end;
private long nextRun;
private long startTime;
private long endTime;
private long repeatInterval;
private Queue<Task> queue;
public Task(){}
//values in milliseconds, start/end are milliseconds from now
public void init(long startTime, long endTime, long repeatInterval) {
if (endTime - startTime < 0) {
throw new IllegalArgumentException(MessageFormat.format(
"endTime[{0}] cannot be smaller than startTime[{1}]", endTime,
startTime));
}
if (repeatInterval < 0) {
throw new IllegalArgumentException(MessageFormat.format(
"repeatInterval[{0}] cannot be less than 1", repeatInterval));
}
if ((endTime - startTime) % repeatInterval != 0) {
throw new IllegalArgumentException(MessageFormat.format(
"Invalid parameters: (endTime[{0}] - startTime[{1}]) " +
"% repeatInterval[{2}] != 0",
endTime, startTime, repeatInterval));
}
start = startTime;
end = endTime;
this.repeatInterval = repeatInterval;
}
private void timeRebase(long now) {
startTime = now + start;
endTime = now + end;
this.nextRun = startTime;
}
//values in milliseconds, start is milliseconds from now
//it only executes firstStep()
public void init(long startTime) {
init(startTime, startTime, 1);
}
private void setQueue(Queue<Task> queue) {
this.queue = queue;
}
@Override
public final void run() {
try {
if (nextRun == startTime) {
firstStep();
nextRun += repeatInterval;
if (nextRun <= endTime) {
queue.add(this);
}
} else if (nextRun < endTime) {
middleStep();
nextRun += repeatInterval;
queue.add(this);
} else {
lastStep();
}
} catch (YarnException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
@Override
public long getDelay(TimeUnit unit) {
return unit.convert(nextRun - System.currentTimeMillis(),
TimeUnit.MILLISECONDS);
}
@Override
public int compareTo(Delayed o) {
if (!(o instanceof Task)) {
throw new IllegalArgumentException("Parameter must be a Task instance");
}
Task other = (Task) o;
return (int) Math.signum(nextRun - other.nextRun);
}
public abstract void firstStep()
throws YarnException, IOException, InterruptedException;
public abstract void middleStep()
throws YarnException, InterruptedException, IOException;
public abstract void lastStep() throws YarnException;
public void setEndTime(long et) {
endTime = et;
}
}
private DelayQueue queue;
private int threadPoolSize;
private ThreadPoolExecutor executor;
private long startTimeMS = 0;
public TaskRunner() {
queue = new DelayQueue();
}
public void setQueueSize(int threadPoolSize) {
this.threadPoolSize = threadPoolSize;
}
@SuppressWarnings("unchecked")
public void start() {
if (executor != null) {
throw new IllegalStateException("Already started");
}
DelayQueue preStartQueue = queue;
queue = new DelayQueue();
executor = new ThreadPoolExecutor(threadPoolSize, threadPoolSize, 0,
TimeUnit.MILLISECONDS, queue);
executor.prestartAllCoreThreads();
startTimeMS = System.currentTimeMillis();
for (Object d : preStartQueue) {
schedule((Task) d, startTimeMS);
}
}
public void stop() {
executor.shutdownNow();
}
@SuppressWarnings("unchecked")
private void schedule(Task task, long timeNow) {
task.timeRebase(timeNow);
task.setQueue(queue);
queue.add(task);
}
public void schedule(Task task) {
schedule(task, System.currentTimeMillis());
}
public long getStartTimeMS() {
return this.startTimeMS;
}
}

View File

@ -0,0 +1,133 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.sls.utils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.tools.rumen.JobTraceReader;
import org.apache.hadoop.tools.rumen.LoggedJob;
import org.apache.hadoop.tools.rumen.LoggedTask;
import org.apache.hadoop.tools.rumen.LoggedTaskAttempt;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.map.ObjectMapper;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import java.util.HashSet;
import java.util.Map;
import java.util.List;
import java.util.Iterator;
public class SLSUtils {
public static String[] getRackHostName(String hostname) {
hostname = hostname.substring(1);
return hostname.split("/");
}
/**
* parse the rumen trace file, return each host name
*/
public static Set<String> parseNodesFromRumenTrace(String jobTrace)
throws IOException {
Set<String> nodeSet = new HashSet<String>();
File fin = new File(jobTrace);
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "file:///");
JobTraceReader reader = new JobTraceReader(
new Path(fin.getAbsolutePath()), conf);
try {
LoggedJob job = null;
while ((job = reader.getNext()) != null) {
for(LoggedTask mapTask : job.getMapTasks()) {
// select the last attempt
LoggedTaskAttempt taskAttempt = mapTask.getAttempts()
.get(mapTask.getAttempts().size() - 1);
nodeSet.add(taskAttempt.getHostName().getValue());
}
for(LoggedTask reduceTask : job.getReduceTasks()) {
LoggedTaskAttempt taskAttempt = reduceTask.getAttempts()
.get(reduceTask.getAttempts().size() - 1);
nodeSet.add(taskAttempt.getHostName().getValue());
}
}
} finally {
reader.close();
}
return nodeSet;
}
/**
* parse the sls trace file, return each host name
*/
public static Set<String> parseNodesFromSLSTrace(String jobTrace)
throws IOException {
Set<String> nodeSet = new HashSet<String>();
JsonFactory jsonF = new JsonFactory();
ObjectMapper mapper = new ObjectMapper();
Reader input = new FileReader(jobTrace);
try {
Iterator<Map> i = mapper.readValues(
jsonF.createJsonParser(input), Map.class);
while (i.hasNext()) {
Map jsonE = i.next();
List tasks = (List) jsonE.get("job.tasks");
for (Object o : tasks) {
Map jsonTask = (Map) o;
String hostname = jsonTask.get("container.host").toString();
nodeSet.add(hostname);
}
}
} finally {
input.close();
}
return nodeSet;
}
/**
* parse the input node file, return each host name
*/
public static Set<String> parseNodesFromNodeFile(String nodeFile)
throws IOException {
Set<String> nodeSet = new HashSet<String>();
JsonFactory jsonF = new JsonFactory();
ObjectMapper mapper = new ObjectMapper();
Reader input = new FileReader(nodeFile);
try {
Iterator<Map> i = mapper.readValues(
jsonF.createJsonParser(input), Map.class);
while (i.hasNext()) {
Map jsonE = i.next();
String rack = "/" + jsonE.get("rack");
List tasks = (List) jsonE.get("nodes");
for (Object o : tasks) {
Map jsonNode = (Map) o;
nodeSet.add(rack + "/" + jsonNode.get("node"));
}
}
} finally {
input.close();
}
return nodeSet;
}
}

View File

@ -0,0 +1,527 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.sls.web;
import java.io.File;
import java.io.IOException;
import java.text.MessageFormat;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
.SchedulerEventType;
import org.mortbay.jetty.Handler;
import org.mortbay.jetty.Server;
import org.mortbay.jetty.handler.AbstractHandler;
import org.mortbay.jetty.Request;
import org.apache.hadoop.yarn.sls.SLSRunner;
import org.apache.hadoop.yarn.sls.scheduler.FairSchedulerMetrics;
import org.apache.hadoop.yarn.sls.scheduler.ResourceSchedulerWrapper;
import org.apache.hadoop.yarn.sls.scheduler.SchedulerMetrics;
import com.codahale.metrics.Counter;
import com.codahale.metrics.Gauge;
import com.codahale.metrics.Histogram;
import com.codahale.metrics.MetricRegistry;
import org.mortbay.jetty.handler.ResourceHandler;
public class SLSWebApp extends HttpServlet {
private static final long serialVersionUID = 1905162041950251407L;
private transient Server server;
private transient ResourceSchedulerWrapper wrapper;
private transient MetricRegistry metrics;
private transient SchedulerMetrics schedulerMetrics;
// metrics objects
private transient Gauge jvmFreeMemoryGauge;
private transient Gauge jvmMaxMemoryGauge;
private transient Gauge jvmTotalMemoryGauge;
private transient Gauge numRunningAppsGauge;
private transient Gauge numRunningContainersGauge;
private transient Gauge allocatedMemoryGauge;
private transient Gauge allocatedVCoresGauge;
private transient Gauge availableMemoryGauge;
private transient Gauge availableVCoresGauge;
private transient Histogram allocateTimecostHistogram;
private transient Histogram handleTimecostHistogram;
private Map<SchedulerEventType, Histogram> handleOperTimecostHistogramMap;
private Map<String, Counter> queueAllocatedMemoryCounterMap;
private Map<String, Counter> queueAllocatedVCoresCounterMap;
private int port;
private int ajaxUpdateTimeMS = 1000;
// html page templates
private String simulateInfoTemplate;
private String simulateTemplate;
private String trackTemplate;
{
// load templates
ClassLoader cl = Thread.currentThread().getContextClassLoader();
try {
simulateInfoTemplate = FileUtils.readFileToString(new File(
cl.getResource("simulate.info.html.template").getFile()));
simulateTemplate = FileUtils.readFileToString(new File(
cl.getResource("simulate.html.template").getFile()));
trackTemplate = FileUtils.readFileToString(new File(
cl.getResource("track.html.template").getFile()));
} catch (IOException e) {
e.printStackTrace();
}
}
public SLSWebApp(ResourceSchedulerWrapper wrapper, int metricsAddressPort) {
this.wrapper = wrapper;
metrics = wrapper.getMetrics();
handleOperTimecostHistogramMap =
new HashMap<SchedulerEventType, Histogram>();
queueAllocatedMemoryCounterMap = new HashMap<String, Counter>();
queueAllocatedVCoresCounterMap = new HashMap<String, Counter>();
schedulerMetrics = wrapper.getSchedulerMetrics();
port = metricsAddressPort;
}
public void start() throws Exception {
// static files
final ResourceHandler staticHandler = new ResourceHandler();
staticHandler.setResourceBase("html");
Handler handler = new AbstractHandler() {
@Override
public void handle(String target, HttpServletRequest request,
HttpServletResponse response, int dispatch) {
try{
// timeunit
int timeunit = 1000; // second, divide millionsecond / 1000
String timeunitLabel = "second";
if (request.getParameter("u")!= null &&
request.getParameter("u").equalsIgnoreCase("m")) {
timeunit = 1000 * 60;
timeunitLabel = "minute";
}
// http request
if (target.equals("/")) {
printPageIndex(request, response);
} else if (target.equals("/simulate")) {
printPageSimulate(request, response, timeunit, timeunitLabel);
} else if (target.equals("/track")) {
printPageTrack(request, response, timeunit, timeunitLabel);
} else
// js/css request
if (target.startsWith("/js") || target.startsWith("/css")) {
response.setCharacterEncoding("utf-8");
staticHandler.handle(target, request, response, dispatch);
} else
// json request
if (target.equals("/simulateMetrics")) {
printJsonMetrics(request, response);
} else if (target.equals("/trackMetrics")) {
printJsonTrack(request, response);
}
} catch (Exception e) {
e.printStackTrace();
}
}
};
server = new Server(port);
server.setHandler(handler);
server.start();
}
public void stop() throws Exception {
if (server != null) {
server.stop();
}
}
/**
* index html page, show simulation info
* path ""
* @param request http request
* @param response http response
* @throws java.io.IOException
*/
private void printPageIndex(HttpServletRequest request,
HttpServletResponse response) throws IOException {
response.setContentType("text/html");
response.setStatus(HttpServletResponse.SC_OK);
String simulateInfo;
if (SLSRunner.simulateInfoMap.isEmpty()) {
String empty = "<tr><td colspan='2' align='center'>" +
"No information available</td></tr>";
simulateInfo = MessageFormat.format(simulateInfoTemplate, empty);
} else {
StringBuilder info = new StringBuilder();
for (Map.Entry<String, Object> entry :
SLSRunner.simulateInfoMap.entrySet()) {
info.append("<tr>");
info.append("<td class='td1'>").append(entry.getKey()).append("</td>");
info.append("<td class='td2'>").append(entry.getValue())
.append("</td>");
info.append("</tr>");
}
simulateInfo =
MessageFormat.format(simulateInfoTemplate, info.toString());
}
response.getWriter().println(simulateInfo);
((Request) request).setHandled(true);
}
/**
* simulate html page, show several real-runtime chart
* path "/simulate"
* use d3.js
* @param request http request
* @param response http response
* @throws java.io.IOException
*/
private void printPageSimulate(HttpServletRequest request,
HttpServletResponse response, int timeunit,
String timeunitLabel)
throws IOException {
response.setContentType("text/html");
response.setStatus(HttpServletResponse.SC_OK);
// queues {0}
Set<String> queues = wrapper.getQueueSet();
StringBuilder queueInfo = new StringBuilder();
int i = 0;
for (String queue : queues) {
queueInfo.append("legends[4][").append(i).append("] = 'queue.")
.append(queue).append(".allocated.memory';");
queueInfo.append("legends[5][").append(i).append("] = 'queue.")
.append(queue).append(".allocated.vcores';");
i ++;
}
// time unit label {1}
// time unit {2}
// ajax update time interval {3}
String simulateInfo = MessageFormat.format(simulateTemplate,
queueInfo.toString(), timeunitLabel, "" + timeunit,
"" + ajaxUpdateTimeMS);
response.getWriter().println(simulateInfo);
((Request) request).setHandled(true);
}
/**
* html page for tracking one queue or job
* use d3.js
* @param request http request
* @param response http response
* @throws java.io.IOException
*/
private void printPageTrack(HttpServletRequest request,
HttpServletResponse response, int timeunit,
String timeunitLabel)
throws IOException {
response.setContentType("text/html");
response.setStatus(HttpServletResponse.SC_OK);
// tracked queues {0}
StringBuilder trackedQueueInfo = new StringBuilder();
Set<String> trackedQueues = wrapper.getQueueSet();
for(String queue : trackedQueues) {
trackedQueueInfo.append("<option value='Queue ").append(queue)
.append("'>").append(queue).append("</option>");
}
// tracked apps {1}
StringBuilder trackedAppInfo = new StringBuilder();
Set<String> trackedApps = wrapper.getTrackedAppSet();
for(String job : trackedApps) {
trackedAppInfo.append("<option value='Job ").append(job)
.append("'>").append(job).append("</option>");
}
// timeunit label {2}
// time unit {3}
// ajax update time {4}
// final html
String trackInfo = MessageFormat.format(trackTemplate,
trackedQueueInfo.toString(), trackedAppInfo.toString(),
timeunitLabel, "" + timeunit, "" + ajaxUpdateTimeMS);
response.getWriter().println(trackInfo);
((Request) request).setHandled(true);
}
/**
* package metrics information in a json and return
* @param request http request
* @param response http response
* @throws java.io.IOException
*/
private void printJsonMetrics(HttpServletRequest request,
HttpServletResponse response)
throws IOException {
response.setContentType("text/json");
response.setStatus(HttpServletResponse.SC_OK);
response.getWriter().println(generateRealTimeTrackingMetrics());
((Request) request).setHandled(true);
}
public String generateRealTimeTrackingMetrics() {
// JVM
double jvmFreeMemoryGB, jvmMaxMemoryGB, jvmTotalMemoryGB;
if (jvmFreeMemoryGauge == null &&
metrics.getGauges().containsKey("variable.jvm.free.memory")) {
jvmFreeMemoryGauge = metrics.getGauges().get("variable.jvm.free.memory");
}
if (jvmMaxMemoryGauge == null &&
metrics.getGauges().containsKey("variable.jvm.max.memory")) {
jvmMaxMemoryGauge = metrics.getGauges().get("variable.jvm.max.memory");
}
if (jvmTotalMemoryGauge == null &&
metrics.getGauges().containsKey("variable.jvm.total.memory")) {
jvmTotalMemoryGauge = metrics.getGauges()
.get("variable.jvm.total.memory");
}
jvmFreeMemoryGB = jvmFreeMemoryGauge == null ? 0 :
Double.parseDouble(jvmFreeMemoryGauge.getValue().toString())
/1024/1024/1024;
jvmMaxMemoryGB = jvmMaxMemoryGauge == null ? 0 :
Double.parseDouble(jvmMaxMemoryGauge.getValue().toString())
/1024/1024/1024;
jvmTotalMemoryGB = jvmTotalMemoryGauge == null ? 0 :
Double.parseDouble(jvmTotalMemoryGauge.getValue().toString())
/1024/1024/1024;
// number of running applications/containers
String numRunningApps, numRunningContainers;
if (numRunningAppsGauge == null &&
metrics.getGauges().containsKey("variable.running.application")) {
numRunningAppsGauge =
metrics.getGauges().get("variable.running.application");
}
if (numRunningContainersGauge == null &&
metrics.getGauges().containsKey("variable.running.container")) {
numRunningContainersGauge =
metrics.getGauges().get("variable.running.container");
}
numRunningApps = numRunningAppsGauge == null ? "0" :
numRunningAppsGauge.getValue().toString();
numRunningContainers = numRunningContainersGauge == null ? "0" :
numRunningContainersGauge.getValue().toString();
// cluster available/allocate resource
double allocatedMemoryGB, allocatedVCoresGB,
availableMemoryGB, availableVCoresGB;
if (allocatedMemoryGauge == null &&
metrics.getGauges()
.containsKey("variable.cluster.allocated.memory")) {
allocatedMemoryGauge = metrics.getGauges()
.get("variable.cluster.allocated.memory");
}
if (allocatedVCoresGauge == null &&
metrics.getGauges()
.containsKey("variable.cluster.allocated.vcores")) {
allocatedVCoresGauge = metrics.getGauges()
.get("variable.cluster.allocated.vcores");
}
if (availableMemoryGauge == null &&
metrics.getGauges()
.containsKey("variable.cluster.available.memory")) {
availableMemoryGauge = metrics.getGauges()
.get("variable.cluster.available.memory");
}
if (availableVCoresGauge == null &&
metrics.getGauges()
.containsKey("variable.cluster.available.vcores")) {
availableVCoresGauge = metrics.getGauges()
.get("variable.cluster.available.vcores");
}
allocatedMemoryGB = allocatedMemoryGauge == null ? 0 :
Double.parseDouble(allocatedMemoryGauge.getValue().toString())/1024;
allocatedVCoresGB = allocatedVCoresGauge == null ? 0 :
Double.parseDouble(allocatedVCoresGauge.getValue().toString());
availableMemoryGB = availableMemoryGauge == null ? 0 :
Double.parseDouble(availableMemoryGauge.getValue().toString())/1024;
availableVCoresGB = availableVCoresGauge == null ? 0 :
Double.parseDouble(availableVCoresGauge.getValue().toString());
// scheduler operation
double allocateTimecost, handleTimecost;
if (allocateTimecostHistogram == null &&
metrics.getHistograms().containsKey(
"sampler.scheduler.operation.allocate.timecost")) {
allocateTimecostHistogram = metrics.getHistograms()
.get("sampler.scheduler.operation.allocate.timecost");
}
if (handleTimecostHistogram == null &&
metrics.getHistograms().containsKey(
"sampler.scheduler.operation.handle.timecost")) {
handleTimecostHistogram = metrics.getHistograms()
.get("sampler.scheduler.operation.handle.timecost");
}
allocateTimecost = allocateTimecostHistogram == null ? 0.0 :
allocateTimecostHistogram.getSnapshot().getMean()/1000000;
handleTimecost = handleTimecostHistogram == null ? 0.0 :
handleTimecostHistogram.getSnapshot().getMean()/1000000;
// various handle operation
Map<SchedulerEventType, Double> handleOperTimecostMap =
new HashMap<SchedulerEventType, Double>();
for (SchedulerEventType e : SchedulerEventType.values()) {
String key = "sampler.scheduler.operation.handle." + e + ".timecost";
if (! handleOperTimecostHistogramMap.containsKey(e) &&
metrics.getHistograms().containsKey(key)) {
handleOperTimecostHistogramMap.put(e, metrics.getHistograms().get(key));
}
double timecost = handleOperTimecostHistogramMap.containsKey(e) ?
handleOperTimecostHistogramMap.get(e).getSnapshot().getMean()/1000000
: 0;
handleOperTimecostMap.put(e, timecost);
}
// allocated resource for each queue
Map<String, Double> queueAllocatedMemoryMap = new HashMap<String, Double>();
Map<String, Long> queueAllocatedVCoresMap = new HashMap<String, Long>();
for (String queue : wrapper.getQueueSet()) {
// memory
String key = "counter.queue." + queue + ".allocated.memory";
if (! queueAllocatedMemoryCounterMap.containsKey(queue) &&
metrics.getCounters().containsKey(key)) {
queueAllocatedMemoryCounterMap.put(queue,
metrics.getCounters().get(key));
}
double queueAllocatedMemoryGB =
queueAllocatedMemoryCounterMap.containsKey(queue) ?
queueAllocatedMemoryCounterMap.get(queue).getCount()/1024.0
: 0;
queueAllocatedMemoryMap.put(queue, queueAllocatedMemoryGB);
// vCores
key = "counter.queue." + queue + ".allocated.cores";
if (! queueAllocatedVCoresCounterMap.containsKey(queue) &&
metrics.getCounters().containsKey(key)) {
queueAllocatedVCoresCounterMap.put(
queue, metrics.getCounters().get(key));
}
long queueAllocatedVCores =
queueAllocatedVCoresCounterMap.containsKey(queue) ?
queueAllocatedVCoresCounterMap.get(queue).getCount(): 0;
queueAllocatedVCoresMap.put(queue, queueAllocatedVCores);
}
// package results
StringBuilder sb = new StringBuilder();
sb.append("{");
sb.append("\"time\":" ).append(System.currentTimeMillis())
.append(",\"jvm.free.memory\":").append(jvmFreeMemoryGB)
.append(",\"jvm.max.memory\":").append(jvmMaxMemoryGB)
.append(",\"jvm.total.memory\":").append(jvmTotalMemoryGB)
.append(",\"running.applications\":").append(numRunningApps)
.append(",\"running.containers\":").append(numRunningContainers)
.append(",\"cluster.allocated.memory\":").append(allocatedMemoryGB)
.append(",\"cluster.allocated.vcores\":").append(allocatedVCoresGB)
.append(",\"cluster.available.memory\":").append(availableMemoryGB)
.append(",\"cluster.available.vcores\":").append(availableVCoresGB);
for (String queue : wrapper.getQueueSet()) {
sb.append(",\"queue.").append(queue).append(".allocated.memory\":")
.append(queueAllocatedMemoryMap.get(queue));
sb.append(",\"queue.").append(queue).append(".allocated.vcores\":")
.append(queueAllocatedVCoresMap.get(queue));
}
// scheduler allocate & handle
sb.append(",\"scheduler.allocate.timecost\":").append(allocateTimecost);
sb.append(",\"scheduler.handle.timecost\":").append(handleTimecost);
for (SchedulerEventType e : SchedulerEventType.values()) {
sb.append(",\"scheduler.handle-").append(e).append(".timecost\":")
.append(handleOperTimecostMap.get(e));
}
sb.append("}");
return sb.toString();
}
/**
* package metrics information for one tracked queue/app
* only support FairScheduler currently
* @throws java.io.IOException
*/
private void printJsonTrack(HttpServletRequest request,
HttpServletResponse response) throws IOException {
response.setContentType("text/json");
response.setStatus(HttpServletResponse.SC_OK);
StringBuilder sb = new StringBuilder();
if(schedulerMetrics instanceof FairSchedulerMetrics) {
String para = request.getParameter("t");
if (para.startsWith("Job ")) {
String appId = para.substring("Job ".length());
sb.append("{");
sb.append("\"time\": ").append(System.currentTimeMillis()).append(",");
sb.append("\"appId\": \"").append(appId).append("\"");
for(String metric : this.schedulerMetrics.getAppTrackedMetrics()) {
String key = "variable.app." + appId + "." + metric;
sb.append(",\"").append(metric).append("\": ");
if (metrics.getGauges().containsKey(key)) {
double memoryGB =
Double.parseDouble(
metrics.getGauges().get(key).getValue().toString())
/ 1024;
sb.append(memoryGB);
} else {
sb.append(-1);
}
}
sb.append("}");
} else if(para.startsWith("Queue ")) {
String queueName = para.substring("Queue ".length());
sb.append("{");
sb.append("\"time\": ").append(System.currentTimeMillis()).append(",");
sb.append("\"queueName\": \"").append(queueName).append("\"");
for(String metric : this.schedulerMetrics.getQueueTrackedMetrics()) {
String key = "variable.queue." + queueName + "." + metric;
sb.append(",\"").append(metric).append("\": ");
if (metrics.getGauges().containsKey(key)) {
double memoryGB =
Double.parseDouble(
metrics.getGauges().get(key).getValue().toString())
/ 1024;
sb.append(memoryGB);
} else {
sb.append(-1);
}
}
sb.append("}");
}
}
String output = sb.toString();
if (output.isEmpty()) {
output = "[]";
}
response.getWriter().println(output);
// package result
((Request) request).setHandled(true);
}
}

View File

@ -0,0 +1,67 @@
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!--
This file contains queue allocations for the Capacity Scheduler.
Its format is explained in the Capacity Scheduler documentation at
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html.
The documentation also includes a sample config file.
-->
<configuration>
<property>
<name>yarn.scheduler.capacity.root.queues</name>
<value>sls_queue_1,sls_queue_2,sls_queue_3</value>
<description>The queues at the this level (root is the root queue).
</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.sls_queue_1.capacity</name>
<value>25</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.sls_queue_1.maximum-capacity</name>
<value>100</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.sls_queue_2.capacity</name>
<value>25</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.sls_queue_2.maximum-capacity</name>
<value>100</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.sls_queue_3.capacity</name>
<value>50</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.sls_queue_3.maximum-capacity</name>
<value>100</value>
</property>
<property>
<name>yarn.scheduler.capacity.maximum-applications</name>
<value>1000</value>
<description>Maximum number of applications in the system which
can be concurrently active both running and pending</description>
</property>
</configuration>

View File

@ -0,0 +1,50 @@
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!--
This file contains pool and user allocations for the Fair Scheduler.
Its format is explained in the Fair Scheduler documentation at
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html
The documentation also includes a sample config file.
-->
<allocations>
<user name="jenkins">
<!-- Limit on running jobs for the user across all pools. If more
jobs than this are submitted, only the first <maxRunningJobs> will
be scheduled at any given time. Defaults to infinity or the
userMaxJobsDefault value set below. -->
<maxRunningJobs>1000</maxRunningJobs>
</user>
<userMaxAppsDefault>1000</userMaxAppsDefault>
<queue name="sls_queue_1">
<minResources>1024 mb, 1 vcores</minResources>
<schedulingMode>fair</schedulingMode>
<weight>0.25</weight>
<minSharePreemptionTimeout>2</minSharePreemptionTimeout>
</queue>
<queue name="sls_queue_2">
<minResources>1024 mb, 1 vcores</minResources>
<schedulingMode>fair</schedulingMode>
<weight>0.25</weight>
<minSharePreemptionTimeout>2</minSharePreemptionTimeout>
</queue>
<queue name="sls_queue_3">
<minResources>1024 mb, 1 vcores</minResources>
<weight>0.5</weight>
<schedulingMode>fair</schedulingMode>
<minSharePreemptionTimeout>2</minSharePreemptionTimeout>
</queue>
</allocations>

View File

@ -0,0 +1,47 @@
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!--
This file contains pool and user allocations for the Fair Scheduler.
Its format is explained in the Fair Scheduler documentation at
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html.
The documentation also includes a sample config file.
-->
<configuration>
<property>
<description>Absolute path to allocation file. An allocation file is an XML
manifest describing queues and their properties, in addition to certain
policy defaults. This file must be in XML format as described in
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html.
</description>
<name>yarn.scheduler.fair.allocation.file</name>
<value>fair-scheduler-allocation.xml</value>
</property>
<property>
<description>Whether to use preemption. Note that preemption is experimental
in the current version. Defaults to false.</description>
<name>yarn.scheduler.fair.preemption</name>
<value>true</value>
</property>
<property>
<description>Whether to allow multiple container assignments in one
heartbeat. Defaults to false.</description>
<name>yarn.scheduler.fair.assignmultiple</name>
<value>true</value>
</property>
</configuration>

View File

@ -0,0 +1,19 @@
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. See accompanying LICENSE file.
#
log4j.appender.test=org.apache.log4j.ConsoleAppender
log4j.appender.test.Target=System.out
log4j.appender.test.layout=org.apache.log4j.PatternLayout
log4j.appender.test.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n
log4j.logger=NONE, test

View File

@ -0,0 +1,81 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<!-- SLSRunner configuration -->
<property>
<name>yarn.sls.runner.pool.size</name>
<value>100</value>
</property>
<!-- Nodes configuration -->
<property>
<name>yarn.sls.nm.memory.mb</name>
<value>10240</value>
</property>
<property>
<name>yarn.sls.nm.vcores</name>
<value>10</value>
</property>
<property>
<name>yarn.sls.nm.heartbeat.interval.ms</name>
<value>1000</value>
</property>
<!-- Apps configuration -->
<property>
<name>yarn.sls.am.heartbeat.interval.ms</name>
<value>1000</value>
</property>
<property>
<name>yarn.sls.am.type.mapreduce</name>
<value>org.apache.hadoop.yarn.sls.appmaster.MRAMSimulator</value>
</property>
<!-- Containers configuration -->
<property>
<name>yarn.sls.container.memory.mb</name>
<value>1024</value>
</property>
<property>
<name>yarn.sls.container.vcores</name>
<value>1</value>
</property>
<!-- metrics -->
<property>
<name>yarn.sls.metrics.switch</name>
<value>ON</value>
</property>
<property>
<name>yarn.sls.metrics.web.address.port</name>
<value>10001</value>
</property>
<property>
<name>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler</name>
<value>org.apache.hadoop.yarn.sls.scheduler.FifoSchedulerMetrics</value>
</property>
<property>
<name>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</name>
<value>org.apache.hadoop.yarn.sls.scheduler.FairSchedulerMetrics</value>
</property>
<property>
<name>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</name>
<value>org.apache.hadoop.yarn.sls.scheduler.CapacitySchedulerMetrics</value>
</property>
</configuration>

View File

@ -0,0 +1,60 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
<!-- <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler</value> -->
<!-- <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value> -->
</property>
<property>
<description>The address of the RM web application.</description>
<name>yarn.resourcemanager.webapp.address</name>
<value>localhost:18088</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>localhost:18031</value>
</property>
<property>
<description>The address of the scheduler interface.</description>
<name>yarn.resourcemanager.scheduler.address</name>
<value>localhost:18030</value>
</property>
<property>
<description>The address of the applications manager interface in the RM.</description>
<name>yarn.resourcemanager.address</name>
<value>localhost:18032</value>
</property>
<property>
<description>The address of the RM admin interface.</description>
<name>yarn.resourcemanager.admin.address</name>
<value>localhost:18033</value>
</property>
<property>
<description>Set to false, to avoid ip check</description>
<name>hadoop.security.token.service.use_ip</name>
<value>false</value>
</property>
</configuration>

View File

@ -0,0 +1,440 @@
~~ Licensed under the Apache License, Version 2.0 (the "License");
~~ you may not use this file except in compliance with the License.
~~ You may obtain a copy of the License at
~~
~~ http://www.apache.org/licenses/LICENSE-2.0
~~
~~ Unless required by applicable law or agreed to in writing, software
~~ distributed under the License is distributed on an "AS IS" BASIS,
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~~ See the License for the specific language governing permissions and
~~ limitations under the License.
---
Yarn Scheduler Load Simulator (SLS)
---
---
${maven.build.timestamp}
Yarn Scheduler Load Simulator (SLS)
\[ {{{./index.html}Go Back}} \]
%{toc|section=1|fromDepth=0}
* Overview
** Overview
The Yarn scheduler is a fertile area of interest with different
implementations, e.g., Fifo, Capacity and Fair schedulers. Meanwhile, several
optimizations are also made to improve scheduler performance for different
scenarios and workload. Each scheduler algorithm has its own set of features,
and drives scheduling decisions by many factors, such as fairness, capacity
guarantee, resource availability, etc. It is very important to evaluate a
scheduler algorithm very well before we deploy in a production cluster.
Unfortunately, currently it is non-trivial to evaluate a scheduler algorithm.
Evaluating in a real cluster is always time and cost consuming, and it is
also very hard to find a large-enough cluster. Hence, a simulator which can
predict how well a scheduler algorithm for some specific workload would be
quite useful.
The Yarn Scheduler Load Simulator (SLS) is such a tool, which can simulate
large-scale Yarn clusters and application loads in a single machine.This
simulator would be invaluable in furthering Yarn by providing a tool for
researchers and developers to prototype new scheduler features and predict
their behavior and performance with reasonable amount of confidence,
thereby aiding rapid innovation.
The simulator will exercise the real Yarn <<<ResourceManager>>> removing the
network factor by simulating <<<NodeManagers>>> and <<<ApplicationMasters>>>
via handling and dispatching <<<NM>>>/<<<AMs>>> heartbeat events from within
the same JVM. To keep tracking of scheduler behavior and performance, a
scheduler wrapper will wrap the real scheduler.
The size of the cluster and the application load can be loaded from
configuration files, which are generated from job history files directly by
adopting {{{https://hadoop.apache.org/docs/stable/rumen.html}Apache Rumen}}.
The simulator will produce real time metrics while executing, including:
* Resource usages for whole cluster and each queue, which can be utilized to
configure cluster and queue's capacity.
* The detailed application execution trace (recorded in relation to simulated
time), which can be analyzed to understand/validate the scheduler behavior
(individual jobs turn around time, throughput, fairness, capacity guarantee,
etc.).
* Several key metrics of scheduler algorithm, such as time cost of each
scheduler operation (allocate, handle, etc.), which can be utilized by Hadoop
developers to find the code spots and scalability limits.
** Goals
* Exercise the scheduler at scale without a real cluster using real job
traces.
* Being able to simulate real workloads.
** Architecture
The following figure illustrates the implementation architecture of the
simulator.
[images/sls_arch.png] The architecture of the simulator
The simulator takes input of workload traces, and fetches the cluster and
applications information. For each NM and AM, the simulator builds a simulator
to simulate their running. All NM/AM simulators run in a thread pool. The
simulator reuses Yarn Resource Manager, and builds a wrapper out of the
scheduler. The Scheduler Wrapper can track the scheduler behaviors and
generates several logs, which are the outputs of the simulator and can be
further analyzed.
** Usecases
* Engineering
* Verify correctness of scheduler algorithm under load
* Cheap/practical way for finding code hotspots/critical-path.
* Validate the impact of changes and new features.
* Determine what drives the scheduler scalability limits.
[]
* QA
* Validate scheduler behavior for "large" clusters and several workload
profiles.
* Solutions/Sales.
* Sizing model for predefined/typical workloads.
* Cluster sizing tool using real customer data (job traces).
* Determine minimum SLAs under a particular workload.
* Usage
This section will show how to use the simulator. Here let <<<$HADOOP_ROOT>>>
represent the Hadoop install directory. If you build Hadoop yourself,
<<<$HADOOP_ROOT>>> is <<<hadoop-dist/target/hadoop-$VERSION>>>. The simulator
is located at <<<$HADOOP_ROOT/share/hadoop/tools/sls>>>. The fold <<<sls>>>
containers four directories: <<<bin>>>, <<<html>>>, <<<sample-conf>>>, and
<<<sample-data>>>
* <<<bin>>>: contains running scripts for the simulator.
* <<<html>>>: contains several html/css/js files we needed for real-time
tracking.
* <<<sample-conf>>>: specifies the simulator configurations.
* <<<sample-data>>>: provides an example rumen trace, which can be used to
generate inputs of the simulator.
[]
The following sections will describe how to use the simulator step by step.
Before start, make sure that command <<<hadoop>>> is included in your
<<<$PATH>>> environment parameter.
** Step 1: Configure Hadoop and the simulator
Before we start, make sure Hadoop and the simulator are configured well.
All configuration files for Hadoop and the simulator should be placed in
directory <<<$HADOOP_ROOT/etc/hadoop>>>, where the <<<ResourceManager>>>
and Yarn scheduler load their configurations. Directory
<<<$HADOOP_ROOT/share/hadoop/tools/sls/sample-conf/>>> provides several
example configurations, that can be used to start a demo.
For configuration of Hadoop and Yarn scheduler, users can refer to Yarns
website ({{{http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/}
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/}}).
For the simulator, it loads configuration information from file
<<<$HADOOP_ROOT/etc/hadoop/sls-runner.xml>>>.
Here we illustrate each configuration parameter in <<<sls-runner.xml>>>.
Note that <<<$HADOOP_ROOT/share/hadoop/tools/sls/sample-conf/sls-runner.xml>>>
contains all the default values for these configuration parameters.
* <<<yarn.sls.runner.pool.size>>>
The simulator uses a thread pool to simulate the <<<NM>>> and <<<AM>>> running
, and this parameter specifies the number of threads in the pool.
* <<<yarn.sls.nm.memory.mb>>>
The total memory for each <<<NMSimulator>>>.
* <<<yarn.sls.nm.vcores>>>
The total vCores for each <<<NMSimulator>>>.
* <<<yarn.sls.nm.heartbeat.interval.ms>>>
The heartbeat interval for each <<<NMSimulator>>>.
* <<<yarn.sls.am.heartbeat.interval.ms>>>
The heartbeat interval for each <<<AMSimulator>>>.
* <<<yarn.sls.am.type.mapreduce>>>
The <<<AMSimulator>>> implementation for MapReduce-like applications.
Users can specify implementations for other type of applications.
* <<<yarn.sls.container.memory.mb>>>
The memory required for each container simulator.
* <<<yarn.sls.container.vcores>>>
The vCores required for each container simulator.
* <<<yarn.sls.runner.metrics.switch>>>
The simulator introduces {{{http://metrics.codahale.com/}Metrics}} to measure
the behaviors of critical components and operations. This field specifies
whether we open (<<<ON>>>) or close (<<<OFF>>>) the Metrics running.
* <<<yarn.sls.metrics.web.address.port>>>
The port used by simulator to provide real-time tracking. The default value is
10001.
* <<<org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler>>>
The implementation of scheduler metrics of Fifo Scheduler.
* <<<org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler>>>
The implementation of scheduler metrics of Fair Scheduler.
* <<<org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler>>>
The implementation of scheduler metrics of Capacity Scheduler.
** Step 2: Run the simulator
The simulator supports two types of input files: the rumen traces and its own
input traces. The script to start the simulator is <<<slsrun.sh>>>.
+----+
$ $HADOOP_ROOT/share/hadoop/tools/sls/bin/slsrun.sh
--input-rumen|--input-sls=<TRACE_FILE1,TRACE_FILE2,...>
--output-dir=<SLS_SIMULATION_OUTPUT_DIRECTORY> [--nodes=<SLS_NODES_FILE>]
[--track-jobs=<JOBID1,JOBID2,...>] [--print-simulation]
+----+
* <<<--input-rumen>>>: The input rumen trace files. Users can input multiple
files, separated by comma. One example trace is provided in
<<<$HADOOP_ROOT/share/hadoop/tools/sls/sample-data/2jobs2min-rumen-jh.json>>>.
* <<<--input-sls>>>: Simulator its own file format. The simulator also
provides a tool to convert rumen traces to sls traces (<<<rumen2sls.sh>>>).
Refer to appendix for an example of sls input json file.
* <<<--output-dir>>>: The output directory for generated running logs and
metrics.
* <<<--nodes>>>: The cluster topology. By default, the simulator will use the
topology fetched from the input json files. Users can specifies a new topology
by setting this parameter. Refer to the appendix for the topology file format.
* <<<--track-jobs>>>: The particular jobs that will be tracked during
simulator running, spearated by comma.
* <<<--print-simulation>>>: Whether to print out simulation information
before simulator running, including number of nodes, applications, tasks,
and information for each application.
In comparison to rumen format, here the sls format is much simpler and users
can easily generate various workload. The simulator also provides a tool to
convert rumen traces to sls traces.
+----+
$ $HADOOP_ROOT/share/hadoop/tools/sls/bin/rumen2sls.sh
--rumen-file=<RUMEN_FILE>
--output-dir=<SLS_OUTPUT_DIRECTORY>
[--output-prefix=<SLS_FILE_PREFIX>]
+----+
* <<<--rumen-file>>>: The rumen format file. One example trace is provided
in directory <<<sample-data>>>.
* <<<--output-dir>>>: The output directory of generated simulation traces.
Two files will be generated in this output directory, including one trace
file including all job and task information, and another file showing the
topology information.
* <<<--output-prefix>>>: The prefix of the generated files. The default value
is ”sls”, and the two generated files are <<<sls-jobs.json>>> and
<<<sls-nodes.json>>>.
* Metrics
The Yarn Scheduler Load Simulator has integrated
{{{http://metrics.codahale.com/}Metrics}} to measure the behaviors of critical
components and operations, including running applications and containers,
cluster available resources, scheduler operation timecost, et al. If the
switch <<<yarn.sls.runner.metrics.switch>>> is set <<<ON>>>, <<<Metrics>>>
will run and output it logs in <<<--output-dir>>> directory specified by users.
Users can track these information during simulator running, and can also
analyze these logs after running to evaluate the scheduler performance.
** Real-time Tracking
The simulator provides an interface for tracking its running in real-time.
Users can go to <<<http://host:port/simulate>>> to track whole running,
and <<<http://host:port/track>>> to track a particular job or queue. Here
the <<<host>>> is the place when we run the simulator, and <<<port>>> is
the value configured by <<<yarn.sls.metrics.web.address.port>>> (default value
is 10001).
Here we'll illustrate each chart shown in the webpage.
The first figure describes the number of running applications and containers.
[images/sls_running_apps_containers.png] Number of running applications/containers
The second figure describes the allocated and available resources (memory)
in the cluster.
[images/sls_cluster_memory.png] Cluster Resource (Memory)
The third figure describes the allocated resource for each queue. Here we have
three queues: sls_queue_1, sls_queue_2, and sls_queue_3.The first two queues
are configured with 25% share, while the last one has 50% share.
[images/sls_queue_allocated_memory.png] Queue Allocated Resource (Memory)
The fourth figure describes the timecost for each scheduler operation.
[images/sls_scheduler_operation_timecost.png] Scheduler Opertion Timecost
Finally, we measure the memory used by the simulator.
[images/sls_JVM.png] JVM Memory
The simulator also provides an interface for tracking some particular
jobs and queues. Go to <<<http://<Host>:<Port>/track>>> to get these
information.
Here the first figure illustrates the resource usage information for queue
<<<SLS_Queue_1>>>.
[images/sls_track_queue.png] Tracking Queue <<<sls_queue_3>>>
The second figure illustrates the resource usage information for job
<<<job_1369942127770_0653>>>.
[images/sls_track_job.png] Tracking Job <<<job_1369942127770_0653>>>
** Offline Analysis
After the simulator finishes, all logs are saved in the output directory
specified by <<<--output-dir>>> in
<<<$HADOOP_ROOT/share/hadoop/tools/sls/bin/slsrun.sh>>>.
* File <<<realtimetrack.json>>>: records all real-time tracking logs every 1
second.
* File <<<jobruntime.csv>>>: records all jobs start and end time in the
simulator.
* Folder <<<metrics>>>: logs generated by the Metrics.
[]
Users can also reproduce those real-time tracking charts in offline mode.
Just upload the <<<realtimetrack.json>>> to
<<<$HADOOP_ROOT/share/hadoop/tools/sls/html/showSimulationTrace.html>>>.
For browser security problem, need to put files <<<realtimetrack.json>>> and
<<<showSimulationTrace.html>>> in the same directory.
* Appendix
** Resources
{{{https://issues.apache.org/jira/browse/YARN-1021}YARN-1021}} is the main
JIRA that introduces Yarn Scheduler Load Simulator to Hadoop Yarn project.
** SLS JSON input file format
Here we provide an example format of the sls json file, which contains 2 jobs.
The first job has 3 map tasks and the second one has 2 map tasks.
+----+
{
"am.type" : "mapreduce",
"job.start.ms" : 0,
"job.end.ms" : 95375,
"job.queue.name" : "sls_queue_1",
"job.id" : "job_1",
"job.user" : "default",
"job.tasks" : [ {
"container.host" : "/default-rack/node1",
"container.start.ms" : 6664,
"container.end.ms" : 23707,
"container.priority" : 20,
"container.type" : "map"
}, {
"container.host" : "/default-rack/node3",
"container.start.ms" : 6665,
"container.end.ms" : 21593,
"container.priority" : 20,
"container.type" : "map"
}, {
"container.host" : "/default-rack/node2",
"container.start.ms" : 68770,
"container.end.ms" : 86613,
"container.priority" : 20,
"container.type" : "map"
} ]
}
{
"am.type" : "mapreduce",
"job.start.ms" : 105204,
"job.end.ms" : 197256,
"job.queue.name" : "sls_queue_2",
"job.id" : "job_2",
"job.user" : "default",
"job.tasks" : [ {
"container.host" : "/default-rack/node1",
"container.start.ms" : 111822,
"container.end.ms" : 133985,
"container.priority" : 20,
"container.type" : "map"
}, {
"container.host" : "/default-rack/node2",
"container.start.ms" : 111788,
"container.end.ms" : 131377,
"container.priority" : 20,
"container.type" : "map"
} ]
}
+----+
** Simulator input topology file format
Here is an example input topology file which has 3 nodes organized in 1 rack.
+----+
{
"rack" : "default-rack",
"nodes" : [ {
"node" : "node1"
}, {
"node" : "node2"
}, {
"node" : "node3"
}]
}
+----+

View File

@ -0,0 +1,30 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#banner {
height: 93px;
background: none;
}
#bannerLeft img {
margin-left: 30px;
margin-top: 10px;
}
#bannerRight img {
margin: 17px;
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 81 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 83 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 77 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 67 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

View File

@ -0,0 +1,46 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.sls;
import org.apache.commons.io.FileUtils;
import org.junit.Test;
import java.io.File;
import java.util.UUID;
public class TestSLSRunner {
@Test
@SuppressWarnings("all")
public void testSimulatorRunning() throws Exception {
File tempDir = new File("target", UUID.randomUUID().toString());
// start the simulator
File slsOutputDir = new File(tempDir.getAbsolutePath() + "/slsoutput/");
String args[] = new String[]{
"-inputrumen", "src/main/data/2jobs2min-rumen-jh.json",
"-output", slsOutputDir.getAbsolutePath()};
SLSRunner.main(args);
// wait for 45 seconds before stop
Thread.sleep(45 * 1000);
SLSRunner.getRunner().stop();
}
}

View File

@ -0,0 +1,247 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.sls.scheduler;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
public class TestTaskRunner {
private TaskRunner runner;
@Before
public void setUp() {
runner = new TaskRunner();
runner.setQueueSize(5);
}
@After
public void cleanUp() {
runner.stop();
}
public static class SingleTask extends TaskRunner.Task {
public static CountDownLatch latch = new CountDownLatch(1);
public static boolean first;
public SingleTask(long startTime) {
super.init(startTime);
}
@Override
public void firstStep() {
if (first) {
Assert.fail();
}
first = true;
latch.countDown();
}
@Override
public void middleStep() {
Assert.fail();
}
@Override
public void lastStep() {
Assert.fail();
}
}
@Test
public void testSingleTask() throws Exception {
runner.start();
runner.schedule(new SingleTask(0));
SingleTask.latch.await(5000, TimeUnit.MILLISECONDS);
Assert.assertTrue(SingleTask.first);
}
public static class DualTask extends TaskRunner.Task {
public static CountDownLatch latch = new CountDownLatch(1);
public static boolean first;
public static boolean last;
public DualTask(long startTime, long endTime, long interval) {
super.init(startTime, endTime, interval);
}
@Override
public void firstStep() {
if (first) {
Assert.fail();
}
first = true;
}
@Override
public void middleStep() {
Assert.fail();
}
@Override
public void lastStep() {
if (last) {
Assert.fail();
}
last = true;
latch.countDown();
}
}
@Test
public void testDualTask() throws Exception {
runner.start();
runner.schedule(new DualTask(0, 10, 10));
DualTask.latch.await(5000, TimeUnit.MILLISECONDS);
Assert.assertTrue(DualTask.first);
Assert.assertTrue(DualTask.last);
}
public static class TriTask extends TaskRunner.Task {
public static CountDownLatch latch = new CountDownLatch(1);
public static boolean first;
public static boolean middle;
public static boolean last;
public TriTask(long startTime, long endTime, long interval) {
super.init(startTime, endTime, interval);
}
@Override
public void firstStep() {
if (first) {
Assert.fail();
}
first = true;
}
@Override
public void middleStep() {
if (middle) {
Assert.fail();
}
middle = true;
}
@Override
public void lastStep() {
if (last) {
Assert.fail();
}
last = true;
latch.countDown();
}
}
@Test
public void testTriTask() throws Exception {
runner.start();
runner.schedule(new TriTask(0, 10, 5));
TriTask.latch.await(5000, TimeUnit.MILLISECONDS);
Assert.assertTrue(TriTask.first);
Assert.assertTrue(TriTask.middle);
Assert.assertTrue(TriTask.last);
}
public static class MultiTask extends TaskRunner.Task {
public static CountDownLatch latch = new CountDownLatch(1);
public static boolean first;
public static int middle;
public static boolean last;
public MultiTask(long startTime, long endTime, long interval) {
super.init(startTime, endTime, interval);
}
@Override
public void firstStep() {
if (first) {
Assert.fail();
}
first = true;
}
@Override
public void middleStep() {
middle++;
}
@Override
public void lastStep() {
if (last) {
Assert.fail();
}
last = true;
latch.countDown();
}
}
@Test
public void testMultiTask() throws Exception {
runner.start();
runner.schedule(new MultiTask(0, 20, 5));
MultiTask.latch.await(5000, TimeUnit.MILLISECONDS);
Assert.assertTrue(MultiTask.first);
Assert.assertEquals((20 - 0) / 5 - 2 + 1, MultiTask.middle);
Assert.assertTrue(MultiTask.last);
}
public static class PreStartTask extends TaskRunner.Task {
public static CountDownLatch latch = new CountDownLatch(1);
public static boolean first;
public PreStartTask(long startTime) {
super.init(startTime);
}
@Override
public void firstStep() {
if (first) {
Assert.fail();
}
first = true;
latch.countDown();
}
@Override
public void middleStep() {
}
@Override
public void lastStep() {
}
}
@Test
public void testPreStartQueueing() throws Exception {
runner.schedule(new PreStartTask(210));
Thread.sleep(210);
runner.start();
long startedAt = System.currentTimeMillis();
PreStartTask.latch.await(5000, TimeUnit.MILLISECONDS);
long runAt = System.currentTimeMillis();
Assert.assertTrue(PreStartTask.first);
Assert.assertTrue(runAt - startedAt >= 200);
}
}

View File

@ -0,0 +1,34 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.sls.utils;
import junit.framework.Assert;
import org.junit.Test;
public class TestSLSUtils {
@Test
public void testGetRackHostname() {
String str = "/rack1/node1";
String rackHostname[] = SLSUtils.getRackHostName(str);
Assert.assertEquals(rackHostname[0], "rack1");
Assert.assertEquals(rackHostname[1], "node1");
}
}

View File

@ -0,0 +1,121 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.sls.web;
import junit.framework.Assert;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.yarn.sls.SLSRunner;
import org.junit.Test;
import java.io.File;
import java.text.MessageFormat;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
public class TestSLSWebApp {
@Test
public void testSimulateInfoPageHtmlTemplate() throws Exception {
String simulateInfoTemplate = FileUtils.readFileToString(
new File("src/main/html/simulate.info.html.template"));
SLSRunner.simulateInfoMap.put("Number of racks", 10);
SLSRunner.simulateInfoMap.put("Number of nodes", 100);
SLSRunner.simulateInfoMap.put("Node memory (MB)", 1024);
SLSRunner.simulateInfoMap.put("Node VCores", 1);
SLSRunner.simulateInfoMap.put("Number of applications", 100);
SLSRunner.simulateInfoMap.put("Number of tasks", 1000);
SLSRunner.simulateInfoMap.put("Average tasks per applicaion", 10);
SLSRunner.simulateInfoMap.put("Number of queues", 4);
SLSRunner.simulateInfoMap.put("Average applications per queue", 25);
SLSRunner.simulateInfoMap.put("Estimated simulate time (s)", 10000);
StringBuilder info = new StringBuilder();
for (Map.Entry<String, Object> entry :
SLSRunner.simulateInfoMap.entrySet()) {
info.append("<tr>");
info.append("<td class='td1'>" + entry.getKey() + "</td>");
info.append("<td class='td2'>" + entry.getValue() + "</td>");
info.append("</tr>");
}
String simulateInfo =
MessageFormat.format(simulateInfoTemplate, info.toString());
Assert.assertTrue("The simulate info html page should not be empty",
simulateInfo.length() > 0);
for (Map.Entry<String, Object> entry :
SLSRunner.simulateInfoMap.entrySet()) {
Assert.assertTrue("The simulate info html page should have information "
+ "of " + entry.getKey(), simulateInfo.contains("<td class='td1'>"
+ entry.getKey() + "</td><td class='td2'>"
+ entry.getValue() + "</td>"));
}
}
@Test
public void testSimulatePageHtmlTemplate() throws Exception {
String simulateTemplate = FileUtils.readFileToString(
new File("src/main/html/simulate.html.template"));
Set<String> queues = new HashSet<String>();
queues.add("sls_queue_1");
queues.add("sls_queue_2");
queues.add("sls_queue_3");
String queueInfo = "";
int i = 0;
for (String queue : queues) {
queueInfo += "legends[4][" + i + "] = 'queue" + queue
+ ".allocated.memory'";
queueInfo += "legends[5][" + i + "] = 'queue" + queue
+ ".allocated.vcores'";
i ++;
}
String simulateInfo = MessageFormat.format(simulateTemplate,
queueInfo, "s", 1000, 1000);
Assert.assertTrue("The simulate page html page should not be empty",
simulateInfo.length() > 0);
}
@Test
public void testTrackPageHtmlTemplate() throws Exception {
String trackTemplate = FileUtils.readFileToString(
new File("src/main/html/track.html.template"));
String trackedQueueInfo = "";
Set<String> trackedQueues = new HashSet<String>();
trackedQueues.add("sls_queue_1");
trackedQueues.add("sls_queue_2");
trackedQueues.add("sls_queue_3");
for(String queue : trackedQueues) {
trackedQueueInfo += "<option value='Queue " + queue + "'>"
+ queue + "</option>";
}
String trackedAppInfo = "";
Set<String> trackedApps = new HashSet<String>();
trackedApps.add("app_1");
trackedApps.add("app_2");
for(String job : trackedApps) {
trackedAppInfo += "<option value='Job " + job + "'>" + job + "</option>";
}
String trackInfo = MessageFormat.format(trackTemplate, trackedQueueInfo,
trackedAppInfo, "s", 1000, 1000);
Assert.assertTrue("The queue/app tracking html page should not be empty",
trackInfo.length() > 0);
}
}

View File

@ -0,0 +1,50 @@
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!--
This file contains pool and user allocations for the Fair Scheduler.
Its format is explained in the Fair Scheduler documentation at
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html
The documentation also includes a sample config file.
-->
<allocations>
<user name="jenkins">
<!-- Limit on running jobs for the user across all pools. If more
jobs than this are submitted, only the first <maxRunningJobs> will
be scheduled at any given time. Defaults to infinity or the
userMaxJobsDefault value set below. -->
<maxRunningJobs>1000</maxRunningJobs>
</user>
<userMaxAppsDefault>1000</userMaxAppsDefault>
<queue name="sls_queue_1">
<minResources>1024 mb, 1 vcores</minResources>
<schedulingMode>fair</schedulingMode>
<weight>0.25</weight>
<minSharePreemptionTimeout>2</minSharePreemptionTimeout>
</queue>
<queue name="sls_queue_2">
<minResources>1024 mb, 1 vcores</minResources>
<schedulingMode>fair</schedulingMode>
<weight>0.25</weight>
<minSharePreemptionTimeout>2</minSharePreemptionTimeout>
</queue>
<queue name="sls_queue_3">
<minResources>1024 mb, 1 vcores</minResources>
<weight>0.5</weight>
<schedulingMode>fair</schedulingMode>
<minSharePreemptionTimeout>2</minSharePreemptionTimeout>
</queue>
</allocations>

View File

@ -0,0 +1,47 @@
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!--
This file contains pool and user allocations for the Fair Scheduler.
Its format is explained in the Fair Scheduler documentation at
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html.
The documentation also includes a sample config file.
-->
<configuration>
<property>
<description>Absolute path to allocation file. An allocation file is an XML
manifest describing queues and their properties, in addition to certain
policy defaults. This file must be in XML format as described in
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html.
</description>
<name>yarn.scheduler.fair.allocation.file</name>
<value>src/test/resources/fair-scheduler-allocation.xml</value>
</property>
<property>
<description>Whether to use preemption. Note that preemption is experimental
in the current version. Defaults to false.</description>
<name>yarn.scheduler.fair.preemption</name>
<value>true</value>
</property>
<property>
<description>Whether to allow multiple container assignments in one
heartbeat. Defaults to false.</description>
<name>yarn.scheduler.fair.assignmultiple</name>
<value>true</value>
</property>
</configuration>

View File

@ -0,0 +1,278 @@
<html>
<head>
<meta charset="utf-8">
<link rel="stylesheet" href="css/bootstrap.min.css" media="screen">
<link rel="stylesheet" href="css/bootstrap-responsive.min.css">
<style type="text/css">
body '{' font: 20px sans-serif; '}'
.axis path,
.axis line '{' fill: none; stroke: #000; shape-rendering: crispEdges; '}'
.axis text '{' font-family: sans-serif; font-size: 20px; '}'
.line '{' fill: none; stroke: steelblue; stroke-width: 3px; '}'
.legend '{'
padding: 5px;
font: 18px sans-serif;
background: yellow;
box-shadow: 2px 2px 1px #888;
'}'
.title '{' font: 24px sans-serif; '}'
.divborder '{'
border-width: 1px;
border-style: solid;
border-color: black;
margin-top:10px
'}'
</style>
<script src="js/thirdparty/d3.v3.js"></script>
<script src="js/thirdparty/jquery.js"></script>
<script src="js/thirdparty/bootstrap.min.js"></script>
</head>
<body>
<div class="row">
<div class="span10 offset2"><br>
<input type="button" style="float: right;" value="Stop"
onClick="stop()" />
</div>
</div>
<div class="row">
<div class="divborder span8" style="margin-left:50px" id="area1"></div>
<div class="divborder span8" id="area2"></div>
</div>
<div class="row">
<div class="divborder span8" style="margin-left:50px" id="area3"></div>
<div class="divborder span8" id="area4"></div>
</div>
<div class="row">
<div class="divborder span8" style="margin-left:50px" id="area5"></div>
<div class="divborder span8" id="area6"></div>
</div>
<div class="row">
<div class="divborder span8" style="margin-left:50px" id="area7"></div>
<div class="span8" id="area8"></div>
</div><br/><br/>
<script>
var basetime = 0;
var running = 1;
var data = [];
var width, height;
var legends = [];
var titles = [];
var yLabels = [];
var isAreas = [];
var svgs = [];
var xs = [];
var ys = [];
var xAxiss = [];
var yAxiss = [];
var lineAreas = [];
var stacks = [];
// legends
legends[0] = [''running.applications'', ''running.containers''];
legends[1] = [''jvm.free.memory'', ''jvm.max.memory'', ''jvm.total.memory''];
legends[2] = [''cluster.allocated.memory'', ''cluster.available.memory''];
legends[3] = [''cluster.allocated.vcores'', ''cluster.available.vcores''];
legends[4] = [];
legends[5] = [];
{0}
legends[6] = [''scheduler.allocate.timecost'',
''scheduler.handle-NODE_ADDED.timecost'',
''scheduler.handle-NODE_REMOVED.timecost'',
''scheduler.handle-NODE_UPDATE.timecost'',
''scheduler.handle-APP_ADDED.timecost'',
''scheduler.handle-APP_REMOVED.timecost'',
''scheduler.handle-CONTAINER_EXPIRED.timecost''];
// title
titles[0] = ''Cluster running applications & containers'';
titles[1] = ''JVM memory'';
titles[2] = ''Cluster allocated & available memory'';
titles[3] = ''Cluster allocated & available vcores'';
titles[4] = ''Queue allocated memory'';
titles[5] = ''Queue allocated vcores'';
titles[6] = ''Scheduler allocate & handle operation timecost'';
// ylabels
yLabels[0] = ''Number'';
yLabels[1] = ''Memory (GB)'';
yLabels[2] = ''Memory (GB)'';
yLabels[3] = ''Number'';
yLabels[4] = ''Memory (GB)'';
yLabels[5] = ''Number'';
yLabels[6] = ''Timecost (ms)'';
// is area?
isAreas = [0, 0, 0, 0, 1, 1, 0];
// draw all charts
for (var i = 0; i < 7; i ++) '{'
drawEachChart(i);
'}'
// draw each chart
function drawEachChart(index) '{'
var margin = '{'top: 50, right: 250, bottom: 50, left: 70'}';
width = 750 - margin.left - margin.right;
height = 420 - margin.top - margin.bottom;
xs[index] = d3.scale.linear().range([0, width]);
ys[index] = d3.scale.linear().range([height, 0]);
xAxiss[index] = d3.svg.axis().scale(xs[index]).orient(''bottom'');
yAxiss[index] = d3.svg.axis().scale(ys[index]).orient(''left'');
if (isAreas[index] == 1)'{'
lineAreas[index] = d3.svg.area()
.x(function(d) '{' return xs[index](d.time); '}')
.y0(function(d) '{' return ys[index](d.y0); '}')
.y1(function(d) '{' return ys[index](d.y0 + d.y); '}');
stacks[index] = d3.layout.stack()
.values(function(d) '{' return d.values; '}');
'}' else '{'
lineAreas[index] = d3.svg.line()
.interpolate(''basis'')
.x(function(d) '{' return xs[index](d.time); '}')
.y(function(d) '{' return ys[index](d.value); '}');
'}'
svgs[index] = d3.select(''#area'' + (index + 1)).append(''svg'')
.attr(''width'', width + margin.left + margin.right)
.attr(''height'', height + margin.top + margin.bottom)
.append(''g'')
.attr(''transform'', ''translate('' + margin.left + '','' + margin.top + '')'');
// x, y and title
svgs[index].append(''text'')
.attr(''transform'', ''translate('' + (width / 2) + '' ,'' +
(height + margin.bottom - 10 ) + '')'')
.style(''text-anchor'', ''middle'')
.text(''Time ({1})'');
svgs[index].append(''text'')
.attr(''transform'', ''rotate(-90)'')
.attr(''y'', 0 - margin.left)
.attr(''x'',0 - (height / 2))
.attr(''dy'', ''1em'')
.style(''text-anchor'', ''middle'')
.text(yLabels[index]);
svgs[index].append(''text'')
.attr(''x'', (width / 2))
.attr(''y'', 10 - (margin.top / 2))
.attr(''text-anchor'', ''middle'')
.text(titles[index]);
'}'
// request data
function requestData() '{'
$.ajax('{'url: ''simulateMetrics'',
success: function(point) '{'
// update data
if (basetime == 0) basetime = point.time;
point.time = (point.time - basetime) / {2};
data.push(point);
// clear old
for (var i = 0; i < 7; i ++) '{'
svgs[i].selectAll(''g.tick'').remove();
svgs[i].selectAll(''g'').remove();
var color = d3.scale.category10();
color.domain(d3.keys(data[0]).filter(function(key) '{'
return $.inArray(key, legends[i]) !== -1;
'}'));
var values;
if (isAreas[i] == 1) '{'
values = stacks[i](color.domain().map(function(name) '{'
return '{'
name: name,
values: data.map(function(d) '{'
return '{'time: d.time, y: d[name]'}';
'}')
'}'
'}'));
xs[i].domain(d3.extent(data, function(d) '{' return d.time;'}'));
ys[i].domain([
d3.min(values, function(c) '{' return 0; '}'),
d3.max(values, function(c) '{' return 1.1 * d3.max(c.values,
function(v) '{' return v.y + v.y0; '}'); '}')
]);
'}' else '{'
values = color.domain().map(function(name) '{'
return '{'
name: name,
values: data.map(function(d) '{'
return '{'time: d.time, value: d[name]'}';
'}')
'}'
'}');
xs[i].domain(d3.extent(data, function(d) '{' return d.time;'}'));
ys[i].domain([
d3.min(values, function(c) '{' return 0; '}'),
d3.max(values, function(c) '{' return 1.1 * d3.max(c.values,
function(v) '{' return v.value; '}'); '}')
]);
'}'
svgs[i].append(''g'').attr(''class'', ''x axis'')
.attr(''transform'', ''translate(0,'' + height + '')'').call(xAxiss[i]);
svgs[i].append(''g'').attr(''class'', ''y axis'').call(yAxiss[i]);
var value = svgs[i].selectAll(''.path'')
.data(values).enter().append(''g'').attr(''class'', ''line'');
if(isAreas[i] == 1) '{'
value.append(''path'').attr(''class'', ''area'')
.attr(''d'', function(d) '{'return lineAreas[i](d.values); '}')
.style(''fill'', function(d) '{'return color(d.name); '}');
'}' else '{'
value.append(''path'').attr(''class'', ''line'')
.attr(''d'', function(d) '{'return lineAreas[i](d.values); '}')
.style(''stroke'', function(d) '{'return color(d.name); '}');
'}'
// legend
var legend = svgs[i].append(''g'')
.attr(''class'', ''legend'')
.attr(''x'', width + 5)
.attr(''y'', 25)
.attr(''height'', 120)
.attr(''width'', 140);
legend.selectAll(''g'').data(legends[i])
.enter()
.append(''g'')
.each(function(d, i) '{'
var g = d3.select(this);
g.append(''rect'')
.attr(''x'', width + 5)
.attr(''y'', i*20)
.attr(''width'', 10)
.attr(''height'', 10)
.style(''fill'', color(d));
g.append(''text'')
.attr(''x'', width + 25)
.attr(''y'', i * 20 + 8)
.attr(''height'',30)
.attr(''width'',250)
.style(''fill'', color(d))
.text(d);
'}');
'}'
if(running == 1)
setTimeout(requestData, {3});
'}',
cache: false
'}');
'}'
// stop
function stop() '{'
running = 0;
'}'
requestData();
</script>
</body>
</html>

View File

@ -0,0 +1,50 @@
<html>
<head>
<meta charset="utf-8">
<style type="text/css">
.td1 '{'
border-width: 1px;
padding: 8px;
border-style: solid;
border-color: #666666;
background-color: #dedede;
width: 50%;
'}'
table.gridtable '{'
font-family: verdana,arial,sans-serif;
font-size:11px;
color:#333333;
border-width: 1px;
border-color: #666666;
border-collapse: collapse;
margin-top: 80px;
'}'
.td2 '{'
border-width: 1px;
padding: 8px;
border-style: solid;
border-color: #666666;
background-color: #ffffff;
width: 50%;
'}'
</style>
</head>
<body>
<table class="gridtable" align="center" width="400px">
<tr>
<td colspan="2" class="td2" align="center">
<b>SLS Simulate Information</b>
</td>
</tr>
{0}
<tr>
<td align="center" height="80px">
<a href="simulate">Simulation Charts</a>
</td>
<td align="center">
<a href="track">Tracked Jobs & Queues</a>
</td>
</tr>
</table>
</body>
</html>

View File

@ -0,0 +1,81 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<!-- SLSRunner configuration -->
<property>
<name>yarn.sls.runner.pool.size</name>
<value>100</value>
</property>
<!-- Nodes configuration -->
<property>
<name>yarn.sls.nm.memory.mb</name>
<value>10240</value>
</property>
<property>
<name>yarn.sls.nm.vcores</name>
<value>10</value>
</property>
<property>
<name>yarn.sls.nm.heartbeat.interval.ms</name>
<value>1000</value>
</property>
<!-- Apps configuration -->
<property>
<name>yarn.sls.am.heartbeat.interval.ms</name>
<value>1000</value>
</property>
<property>
<name>yarn.sls.am.type.mapreduce</name>
<value>org.apache.hadoop.yarn.sls.appmaster.MRAMSimulator</value>
</property>
<!-- Containers configuration -->
<property>
<name>yarn.sls.container.memory.mb</name>
<value>1024</value>
</property>
<property>
<name>yarn.sls.container.vcores</name>
<value>1</value>
</property>
<!-- metrics -->
<property>
<name>yarn.sls.metrics.switch</name>
<value>ON</value>
</property>
<property>
<name>yarn.sls.metrics.web.address.port</name>
<value>10001</value>
</property>
<property>
<name>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler</name>
<value>org.apache.hadoop.yarn.sls.scheduler.FifoSchedulerMetrics</value>
</property>
<property>
<name>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</name>
<value>org.apache.hadoop.yarn.sls.scheduler.FairSchedulerMetrics</value>
</property>
<property>
<name>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</name>
<value>org.apache.hadoop.yarn.sls.scheduler.CapacitySchedulerMetrics</value>
</property>
</configuration>

View File

@ -0,0 +1,193 @@
<html>
<head>
<meta charset="utf-8">
<link rel="stylesheet" href="css/bootstrap.min.css" media="screen">
<link rel="stylesheet" href="css/bootstrap-responsive.min.css">
<style type="text/css">
body '{' font: 20px sans-serif;'}'
.axis path,
.axis line '{' fill: none; stroke: #000; shape-rendering: crispEdges;'}'
.axis text '{' font-family: sans-serif; font-size: 20px; '}'
.line '{' fill: none; stroke: steelblue; stroke-width: 3px; '}'
.legend '{' padding: 5px; font: 18px sans-serif; background: yellow;
box-shadow: 2px 2px 1px #888;'}'
.title '{' font: 24px sans-serif; '}'
.divborder '{' border-width: 1px; border-style: solid; border-color: black;
margin-top:10px '}'
</style>
<script src="js/thirdparty/d3.v3.js"></script>
<script src="js/thirdparty/jquery.js"></script>
<script src="js/thirdparty/bootstrap.min.js"></script>
</head>
<body>
<div class="row">
<div class="offset4 span8"><br/><br/><br/>
Select Tracked Job/Queue:
<select id="trackedSelect" onchange="redrawChart()">
<option>----Queue----</option>
{0}
<option>----Job----</option>
{1}
</select>
<input type="button" style="float: right;" value="Stop"
onClick="stop()" />
</div>
</div>
<div class="row">
<div class="divborder span9 offset4" id="area1"></div>
</div>
<script>
// global variables
var basetime = 0;
var running = 1;
var para = '''';
var data = [];
var path, line, svg;
var x, y;
var width, height;
var xAxis, yAxis;
var legends = [''usage.memory'', ''demand.memory'', ''maxshare.memory'',
''minshare.memory'', ''fairshare.memory''];
// stop function
function stop() '{'
running = 0;
'}'
// select changed event
function redrawChart() '{'
var value = $(''#trackedSelect'').val();
if (value.substring(0, ''Job ''.length) === ''Job ''
|| value.substring(0, ''Queue ''.length) === ''Queue '') '{'
para = value;
running = 0;
basetime = 0;
data = [];
$(''#area1'').empty();
drawChart(''Tracking '' + value);
running = 1;
requestData();
}
}
// draw chart
function drawChart(title) '{'
// location
var margin = '{'top: 50, right: 150, bottom: 50, left: 80'}';
width = 800 - margin.left - margin.right;
height = 420 - margin.top - margin.bottom;
x = d3.scale.linear().range([0, width]);
y = d3.scale.linear().range([height, 0]);
xAxis = d3.svg.axis().scale(x).orient(''bottom'');
yAxis = d3.svg.axis().scale(y).orient(''left'');
// lines
line = d3.svg.line().interpolate(''basis'')
.x(function(d) '{' return x(d.time); })
.y(function(d) '{' return y(d.value); });
// create chart
svg = d3.select(''#area1'').append(''svg'')
.attr(''width'', width + margin.left + margin.right)
.attr(''height'', height + margin.top + margin.bottom)
.append(''g'')
.attr(''transform'', ''translate('' + margin.left + '','' + margin.top + '')'');
// axis labels
svg.append(''text'')
.attr(''transform'', ''translate('' + (width / 2) + '','' + (height + margin.bottom - 5 ) + '')'')
.style(''text-anchor'', ''middle'')
.text(''Time ({2})'');
svg.append(''text'')
.attr(''transform'', ''rotate(-90)'')
.attr(''y'', 0 - margin.left)
.attr(''x'',0 - (height / 2))
.attr(''dy'', ''1em'')
.style(''text-anchor'', ''middle'')
.text(''Memory (GB)'');
// title
svg.append(''text'')
.attr(''x'', (width / 2))
.attr(''y'', 10 - (margin.top / 2))
.attr(''text-anchor'', ''middle'')
.text(title);
'}'
// request data
function requestData() '{'
$.ajax('{'url: ''trackMetrics?t='' + para,
success: function(point) '{'
// clear old
svg.selectAll(''g.tick'').remove();
svg.selectAll(''g'').remove();
if(basetime == 0) basetime = point.time;
point.time = (point.time - basetime)/{3};
data.push(point);
var color = d3.scale.category10();
color.domain(d3.keys(data[0]).filter(function(key) '{'
return $.inArray(key, legends) !== -1;
'}'));
var values = color.domain().map(function(name) '{'
return '{'
name: name,
values: data.map(function(d) '{'
return '{' time: d.time, value: d[name]'}';
'}')
'}';
'}');
// set x/y range
x.domain(d3.extent(data, function(d) '{' return d.time; '}'));
y.domain([
d3.min(values, function(c) '{' return 0 '}'),
d3.max(values, function(c) '{' return 1.1 * d3.max(c.values, function(v) '{' return v.value; '}'); '}')
]);
svg.append(''g'').attr(''class'', ''x axis'')
.attr(''transform'', ''translate(0,'' + height + '')'').call(xAxis);
svg.append(''g'').attr(''class'', ''y axis'').call(yAxis);
var value = svg.selectAll(''.path'')
.data(values).enter().append(''g'').attr(''class'', ''line'');
value.append(''path'').attr(''class'', ''line'')
.attr(''d'', function(d) '{'return line(d.values); '}')
.style(''stroke'', function(d) '{'return color(d.name); '}');
// legend
var legend = svg.append(''g'')
.attr(''class'', ''legend'')
.attr(''x'', width + 5)
.attr(''y'', 25)
.attr(''height'', 120)
.attr(''width'', 180);
legend.selectAll(''g'').data(legends)
.enter()
.append(''g'')
.each(function(d, i) '{'
var g = d3.select(this);
g.append(''rect'')
.attr(''x'', width + 5)
.attr(''y'', i * 20)
.attr(''width'', 10)
.attr(''height'', 10)
.style(''fill'', color(d));
g.append(''text'')
.attr(''x'', width + 25)
.attr(''y'', i * 20 + 8)
.attr(''height'',30)
.attr(''width'',250)
.style(''fill'', color(d))
.text(d);
'}');
if(running == 1)
setTimeout(requestData, {4});
'}',
cache: false
'}');
'}'
</script>
</body>
</html>

View File

@ -0,0 +1,58 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
</property>
<property>
<description>The address of the RM web application.</description>
<name>yarn.resourcemanager.webapp.address</name>
<value>localhost:18088</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>localhost:18031</value>
</property>
<property>
<description>The address of the scheduler interface.</description>
<name>yarn.resourcemanager.scheduler.address</name>
<value>localhost:18030</value>
</property>
<property>
<description>The address of the applications manager interface in the RM.</description>
<name>yarn.resourcemanager.address</name>
<value>localhost:18032</value>
</property>
<property>
<description>The address of the RM admin interface.</description>
<name>yarn.resourcemanager.admin.address</name>
<value>localhost:18033</value>
</property>
<property>
<description>Set to false, to avoid ip check</description>
<name>hadoop.security.token.service.use_ip</name>
<value>false</value>
</property>
</configuration>

View File

@ -83,6 +83,11 @@
<scope>compile</scope>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-sls</artifactId>
<scope>compile</scope>
</dependency>
</dependencies>
<build>

View File

@ -41,6 +41,7 @@
<module>hadoop-extras</module>
<module>hadoop-pipes</module>
<module>hadoop-openstack</module>
<module>hadoop-sls</module>
</modules>
<build>

View File

@ -9,6 +9,8 @@ Release 2.3.0 - UNRELEASED
YARN-649. Added a new NM web-service to serve container logs in plain text
over HTTP. (Sandy Ryza via vinodkv)
YARN-1021. Yarn Scheduler Load Simulator. (ywskycn via tucu)
IMPROVEMENTS
YARN-905. Add state filters to nodes CLI (Wei Yan via Sandy Ryza)