YARN-1021. Yarn Scheduler Load Simulator. (ywskycn via tucu)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1527065 13f79535-47bb-0310-9956-ffa450edef68
|
@ -0,0 +1,45 @@
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<assembly>
|
||||||
|
<id>hadoop-sls</id>
|
||||||
|
<formats>
|
||||||
|
<format>dir</format>
|
||||||
|
</formats>
|
||||||
|
<includeBaseDirectory>false</includeBaseDirectory>
|
||||||
|
|
||||||
|
<fileSets>
|
||||||
|
<fileSet>
|
||||||
|
<directory>${basedir}/src/main/bin</directory>
|
||||||
|
<outputDirectory>sls/bin</outputDirectory>
|
||||||
|
<fileMode>0755</fileMode>
|
||||||
|
</fileSet>
|
||||||
|
<fileSet>
|
||||||
|
<directory>${basedir}/src/main/html</directory>
|
||||||
|
<outputDirectory>sls/html</outputDirectory>
|
||||||
|
</fileSet>
|
||||||
|
<fileSet>
|
||||||
|
<directory>${basedir}/src/main/sample-conf</directory>
|
||||||
|
<outputDirectory>sls/sample-conf</outputDirectory>
|
||||||
|
</fileSet>
|
||||||
|
<fileSet>
|
||||||
|
<directory>${basedir}/src/main/data</directory>
|
||||||
|
<outputDirectory>sls/sample-data</outputDirectory>
|
||||||
|
</fileSet>
|
||||||
|
</fileSets>
|
||||||
|
|
||||||
|
</assembly>
|
|
@ -93,6 +93,17 @@
|
||||||
<include>*-sources.jar</include>
|
<include>*-sources.jar</include>
|
||||||
</includes>
|
</includes>
|
||||||
</fileSet>
|
</fileSet>
|
||||||
|
<fileSet>
|
||||||
|
<directory>../hadoop-sls/target</directory>
|
||||||
|
<outputDirectory>/share/hadoop/${hadoop.component}/sources</outputDirectory>
|
||||||
|
<includes>
|
||||||
|
<include>*-sources.jar</include>
|
||||||
|
</includes>
|
||||||
|
</fileSet>
|
||||||
|
<fileSet>
|
||||||
|
<directory>../hadoop-sls/target/hadoop-sls-${project.version}/sls</directory>
|
||||||
|
<outputDirectory>/share/hadoop/${hadoop.component}/sls</outputDirectory>
|
||||||
|
</fileSet>
|
||||||
</fileSets>
|
</fileSets>
|
||||||
<dependencySets>
|
<dependencySets>
|
||||||
<dependencySet>
|
<dependencySet>
|
||||||
|
|
|
@ -729,6 +729,16 @@
|
||||||
<artifactId>hsqldb</artifactId>
|
<artifactId>hsqldb</artifactId>
|
||||||
<version>2.0.0</version>
|
<version>2.0.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.codahale.metrics</groupId>
|
||||||
|
<artifactId>metrics-core</artifactId>
|
||||||
|
<version>3.0.0</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-sls</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</dependencyManagement>
|
</dependencyManagement>
|
||||||
|
|
||||||
|
|
|
@ -95,6 +95,7 @@
|
||||||
<item name="Fair Scheduler" href="hadoop-yarn/hadoop-yarn-site/FairScheduler.html"/>
|
<item name="Fair Scheduler" href="hadoop-yarn/hadoop-yarn-site/FairScheduler.html"/>
|
||||||
<item name="Web Application Proxy" href="hadoop-yarn/hadoop-yarn-site/WebApplicationProxy.html"/>
|
<item name="Web Application Proxy" href="hadoop-yarn/hadoop-yarn-site/WebApplicationProxy.html"/>
|
||||||
<item name="YARN Commands" href="hadoop-yarn/hadoop-yarn-site/YarnCommands.html"/>
|
<item name="YARN Commands" href="hadoop-yarn/hadoop-yarn-site/YarnCommands.html"/>
|
||||||
|
<item name="Scheduler Load Simulator" href="hadoop-sls/SchedulerLoadSimulator.html"/>
|
||||||
</menu>
|
</menu>
|
||||||
|
|
||||||
<menu name="YARN REST APIs" inherit="top">
|
<menu name="YARN REST APIs" inherit="top">
|
||||||
|
|
|
@ -0,0 +1,12 @@
|
||||||
|
Yarn Scheduler Load Simulator (SLS)
|
||||||
|
|
||||||
|
SLS is a stress and performance harness for the Yarn Resource Manager Scheduler
|
||||||
|
that exercises the scheduler implementation simulating the cluster size and the
|
||||||
|
applications load without having to have a cluster nor applications.
|
||||||
|
|
||||||
|
SLS runs a regular RM without RPC endpoints and uses a NodeManager and
|
||||||
|
Application Manager simulators to send and receive events simulating cluster
|
||||||
|
and application load behavior.
|
||||||
|
|
||||||
|
The size of the cluster and the application load is scripted in a configuration
|
||||||
|
file.
|
|
@ -0,0 +1,26 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<FindBugsFilter>
|
||||||
|
|
||||||
|
<!-- Ignore comparedTo, equals warnings -->
|
||||||
|
<Match>
|
||||||
|
<Class name="org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator" />
|
||||||
|
<Bug pattern="EQ_COMPARETO_USE_OBJECT_EQUALS" />
|
||||||
|
</Match>
|
||||||
|
|
||||||
|
</FindBugsFilter>
|
|
@ -0,0 +1,184 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
<parent>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-project</artifactId>
|
||||||
|
<version>2.3.0-SNAPSHOT</version>
|
||||||
|
<relativePath>../../hadoop-project</relativePath>
|
||||||
|
</parent>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-sls</artifactId>
|
||||||
|
<version>2.3.0-SNAPSHOT</version>
|
||||||
|
<description>Apache Hadoop Scheduler Load Simulator</description>
|
||||||
|
<name>Apache Hadoop Scheduler Load Simulator</name>
|
||||||
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>junit</groupId>
|
||||||
|
<artifactId>junit</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-client</artifactId>
|
||||||
|
<scope>provided</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-minicluster</artifactId>
|
||||||
|
<scope>provided</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-rumen</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.codahale.metrics</groupId>
|
||||||
|
<artifactId>metrics-core</artifactId>
|
||||||
|
<scope>compile</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.mortbay.jetty</groupId>
|
||||||
|
<artifactId>jetty</artifactId>
|
||||||
|
<scope>provided</scope>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>org.mortbay.jetty</groupId>
|
||||||
|
<artifactId>servlet-api</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.mortbay.jetty</groupId>
|
||||||
|
<artifactId>jetty-util</artifactId>
|
||||||
|
<scope>provided</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-source-plugin</artifactId>
|
||||||
|
<configuration>
|
||||||
|
<attach>true</attach>
|
||||||
|
</configuration>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<goals>
|
||||||
|
<goal>jar</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.codehaus.mojo</groupId>
|
||||||
|
<artifactId>findbugs-maven-plugin</artifactId>
|
||||||
|
<configuration>
|
||||||
|
<findbugsXmlOutput>true</findbugsXmlOutput>
|
||||||
|
<xmlOutput>true</xmlOutput>
|
||||||
|
<excludeFilterFile>${basedir}/dev-support/findbugs-exclude.xml</excludeFilterFile>
|
||||||
|
<effort>Max</effort>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.rat</groupId>
|
||||||
|
<artifactId>apache-rat-plugin</artifactId>
|
||||||
|
<configuration>
|
||||||
|
<excludes>
|
||||||
|
<exclude>src/main/data/2jobs2min-rumen-jh.json</exclude>
|
||||||
|
<exclude>src/main/html/js/thirdparty/jquery.js</exclude>
|
||||||
|
<exclude>src/main/html/js/thirdparty/d3-LICENSE</exclude>
|
||||||
|
<exclude>src/main/html/js/thirdparty/d3.v3.js</exclude>
|
||||||
|
<exclude>src/main/html/simulate.html.template</exclude>
|
||||||
|
<exclude>src/main/html/simulate.info.html.template</exclude>
|
||||||
|
<exclude>src/main/html/track.html.template</exclude>
|
||||||
|
<exclude>src/test/resources/simulate.html.template</exclude>
|
||||||
|
<exclude>src/test/resources/simulate.info.html.template</exclude>
|
||||||
|
<exclude>src/test/resources/track.html.template</exclude>
|
||||||
|
</excludes>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
|
||||||
|
<profiles>
|
||||||
|
<profile>
|
||||||
|
<id>docs</id>
|
||||||
|
<activation>
|
||||||
|
<activeByDefault>false</activeByDefault>
|
||||||
|
</activation>
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-site-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<phase>package</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>site</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</profile>
|
||||||
|
<profile>
|
||||||
|
<id>dist</id>
|
||||||
|
<activation>
|
||||||
|
<activeByDefault>false</activeByDefault>
|
||||||
|
</activation>
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-assembly-plugin</artifactId>
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-assemblies</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>dist</id>
|
||||||
|
<phase>prepare-package</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>single</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<appendAssemblyId>false</appendAssemblyId>
|
||||||
|
<attach>false</attach>
|
||||||
|
<finalName>${project.artifactId}-${project.version}</finalName>
|
||||||
|
<descriptorRefs>
|
||||||
|
<descriptorRef>hadoop-sls</descriptorRef>
|
||||||
|
</descriptorRefs>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</profile>
|
||||||
|
</profiles>
|
||||||
|
</project>
|
|
@ -0,0 +1,55 @@
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<assembly>
|
||||||
|
<id>sls</id>
|
||||||
|
<formats>
|
||||||
|
<format>dir</format>
|
||||||
|
<format>tar.gz</format>
|
||||||
|
</formats>
|
||||||
|
<includeBaseDirectory>false</includeBaseDirectory>
|
||||||
|
|
||||||
|
<fileSets>
|
||||||
|
<fileSet>
|
||||||
|
<directory>${basedir}/src/main/bin</directory>
|
||||||
|
<outputDirectory>bin</outputDirectory>
|
||||||
|
<fileMode>0755</fileMode>
|
||||||
|
</fileSet>
|
||||||
|
<fileSet>
|
||||||
|
<directory>${basedir}/src/main/data</directory>
|
||||||
|
<outputDirectory>sample-data</outputDirectory>
|
||||||
|
</fileSet>
|
||||||
|
<fileSet>
|
||||||
|
<directory>${basedir}/src/main/html</directory>
|
||||||
|
<outputDirectory>html</outputDirectory>
|
||||||
|
</fileSet>
|
||||||
|
<fileSet>
|
||||||
|
<directory>${basedir}/src/main/sample-conf</directory>
|
||||||
|
<outputDirectory>sample-conf</outputDirectory>
|
||||||
|
</fileSet>
|
||||||
|
</fileSets>
|
||||||
|
|
||||||
|
<dependencySets>
|
||||||
|
<dependencySet>
|
||||||
|
<outputDirectory>/lib</outputDirectory>
|
||||||
|
<unpack>false</unpack>
|
||||||
|
<scope>compile</scope>
|
||||||
|
<useProjectArtifact>true</useProjectArtifact>
|
||||||
|
</dependencySet>
|
||||||
|
</dependencySets>
|
||||||
|
|
||||||
|
</assembly>
|
|
@ -0,0 +1,106 @@
|
||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License. See accompanying LICENSE file.
|
||||||
|
#
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
printUsage() {
|
||||||
|
echo "Usage: rumen2sls.sh <OPTIONS>"
|
||||||
|
echo " --rumen-file=<RUMEN_FILE>"
|
||||||
|
echo " --output-dir=<SLS_OUTPUT_DIR>"
|
||||||
|
echo " [--output-prefix=<PREFIX>] (default is sls)"
|
||||||
|
echo
|
||||||
|
}
|
||||||
|
###############################################################################
|
||||||
|
parseArgs() {
|
||||||
|
for i in $*
|
||||||
|
do
|
||||||
|
case $i in
|
||||||
|
--rumen-file=*)
|
||||||
|
rumenfile=${i#*=}
|
||||||
|
;;
|
||||||
|
--output-dir=*)
|
||||||
|
outputdir=${i#*=}
|
||||||
|
;;
|
||||||
|
--output-prefix=*)
|
||||||
|
outputprefix=${i#*=}
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Invalid option"
|
||||||
|
echo
|
||||||
|
printUsage
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
if [[ "${rumenfile}" == "" || "${outputdir}" == "" ]] ; then
|
||||||
|
echo "Both --rumen-file ${rumenfile} and --output-dir \
|
||||||
|
${outputfdir} must be specified"
|
||||||
|
echo
|
||||||
|
printUsage
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
###############################################################################
|
||||||
|
calculateBasedir() {
|
||||||
|
# resolve links - $0 may be a softlink
|
||||||
|
PRG="${1}"
|
||||||
|
|
||||||
|
while [ -h "${PRG}" ]; do
|
||||||
|
ls=`ls -ld "${PRG}"`
|
||||||
|
link=`expr "$ls" : '.*-> \(.*\)$'`
|
||||||
|
if expr "$link" : '/.*' > /dev/null; then
|
||||||
|
PRG="$link"
|
||||||
|
else
|
||||||
|
PRG=`dirname "${PRG}"`/"$link"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
BASEDIR=`dirname ${PRG}`
|
||||||
|
BASEDIR=`cd ${BASEDIR}/..;pwd`
|
||||||
|
}
|
||||||
|
###############################################################################
|
||||||
|
calculateClasspath() {
|
||||||
|
HADOOP_BASE=`which hadoop`
|
||||||
|
HADOOP_BASE=`dirname $HADOOP_BASE`
|
||||||
|
DEFAULT_LIBEXEC_DIR=${HADOOP_BASE}/../libexec
|
||||||
|
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
|
||||||
|
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
|
||||||
|
export HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${TOOL_PATH}"
|
||||||
|
}
|
||||||
|
###############################################################################
|
||||||
|
runSLSGenerator() {
|
||||||
|
if [[ "${outputprefix}" == "" ]] ; then
|
||||||
|
outputprefix="sls"
|
||||||
|
fi
|
||||||
|
|
||||||
|
slsJobs=${outputdir}/${outputprefix}-jobs.json
|
||||||
|
slsNodes=${outputdir}/${outputprefix}-nodes.json
|
||||||
|
|
||||||
|
args="-input ${rumenfile} -outputJobs ${slsJobs}";
|
||||||
|
args="${args} -outputNodes ${slsNodes}";
|
||||||
|
|
||||||
|
hadoop org.apache.hadoop.yarn.sls.RumenToSLSConverter ${args}
|
||||||
|
}
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
calculateBasedir $0
|
||||||
|
calculateClasspath
|
||||||
|
parseArgs "$@"
|
||||||
|
runSLSGenerator
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo "SLS simulation files available at: ${outputdir}"
|
||||||
|
echo
|
||||||
|
|
||||||
|
exit 0
|
|
@ -0,0 +1,112 @@
|
||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License. See accompanying LICENSE file.
|
||||||
|
#
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
printUsage() {
|
||||||
|
echo "Usage: slsrun.sh <OPTIONS>"
|
||||||
|
echo " --input-rumen|--input-sls=<FILE1,FILE2,...>"
|
||||||
|
echo " --output-dir=<SLS_SIMULATION_OUTPUT_DIRECTORY>"
|
||||||
|
echo " [--nodes=<SLS_NODES_FILE>]"
|
||||||
|
echo " [--track-jobs=<JOBID1,JOBID2,...>]"
|
||||||
|
echo " [--print-simulation]"
|
||||||
|
echo
|
||||||
|
}
|
||||||
|
###############################################################################
|
||||||
|
parseArgs() {
|
||||||
|
for i in $*
|
||||||
|
do
|
||||||
|
case $i in
|
||||||
|
--input-rumen=*)
|
||||||
|
inputrumen=${i#*=}
|
||||||
|
;;
|
||||||
|
--input-sls=*)
|
||||||
|
inputsls=${i#*=}
|
||||||
|
;;
|
||||||
|
--output-dir=*)
|
||||||
|
outputdir=${i#*=}
|
||||||
|
;;
|
||||||
|
--nodes=*)
|
||||||
|
nodes=${i#*=}
|
||||||
|
;;
|
||||||
|
--track-jobs=*)
|
||||||
|
trackjobs=${i#*=}
|
||||||
|
;;
|
||||||
|
--print-simulation)
|
||||||
|
printsimulation="true"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Invalid option"
|
||||||
|
echo
|
||||||
|
printUsage
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ "${inputrumen}" == "" && "${inputsls}" == "" ]] ; then
|
||||||
|
echo "Either --input-rumen or --input-sls must be specified"
|
||||||
|
echo
|
||||||
|
printUsage
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "${outputdir}" == "" ]] ; then
|
||||||
|
echo "The output directory --output-dir must be specified"
|
||||||
|
echo
|
||||||
|
printUsage
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
calculateClasspath() {
|
||||||
|
HADOOP_BASE=`which hadoop`
|
||||||
|
HADOOP_BASE=`dirname $HADOOP_BASE`
|
||||||
|
DEFAULT_LIBEXEC_DIR=${HADOOP_BASE}/../libexec
|
||||||
|
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
|
||||||
|
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
|
||||||
|
export HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${TOOL_PATH}:html"
|
||||||
|
}
|
||||||
|
###############################################################################
|
||||||
|
runSimulation() {
|
||||||
|
if [[ "${inputsls}" == "" ]] ; then
|
||||||
|
args="-inputrumen ${inputrumen}"
|
||||||
|
else
|
||||||
|
args="-inputsls ${inputsls}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
args="${args} -output ${outputdir}"
|
||||||
|
|
||||||
|
if [[ "${nodes}" != "" ]] ; then
|
||||||
|
args="${args} -nodes ${nodes}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "${trackjobs}" != "" ]] ; then
|
||||||
|
args="${args} -trackjobs ${trackjobs}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "${printsimulation}" == "true" ]] ; then
|
||||||
|
args="${args} -printsimulation"
|
||||||
|
fi
|
||||||
|
|
||||||
|
hadoop org.apache.hadoop.yarn.sls.SLSRunner ${args}
|
||||||
|
}
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
calculateClasspath
|
||||||
|
parseArgs "$@"
|
||||||
|
runSimulation
|
||||||
|
|
||||||
|
exit 0
|
|
@ -0,0 +1,26 @@
|
||||||
|
Copyright (c) 2013, Michael Bostock
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice, this
|
||||||
|
list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer in the documentation
|
||||||
|
and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* The name Michael Bostock may not be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL MICHAEL BOSTOCK BE LIABLE FOR ANY DIRECT,
|
||||||
|
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
|
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||||
|
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||||
|
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,334 @@
|
||||||
|
<!--
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
<!doctype>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<link rel="stylesheet" href="css/bootstrap.min.css" media="screen">
|
||||||
|
<link rel="stylesheet" href="css/bootstrap-responsive.min.css">
|
||||||
|
<style type="text/css">
|
||||||
|
body {
|
||||||
|
font: 20px sans-serif;
|
||||||
|
}
|
||||||
|
|
||||||
|
.axis path,
|
||||||
|
.axis line {
|
||||||
|
fill: none;
|
||||||
|
stroke: #000;
|
||||||
|
shape-rendering: crispEdges;
|
||||||
|
}
|
||||||
|
.axis text {
|
||||||
|
font-family: sans-serif;
|
||||||
|
font-size: 20px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.line {
|
||||||
|
fill: none;
|
||||||
|
stroke: steelblue;
|
||||||
|
stroke-width: 3px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.legend {
|
||||||
|
padding: 1px;
|
||||||
|
font: 18px sans-serif;
|
||||||
|
background: yellow;
|
||||||
|
box-shadow: 2px 2px 1px #888;
|
||||||
|
}
|
||||||
|
|
||||||
|
.title {
|
||||||
|
font: 24px sans-serif;
|
||||||
|
}
|
||||||
|
.divborder {
|
||||||
|
border-width: 1px;
|
||||||
|
border-style: solid;
|
||||||
|
border-color: black;
|
||||||
|
margin-top:10px
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
<script src="js/thirdparty/d3.v3.js"></script>
|
||||||
|
<script src="js/thirdparty/jquery.js"></script>
|
||||||
|
<script src="js/thirdparty/bootstrap.min.js"></script>
|
||||||
|
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
<div class="row">
|
||||||
|
<div class="offset5" style="margin-top:20px; margin-bottom:20px">
|
||||||
|
Select the generated metrics log file (realtimetrack.json): <input type='file' id='jsonfile' /> <input type='button' value='Generate !' onClick='draw()' /><br>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="row">
|
||||||
|
<div class="divborder span8" style="margin-left:50px" id="area1"></div>
|
||||||
|
<div class="divborder span8" id="area2"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="row">
|
||||||
|
<div class="divborder span8" style="margin-left:50px" id="area3"></div>
|
||||||
|
<div class="divborder span8" id="area4"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="row">
|
||||||
|
<div class="divborder span8" style="margin-left:50px" id="area5"></div>
|
||||||
|
<div class="divborder span8" id="area6"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="row">
|
||||||
|
<div class="divborder span8" style="margin-left:50px" id="area7"></div>
|
||||||
|
<div class="span7" id="area8"></div>
|
||||||
|
</div>
|
||||||
|
<p> </p>
|
||||||
|
<script>
|
||||||
|
// select file and draw
|
||||||
|
function draw() {
|
||||||
|
var filepath = document.getElementById('jsonfile').value;
|
||||||
|
if (filepath) {
|
||||||
|
for (var i = 1; i < 9; i ++) {
|
||||||
|
$('#area' + i).empty();
|
||||||
|
}
|
||||||
|
filepath = filepath.replace("C:\\fakepath\\", "");
|
||||||
|
drawCharts(filepath);
|
||||||
|
} else {
|
||||||
|
alert('choose file firstly.');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function drawCharts(filepath) {
|
||||||
|
$.getJSON(filepath, function(data) {
|
||||||
|
var numQueues = 0;
|
||||||
|
var queueNames = new Array();
|
||||||
|
for (var j in data[0]) {
|
||||||
|
if (j.substring(0, 'queue'.length) === 'queue') {
|
||||||
|
queueNames[numQueues] = j;
|
||||||
|
numQueues ++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
numQueues /= 2;
|
||||||
|
|
||||||
|
// create graph
|
||||||
|
$.getJSON(filepath, function(data) {
|
||||||
|
var basetime = data[0].time;
|
||||||
|
data.forEach(function(d) {
|
||||||
|
d.time = (d.time - basetime) / 1000;
|
||||||
|
});
|
||||||
|
|
||||||
|
var legends = ["running.applications", "running.containers"];
|
||||||
|
drawEachChart("#area1", data, legends, "Cluster running applications & containers", "Number", 0, 0);
|
||||||
|
legends = ["jvm.free.memory", "jvm.max.memory", "jvm.total.memory"];
|
||||||
|
drawEachChart("#area2", data, legends, "JVM memory", "Memory (GB)", 0, 0);
|
||||||
|
legends = ["cluster.allocated.memory", "cluster.available.memory"];
|
||||||
|
drawEachChart("#area3", data, legends, "Cluster allocated & available memory", "Memory (GB)", 0, 0);
|
||||||
|
legends = ["cluster.allocated.vcores", "cluster.available.vcores"];
|
||||||
|
drawEachChart("#area4", data, legends, "Cluster allocated & available vcores", "Number", 0, 0);
|
||||||
|
|
||||||
|
for (var i = 0; i < numQueues; i ++) {
|
||||||
|
legends[i] = queueNames[i * 2];
|
||||||
|
}
|
||||||
|
drawEachChart("#area5", data, legends, "Queue allocated memory", "Memory (GB)", 1, 100);
|
||||||
|
for (var i = 0; i < numQueues; i ++) {
|
||||||
|
legends[i] = queueNames[i * 2 + 1];
|
||||||
|
}
|
||||||
|
drawEachChart("#area6", data, legends, "Queue allocated vcores", "VCores", 1, 90);
|
||||||
|
|
||||||
|
legends = [
|
||||||
|
"scheduler.allocate.timecost",
|
||||||
|
"scheduler.handle-NODE_ADDED.timecost", "scheduler.handle-NODE_REMOVED.timecost",
|
||||||
|
"scheduler.handle-NODE_UPDATE.timecost", "scheduler.handle-APP_ADDED.timecost",
|
||||||
|
"scheduler.handle-APP_REMOVED.timecost", "scheduler.handle-CONTAINER_EXPIRED.timecost"
|
||||||
|
];
|
||||||
|
drawEachChart("#area7", data, legends, "Scheduler allocate & handle operations timecost", "Timecost (ms)", 0, 210);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// draw different chart
|
||||||
|
function drawEachChart(chartArea, data, legends, title, yLabelTitle, isArea, pl) {
|
||||||
|
// drawchart
|
||||||
|
var margin = {top: 50, right: 250, bottom: 50, left: 70};
|
||||||
|
var width = 800 - margin.left - margin.right;
|
||||||
|
var height = 420 - margin.top - margin.bottom;
|
||||||
|
|
||||||
|
var x = d3.scale.linear().range([0, width]);
|
||||||
|
var y = d3.scale.linear().range([height, 0]);
|
||||||
|
var xAxis = d3.svg.axis().scale(x).orient("bottom");
|
||||||
|
var yAxis = d3.svg.axis().scale(y).orient("left");
|
||||||
|
|
||||||
|
var color = d3.scale.category10();
|
||||||
|
|
||||||
|
if (isArea == 1){
|
||||||
|
var area = d3.svg.area()
|
||||||
|
.x(function(d) { return x(d.time); })
|
||||||
|
.y0(function(d) { return y(d.y0); })
|
||||||
|
.y1(function(d) { return y(d.y0 + d.y); });
|
||||||
|
|
||||||
|
var stack = d3.layout.stack()
|
||||||
|
.values(function(d) { return d.values; });
|
||||||
|
|
||||||
|
// create chart
|
||||||
|
var svg = d3.select(chartArea).append("svg")
|
||||||
|
.attr("width", width + margin.left + margin.right)
|
||||||
|
.attr("height", height + margin.top + margin.bottom)
|
||||||
|
.append("g")
|
||||||
|
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
|
||||||
|
|
||||||
|
color.domain(d3.keys(data[0])
|
||||||
|
.filter(function(key) {return $.inArray(key, legends) !== -1; }));
|
||||||
|
|
||||||
|
var points = stack(color.domain().map(function(name) {
|
||||||
|
return {
|
||||||
|
name: name,
|
||||||
|
values: data.map(function(d) {
|
||||||
|
return {time: d.time, y: d[name]};
|
||||||
|
})
|
||||||
|
};
|
||||||
|
}));
|
||||||
|
|
||||||
|
// x & y
|
||||||
|
x.domain(d3.extent(data, function(d) { return d.time; }));
|
||||||
|
y.domain([
|
||||||
|
d3.min(points, function(c) {
|
||||||
|
return 0.9 * d3.min(c.values, function(v) { return v.y; }); }),
|
||||||
|
d3.max(points, function(c) {
|
||||||
|
return 1.1 * d3.max(c.values, function(v) { return v.y + v.y0; }); })
|
||||||
|
]);
|
||||||
|
|
||||||
|
svg.append("g").attr("class", "x axis")
|
||||||
|
.attr("transform", "translate(0," + height + ")")
|
||||||
|
.call(xAxis)
|
||||||
|
.append("text")
|
||||||
|
.attr("transform", "translate(" + (width / 2) + ", 45)")
|
||||||
|
.style("text-anchor", "middle")
|
||||||
|
.text("Time (s)");
|
||||||
|
|
||||||
|
svg.append("g")
|
||||||
|
.attr("class", "y axis")
|
||||||
|
.call(yAxis)
|
||||||
|
.append("text")
|
||||||
|
.attr("transform", "rotate(-90)")
|
||||||
|
.attr("y", 0 - margin.left)
|
||||||
|
.attr("x",0 - (height / 2))
|
||||||
|
.attr("dy", "1em")
|
||||||
|
.style("text-anchor", "middle")
|
||||||
|
.text(yLabelTitle);
|
||||||
|
|
||||||
|
var point = svg.selectAll(".point")
|
||||||
|
.data(points)
|
||||||
|
.enter().append("g");
|
||||||
|
|
||||||
|
point.append("path")
|
||||||
|
.attr("class", "area")
|
||||||
|
.attr("d", function(d) { return area(d.values); })
|
||||||
|
.style("fill", function(d) { return color(d.name); });
|
||||||
|
} else {
|
||||||
|
// lines
|
||||||
|
var line = d3.svg.line()
|
||||||
|
.interpolate("basis")
|
||||||
|
.x(function(d) { return x(d.time); })
|
||||||
|
.y(function(d) { return y(d.value); });
|
||||||
|
|
||||||
|
// create chart
|
||||||
|
var svg = d3.select(chartArea).append("svg")
|
||||||
|
.attr("id", title)
|
||||||
|
.attr("width", width + margin.left + margin.right)
|
||||||
|
.attr("height", height + margin.top + margin.bottom)
|
||||||
|
.append("g")
|
||||||
|
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
|
||||||
|
|
||||||
|
color.domain(d3.keys(data[0])
|
||||||
|
.filter(function(key) {return $.inArray(key, legends) !== -1; }));
|
||||||
|
|
||||||
|
var values = color.domain().map(function(name) {
|
||||||
|
return {
|
||||||
|
name: name,
|
||||||
|
values: data.map(function(d) {
|
||||||
|
return {time: d.time, value: +d[name]};
|
||||||
|
})
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
// x & y
|
||||||
|
x.domain(d3.extent(data, function(d) { return d.time; }));
|
||||||
|
y.domain([
|
||||||
|
d3.min(values, function(c) { return 0.9 * d3.min(c.values, function(v) { return v.value; }); }),
|
||||||
|
d3.max(values, function(c) { return 1.1 * d3.max(c.values, function(v) { return v.value; }); })
|
||||||
|
]);
|
||||||
|
|
||||||
|
svg.append("g").attr("class", "x axis")
|
||||||
|
.attr("transform", "translate(0," + height + ")")
|
||||||
|
.call(xAxis)
|
||||||
|
.append("text")
|
||||||
|
.attr("transform", "translate(" + (width / 2) + ", 45)")
|
||||||
|
.style("text-anchor", "middle")
|
||||||
|
.text("Time (s)");
|
||||||
|
|
||||||
|
svg.append("g")
|
||||||
|
.attr("class", "y axis")
|
||||||
|
.call(yAxis)
|
||||||
|
.append("text")
|
||||||
|
.attr("transform", "rotate(-90)")
|
||||||
|
.attr("y", 0 - margin.left)
|
||||||
|
.attr("x",0 - (height / 2))
|
||||||
|
.attr("dy", "1em")
|
||||||
|
.style("text-anchor", "middle")
|
||||||
|
.text(yLabelTitle);
|
||||||
|
|
||||||
|
var value = svg.selectAll(".city")
|
||||||
|
.data(values)
|
||||||
|
.enter().append("g")
|
||||||
|
.attr("class", "city");
|
||||||
|
|
||||||
|
value.append("path")
|
||||||
|
.attr("class", "line")
|
||||||
|
.attr("d", function(d) { return line(d.values); })
|
||||||
|
.style("stroke", function(d) { return color(d.name); });
|
||||||
|
}
|
||||||
|
// title
|
||||||
|
svg.append("text")
|
||||||
|
.attr("x", (width / 2))
|
||||||
|
.attr("y", 10 - (margin.top / 2))
|
||||||
|
.attr("text-anchor", "middle")
|
||||||
|
.text(title);
|
||||||
|
|
||||||
|
// legend
|
||||||
|
var legend = svg.append("g")
|
||||||
|
.attr("class", "legend")
|
||||||
|
.attr("x", width - 50)
|
||||||
|
.attr("y", 25)
|
||||||
|
.attr("height", 120)
|
||||||
|
.attr("width", 140);
|
||||||
|
|
||||||
|
legend.selectAll('g').data(legends)
|
||||||
|
.enter()
|
||||||
|
.append('g')
|
||||||
|
.each(function(d, i) {
|
||||||
|
var g = d3.select(this);
|
||||||
|
g.append("rect")
|
||||||
|
.attr("x", width - 5 - pl)
|
||||||
|
.attr("y", i*20 + 0)
|
||||||
|
.attr("width", 10)
|
||||||
|
.attr("height", 10)
|
||||||
|
.style("fill", color(d));
|
||||||
|
|
||||||
|
g.append("text")
|
||||||
|
.attr("x", width + 15 - pl)
|
||||||
|
.attr("y", i * 20 + 8)
|
||||||
|
.attr("height",30)
|
||||||
|
.attr("width",250)
|
||||||
|
.style("fill", color(d))
|
||||||
|
.text(d);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -0,0 +1,278 @@
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<link rel="stylesheet" href="css/bootstrap.min.css" media="screen">
|
||||||
|
<link rel="stylesheet" href="css/bootstrap-responsive.min.css">
|
||||||
|
<style type="text/css">
|
||||||
|
body '{' font: 20px sans-serif; '}'
|
||||||
|
.axis path,
|
||||||
|
.axis line '{' fill: none; stroke: #000; shape-rendering: crispEdges; '}'
|
||||||
|
.axis text '{' font-family: sans-serif; font-size: 20px; '}'
|
||||||
|
.line '{' fill: none; stroke: steelblue; stroke-width: 3px; '}'
|
||||||
|
.legend '{'
|
||||||
|
padding: 5px;
|
||||||
|
font: 18px sans-serif;
|
||||||
|
background: yellow;
|
||||||
|
box-shadow: 2px 2px 1px #888;
|
||||||
|
'}'
|
||||||
|
.title '{' font: 24px sans-serif; '}'
|
||||||
|
.divborder '{'
|
||||||
|
border-width: 1px;
|
||||||
|
border-style: solid;
|
||||||
|
border-color: black;
|
||||||
|
margin-top:10px
|
||||||
|
'}'
|
||||||
|
</style>
|
||||||
|
<script src="js/thirdparty/d3.v3.js"></script>
|
||||||
|
<script src="js/thirdparty/jquery.js"></script>
|
||||||
|
<script src="js/thirdparty/bootstrap.min.js"></script>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="row">
|
||||||
|
<div class="span10 offset2"><br>
|
||||||
|
<input type="button" style="float: right;" value="Stop"
|
||||||
|
onClick="stop()" />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="row">
|
||||||
|
<div class="divborder span8" style="margin-left:50px" id="area1"></div>
|
||||||
|
<div class="divborder span8" id="area2"></div>
|
||||||
|
</div>
|
||||||
|
<div class="row">
|
||||||
|
<div class="divborder span8" style="margin-left:50px" id="area3"></div>
|
||||||
|
<div class="divborder span8" id="area4"></div>
|
||||||
|
</div>
|
||||||
|
<div class="row">
|
||||||
|
<div class="divborder span8" style="margin-left:50px" id="area5"></div>
|
||||||
|
<div class="divborder span8" id="area6"></div>
|
||||||
|
</div>
|
||||||
|
<div class="row">
|
||||||
|
<div class="divborder span8" style="margin-left:50px" id="area7"></div>
|
||||||
|
<div class="span8" id="area8"></div>
|
||||||
|
</div><br/><br/>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
var basetime = 0;
|
||||||
|
var running = 1;
|
||||||
|
var data = [];
|
||||||
|
var width, height;
|
||||||
|
var legends = [];
|
||||||
|
var titles = [];
|
||||||
|
var yLabels = [];
|
||||||
|
var isAreas = [];
|
||||||
|
var svgs = [];
|
||||||
|
var xs = [];
|
||||||
|
var ys = [];
|
||||||
|
var xAxiss = [];
|
||||||
|
var yAxiss = [];
|
||||||
|
var lineAreas = [];
|
||||||
|
var stacks = [];
|
||||||
|
|
||||||
|
// legends
|
||||||
|
legends[0] = [''running.applications'', ''running.containers''];
|
||||||
|
legends[1] = [''jvm.free.memory'', ''jvm.max.memory'', ''jvm.total.memory''];
|
||||||
|
legends[2] = [''cluster.allocated.memory'', ''cluster.available.memory''];
|
||||||
|
legends[3] = [''cluster.allocated.vcores'', ''cluster.available.vcores''];
|
||||||
|
legends[4] = [];
|
||||||
|
legends[5] = [];
|
||||||
|
{0}
|
||||||
|
legends[6] = [''scheduler.allocate.timecost'',
|
||||||
|
''scheduler.handle-NODE_ADDED.timecost'',
|
||||||
|
''scheduler.handle-NODE_REMOVED.timecost'',
|
||||||
|
''scheduler.handle-NODE_UPDATE.timecost'',
|
||||||
|
''scheduler.handle-APP_ADDED.timecost'',
|
||||||
|
''scheduler.handle-APP_REMOVED.timecost'',
|
||||||
|
''scheduler.handle-CONTAINER_EXPIRED.timecost''];
|
||||||
|
|
||||||
|
// title
|
||||||
|
titles[0] = ''Cluster running applications & containers'';
|
||||||
|
titles[1] = ''JVM memory'';
|
||||||
|
titles[2] = ''Cluster allocated & available memory'';
|
||||||
|
titles[3] = ''Cluster allocated & available vcores'';
|
||||||
|
titles[4] = ''Queue allocated memory'';
|
||||||
|
titles[5] = ''Queue allocated vcores'';
|
||||||
|
titles[6] = ''Scheduler allocate & handle operation timecost'';
|
||||||
|
|
||||||
|
// ylabels
|
||||||
|
yLabels[0] = ''Number'';
|
||||||
|
yLabels[1] = ''Memory (GB)'';
|
||||||
|
yLabels[2] = ''Memory (GB)'';
|
||||||
|
yLabels[3] = ''Number'';
|
||||||
|
yLabels[4] = ''Memory (GB)'';
|
||||||
|
yLabels[5] = ''Number'';
|
||||||
|
yLabels[6] = ''Timecost (ms)'';
|
||||||
|
|
||||||
|
// is area?
|
||||||
|
isAreas = [0, 0, 0, 0, 1, 1, 0];
|
||||||
|
|
||||||
|
// draw all charts
|
||||||
|
for (var i = 0; i < 7; i ++) '{'
|
||||||
|
drawEachChart(i);
|
||||||
|
'}'
|
||||||
|
|
||||||
|
// draw each chart
|
||||||
|
function drawEachChart(index) '{'
|
||||||
|
var margin = '{'top: 50, right: 250, bottom: 50, left: 70'}';
|
||||||
|
width = 750 - margin.left - margin.right;
|
||||||
|
height = 420 - margin.top - margin.bottom;
|
||||||
|
|
||||||
|
xs[index] = d3.scale.linear().range([0, width]);
|
||||||
|
ys[index] = d3.scale.linear().range([height, 0]);
|
||||||
|
xAxiss[index] = d3.svg.axis().scale(xs[index]).orient(''bottom'');
|
||||||
|
yAxiss[index] = d3.svg.axis().scale(ys[index]).orient(''left'');
|
||||||
|
|
||||||
|
if (isAreas[index] == 1)'{'
|
||||||
|
lineAreas[index] = d3.svg.area()
|
||||||
|
.x(function(d) '{' return xs[index](d.time); '}')
|
||||||
|
.y0(function(d) '{' return ys[index](d.y0); '}')
|
||||||
|
.y1(function(d) '{' return ys[index](d.y0 + d.y); '}');
|
||||||
|
|
||||||
|
stacks[index] = d3.layout.stack()
|
||||||
|
.values(function(d) '{' return d.values; '}');
|
||||||
|
'}' else '{'
|
||||||
|
lineAreas[index] = d3.svg.line()
|
||||||
|
.interpolate(''basis'')
|
||||||
|
.x(function(d) '{' return xs[index](d.time); '}')
|
||||||
|
.y(function(d) '{' return ys[index](d.value); '}');
|
||||||
|
'}'
|
||||||
|
|
||||||
|
svgs[index] = d3.select(''#area'' + (index + 1)).append(''svg'')
|
||||||
|
.attr(''width'', width + margin.left + margin.right)
|
||||||
|
.attr(''height'', height + margin.top + margin.bottom)
|
||||||
|
.append(''g'')
|
||||||
|
.attr(''transform'', ''translate('' + margin.left + '','' + margin.top + '')'');
|
||||||
|
|
||||||
|
// x, y and title
|
||||||
|
svgs[index].append(''text'')
|
||||||
|
.attr(''transform'', ''translate('' + (width / 2) + '' ,'' +
|
||||||
|
(height + margin.bottom - 10 ) + '')'')
|
||||||
|
.style(''text-anchor'', ''middle'')
|
||||||
|
.text(''Time ({1})'');
|
||||||
|
|
||||||
|
svgs[index].append(''text'')
|
||||||
|
.attr(''transform'', ''rotate(-90)'')
|
||||||
|
.attr(''y'', 0 - margin.left)
|
||||||
|
.attr(''x'',0 - (height / 2))
|
||||||
|
.attr(''dy'', ''1em'')
|
||||||
|
.style(''text-anchor'', ''middle'')
|
||||||
|
.text(yLabels[index]);
|
||||||
|
|
||||||
|
svgs[index].append(''text'')
|
||||||
|
.attr(''x'', (width / 2))
|
||||||
|
.attr(''y'', 10 - (margin.top / 2))
|
||||||
|
.attr(''text-anchor'', ''middle'')
|
||||||
|
.text(titles[index]);
|
||||||
|
'}'
|
||||||
|
|
||||||
|
// request data
|
||||||
|
function requestData() '{'
|
||||||
|
$.ajax('{'url: ''simulateMetrics'',
|
||||||
|
success: function(point) '{'
|
||||||
|
// update data
|
||||||
|
if (basetime == 0) basetime = point.time;
|
||||||
|
point.time = (point.time - basetime) / {2};
|
||||||
|
data.push(point);
|
||||||
|
|
||||||
|
// clear old
|
||||||
|
for (var i = 0; i < 7; i ++) '{'
|
||||||
|
svgs[i].selectAll(''g.tick'').remove();
|
||||||
|
svgs[i].selectAll(''g'').remove();
|
||||||
|
var color = d3.scale.category10();
|
||||||
|
color.domain(d3.keys(data[0]).filter(function(key) '{'
|
||||||
|
return $.inArray(key, legends[i]) !== -1;
|
||||||
|
'}'));
|
||||||
|
|
||||||
|
var values;
|
||||||
|
if (isAreas[i] == 1) '{'
|
||||||
|
values = stacks[i](color.domain().map(function(name) '{'
|
||||||
|
return '{'
|
||||||
|
name: name,
|
||||||
|
values: data.map(function(d) '{'
|
||||||
|
return '{'time: d.time, y: d[name]'}';
|
||||||
|
'}')
|
||||||
|
'}'
|
||||||
|
'}'));
|
||||||
|
xs[i].domain(d3.extent(data, function(d) '{' return d.time;'}'));
|
||||||
|
ys[i].domain([
|
||||||
|
d3.min(values, function(c) '{' return 0; '}'),
|
||||||
|
d3.max(values, function(c) '{' return 1.1 * d3.max(c.values,
|
||||||
|
function(v) '{' return v.y + v.y0; '}'); '}')
|
||||||
|
]);
|
||||||
|
'}' else '{'
|
||||||
|
values = color.domain().map(function(name) '{'
|
||||||
|
return '{'
|
||||||
|
name: name,
|
||||||
|
values: data.map(function(d) '{'
|
||||||
|
return '{'time: d.time, value: d[name]'}';
|
||||||
|
'}')
|
||||||
|
'}'
|
||||||
|
'}');
|
||||||
|
xs[i].domain(d3.extent(data, function(d) '{' return d.time;'}'));
|
||||||
|
ys[i].domain([
|
||||||
|
d3.min(values, function(c) '{' return 0; '}'),
|
||||||
|
d3.max(values, function(c) '{' return 1.1 * d3.max(c.values,
|
||||||
|
function(v) '{' return v.value; '}'); '}')
|
||||||
|
]);
|
||||||
|
'}'
|
||||||
|
|
||||||
|
svgs[i].append(''g'').attr(''class'', ''x axis'')
|
||||||
|
.attr(''transform'', ''translate(0,'' + height + '')'').call(xAxiss[i]);
|
||||||
|
|
||||||
|
svgs[i].append(''g'').attr(''class'', ''y axis'').call(yAxiss[i]);
|
||||||
|
|
||||||
|
var value = svgs[i].selectAll(''.path'')
|
||||||
|
.data(values).enter().append(''g'').attr(''class'', ''line'');
|
||||||
|
|
||||||
|
if(isAreas[i] == 1) '{'
|
||||||
|
value.append(''path'').attr(''class'', ''area'')
|
||||||
|
.attr(''d'', function(d) '{'return lineAreas[i](d.values); '}')
|
||||||
|
.style(''fill'', function(d) '{'return color(d.name); '}');
|
||||||
|
'}' else '{'
|
||||||
|
value.append(''path'').attr(''class'', ''line'')
|
||||||
|
.attr(''d'', function(d) '{'return lineAreas[i](d.values); '}')
|
||||||
|
.style(''stroke'', function(d) '{'return color(d.name); '}');
|
||||||
|
'}'
|
||||||
|
|
||||||
|
// legend
|
||||||
|
var legend = svgs[i].append(''g'')
|
||||||
|
.attr(''class'', ''legend'')
|
||||||
|
.attr(''x'', width + 5)
|
||||||
|
.attr(''y'', 25)
|
||||||
|
.attr(''height'', 120)
|
||||||
|
.attr(''width'', 140);
|
||||||
|
legend.selectAll(''g'').data(legends[i])
|
||||||
|
.enter()
|
||||||
|
.append(''g'')
|
||||||
|
.each(function(d, i) '{'
|
||||||
|
var g = d3.select(this);
|
||||||
|
g.append(''rect'')
|
||||||
|
.attr(''x'', width + 5)
|
||||||
|
.attr(''y'', i*20)
|
||||||
|
.attr(''width'', 10)
|
||||||
|
.attr(''height'', 10)
|
||||||
|
.style(''fill'', color(d));
|
||||||
|
g.append(''text'')
|
||||||
|
.attr(''x'', width + 25)
|
||||||
|
.attr(''y'', i * 20 + 8)
|
||||||
|
.attr(''height'',30)
|
||||||
|
.attr(''width'',250)
|
||||||
|
.style(''fill'', color(d))
|
||||||
|
.text(d);
|
||||||
|
'}');
|
||||||
|
'}'
|
||||||
|
|
||||||
|
if(running == 1)
|
||||||
|
setTimeout(requestData, {3});
|
||||||
|
'}',
|
||||||
|
cache: false
|
||||||
|
'}');
|
||||||
|
'}'
|
||||||
|
|
||||||
|
// stop
|
||||||
|
function stop() '{'
|
||||||
|
running = 0;
|
||||||
|
'}'
|
||||||
|
requestData();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -0,0 +1,50 @@
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<style type="text/css">
|
||||||
|
.td1 '{'
|
||||||
|
border-width: 1px;
|
||||||
|
padding: 8px;
|
||||||
|
border-style: solid;
|
||||||
|
border-color: #666666;
|
||||||
|
background-color: #dedede;
|
||||||
|
width: 50%;
|
||||||
|
'}'
|
||||||
|
table.gridtable '{'
|
||||||
|
font-family: verdana,arial,sans-serif;
|
||||||
|
font-size:11px;
|
||||||
|
color:#333333;
|
||||||
|
border-width: 1px;
|
||||||
|
border-color: #666666;
|
||||||
|
border-collapse: collapse;
|
||||||
|
margin-top: 80px;
|
||||||
|
'}'
|
||||||
|
.td2 '{'
|
||||||
|
border-width: 1px;
|
||||||
|
padding: 8px;
|
||||||
|
border-style: solid;
|
||||||
|
border-color: #666666;
|
||||||
|
background-color: #ffffff;
|
||||||
|
width: 50%;
|
||||||
|
'}'
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<table class="gridtable" align="center" width="400px">
|
||||||
|
<tr>
|
||||||
|
<td colspan="2" class="td2" align="center">
|
||||||
|
<b>SLS Simulate Information</b>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{0}
|
||||||
|
<tr>
|
||||||
|
<td align="center" height="80px">
|
||||||
|
<a href="simulate">Simulation Charts</a>
|
||||||
|
</td>
|
||||||
|
<td align="center">
|
||||||
|
<a href="track">Tracked Jobs & Queues</a>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -0,0 +1,193 @@
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<link rel="stylesheet" href="css/bootstrap.min.css" media="screen">
|
||||||
|
<link rel="stylesheet" href="css/bootstrap-responsive.min.css">
|
||||||
|
<style type="text/css">
|
||||||
|
body '{' font: 20px sans-serif;'}'
|
||||||
|
.axis path,
|
||||||
|
.axis line '{' fill: none; stroke: #000; shape-rendering: crispEdges;'}'
|
||||||
|
.axis text '{' font-family: sans-serif; font-size: 20px; '}'
|
||||||
|
.line '{' fill: none; stroke: steelblue; stroke-width: 3px; '}'
|
||||||
|
.legend '{' padding: 5px; font: 18px sans-serif; background: yellow;
|
||||||
|
box-shadow: 2px 2px 1px #888;'}'
|
||||||
|
.title '{' font: 24px sans-serif; '}'
|
||||||
|
.divborder '{' border-width: 1px; border-style: solid; border-color: black;
|
||||||
|
margin-top:10px '}'
|
||||||
|
</style>
|
||||||
|
<script src="js/thirdparty/d3.v3.js"></script>
|
||||||
|
<script src="js/thirdparty/jquery.js"></script>
|
||||||
|
<script src="js/thirdparty/bootstrap.min.js"></script>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="row">
|
||||||
|
<div class="offset4 span8"><br/><br/><br/>
|
||||||
|
Select Tracked Job/Queue:
|
||||||
|
<select id="trackedSelect" onchange="redrawChart()">
|
||||||
|
<option>----Queue----</option>
|
||||||
|
{0}
|
||||||
|
<option>----Job----</option>
|
||||||
|
{1}
|
||||||
|
</select>
|
||||||
|
<input type="button" style="float: right;" value="Stop"
|
||||||
|
onClick="stop()" />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="row">
|
||||||
|
<div class="divborder span9 offset4" id="area1"></div>
|
||||||
|
</div>
|
||||||
|
<script>
|
||||||
|
// global variables
|
||||||
|
var basetime = 0;
|
||||||
|
var running = 1;
|
||||||
|
var para = '''';
|
||||||
|
var data = [];
|
||||||
|
var path, line, svg;
|
||||||
|
var x, y;
|
||||||
|
var width, height;
|
||||||
|
var xAxis, yAxis;
|
||||||
|
var legends = [''usage.memory'', ''demand.memory'', ''maxshare.memory'',
|
||||||
|
''minshare.memory'', ''fairshare.memory''];
|
||||||
|
|
||||||
|
// stop function
|
||||||
|
function stop() '{'
|
||||||
|
running = 0;
|
||||||
|
'}'
|
||||||
|
|
||||||
|
// select changed event
|
||||||
|
function redrawChart() '{'
|
||||||
|
var value = $(''#trackedSelect'').val();
|
||||||
|
if (value.substring(0, ''Job ''.length) === ''Job ''
|
||||||
|
|| value.substring(0, ''Queue ''.length) === ''Queue '') '{'
|
||||||
|
para = value;
|
||||||
|
running = 0;
|
||||||
|
basetime = 0;
|
||||||
|
data = [];
|
||||||
|
$(''#area1'').empty();
|
||||||
|
drawChart(''Tracking '' + value);
|
||||||
|
running = 1;
|
||||||
|
requestData();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// draw chart
|
||||||
|
function drawChart(title) '{'
|
||||||
|
// location
|
||||||
|
var margin = '{'top: 50, right: 150, bottom: 50, left: 80'}';
|
||||||
|
width = 800 - margin.left - margin.right;
|
||||||
|
height = 420 - margin.top - margin.bottom;
|
||||||
|
x = d3.scale.linear().range([0, width]);
|
||||||
|
y = d3.scale.linear().range([height, 0]);
|
||||||
|
xAxis = d3.svg.axis().scale(x).orient(''bottom'');
|
||||||
|
yAxis = d3.svg.axis().scale(y).orient(''left'');
|
||||||
|
// lines
|
||||||
|
line = d3.svg.line().interpolate(''basis'')
|
||||||
|
.x(function(d) '{' return x(d.time); })
|
||||||
|
.y(function(d) '{' return y(d.value); });
|
||||||
|
// create chart
|
||||||
|
svg = d3.select(''#area1'').append(''svg'')
|
||||||
|
.attr(''width'', width + margin.left + margin.right)
|
||||||
|
.attr(''height'', height + margin.top + margin.bottom)
|
||||||
|
.append(''g'')
|
||||||
|
.attr(''transform'', ''translate('' + margin.left + '','' + margin.top + '')'');
|
||||||
|
// axis labels
|
||||||
|
svg.append(''text'')
|
||||||
|
.attr(''transform'', ''translate('' + (width / 2) + '','' + (height + margin.bottom - 5 ) + '')'')
|
||||||
|
.style(''text-anchor'', ''middle'')
|
||||||
|
.text(''Time ({2})'');
|
||||||
|
svg.append(''text'')
|
||||||
|
.attr(''transform'', ''rotate(-90)'')
|
||||||
|
.attr(''y'', 0 - margin.left)
|
||||||
|
.attr(''x'',0 - (height / 2))
|
||||||
|
.attr(''dy'', ''1em'')
|
||||||
|
.style(''text-anchor'', ''middle'')
|
||||||
|
.text(''Memory (GB)'');
|
||||||
|
// title
|
||||||
|
svg.append(''text'')
|
||||||
|
.attr(''x'', (width / 2))
|
||||||
|
.attr(''y'', 10 - (margin.top / 2))
|
||||||
|
.attr(''text-anchor'', ''middle'')
|
||||||
|
.text(title);
|
||||||
|
'}'
|
||||||
|
|
||||||
|
// request data
|
||||||
|
function requestData() '{'
|
||||||
|
$.ajax('{'url: ''trackMetrics?t='' + para,
|
||||||
|
success: function(point) '{'
|
||||||
|
// clear old
|
||||||
|
svg.selectAll(''g.tick'').remove();
|
||||||
|
svg.selectAll(''g'').remove();
|
||||||
|
|
||||||
|
if(basetime == 0) basetime = point.time;
|
||||||
|
point.time = (point.time - basetime)/{3};
|
||||||
|
data.push(point);
|
||||||
|
|
||||||
|
var color = d3.scale.category10();
|
||||||
|
color.domain(d3.keys(data[0]).filter(function(key) '{'
|
||||||
|
return $.inArray(key, legends) !== -1;
|
||||||
|
'}'));
|
||||||
|
|
||||||
|
var values = color.domain().map(function(name) '{'
|
||||||
|
return '{'
|
||||||
|
name: name,
|
||||||
|
values: data.map(function(d) '{'
|
||||||
|
return '{' time: d.time, value: d[name]'}';
|
||||||
|
'}')
|
||||||
|
'}';
|
||||||
|
'}');
|
||||||
|
|
||||||
|
// set x/y range
|
||||||
|
x.domain(d3.extent(data, function(d) '{' return d.time; '}'));
|
||||||
|
y.domain([
|
||||||
|
d3.min(values, function(c) '{' return 0 '}'),
|
||||||
|
d3.max(values, function(c) '{' return 1.1 * d3.max(c.values, function(v) '{' return v.value; '}'); '}')
|
||||||
|
]);
|
||||||
|
|
||||||
|
svg.append(''g'').attr(''class'', ''x axis'')
|
||||||
|
.attr(''transform'', ''translate(0,'' + height + '')'').call(xAxis);
|
||||||
|
svg.append(''g'').attr(''class'', ''y axis'').call(yAxis);
|
||||||
|
var value = svg.selectAll(''.path'')
|
||||||
|
.data(values).enter().append(''g'').attr(''class'', ''line'');
|
||||||
|
|
||||||
|
value.append(''path'').attr(''class'', ''line'')
|
||||||
|
.attr(''d'', function(d) '{'return line(d.values); '}')
|
||||||
|
.style(''stroke'', function(d) '{'return color(d.name); '}');
|
||||||
|
|
||||||
|
// legend
|
||||||
|
var legend = svg.append(''g'')
|
||||||
|
.attr(''class'', ''legend'')
|
||||||
|
.attr(''x'', width + 5)
|
||||||
|
.attr(''y'', 25)
|
||||||
|
.attr(''height'', 120)
|
||||||
|
.attr(''width'', 180);
|
||||||
|
|
||||||
|
legend.selectAll(''g'').data(legends)
|
||||||
|
.enter()
|
||||||
|
.append(''g'')
|
||||||
|
.each(function(d, i) '{'
|
||||||
|
var g = d3.select(this);
|
||||||
|
g.append(''rect'')
|
||||||
|
.attr(''x'', width + 5)
|
||||||
|
.attr(''y'', i * 20)
|
||||||
|
.attr(''width'', 10)
|
||||||
|
.attr(''height'', 10)
|
||||||
|
.style(''fill'', color(d));
|
||||||
|
|
||||||
|
g.append(''text'')
|
||||||
|
.attr(''x'', width + 25)
|
||||||
|
.attr(''y'', i * 20 + 8)
|
||||||
|
.attr(''height'',30)
|
||||||
|
.attr(''width'',250)
|
||||||
|
.style(''fill'', color(d))
|
||||||
|
.text(d);
|
||||||
|
'}');
|
||||||
|
|
||||||
|
if(running == 1)
|
||||||
|
setTimeout(requestData, {4});
|
||||||
|
'}',
|
||||||
|
cache: false
|
||||||
|
'}');
|
||||||
|
'}'
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -0,0 +1,234 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.yarn.sls;
|
||||||
|
|
||||||
|
import org.apache.commons.cli.CommandLine;
|
||||||
|
import org.apache.commons.cli.CommandLineParser;
|
||||||
|
import org.apache.commons.cli.GnuParser;
|
||||||
|
import org.apache.commons.cli.Options;
|
||||||
|
import org.codehaus.jackson.JsonFactory;
|
||||||
|
import org.codehaus.jackson.map.ObjectMapper;
|
||||||
|
import org.codehaus.jackson.map.ObjectWriter;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.sls.utils.SLSUtils;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileReader;
|
||||||
|
import java.io.FileWriter;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.io.Writer;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
|
public class RumenToSLSConverter {
|
||||||
|
private static final String EOL = System.getProperty("line.separator");
|
||||||
|
|
||||||
|
private static long baseline = 0;
|
||||||
|
private static Map<String, Set<String>> rackNodeMap =
|
||||||
|
new TreeMap<String, Set<String>>();
|
||||||
|
|
||||||
|
public static void main(String args[]) throws Exception {
|
||||||
|
Options options = new Options();
|
||||||
|
options.addOption("input", true, "input rumen json file");
|
||||||
|
options.addOption("outputJobs", true, "output jobs file");
|
||||||
|
options.addOption("outputNodes", true, "output nodes file");
|
||||||
|
|
||||||
|
CommandLineParser parser = new GnuParser();
|
||||||
|
CommandLine cmd = parser.parse(options, args);
|
||||||
|
|
||||||
|
if (! cmd.hasOption("input") ||
|
||||||
|
! cmd.hasOption("outputJobs") ||
|
||||||
|
! cmd.hasOption("outputNodes")) {
|
||||||
|
System.err.println();
|
||||||
|
System.err.println("ERROR: Missing input or output file");
|
||||||
|
System.err.println();
|
||||||
|
System.err.println("LoadGenerator creates a SLS script " +
|
||||||
|
"from a Hadoop Rumen output");
|
||||||
|
System.err.println();
|
||||||
|
System.err.println("Options: -input FILE -outputJobs FILE " +
|
||||||
|
"-outputNodes FILE");
|
||||||
|
System.err.println();
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
String inputFile = cmd.getOptionValue("input");
|
||||||
|
String outputJsonFile = cmd.getOptionValue("outputJobs");
|
||||||
|
String outputNodeFile = cmd.getOptionValue("outputNodes");
|
||||||
|
|
||||||
|
// check existing
|
||||||
|
if (! new File(inputFile).exists()) {
|
||||||
|
System.err.println();
|
||||||
|
System.err.println("ERROR: input does not exist");
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
if (new File(outputJsonFile).exists()) {
|
||||||
|
System.err.println();
|
||||||
|
System.err.println("ERROR: output job file is existing");
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
if (new File(outputNodeFile).exists()) {
|
||||||
|
System.err.println();
|
||||||
|
System.err.println("ERROR: output node file is existing");
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
File jsonFile = new File(outputJsonFile);
|
||||||
|
if (! jsonFile.getParentFile().exists()
|
||||||
|
&& ! jsonFile.getParentFile().mkdirs()) {
|
||||||
|
System.err.println("ERROR: Cannot create output directory in path: "
|
||||||
|
+ jsonFile.getParentFile().getAbsoluteFile());
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
File nodeFile = new File(outputNodeFile);
|
||||||
|
if (! nodeFile.getParentFile().exists()
|
||||||
|
&& ! nodeFile.getParentFile().mkdirs()) {
|
||||||
|
System.err.println("ERROR: Cannot create output directory in path: "
|
||||||
|
+ jsonFile.getParentFile().getAbsoluteFile());
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
generateSLSLoadFile(inputFile, outputJsonFile);
|
||||||
|
generateSLSNodeFile(outputNodeFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void generateSLSLoadFile(String inputFile, String outputFile)
|
||||||
|
throws IOException {
|
||||||
|
Reader input = new FileReader(inputFile);
|
||||||
|
try {
|
||||||
|
Writer output = new FileWriter(outputFile);
|
||||||
|
try {
|
||||||
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
|
ObjectWriter writer = mapper.defaultPrettyPrintingWriter();
|
||||||
|
Iterator<Map> i = mapper.readValues(
|
||||||
|
new JsonFactory().createJsonParser(input), Map.class);
|
||||||
|
while (i.hasNext()) {
|
||||||
|
Map m = i.next();
|
||||||
|
output.write(writer.writeValueAsString(createSLSJob(m)) + EOL);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
output.close();
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
input.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
private static void generateSLSNodeFile(String outputFile)
|
||||||
|
throws IOException {
|
||||||
|
Writer output = new FileWriter(outputFile);
|
||||||
|
try {
|
||||||
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
|
ObjectWriter writer = mapper.defaultPrettyPrintingWriter();
|
||||||
|
for (Map.Entry<String, Set<String>> entry : rackNodeMap.entrySet()) {
|
||||||
|
Map rack = new LinkedHashMap();
|
||||||
|
rack.put("rack", entry.getKey());
|
||||||
|
List nodes = new ArrayList();
|
||||||
|
for (String name : entry.getValue()) {
|
||||||
|
Map node = new LinkedHashMap();
|
||||||
|
node.put("node", name);
|
||||||
|
nodes.add(node);
|
||||||
|
}
|
||||||
|
rack.put("nodes", nodes);
|
||||||
|
output.write(writer.writeValueAsString(rack) + EOL);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
output.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
private static Map createSLSJob(Map rumenJob) {
|
||||||
|
Map json = new LinkedHashMap();
|
||||||
|
long jobStart = (Long) rumenJob.get("submitTime");
|
||||||
|
long jobFinish = (Long) rumenJob.get("finishTime");
|
||||||
|
String jobId = rumenJob.get("jobID").toString();
|
||||||
|
String queue = rumenJob.get("queue").toString();
|
||||||
|
String user = rumenJob.get("user").toString();
|
||||||
|
if (baseline == 0) {
|
||||||
|
baseline = jobStart;
|
||||||
|
}
|
||||||
|
jobStart -= baseline;
|
||||||
|
jobFinish -= baseline;
|
||||||
|
long offset = 0;
|
||||||
|
if (jobStart < 0) {
|
||||||
|
System.out.println("Warning: reset job " + jobId + " start time to 0.");
|
||||||
|
offset = -jobStart;
|
||||||
|
jobFinish = jobFinish - jobStart;
|
||||||
|
jobStart = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
json.put("am.type", "mapreduce");
|
||||||
|
json.put("job.start.ms", jobStart);
|
||||||
|
json.put("job.end.ms", jobFinish);
|
||||||
|
json.put("job.queue.name", queue);
|
||||||
|
json.put("job.id", jobId);
|
||||||
|
json.put("job.user", user);
|
||||||
|
|
||||||
|
List maps = createSLSTasks("map",
|
||||||
|
(List) rumenJob.get("mapTasks"), offset);
|
||||||
|
List reduces = createSLSTasks("reduce",
|
||||||
|
(List) rumenJob.get("reduceTasks"), offset);
|
||||||
|
List tasks = new ArrayList();
|
||||||
|
tasks.addAll(maps);
|
||||||
|
tasks.addAll(reduces);
|
||||||
|
json.put("job.tasks", tasks);
|
||||||
|
return json;
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
private static List createSLSTasks(String taskType,
|
||||||
|
List rumenTasks, long offset) {
|
||||||
|
int priority = taskType.equals("reduce") ? 10 : 20;
|
||||||
|
List array = new ArrayList();
|
||||||
|
for (Object e : rumenTasks) {
|
||||||
|
Map rumenTask = (Map) e;
|
||||||
|
for (Object ee : (List) rumenTask.get("attempts")) {
|
||||||
|
Map rumenAttempt = (Map) ee;
|
||||||
|
long taskStart = (Long) rumenAttempt.get("startTime");
|
||||||
|
long taskFinish = (Long) rumenAttempt.get("finishTime");
|
||||||
|
String hostname = (String) rumenAttempt.get("hostName");
|
||||||
|
taskStart = taskStart - baseline + offset;
|
||||||
|
taskFinish = taskFinish - baseline + offset;
|
||||||
|
Map task = new LinkedHashMap();
|
||||||
|
task.put("container.host", hostname);
|
||||||
|
task.put("container.start.ms", taskStart);
|
||||||
|
task.put("container.end.ms", taskFinish);
|
||||||
|
task.put("container.priority", priority);
|
||||||
|
task.put("container.type", taskType);
|
||||||
|
array.add(task);
|
||||||
|
String rackHost[] = SLSUtils.getRackHostName(hostname);
|
||||||
|
if (rackNodeMap.containsKey(rackHost[0])) {
|
||||||
|
rackNodeMap.get(rackHost[0]).add(rackHost[1]);
|
||||||
|
} else {
|
||||||
|
Set<String> hosts = new TreeSet<String>();
|
||||||
|
hosts.add(rackHost[1]);
|
||||||
|
rackNodeMap.put(rackHost[0], hosts);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return array;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,526 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.yarn.sls;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.text.MessageFormat;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.tools.rumen.JobTraceReader;
|
||||||
|
import org.apache.hadoop.tools.rumen.LoggedJob;
|
||||||
|
import org.apache.hadoop.tools.rumen.LoggedTask;
|
||||||
|
import org.apache.hadoop.tools.rumen.LoggedTaskAttempt;
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||||
|
import org.apache.hadoop.yarn.sls.appmaster.AMSimulator;
|
||||||
|
import org.apache.hadoop.yarn.sls.conf.SLSConfiguration;
|
||||||
|
import org.apache.hadoop.yarn.sls.nodemanager.NMSimulator;
|
||||||
|
import org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator;
|
||||||
|
import org.apache.hadoop.yarn.sls.scheduler.ResourceSchedulerWrapper;
|
||||||
|
import org.apache.hadoop.yarn.sls.scheduler.TaskRunner;
|
||||||
|
|
||||||
|
import org.apache.commons.cli.CommandLine;
|
||||||
|
import org.apache.commons.cli.CommandLineParser;
|
||||||
|
import org.apache.commons.cli.GnuParser;
|
||||||
|
import org.apache.commons.cli.Options;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.util.ReflectionUtils;
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
||||||
|
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||||
|
import org.apache.hadoop.yarn.sls.utils.SLSUtils;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.codehaus.jackson.JsonFactory;
|
||||||
|
import org.codehaus.jackson.map.ObjectMapper;
|
||||||
|
|
||||||
|
public class SLSRunner {
|
||||||
|
// RM, Runner
|
||||||
|
private ResourceManager rm;
|
||||||
|
private static TaskRunner runner = new TaskRunner();
|
||||||
|
private String[] inputTraces;
|
||||||
|
private Configuration conf;
|
||||||
|
private Map<String, Integer> queueAppNumMap;
|
||||||
|
|
||||||
|
// NM simulator
|
||||||
|
private HashMap<NodeId, NMSimulator> nmMap;
|
||||||
|
private int nmMemoryMB, nmVCores;
|
||||||
|
private String nodeFile;
|
||||||
|
|
||||||
|
// AM simulator
|
||||||
|
private int AM_ID;
|
||||||
|
private Map<String, AMSimulator> amMap;
|
||||||
|
private Set<String> trackedApps;
|
||||||
|
private Map<String, Class> amClassMap;
|
||||||
|
private static int remainingApps = 0;
|
||||||
|
|
||||||
|
// metrics
|
||||||
|
private String metricsOutputDir;
|
||||||
|
private boolean printSimulation;
|
||||||
|
|
||||||
|
// other simulation information
|
||||||
|
private int numNMs, numRacks, numAMs, numTasks;
|
||||||
|
private long maxRuntime;
|
||||||
|
public final static Map<String, Object> simulateInfoMap =
|
||||||
|
new HashMap<String, Object>();
|
||||||
|
|
||||||
|
// logger
|
||||||
|
public final static Logger LOG = Logger.getLogger(SLSRunner.class);
|
||||||
|
|
||||||
|
// input traces, input-rumen or input-sls
|
||||||
|
private boolean isSLS;
|
||||||
|
|
||||||
|
public SLSRunner(boolean isSLS, String inputTraces[], String nodeFile,
|
||||||
|
String outputDir, Set<String> trackedApps,
|
||||||
|
boolean printsimulation)
|
||||||
|
throws IOException, ClassNotFoundException {
|
||||||
|
this.isSLS = isSLS;
|
||||||
|
this.inputTraces = inputTraces.clone();
|
||||||
|
this.nodeFile = nodeFile;
|
||||||
|
this.trackedApps = trackedApps;
|
||||||
|
this.printSimulation = printsimulation;
|
||||||
|
metricsOutputDir = outputDir;
|
||||||
|
|
||||||
|
nmMap = new HashMap<NodeId, NMSimulator>();
|
||||||
|
queueAppNumMap = new HashMap<String, Integer>();
|
||||||
|
amMap = new HashMap<String, AMSimulator>();
|
||||||
|
amClassMap = new HashMap<String, Class>();
|
||||||
|
|
||||||
|
// runner configuration
|
||||||
|
conf = new Configuration(false);
|
||||||
|
conf.addResource("sls-runner.xml");
|
||||||
|
// runner
|
||||||
|
int poolSize = conf.getInt(SLSConfiguration.RUNNER_POOL_SIZE,
|
||||||
|
SLSConfiguration.RUNNER_POOL_SIZE_DEFAULT);
|
||||||
|
SLSRunner.runner.setQueueSize(poolSize);
|
||||||
|
// <AMType, Class> map
|
||||||
|
for (Map.Entry e : conf) {
|
||||||
|
String key = e.getKey().toString();
|
||||||
|
if (key.startsWith(SLSConfiguration.AM_TYPE)) {
|
||||||
|
String amType = key.substring(SLSConfiguration.AM_TYPE.length());
|
||||||
|
amClassMap.put(amType, Class.forName(conf.get(key)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void start() throws Exception {
|
||||||
|
// start resource manager
|
||||||
|
startRM();
|
||||||
|
// start node managers
|
||||||
|
startNM();
|
||||||
|
// start application masters
|
||||||
|
startAM();
|
||||||
|
// set queue & tracked apps information
|
||||||
|
((ResourceSchedulerWrapper) rm.getResourceScheduler())
|
||||||
|
.setQueueSet(this.queueAppNumMap.keySet());
|
||||||
|
((ResourceSchedulerWrapper) rm.getResourceScheduler())
|
||||||
|
.setTrackedAppSet(this.trackedApps);
|
||||||
|
// print out simulation info
|
||||||
|
printSimulationInfo();
|
||||||
|
// blocked until all nodes RUNNING
|
||||||
|
waitForNodesRunning();
|
||||||
|
// starting the runner once everything is ready to go,
|
||||||
|
runner.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void startRM() throws IOException, ClassNotFoundException {
|
||||||
|
Configuration rmConf = new YarnConfiguration();
|
||||||
|
String schedulerClass = rmConf.get(YarnConfiguration.RM_SCHEDULER);
|
||||||
|
rmConf.set(SLSConfiguration.RM_SCHEDULER, schedulerClass);
|
||||||
|
rmConf.set(YarnConfiguration.RM_SCHEDULER,
|
||||||
|
ResourceSchedulerWrapper.class.getName());
|
||||||
|
rmConf.set(SLSConfiguration.METRICS_OUTPUT_DIR, metricsOutputDir);
|
||||||
|
rm = new ResourceManager();
|
||||||
|
rm.init(rmConf);
|
||||||
|
rm.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void startNM() throws YarnException, IOException {
|
||||||
|
// nm configuration
|
||||||
|
nmMemoryMB = conf.getInt(SLSConfiguration.NM_MEMORY_MB,
|
||||||
|
SLSConfiguration.NM_MEMORY_MB_DEFAULT);
|
||||||
|
nmVCores = conf.getInt(SLSConfiguration.NM_VCORES,
|
||||||
|
SLSConfiguration.NM_VCORES_DEFAULT);
|
||||||
|
int heartbeatInterval = conf.getInt(
|
||||||
|
SLSConfiguration.NM_HEARTBEAT_INTERVAL_MS,
|
||||||
|
SLSConfiguration.NM_HEARTBEAT_INTERVAL_MS_DEFAULT);
|
||||||
|
// nm information (fetch from topology file, or from sls/rumen json file)
|
||||||
|
Set<String> nodeSet = new HashSet<String>();
|
||||||
|
if (nodeFile.isEmpty()) {
|
||||||
|
if (isSLS) {
|
||||||
|
for (String inputTrace : inputTraces) {
|
||||||
|
nodeSet.addAll(SLSUtils.parseNodesFromSLSTrace(inputTrace));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (String inputTrace : inputTraces) {
|
||||||
|
nodeSet.addAll(SLSUtils.parseNodesFromRumenTrace(inputTrace));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
nodeSet.addAll(SLSUtils.parseNodesFromNodeFile(nodeFile));
|
||||||
|
}
|
||||||
|
// create NM simulators
|
||||||
|
Random random = new Random();
|
||||||
|
Set<String> rackSet = new HashSet<String>();
|
||||||
|
for (String hostName : nodeSet) {
|
||||||
|
// we randomize the heartbeat start time from zero to 1 interval
|
||||||
|
NMSimulator nm = new NMSimulator();
|
||||||
|
nm.init(hostName, nmMemoryMB, nmVCores,
|
||||||
|
random.nextInt(heartbeatInterval), heartbeatInterval, rm);
|
||||||
|
nmMap.put(nm.getNode().getNodeID(), nm);
|
||||||
|
runner.schedule(nm);
|
||||||
|
rackSet.add(nm.getNode().getRackName());
|
||||||
|
}
|
||||||
|
numRacks = rackSet.size();
|
||||||
|
numNMs = nmMap.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void waitForNodesRunning() throws InterruptedException {
|
||||||
|
long startTimeMS = System.currentTimeMillis();
|
||||||
|
while (true) {
|
||||||
|
int numRunningNodes = 0;
|
||||||
|
for (RMNode node : rm.getRMContext().getRMNodes().values()) {
|
||||||
|
if (node.getState() == NodeState.RUNNING) {
|
||||||
|
numRunningNodes ++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (numRunningNodes == numNMs) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
LOG.info(MessageFormat.format("SLSRunner is waiting for all " +
|
||||||
|
"nodes RUNNING. {0} of {1} NMs initialized.",
|
||||||
|
numRunningNodes, numNMs));
|
||||||
|
Thread.sleep(1000);
|
||||||
|
}
|
||||||
|
LOG.info(MessageFormat.format("SLSRunner takes {0} ms to launch all nodes.",
|
||||||
|
(System.currentTimeMillis() - startTimeMS)));
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
private void startAM() throws YarnException, IOException {
|
||||||
|
// application/container configuration
|
||||||
|
int heartbeatInterval = conf.getInt(
|
||||||
|
SLSConfiguration.AM_HEARTBEAT_INTERVAL_MS,
|
||||||
|
SLSConfiguration.AM_HEARTBEAT_INTERVAL_MS_DEFAULT);
|
||||||
|
int containerMemoryMB = conf.getInt(SLSConfiguration.CONTAINER_MEMORY_MB,
|
||||||
|
SLSConfiguration.CONTAINER_MEMORY_MB_DEFAULT);
|
||||||
|
int containerVCores = conf.getInt(SLSConfiguration.CONTAINER_VCORES,
|
||||||
|
SLSConfiguration.CONTAINER_VCORES_DEFAULT);
|
||||||
|
Resource containerResource =
|
||||||
|
BuilderUtils.newResource(containerMemoryMB, containerVCores);
|
||||||
|
|
||||||
|
// application workload
|
||||||
|
if (isSLS) {
|
||||||
|
startAMFromSLSTraces(containerResource, heartbeatInterval);
|
||||||
|
} else {
|
||||||
|
startAMFromRumenTraces(containerResource, heartbeatInterval);
|
||||||
|
}
|
||||||
|
numAMs = amMap.size();
|
||||||
|
remainingApps = numAMs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* parse workload information from sls trace files
|
||||||
|
*/
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
private void startAMFromSLSTraces(Resource containerResource,
|
||||||
|
int heartbeatInterval) throws IOException {
|
||||||
|
// parse from sls traces
|
||||||
|
JsonFactory jsonF = new JsonFactory();
|
||||||
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
|
for (String inputTrace : inputTraces) {
|
||||||
|
Reader input = new FileReader(inputTrace);
|
||||||
|
try {
|
||||||
|
Iterator<Map> i = mapper.readValues(jsonF.createJsonParser(input),
|
||||||
|
Map.class);
|
||||||
|
while (i.hasNext()) {
|
||||||
|
Map jsonJob = i.next();
|
||||||
|
|
||||||
|
// load job information
|
||||||
|
long jobStartTime = Long.parseLong(
|
||||||
|
jsonJob.get("job.start.ms").toString());
|
||||||
|
long jobFinishTime = Long.parseLong(
|
||||||
|
jsonJob.get("job.end.ms").toString());
|
||||||
|
|
||||||
|
String user = (String) jsonJob.get("job.user");
|
||||||
|
if (user == null) user = "default";
|
||||||
|
String queue = jsonJob.get("job.queue.name").toString();
|
||||||
|
|
||||||
|
String oldAppId = jsonJob.get("job.id").toString();
|
||||||
|
boolean isTracked = trackedApps.contains(oldAppId);
|
||||||
|
int queueSize = queueAppNumMap.containsKey(queue) ?
|
||||||
|
queueAppNumMap.get(queue) : 0;
|
||||||
|
queueSize ++;
|
||||||
|
queueAppNumMap.put(queue, queueSize);
|
||||||
|
// tasks
|
||||||
|
List tasks = (List) jsonJob.get("job.tasks");
|
||||||
|
if (tasks == null || tasks.size() == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
List<ContainerSimulator> containerList =
|
||||||
|
new ArrayList<ContainerSimulator>();
|
||||||
|
for (Object o : tasks) {
|
||||||
|
Map jsonTask = (Map) o;
|
||||||
|
String hostname = jsonTask.get("container.host").toString();
|
||||||
|
long taskStart = Long.parseLong(
|
||||||
|
jsonTask.get("container.start.ms").toString());
|
||||||
|
long taskFinish = Long.parseLong(
|
||||||
|
jsonTask.get("container.end.ms").toString());
|
||||||
|
long lifeTime = taskFinish - taskStart;
|
||||||
|
int priority = Integer.parseInt(
|
||||||
|
jsonTask.get("container.priority").toString());
|
||||||
|
String type = jsonTask.get("container.type").toString();
|
||||||
|
containerList.add(new ContainerSimulator(containerResource,
|
||||||
|
lifeTime, hostname, priority, type));
|
||||||
|
}
|
||||||
|
|
||||||
|
// create a new AM
|
||||||
|
String amType = jsonJob.get("am.type").toString();
|
||||||
|
AMSimulator amSim = (AMSimulator) ReflectionUtils.newInstance(
|
||||||
|
amClassMap.get(amType), new Configuration());
|
||||||
|
if (amSim != null) {
|
||||||
|
amSim.init(AM_ID++, heartbeatInterval, containerList, rm,
|
||||||
|
this, jobStartTime, jobFinishTime, user, queue,
|
||||||
|
isTracked, oldAppId);
|
||||||
|
runner.schedule(amSim);
|
||||||
|
maxRuntime = Math.max(maxRuntime, jobFinishTime);
|
||||||
|
numTasks += containerList.size();
|
||||||
|
amMap.put(oldAppId, amSim);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
input.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* parse workload information from rumen trace files
|
||||||
|
*/
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
private void startAMFromRumenTraces(Resource containerResource,
|
||||||
|
int heartbeatInterval)
|
||||||
|
throws IOException {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.set("fs.defaultFS", "file:///");
|
||||||
|
long baselineTimeMS = 0;
|
||||||
|
for (String inputTrace : inputTraces) {
|
||||||
|
File fin = new File(inputTrace);
|
||||||
|
JobTraceReader reader = new JobTraceReader(
|
||||||
|
new Path(fin.getAbsolutePath()), conf);
|
||||||
|
try {
|
||||||
|
LoggedJob job = null;
|
||||||
|
while ((job = reader.getNext()) != null) {
|
||||||
|
// only support MapReduce currently
|
||||||
|
String jobType = "mapreduce";
|
||||||
|
String user = job.getUser() == null ?
|
||||||
|
"default" : job.getUser().getValue();
|
||||||
|
String jobQueue = job.getQueue().getValue();
|
||||||
|
String oldJobId = job.getJobID().toString();
|
||||||
|
long jobStartTimeMS = job.getSubmitTime();
|
||||||
|
long jobFinishTimeMS = job.getFinishTime();
|
||||||
|
if (baselineTimeMS == 0) {
|
||||||
|
baselineTimeMS = jobStartTimeMS;
|
||||||
|
}
|
||||||
|
jobStartTimeMS -= baselineTimeMS;
|
||||||
|
jobFinishTimeMS -= baselineTimeMS;
|
||||||
|
if (jobStartTimeMS < 0) {
|
||||||
|
LOG.warn("Warning: reset job " + oldJobId + " start time to 0.");
|
||||||
|
jobFinishTimeMS = jobFinishTimeMS - jobStartTimeMS;
|
||||||
|
jobStartTimeMS = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean isTracked = trackedApps.contains(oldJobId);
|
||||||
|
int queueSize = queueAppNumMap.containsKey(jobQueue) ?
|
||||||
|
queueAppNumMap.get(jobQueue) : 0;
|
||||||
|
queueSize ++;
|
||||||
|
queueAppNumMap.put(jobQueue, queueSize);
|
||||||
|
|
||||||
|
List<ContainerSimulator> containerList =
|
||||||
|
new ArrayList<ContainerSimulator>();
|
||||||
|
// map tasks
|
||||||
|
for(LoggedTask mapTask : job.getMapTasks()) {
|
||||||
|
LoggedTaskAttempt taskAttempt = mapTask.getAttempts()
|
||||||
|
.get(mapTask.getAttempts().size() - 1);
|
||||||
|
String hostname = taskAttempt.getHostName().getValue();
|
||||||
|
long containerLifeTime = taskAttempt.getFinishTime()
|
||||||
|
- taskAttempt.getStartTime();
|
||||||
|
containerList.add(new ContainerSimulator(containerResource,
|
||||||
|
containerLifeTime, hostname, 10, "map"));
|
||||||
|
}
|
||||||
|
|
||||||
|
// reduce tasks
|
||||||
|
for(LoggedTask reduceTask : job.getReduceTasks()) {
|
||||||
|
LoggedTaskAttempt taskAttempt = reduceTask.getAttempts()
|
||||||
|
.get(reduceTask.getAttempts().size() - 1);
|
||||||
|
String hostname = taskAttempt.getHostName().getValue();
|
||||||
|
long containerLifeTime = taskAttempt.getFinishTime()
|
||||||
|
- taskAttempt.getStartTime();
|
||||||
|
containerList.add(new ContainerSimulator(containerResource,
|
||||||
|
containerLifeTime, hostname, 20, "reduce"));
|
||||||
|
}
|
||||||
|
|
||||||
|
// create a new AM
|
||||||
|
AMSimulator amSim = (AMSimulator) ReflectionUtils.newInstance(
|
||||||
|
amClassMap.get(jobType), conf);
|
||||||
|
if (amSim != null) {
|
||||||
|
amSim.init(AM_ID ++, heartbeatInterval, containerList,
|
||||||
|
rm, this, jobStartTimeMS, jobFinishTimeMS, user, jobQueue,
|
||||||
|
isTracked, oldJobId);
|
||||||
|
runner.schedule(amSim);
|
||||||
|
maxRuntime = Math.max(maxRuntime, jobFinishTimeMS);
|
||||||
|
numTasks += containerList.size();
|
||||||
|
amMap.put(oldJobId, amSim);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
reader.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void printSimulationInfo() {
|
||||||
|
if (printSimulation) {
|
||||||
|
// node
|
||||||
|
LOG.info("------------------------------------");
|
||||||
|
LOG.info(MessageFormat.format("# nodes = {0}, # racks = {1}, capacity " +
|
||||||
|
"of each node {2} MB memory and {3} vcores.",
|
||||||
|
numNMs, numRacks, nmMemoryMB, nmVCores));
|
||||||
|
LOG.info("------------------------------------");
|
||||||
|
// job
|
||||||
|
LOG.info(MessageFormat.format("# applications = {0}, # total " +
|
||||||
|
"tasks = {1}, average # tasks per application = {2}",
|
||||||
|
numAMs, numTasks, (int)(Math.ceil((numTasks + 0.0) / numAMs))));
|
||||||
|
LOG.info("JobId\tQueue\tAMType\tDuration\t#Tasks");
|
||||||
|
for (Map.Entry<String, AMSimulator> entry : amMap.entrySet()) {
|
||||||
|
AMSimulator am = entry.getValue();
|
||||||
|
LOG.info(entry.getKey() + "\t" + am.getQueue() + "\t" + am.getAMType()
|
||||||
|
+ "\t" + am.getDuration() + "\t" + am.getNumTasks());
|
||||||
|
}
|
||||||
|
LOG.info("------------------------------------");
|
||||||
|
// queue
|
||||||
|
LOG.info(MessageFormat.format("number of queues = {0} average " +
|
||||||
|
"number of apps = {1}", queueAppNumMap.size(),
|
||||||
|
(int)(Math.ceil((numAMs + 0.0) / queueAppNumMap.size()))));
|
||||||
|
LOG.info("------------------------------------");
|
||||||
|
// runtime
|
||||||
|
LOG.info(MessageFormat.format("estimated simulation time is {0}" +
|
||||||
|
" seconds", (long)(Math.ceil(maxRuntime / 1000.0))));
|
||||||
|
LOG.info("------------------------------------");
|
||||||
|
}
|
||||||
|
// package these information in the simulateInfoMap used by other places
|
||||||
|
simulateInfoMap.put("Number of racks", numRacks);
|
||||||
|
simulateInfoMap.put("Number of nodes", numNMs);
|
||||||
|
simulateInfoMap.put("Node memory (MB)", nmMemoryMB);
|
||||||
|
simulateInfoMap.put("Node VCores", nmVCores);
|
||||||
|
simulateInfoMap.put("Number of applications", numAMs);
|
||||||
|
simulateInfoMap.put("Number of tasks", numTasks);
|
||||||
|
simulateInfoMap.put("Average tasks per applicaion",
|
||||||
|
(int)(Math.ceil((numTasks + 0.0) / numAMs)));
|
||||||
|
simulateInfoMap.put("Number of queues", queueAppNumMap.size());
|
||||||
|
simulateInfoMap.put("Average applications per queue",
|
||||||
|
(int)(Math.ceil((numAMs + 0.0) / queueAppNumMap.size())));
|
||||||
|
simulateInfoMap.put("Estimated simulate time (s)",
|
||||||
|
(long)(Math.ceil(maxRuntime / 1000.0)));
|
||||||
|
}
|
||||||
|
|
||||||
|
public HashMap<NodeId, NMSimulator> getNmMap() {
|
||||||
|
return nmMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static TaskRunner getRunner() {
|
||||||
|
return runner;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void decreaseRemainingApps() {
|
||||||
|
remainingApps --;
|
||||||
|
|
||||||
|
if (remainingApps == 0) {
|
||||||
|
LOG.info("SLSRunner tears down.");
|
||||||
|
System.exit(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String args[]) throws Exception {
|
||||||
|
Options options = new Options();
|
||||||
|
options.addOption("inputrumen", true, "input rumen files");
|
||||||
|
options.addOption("inputsls", true, "input sls files");
|
||||||
|
options.addOption("nodes", true, "input topology");
|
||||||
|
options.addOption("output", true, "output directory");
|
||||||
|
options.addOption("trackjobs", true,
|
||||||
|
"jobs to be tracked during simulating");
|
||||||
|
options.addOption("printsimulation", false,
|
||||||
|
"print out simulation information");
|
||||||
|
|
||||||
|
CommandLineParser parser = new GnuParser();
|
||||||
|
CommandLine cmd = parser.parse(options, args);
|
||||||
|
|
||||||
|
String inputRumen = cmd.getOptionValue("inputrumen");
|
||||||
|
String inputSLS = cmd.getOptionValue("inputsls");
|
||||||
|
String output = cmd.getOptionValue("output");
|
||||||
|
|
||||||
|
if ((inputRumen == null && inputSLS == null) || output == null) {
|
||||||
|
System.err.println();
|
||||||
|
System.err.println("ERROR: Missing input or output file");
|
||||||
|
System.err.println();
|
||||||
|
System.err.println("Options: -inputrumen|-inputsls FILE,FILE... " +
|
||||||
|
"-output FILE [-nodes FILE] [-trackjobs JobId,JobId...] " +
|
||||||
|
"[-printsimulation]");
|
||||||
|
System.err.println();
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
File outputFile = new File(output);
|
||||||
|
if (! outputFile.exists()
|
||||||
|
&& ! outputFile.mkdirs()) {
|
||||||
|
System.err.println("ERROR: Cannot create output directory "
|
||||||
|
+ outputFile.getAbsolutePath());
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
Set<String> trackedJobSet = new HashSet<String>();
|
||||||
|
if (cmd.hasOption("trackjobs")) {
|
||||||
|
String trackjobs = cmd.getOptionValue("trackjobs");
|
||||||
|
String jobIds[] = trackjobs.split(",");
|
||||||
|
trackedJobSet.addAll(Arrays.asList(jobIds));
|
||||||
|
}
|
||||||
|
|
||||||
|
String nodeFile = cmd.hasOption("nodes") ? cmd.getOptionValue("nodes") : "";
|
||||||
|
|
||||||
|
boolean isSLS = inputSLS != null;
|
||||||
|
String inputFiles[] = isSLS ? inputSLS.split(",") : inputRumen.split(",");
|
||||||
|
SLSRunner sls = new SLSRunner(isSLS, inputFiles, nodeFile, output,
|
||||||
|
trackedJobSet, cmd.hasOption("printsimulation"));
|
||||||
|
sls.start();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,385 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.sls.appmaster;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.security.PrivilegedExceptionAction;
|
||||||
|
import java.text.MessageFormat;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.BlockingQueue;
|
||||||
|
import java.util.concurrent.LinkedBlockingQueue;
|
||||||
|
|
||||||
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
|
import org.apache.hadoop.security.token.Token;
|
||||||
|
import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
|
||||||
|
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
|
||||||
|
import org.apache.hadoop.yarn.api.protocolrecords
|
||||||
|
.FinishApplicationMasterRequest;
|
||||||
|
import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
|
||||||
|
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
|
||||||
|
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.api.protocolrecords
|
||||||
|
.RegisterApplicationMasterRequest;
|
||||||
|
import org.apache.hadoop.yarn.api.protocolrecords
|
||||||
|
.RegisterApplicationMasterResponse;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
|
||||||
|
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
|
||||||
|
import org.apache.hadoop.yarn.api.records.LocalResource;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Priority;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
|
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||||
|
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||||
|
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
||||||
|
import org.apache.hadoop.yarn.util.Records;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator;
|
||||||
|
import org.apache.hadoop.yarn.sls.scheduler.ResourceSchedulerWrapper;
|
||||||
|
import org.apache.hadoop.yarn.sls.SLSRunner;
|
||||||
|
import org.apache.hadoop.yarn.sls.scheduler.TaskRunner;
|
||||||
|
import org.apache.hadoop.yarn.sls.utils.SLSUtils;
|
||||||
|
|
||||||
|
public abstract class AMSimulator extends TaskRunner.Task {
|
||||||
|
// resource manager
|
||||||
|
protected ResourceManager rm;
|
||||||
|
// main
|
||||||
|
protected SLSRunner se;
|
||||||
|
// application
|
||||||
|
protected ApplicationId appId;
|
||||||
|
protected ApplicationAttemptId appAttemptId;
|
||||||
|
protected String oldAppId; // jobId from the jobhistory file
|
||||||
|
// record factory
|
||||||
|
protected final static RecordFactory recordFactory =
|
||||||
|
RecordFactoryProvider.getRecordFactory(null);
|
||||||
|
// response queue
|
||||||
|
protected final BlockingQueue<AllocateResponse> responseQueue;
|
||||||
|
protected int RESPONSE_ID = 1;
|
||||||
|
// user name
|
||||||
|
protected String user;
|
||||||
|
// queue name
|
||||||
|
protected String queue;
|
||||||
|
// am type
|
||||||
|
protected String amtype;
|
||||||
|
// job start/end time
|
||||||
|
protected long traceStartTimeMS;
|
||||||
|
protected long traceFinishTimeMS;
|
||||||
|
protected long simulateStartTimeMS;
|
||||||
|
protected long simulateFinishTimeMS;
|
||||||
|
// whether tracked in Metrics
|
||||||
|
protected boolean isTracked;
|
||||||
|
// progress
|
||||||
|
protected int totalContainers;
|
||||||
|
protected int finishedContainers;
|
||||||
|
|
||||||
|
protected final Logger LOG = Logger.getLogger(AMSimulator.class);
|
||||||
|
|
||||||
|
public AMSimulator() {
|
||||||
|
this.responseQueue = new LinkedBlockingQueue<AllocateResponse>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void init(int id, int heartbeatInterval,
|
||||||
|
List<ContainerSimulator> containerList, ResourceManager rm, SLSRunner se,
|
||||||
|
long traceStartTime, long traceFinishTime, String user, String queue,
|
||||||
|
boolean isTracked, String oldAppId) {
|
||||||
|
super.init(traceStartTime, traceStartTime + 1000000L * heartbeatInterval,
|
||||||
|
heartbeatInterval);
|
||||||
|
this.user = user;
|
||||||
|
this.rm = rm;
|
||||||
|
this.se = se;
|
||||||
|
this.user = user;
|
||||||
|
this.queue = queue;
|
||||||
|
this.oldAppId = oldAppId;
|
||||||
|
this.isTracked = isTracked;
|
||||||
|
this.traceStartTimeMS = traceStartTime;
|
||||||
|
this.traceFinishTimeMS = traceFinishTime;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* register with RM
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void firstStep()
|
||||||
|
throws YarnException, IOException, InterruptedException {
|
||||||
|
simulateStartTimeMS = System.currentTimeMillis() -
|
||||||
|
SLSRunner.getRunner().getStartTimeMS();
|
||||||
|
|
||||||
|
// submit application, waiting until ACCEPTED
|
||||||
|
submitApp();
|
||||||
|
|
||||||
|
// register application master
|
||||||
|
registerAM();
|
||||||
|
|
||||||
|
// track app metrics
|
||||||
|
trackApp();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void middleStep()
|
||||||
|
throws InterruptedException, YarnException, IOException {
|
||||||
|
// process responses in the queue
|
||||||
|
processResponseQueue();
|
||||||
|
|
||||||
|
// send out request
|
||||||
|
sendContainerRequest();
|
||||||
|
|
||||||
|
// check whether finish
|
||||||
|
checkStop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void lastStep() {
|
||||||
|
LOG.info(MessageFormat.format("Application {0} is shutting down.", appId));
|
||||||
|
// unregister tracking
|
||||||
|
if (isTracked) {
|
||||||
|
untrackApp();
|
||||||
|
}
|
||||||
|
// unregister application master
|
||||||
|
final FinishApplicationMasterRequest finishAMRequest = recordFactory
|
||||||
|
.newRecordInstance(FinishApplicationMasterRequest.class);
|
||||||
|
finishAMRequest.setFinalApplicationStatus(FinalApplicationStatus.SUCCEEDED);
|
||||||
|
|
||||||
|
try {
|
||||||
|
UserGroupInformation ugi =
|
||||||
|
UserGroupInformation.createRemoteUser(appAttemptId.toString());
|
||||||
|
Token<AMRMTokenIdentifier> token =
|
||||||
|
rm.getRMContext().getRMApps().get(appAttemptId.getApplicationId())
|
||||||
|
.getRMAppAttempt(appAttemptId).getAMRMToken();
|
||||||
|
ugi.addTokenIdentifier(token.decodeIdentifier());
|
||||||
|
ugi.doAs(new PrivilegedExceptionAction<Object>() {
|
||||||
|
@Override
|
||||||
|
public Object run() throws Exception {
|
||||||
|
rm.getApplicationMasterService()
|
||||||
|
.finishApplicationMaster(finishAMRequest);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
|
||||||
|
simulateFinishTimeMS = System.currentTimeMillis() -
|
||||||
|
SLSRunner.getRunner().getStartTimeMS();
|
||||||
|
// record job running information
|
||||||
|
((ResourceSchedulerWrapper)rm.getResourceScheduler())
|
||||||
|
.addAMRuntime(appId,
|
||||||
|
traceStartTimeMS, traceFinishTimeMS,
|
||||||
|
simulateStartTimeMS, simulateFinishTimeMS);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected ResourceRequest createResourceRequest(
|
||||||
|
Resource resource, String host, int priority, int numContainers) {
|
||||||
|
ResourceRequest request = recordFactory
|
||||||
|
.newRecordInstance(ResourceRequest.class);
|
||||||
|
request.setCapability(resource);
|
||||||
|
request.setResourceName(host);
|
||||||
|
request.setNumContainers(numContainers);
|
||||||
|
Priority prio = recordFactory.newRecordInstance(Priority.class);
|
||||||
|
prio.setPriority(priority);
|
||||||
|
request.setPriority(prio);
|
||||||
|
return request;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected AllocateRequest createAllocateRequest(List<ResourceRequest> ask,
|
||||||
|
List<ContainerId> toRelease) {
|
||||||
|
AllocateRequest allocateRequest =
|
||||||
|
recordFactory.newRecordInstance(AllocateRequest.class);
|
||||||
|
allocateRequest.setResponseId(RESPONSE_ID ++);
|
||||||
|
allocateRequest.setAskList(ask);
|
||||||
|
allocateRequest.setReleaseList(toRelease);
|
||||||
|
return allocateRequest;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected AllocateRequest createAllocateRequest(List<ResourceRequest> ask) {
|
||||||
|
return createAllocateRequest(ask, new ArrayList<ContainerId>());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract void processResponseQueue()
|
||||||
|
throws InterruptedException, YarnException, IOException;
|
||||||
|
|
||||||
|
protected abstract void sendContainerRequest()
|
||||||
|
throws YarnException, IOException, InterruptedException;
|
||||||
|
|
||||||
|
protected abstract void checkStop();
|
||||||
|
|
||||||
|
private void submitApp()
|
||||||
|
throws YarnException, InterruptedException, IOException {
|
||||||
|
// ask for new application
|
||||||
|
GetNewApplicationRequest newAppRequest =
|
||||||
|
Records.newRecord(GetNewApplicationRequest.class);
|
||||||
|
GetNewApplicationResponse newAppResponse =
|
||||||
|
rm.getClientRMService().getNewApplication(newAppRequest);
|
||||||
|
appId = newAppResponse.getApplicationId();
|
||||||
|
|
||||||
|
// submit the application
|
||||||
|
final SubmitApplicationRequest subAppRequest =
|
||||||
|
Records.newRecord(SubmitApplicationRequest.class);
|
||||||
|
ApplicationSubmissionContext appSubContext =
|
||||||
|
Records.newRecord(ApplicationSubmissionContext.class);
|
||||||
|
appSubContext.setApplicationId(appId);
|
||||||
|
appSubContext.setMaxAppAttempts(1);
|
||||||
|
appSubContext.setQueue(queue);
|
||||||
|
appSubContext.setPriority(Priority.newInstance(0));
|
||||||
|
ContainerLaunchContext conLauContext =
|
||||||
|
Records.newRecord(ContainerLaunchContext.class);
|
||||||
|
conLauContext.setApplicationACLs(
|
||||||
|
new HashMap<ApplicationAccessType, String>());
|
||||||
|
conLauContext.setCommands(new ArrayList<String>());
|
||||||
|
conLauContext.setEnvironment(new HashMap<String, String>());
|
||||||
|
conLauContext.setLocalResources(new HashMap<String, LocalResource>());
|
||||||
|
conLauContext.setServiceData(new HashMap<String, ByteBuffer>());
|
||||||
|
appSubContext.setAMContainerSpec(conLauContext);
|
||||||
|
appSubContext.setUnmanagedAM(true);
|
||||||
|
subAppRequest.setApplicationSubmissionContext(appSubContext);
|
||||||
|
UserGroupInformation ugi = UserGroupInformation.createRemoteUser(user);
|
||||||
|
ugi.doAs(new PrivilegedExceptionAction<Object>() {
|
||||||
|
@Override
|
||||||
|
public Object run() throws YarnException {
|
||||||
|
rm.getClientRMService().submitApplication(subAppRequest);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
LOG.info(MessageFormat.format("Submit a new application {0}", appId));
|
||||||
|
|
||||||
|
// waiting until application ACCEPTED
|
||||||
|
RMApp app = rm.getRMContext().getRMApps().get(appId);
|
||||||
|
while(app.getState() != RMAppState.ACCEPTED) {
|
||||||
|
Thread.sleep(50);
|
||||||
|
}
|
||||||
|
|
||||||
|
appAttemptId = rm.getRMContext().getRMApps().get(appId)
|
||||||
|
.getCurrentAppAttempt().getAppAttemptId();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void registerAM()
|
||||||
|
throws YarnException, IOException, InterruptedException {
|
||||||
|
// register application master
|
||||||
|
final RegisterApplicationMasterRequest amRegisterRequest =
|
||||||
|
Records.newRecord(RegisterApplicationMasterRequest.class);
|
||||||
|
amRegisterRequest.setHost("localhost");
|
||||||
|
amRegisterRequest.setRpcPort(1000);
|
||||||
|
amRegisterRequest.setTrackingUrl("localhost:1000");
|
||||||
|
|
||||||
|
UserGroupInformation ugi =
|
||||||
|
UserGroupInformation.createRemoteUser(appAttemptId.toString());
|
||||||
|
Token<AMRMTokenIdentifier> token =
|
||||||
|
rm.getRMContext().getRMApps().get(appAttemptId.getApplicationId())
|
||||||
|
.getRMAppAttempt(appAttemptId).getAMRMToken();
|
||||||
|
ugi.addTokenIdentifier(token.decodeIdentifier());
|
||||||
|
|
||||||
|
ugi.doAs(
|
||||||
|
new PrivilegedExceptionAction<RegisterApplicationMasterResponse>() {
|
||||||
|
@Override
|
||||||
|
public RegisterApplicationMasterResponse run() throws Exception {
|
||||||
|
return rm.getApplicationMasterService()
|
||||||
|
.registerApplicationMaster(amRegisterRequest);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
LOG.info(MessageFormat.format(
|
||||||
|
"Register the application master for application {0}", appId));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void trackApp() {
|
||||||
|
if (isTracked) {
|
||||||
|
((ResourceSchedulerWrapper) rm.getResourceScheduler())
|
||||||
|
.addTrackedApp(appAttemptId, oldAppId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public void untrackApp() {
|
||||||
|
if (isTracked) {
|
||||||
|
((ResourceSchedulerWrapper) rm.getResourceScheduler())
|
||||||
|
.removeTrackedApp(appAttemptId, oldAppId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected List<ResourceRequest> packageRequests(
|
||||||
|
List<ContainerSimulator> csList, int priority) {
|
||||||
|
// create requests
|
||||||
|
Map<String, ResourceRequest> rackLocalRequestMap = new HashMap<String, ResourceRequest>();
|
||||||
|
Map<String, ResourceRequest> nodeLocalRequestMap = new HashMap<String, ResourceRequest>();
|
||||||
|
ResourceRequest anyRequest = null;
|
||||||
|
for (ContainerSimulator cs : csList) {
|
||||||
|
String rackHostNames[] = SLSUtils.getRackHostName(cs.getHostname());
|
||||||
|
// check rack local
|
||||||
|
String rackname = rackHostNames[0];
|
||||||
|
if (rackLocalRequestMap.containsKey(rackname)) {
|
||||||
|
rackLocalRequestMap.get(rackname).setNumContainers(
|
||||||
|
rackLocalRequestMap.get(rackname).getNumContainers() + 1);
|
||||||
|
} else {
|
||||||
|
ResourceRequest request = createResourceRequest(
|
||||||
|
cs.getResource(), rackname, priority, 1);
|
||||||
|
rackLocalRequestMap.put(rackname, request);
|
||||||
|
}
|
||||||
|
// check node local
|
||||||
|
String hostname = rackHostNames[1];
|
||||||
|
if (nodeLocalRequestMap.containsKey(hostname)) {
|
||||||
|
nodeLocalRequestMap.get(hostname).setNumContainers(
|
||||||
|
nodeLocalRequestMap.get(hostname).getNumContainers() + 1);
|
||||||
|
} else {
|
||||||
|
ResourceRequest request = createResourceRequest(
|
||||||
|
cs.getResource(), hostname, priority, 1);
|
||||||
|
nodeLocalRequestMap.put(hostname, request);
|
||||||
|
}
|
||||||
|
// any
|
||||||
|
if (anyRequest == null) {
|
||||||
|
anyRequest = createResourceRequest(
|
||||||
|
cs.getResource(), ResourceRequest.ANY, priority, 1);
|
||||||
|
} else {
|
||||||
|
anyRequest.setNumContainers(anyRequest.getNumContainers() + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
|
||||||
|
ask.addAll(nodeLocalRequestMap.values());
|
||||||
|
ask.addAll(rackLocalRequestMap.values());
|
||||||
|
if (anyRequest != null) {
|
||||||
|
ask.add(anyRequest);
|
||||||
|
}
|
||||||
|
return ask;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getQueue() {
|
||||||
|
return queue;
|
||||||
|
}
|
||||||
|
public String getAMType() {
|
||||||
|
return amtype;
|
||||||
|
}
|
||||||
|
public long getDuration() {
|
||||||
|
return simulateFinishTimeMS - simulateStartTimeMS;
|
||||||
|
}
|
||||||
|
public int getNumTasks() {
|
||||||
|
return totalContainers;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,405 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.sls.appmaster;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.security.PrivilegedExceptionAction;
|
||||||
|
import java.text.MessageFormat;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
|
import org.apache.hadoop.security.token.Token;
|
||||||
|
import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
|
||||||
|
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Container;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
|
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
||||||
|
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator;
|
||||||
|
import org.apache.hadoop.yarn.sls.SLSRunner;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
|
public class MRAMSimulator extends AMSimulator {
|
||||||
|
/*
|
||||||
|
Vocabulary Used:
|
||||||
|
pending -> requests which are NOT yet sent to RM
|
||||||
|
scheduled -> requests which are sent to RM but not yet assigned
|
||||||
|
assigned -> requests which are assigned to a container
|
||||||
|
completed -> request corresponding to which container has completed
|
||||||
|
|
||||||
|
Maps are scheduled as soon as their requests are received. Reduces are
|
||||||
|
scheduled when all maps have finished (not support slow-start currently).
|
||||||
|
*/
|
||||||
|
|
||||||
|
private static final int PRIORITY_REDUCE = 10;
|
||||||
|
private static final int PRIORITY_MAP = 20;
|
||||||
|
|
||||||
|
// pending maps
|
||||||
|
private LinkedList<ContainerSimulator> pendingMaps =
|
||||||
|
new LinkedList<ContainerSimulator>();
|
||||||
|
|
||||||
|
// pending failed maps
|
||||||
|
private LinkedList<ContainerSimulator> pendingFailedMaps =
|
||||||
|
new LinkedList<ContainerSimulator>();
|
||||||
|
|
||||||
|
// scheduled maps
|
||||||
|
private LinkedList<ContainerSimulator> scheduledMaps =
|
||||||
|
new LinkedList<ContainerSimulator>();
|
||||||
|
|
||||||
|
// assigned maps
|
||||||
|
private Map<ContainerId, ContainerSimulator> assignedMaps =
|
||||||
|
new HashMap<ContainerId, ContainerSimulator>();
|
||||||
|
|
||||||
|
// reduces which are not yet scheduled
|
||||||
|
private LinkedList<ContainerSimulator> pendingReduces =
|
||||||
|
new LinkedList<ContainerSimulator>();
|
||||||
|
|
||||||
|
// pending failed reduces
|
||||||
|
private LinkedList<ContainerSimulator> pendingFailedReduces =
|
||||||
|
new LinkedList<ContainerSimulator>();
|
||||||
|
|
||||||
|
// scheduled reduces
|
||||||
|
private LinkedList<ContainerSimulator> scheduledReduces =
|
||||||
|
new LinkedList<ContainerSimulator>();
|
||||||
|
|
||||||
|
// assigned reduces
|
||||||
|
private Map<ContainerId, ContainerSimulator> assignedReduces =
|
||||||
|
new HashMap<ContainerId, ContainerSimulator>();
|
||||||
|
|
||||||
|
// all maps & reduces
|
||||||
|
private LinkedList<ContainerSimulator> allMaps =
|
||||||
|
new LinkedList<ContainerSimulator>();
|
||||||
|
private LinkedList<ContainerSimulator> allReduces =
|
||||||
|
new LinkedList<ContainerSimulator>();
|
||||||
|
|
||||||
|
// counters
|
||||||
|
private int mapFinished = 0;
|
||||||
|
private int mapTotal = 0;
|
||||||
|
private int reduceFinished = 0;
|
||||||
|
private int reduceTotal = 0;
|
||||||
|
// waiting for AM container
|
||||||
|
private boolean isAMContainerRunning = false;
|
||||||
|
private Container amContainer;
|
||||||
|
// finished
|
||||||
|
private boolean isFinished = false;
|
||||||
|
// resource for AM container
|
||||||
|
private final static int MR_AM_CONTAINER_RESOURCE_MEMORY_MB = 1024;
|
||||||
|
private final static int MR_AM_CONTAINER_RESOURCE_VCORES = 1;
|
||||||
|
|
||||||
|
public final Logger LOG = Logger.getLogger(MRAMSimulator.class);
|
||||||
|
|
||||||
|
public void init(int id, int heartbeatInterval,
|
||||||
|
List<ContainerSimulator> containerList, ResourceManager rm, SLSRunner se,
|
||||||
|
long traceStartTime, long traceFinishTime, String user, String queue,
|
||||||
|
boolean isTracked, String oldAppId) {
|
||||||
|
super.init(id, heartbeatInterval, containerList, rm, se,
|
||||||
|
traceStartTime, traceFinishTime, user, queue,
|
||||||
|
isTracked, oldAppId);
|
||||||
|
amtype = "mapreduce";
|
||||||
|
|
||||||
|
// get map/reduce tasks
|
||||||
|
for (ContainerSimulator cs : containerList) {
|
||||||
|
if (cs.getType().equals("map")) {
|
||||||
|
cs.setPriority(PRIORITY_MAP);
|
||||||
|
pendingMaps.add(cs);
|
||||||
|
} else if (cs.getType().equals("reduce")) {
|
||||||
|
cs.setPriority(PRIORITY_REDUCE);
|
||||||
|
pendingReduces.add(cs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
allMaps.addAll(pendingMaps);
|
||||||
|
allReduces.addAll(pendingReduces);
|
||||||
|
mapTotal = pendingMaps.size();
|
||||||
|
reduceTotal = pendingReduces.size();
|
||||||
|
totalContainers = mapTotal + reduceTotal;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void firstStep()
|
||||||
|
throws YarnException, IOException, InterruptedException {
|
||||||
|
super.firstStep();
|
||||||
|
|
||||||
|
requestAMContainer();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* send out request for AM container
|
||||||
|
*/
|
||||||
|
protected void requestAMContainer()
|
||||||
|
throws YarnException, IOException, InterruptedException {
|
||||||
|
List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
|
||||||
|
ResourceRequest amRequest = createResourceRequest(
|
||||||
|
BuilderUtils.newResource(MR_AM_CONTAINER_RESOURCE_MEMORY_MB,
|
||||||
|
MR_AM_CONTAINER_RESOURCE_VCORES),
|
||||||
|
ResourceRequest.ANY, 1, 1);
|
||||||
|
ask.add(amRequest);
|
||||||
|
LOG.debug(MessageFormat.format("Application {0} sends out allocate " +
|
||||||
|
"request for its AM", appId));
|
||||||
|
final AllocateRequest request = this.createAllocateRequest(ask);
|
||||||
|
|
||||||
|
UserGroupInformation ugi =
|
||||||
|
UserGroupInformation.createRemoteUser(appAttemptId.toString());
|
||||||
|
Token<AMRMTokenIdentifier> token = rm.getRMContext().getRMApps()
|
||||||
|
.get(appAttemptId.getApplicationId())
|
||||||
|
.getRMAppAttempt(appAttemptId).getAMRMToken();
|
||||||
|
ugi.addTokenIdentifier(token.decodeIdentifier());
|
||||||
|
AllocateResponse response = ugi.doAs(
|
||||||
|
new PrivilegedExceptionAction<AllocateResponse>() {
|
||||||
|
@Override
|
||||||
|
public AllocateResponse run() throws Exception {
|
||||||
|
return rm.getApplicationMasterService().allocate(request);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// waiting until the AM container is allocated
|
||||||
|
while (true) {
|
||||||
|
if (response != null && ! response.getAllocatedContainers().isEmpty()) {
|
||||||
|
// get AM container
|
||||||
|
Container container = response.getAllocatedContainers().get(0);
|
||||||
|
se.getNmMap().get(container.getNodeId())
|
||||||
|
.addNewContainer(container, -1L);
|
||||||
|
// start AM container
|
||||||
|
amContainer = container;
|
||||||
|
LOG.debug(MessageFormat.format("Application {0} starts its " +
|
||||||
|
"AM container ({1}).", appId, amContainer.getId()));
|
||||||
|
isAMContainerRunning = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// this sleep time is different from HeartBeat
|
||||||
|
Thread.sleep(1000);
|
||||||
|
// send out empty request
|
||||||
|
sendContainerRequest();
|
||||||
|
response = responseQueue.take();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
protected void processResponseQueue()
|
||||||
|
throws InterruptedException, YarnException, IOException {
|
||||||
|
while (! responseQueue.isEmpty()) {
|
||||||
|
AllocateResponse response = responseQueue.take();
|
||||||
|
|
||||||
|
// check completed containers
|
||||||
|
if (! response.getCompletedContainersStatuses().isEmpty()) {
|
||||||
|
for (ContainerStatus cs : response.getCompletedContainersStatuses()) {
|
||||||
|
ContainerId containerId = cs.getContainerId();
|
||||||
|
if (cs.getExitStatus() == ContainerExitStatus.SUCCESS) {
|
||||||
|
if (assignedMaps.containsKey(containerId)) {
|
||||||
|
LOG.debug(MessageFormat.format("Application {0} has one" +
|
||||||
|
"mapper finished ({1}).", appId, containerId));
|
||||||
|
assignedMaps.remove(containerId);
|
||||||
|
mapFinished ++;
|
||||||
|
finishedContainers ++;
|
||||||
|
} else if (assignedReduces.containsKey(containerId)) {
|
||||||
|
LOG.debug(MessageFormat.format("Application {0} has one" +
|
||||||
|
"reducer finished ({1}).", appId, containerId));
|
||||||
|
assignedReduces.remove(containerId);
|
||||||
|
reduceFinished ++;
|
||||||
|
finishedContainers ++;
|
||||||
|
} else {
|
||||||
|
// am container released event
|
||||||
|
isFinished = true;
|
||||||
|
LOG.info(MessageFormat.format("Application {0} goes to " +
|
||||||
|
"finish.", appId));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// container to be killed
|
||||||
|
if (assignedMaps.containsKey(containerId)) {
|
||||||
|
LOG.debug(MessageFormat.format("Application {0} has one " +
|
||||||
|
"mapper killed ({1}).", appId, containerId));
|
||||||
|
pendingFailedMaps.add(assignedMaps.remove(containerId));
|
||||||
|
} else if (assignedReduces.containsKey(containerId)) {
|
||||||
|
LOG.debug(MessageFormat.format("Application {0} has one " +
|
||||||
|
"reducer killed ({1}).", appId, containerId));
|
||||||
|
pendingFailedReduces.add(assignedReduces.remove(containerId));
|
||||||
|
} else {
|
||||||
|
LOG.info(MessageFormat.format("Application {0}'s AM is " +
|
||||||
|
"going to be killed. Restarting...", appId));
|
||||||
|
restart();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// check finished
|
||||||
|
if (isAMContainerRunning &&
|
||||||
|
(mapFinished == mapTotal) &&
|
||||||
|
(reduceFinished == reduceTotal)) {
|
||||||
|
// to release the AM container
|
||||||
|
se.getNmMap().get(amContainer.getNodeId())
|
||||||
|
.cleanupContainer(amContainer.getId());
|
||||||
|
isAMContainerRunning = false;
|
||||||
|
LOG.debug(MessageFormat.format("Application {0} sends out event " +
|
||||||
|
"to clean up its AM container.", appId));
|
||||||
|
isFinished = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// check allocated containers
|
||||||
|
for (Container container : response.getAllocatedContainers()) {
|
||||||
|
if (! scheduledMaps.isEmpty()) {
|
||||||
|
ContainerSimulator cs = scheduledMaps.remove();
|
||||||
|
LOG.debug(MessageFormat.format("Application {0} starts a " +
|
||||||
|
"launch a mapper ({1}).", appId, container.getId()));
|
||||||
|
assignedMaps.put(container.getId(), cs);
|
||||||
|
se.getNmMap().get(container.getNodeId())
|
||||||
|
.addNewContainer(container, cs.getLifeTime());
|
||||||
|
} else if (! this.scheduledReduces.isEmpty()) {
|
||||||
|
ContainerSimulator cs = scheduledReduces.remove();
|
||||||
|
LOG.debug(MessageFormat.format("Application {0} starts a " +
|
||||||
|
"launch a reducer ({1}).", appId, container.getId()));
|
||||||
|
assignedReduces.put(container.getId(), cs);
|
||||||
|
se.getNmMap().get(container.getNodeId())
|
||||||
|
.addNewContainer(container, cs.getLifeTime());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* restart running because of the am container killed
|
||||||
|
*/
|
||||||
|
private void restart()
|
||||||
|
throws YarnException, IOException, InterruptedException {
|
||||||
|
// clear
|
||||||
|
finishedContainers = 0;
|
||||||
|
isFinished = false;
|
||||||
|
mapFinished = 0;
|
||||||
|
reduceFinished = 0;
|
||||||
|
pendingFailedMaps.clear();
|
||||||
|
pendingMaps.clear();
|
||||||
|
pendingReduces.clear();
|
||||||
|
pendingFailedReduces.clear();
|
||||||
|
pendingMaps.addAll(allMaps);
|
||||||
|
pendingReduces.addAll(pendingReduces);
|
||||||
|
isAMContainerRunning = false;
|
||||||
|
amContainer = null;
|
||||||
|
// resent am container request
|
||||||
|
requestAMContainer();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void sendContainerRequest()
|
||||||
|
throws YarnException, IOException, InterruptedException {
|
||||||
|
if (isFinished) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// send out request
|
||||||
|
List<ResourceRequest> ask = null;
|
||||||
|
if (isAMContainerRunning) {
|
||||||
|
if (mapFinished != mapTotal) {
|
||||||
|
// map phase
|
||||||
|
if (! pendingMaps.isEmpty()) {
|
||||||
|
ask = packageRequests(pendingMaps, PRIORITY_MAP);
|
||||||
|
LOG.debug(MessageFormat.format("Application {0} sends out " +
|
||||||
|
"request for {1} mappers.", appId, pendingMaps.size()));
|
||||||
|
scheduledMaps.addAll(pendingMaps);
|
||||||
|
pendingMaps.clear();
|
||||||
|
} else if (! pendingFailedMaps.isEmpty() && scheduledMaps.isEmpty()) {
|
||||||
|
ask = packageRequests(pendingFailedMaps, PRIORITY_MAP);
|
||||||
|
LOG.debug(MessageFormat.format("Application {0} sends out " +
|
||||||
|
"requests for {1} failed mappers.", appId,
|
||||||
|
pendingFailedMaps.size()));
|
||||||
|
scheduledMaps.addAll(pendingFailedMaps);
|
||||||
|
pendingFailedMaps.clear();
|
||||||
|
}
|
||||||
|
} else if (reduceFinished != reduceTotal) {
|
||||||
|
// reduce phase
|
||||||
|
if (! pendingReduces.isEmpty()) {
|
||||||
|
ask = packageRequests(pendingReduces, PRIORITY_REDUCE);
|
||||||
|
LOG.debug(MessageFormat.format("Application {0} sends out " +
|
||||||
|
"requests for {1} reducers.", appId, pendingReduces.size()));
|
||||||
|
scheduledReduces.addAll(pendingReduces);
|
||||||
|
pendingReduces.clear();
|
||||||
|
} else if (! pendingFailedReduces.isEmpty()
|
||||||
|
&& scheduledReduces.isEmpty()) {
|
||||||
|
ask = packageRequests(pendingFailedReduces, PRIORITY_REDUCE);
|
||||||
|
LOG.debug(MessageFormat.format("Application {0} sends out " +
|
||||||
|
"request for {1} failed reducers.", appId,
|
||||||
|
pendingFailedReduces.size()));
|
||||||
|
scheduledReduces.addAll(pendingFailedReduces);
|
||||||
|
pendingFailedReduces.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ask == null) {
|
||||||
|
ask = new ArrayList<ResourceRequest>();
|
||||||
|
}
|
||||||
|
|
||||||
|
final AllocateRequest request = createAllocateRequest(ask);
|
||||||
|
if (totalContainers == 0) {
|
||||||
|
request.setProgress(1.0f);
|
||||||
|
} else {
|
||||||
|
request.setProgress((float) finishedContainers / totalContainers);
|
||||||
|
}
|
||||||
|
|
||||||
|
UserGroupInformation ugi =
|
||||||
|
UserGroupInformation.createRemoteUser(appAttemptId.toString());
|
||||||
|
Token<AMRMTokenIdentifier> token = rm.getRMContext().getRMApps()
|
||||||
|
.get(appAttemptId.getApplicationId())
|
||||||
|
.getRMAppAttempt(appAttemptId).getAMRMToken();
|
||||||
|
ugi.addTokenIdentifier(token.decodeIdentifier());
|
||||||
|
AllocateResponse response = ugi.doAs(
|
||||||
|
new PrivilegedExceptionAction<AllocateResponse>() {
|
||||||
|
@Override
|
||||||
|
public AllocateResponse run() throws Exception {
|
||||||
|
return rm.getApplicationMasterService().allocate(request);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
if (response != null) {
|
||||||
|
responseQueue.put(response);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void checkStop() {
|
||||||
|
if (isFinished) {
|
||||||
|
super.setEndTime(System.currentTimeMillis());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void lastStep() {
|
||||||
|
super.lastStep();
|
||||||
|
|
||||||
|
// clear data structures
|
||||||
|
allMaps.clear();
|
||||||
|
allReduces.clear();
|
||||||
|
assignedMaps.clear();
|
||||||
|
assignedReduces.clear();
|
||||||
|
pendingFailedMaps.clear();
|
||||||
|
pendingFailedReduces.clear();
|
||||||
|
pendingMaps.clear();
|
||||||
|
pendingReduces.clear();
|
||||||
|
scheduledMaps.clear();
|
||||||
|
scheduledReduces.clear();
|
||||||
|
responseQueue.clear();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,68 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.sls.conf;
|
||||||
|
|
||||||
|
public class SLSConfiguration {
|
||||||
|
// sls
|
||||||
|
public static final String PREFIX = "yarn.sls.";
|
||||||
|
// runner
|
||||||
|
public static final String RUNNER_PREFIX = PREFIX + "runner.";
|
||||||
|
public static final String RUNNER_POOL_SIZE = RUNNER_PREFIX + "pool.size";
|
||||||
|
public static final int RUNNER_POOL_SIZE_DEFAULT = 10;
|
||||||
|
// scheduler
|
||||||
|
public static final String SCHEDULER_PREFIX = PREFIX + "scheduler.";
|
||||||
|
public static final String RM_SCHEDULER = SCHEDULER_PREFIX + "class";
|
||||||
|
// metrics
|
||||||
|
public static final String METRICS_PREFIX = PREFIX + "metrics.";
|
||||||
|
public static final String METRICS_SWITCH = METRICS_PREFIX + "switch";
|
||||||
|
public static final String METRICS_WEB_ADDRESS_PORT = METRICS_PREFIX
|
||||||
|
+ "web.address.port";
|
||||||
|
public static final String METRICS_OUTPUT_DIR = METRICS_PREFIX + "output";
|
||||||
|
public static final int METRICS_WEB_ADDRESS_PORT_DEFAULT = 10001;
|
||||||
|
public static final String METRICS_TIMER_WINDOW_SIZE = METRICS_PREFIX
|
||||||
|
+ "timer.window.size";
|
||||||
|
public static final int METRICS_TIMER_WINDOW_SIZE_DEFAULT = 100;
|
||||||
|
public static final String METRICS_RECORD_INTERVAL_MS = METRICS_PREFIX
|
||||||
|
+ "record.interval.ms";
|
||||||
|
public static final int METRICS_RECORD_INTERVAL_MS_DEFAULT = 1000;
|
||||||
|
// nm
|
||||||
|
public static final String NM_PREFIX = PREFIX + "nm.";
|
||||||
|
public static final String NM_MEMORY_MB = NM_PREFIX + "memory.mb";
|
||||||
|
public static final int NM_MEMORY_MB_DEFAULT = 10240;
|
||||||
|
public static final String NM_VCORES = NM_PREFIX + "vcores";
|
||||||
|
public static final int NM_VCORES_DEFAULT = 10;
|
||||||
|
public static final String NM_HEARTBEAT_INTERVAL_MS = NM_PREFIX
|
||||||
|
+ "heartbeat.interval.ms";
|
||||||
|
public static final int NM_HEARTBEAT_INTERVAL_MS_DEFAULT = 1000;
|
||||||
|
// am
|
||||||
|
public static final String AM_PREFIX = PREFIX + "am.";
|
||||||
|
public static final String AM_HEARTBEAT_INTERVAL_MS = AM_PREFIX
|
||||||
|
+ "heartbeat.interval.ms";
|
||||||
|
public static final int AM_HEARTBEAT_INTERVAL_MS_DEFAULT = 1000;
|
||||||
|
public static final String AM_TYPE = AM_PREFIX + "type.";
|
||||||
|
|
||||||
|
// container
|
||||||
|
public static final String CONTAINER_PREFIX = PREFIX + "container.";
|
||||||
|
public static final String CONTAINER_MEMORY_MB = CONTAINER_PREFIX
|
||||||
|
+ "memory.mb";
|
||||||
|
public static final int CONTAINER_MEMORY_MB_DEFAULT = 1024;
|
||||||
|
public static final String CONTAINER_VCORES = CONTAINER_PREFIX + "vcores";
|
||||||
|
public static final int CONTAINER_VCORES_DEFAULT = 1;
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,261 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.sls.nodemanager;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.text.MessageFormat;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
import java.util.concurrent.DelayQueue;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Container;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerState;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords
|
||||||
|
.RegisterNodeManagerRequest;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords
|
||||||
|
.RegisterNodeManagerResponse;
|
||||||
|
import org.apache.hadoop.yarn.server.api.records.MasterKey;
|
||||||
|
import org.apache.hadoop.yarn.server.api.records.NodeAction;
|
||||||
|
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
|
||||||
|
import org.apache.hadoop.yarn.server.api.records.NodeStatus;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||||
|
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||||
|
import org.apache.hadoop.yarn.util.Records;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator;
|
||||||
|
import org.apache.hadoop.yarn.sls.scheduler.TaskRunner;
|
||||||
|
import org.apache.hadoop.yarn.sls.utils.SLSUtils;
|
||||||
|
|
||||||
|
public class NMSimulator extends TaskRunner.Task {
|
||||||
|
// node resource
|
||||||
|
private RMNode node;
|
||||||
|
// master key
|
||||||
|
private MasterKey masterKey;
|
||||||
|
// containers with various STATE
|
||||||
|
private List<ContainerId> completedContainerList;
|
||||||
|
private List<ContainerId> releasedContainerList;
|
||||||
|
private DelayQueue<ContainerSimulator> containerQueue;
|
||||||
|
private Map<ContainerId, ContainerSimulator> runningContainers;
|
||||||
|
private List<ContainerId> amContainerList;
|
||||||
|
// resource manager
|
||||||
|
private ResourceManager rm;
|
||||||
|
// heart beat response id
|
||||||
|
private int RESPONSE_ID = 1;
|
||||||
|
private final static Logger LOG = Logger.getLogger(NMSimulator.class);
|
||||||
|
|
||||||
|
public void init(String nodeIdStr, int memory, int cores,
|
||||||
|
int dispatchTime, int heartBeatInterval, ResourceManager rm)
|
||||||
|
throws IOException, YarnException {
|
||||||
|
super.init(dispatchTime, dispatchTime + 1000000L * heartBeatInterval,
|
||||||
|
heartBeatInterval);
|
||||||
|
// create resource
|
||||||
|
String rackHostName[] = SLSUtils.getRackHostName(nodeIdStr);
|
||||||
|
this.node = NodeInfo.newNodeInfo(rackHostName[0], rackHostName[1],
|
||||||
|
BuilderUtils.newResource(memory, cores));
|
||||||
|
this.rm = rm;
|
||||||
|
// init data structures
|
||||||
|
completedContainerList =
|
||||||
|
Collections.synchronizedList(new ArrayList<ContainerId>());
|
||||||
|
releasedContainerList =
|
||||||
|
Collections.synchronizedList(new ArrayList<ContainerId>());
|
||||||
|
containerQueue = new DelayQueue<ContainerSimulator>();
|
||||||
|
amContainerList =
|
||||||
|
Collections.synchronizedList(new ArrayList<ContainerId>());
|
||||||
|
runningContainers =
|
||||||
|
new ConcurrentHashMap<ContainerId, ContainerSimulator>();
|
||||||
|
// register NM with RM
|
||||||
|
RegisterNodeManagerRequest req =
|
||||||
|
Records.newRecord(RegisterNodeManagerRequest.class);
|
||||||
|
req.setNodeId(node.getNodeID());
|
||||||
|
req.setResource(node.getTotalCapability());
|
||||||
|
req.setHttpPort(80);
|
||||||
|
RegisterNodeManagerResponse response = rm.getResourceTrackerService()
|
||||||
|
.registerNodeManager(req);
|
||||||
|
masterKey = response.getNMTokenMasterKey();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void firstStep() throws YarnException, IOException {
|
||||||
|
// do nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void middleStep() {
|
||||||
|
// we check the lifetime for each running containers
|
||||||
|
ContainerSimulator cs = null;
|
||||||
|
synchronized(completedContainerList) {
|
||||||
|
while ((cs = containerQueue.poll()) != null) {
|
||||||
|
runningContainers.remove(cs.getId());
|
||||||
|
completedContainerList.add(cs.getId());
|
||||||
|
LOG.debug(MessageFormat.format("Container {0} has completed",
|
||||||
|
cs.getId()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// send heart beat
|
||||||
|
NodeHeartbeatRequest beatRequest =
|
||||||
|
Records.newRecord(NodeHeartbeatRequest.class);
|
||||||
|
beatRequest.setLastKnownNMTokenMasterKey(masterKey);
|
||||||
|
NodeStatus ns = Records.newRecord(NodeStatus.class);
|
||||||
|
|
||||||
|
ns.setContainersStatuses(generateContainerStatusList());
|
||||||
|
ns.setNodeId(node.getNodeID());
|
||||||
|
ns.setKeepAliveApplications(new ArrayList<ApplicationId>());
|
||||||
|
ns.setResponseId(RESPONSE_ID ++);
|
||||||
|
ns.setNodeHealthStatus(NodeHealthStatus.newInstance(true, "", 0));
|
||||||
|
beatRequest.setNodeStatus(ns);
|
||||||
|
try {
|
||||||
|
NodeHeartbeatResponse beatResponse =
|
||||||
|
rm.getResourceTrackerService().nodeHeartbeat(beatRequest);
|
||||||
|
if (! beatResponse.getContainersToCleanup().isEmpty()) {
|
||||||
|
// remove from queue
|
||||||
|
synchronized(releasedContainerList) {
|
||||||
|
for (ContainerId containerId : beatResponse.getContainersToCleanup()){
|
||||||
|
if (amContainerList.contains(containerId)) {
|
||||||
|
// AM container (not killed?, only release)
|
||||||
|
synchronized(amContainerList) {
|
||||||
|
amContainerList.remove(containerId);
|
||||||
|
}
|
||||||
|
LOG.debug(MessageFormat.format("NodeManager {0} releases " +
|
||||||
|
"an AM ({1}).", node.getNodeID(), containerId));
|
||||||
|
} else {
|
||||||
|
cs = runningContainers.remove(containerId);
|
||||||
|
containerQueue.remove(cs);
|
||||||
|
releasedContainerList.add(containerId);
|
||||||
|
LOG.debug(MessageFormat.format("NodeManager {0} releases a " +
|
||||||
|
"container ({1}).", node.getNodeID(), containerId));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (beatResponse.getNodeAction() == NodeAction.SHUTDOWN) {
|
||||||
|
lastStep();
|
||||||
|
}
|
||||||
|
} catch (YarnException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void lastStep() {
|
||||||
|
// do nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* catch status of all containers located on current node
|
||||||
|
*/
|
||||||
|
private ArrayList<ContainerStatus> generateContainerStatusList() {
|
||||||
|
ArrayList<ContainerStatus> csList = new ArrayList<ContainerStatus>();
|
||||||
|
// add running containers
|
||||||
|
for (ContainerSimulator container : runningContainers.values()) {
|
||||||
|
csList.add(newContainerStatus(container.getId(),
|
||||||
|
ContainerState.RUNNING, ContainerExitStatus.SUCCESS));
|
||||||
|
}
|
||||||
|
synchronized(amContainerList) {
|
||||||
|
for (ContainerId cId : amContainerList) {
|
||||||
|
csList.add(newContainerStatus(cId,
|
||||||
|
ContainerState.RUNNING, ContainerExitStatus.SUCCESS));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// add complete containers
|
||||||
|
synchronized(completedContainerList) {
|
||||||
|
for (ContainerId cId : completedContainerList) {
|
||||||
|
LOG.debug(MessageFormat.format("NodeManager {0} completed" +
|
||||||
|
" container ({1}).", node.getNodeID(), cId));
|
||||||
|
csList.add(newContainerStatus(
|
||||||
|
cId, ContainerState.COMPLETE, ContainerExitStatus.SUCCESS));
|
||||||
|
}
|
||||||
|
completedContainerList.clear();
|
||||||
|
}
|
||||||
|
// released containers
|
||||||
|
synchronized(releasedContainerList) {
|
||||||
|
for (ContainerId cId : releasedContainerList) {
|
||||||
|
LOG.debug(MessageFormat.format("NodeManager {0} released container" +
|
||||||
|
" ({1}).", node.getNodeID(), cId));
|
||||||
|
csList.add(newContainerStatus(
|
||||||
|
cId, ContainerState.COMPLETE, ContainerExitStatus.ABORTED));
|
||||||
|
}
|
||||||
|
releasedContainerList.clear();
|
||||||
|
}
|
||||||
|
return csList;
|
||||||
|
}
|
||||||
|
|
||||||
|
private ContainerStatus newContainerStatus(ContainerId cId,
|
||||||
|
ContainerState state,
|
||||||
|
int exitState) {
|
||||||
|
ContainerStatus cs = Records.newRecord(ContainerStatus.class);
|
||||||
|
cs.setContainerId(cId);
|
||||||
|
cs.setState(state);
|
||||||
|
cs.setExitStatus(exitState);
|
||||||
|
return cs;
|
||||||
|
}
|
||||||
|
|
||||||
|
public RMNode getNode() {
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* launch a new container with the given life time
|
||||||
|
*/
|
||||||
|
public void addNewContainer(Container container, long lifeTimeMS) {
|
||||||
|
LOG.debug(MessageFormat.format("NodeManager {0} launches a new " +
|
||||||
|
"container ({1}).", node.getNodeID(), container.getId()));
|
||||||
|
if (lifeTimeMS != -1) {
|
||||||
|
// normal container
|
||||||
|
ContainerSimulator cs = new ContainerSimulator(container.getId(),
|
||||||
|
container.getResource(), lifeTimeMS + System.currentTimeMillis(),
|
||||||
|
lifeTimeMS);
|
||||||
|
containerQueue.add(cs);
|
||||||
|
runningContainers.put(cs.getId(), cs);
|
||||||
|
} else {
|
||||||
|
// AM container
|
||||||
|
// -1 means AMContainer
|
||||||
|
synchronized(amContainerList) {
|
||||||
|
amContainerList.add(container.getId());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* clean up an AM container and add to completed list
|
||||||
|
* @param containerId id of the container to be cleaned
|
||||||
|
*/
|
||||||
|
public void cleanupContainer(ContainerId containerId) {
|
||||||
|
synchronized(amContainerList) {
|
||||||
|
amContainerList.remove(containerId);
|
||||||
|
}
|
||||||
|
synchronized(completedContainerList) {
|
||||||
|
completedContainerList.add(containerId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,167 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.sls.nodemanager;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.hadoop.net.Node;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerState;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode
|
||||||
|
.UpdatedContainerInfo;
|
||||||
|
|
||||||
|
public class NodeInfo {
|
||||||
|
private static int NODE_ID = 0;
|
||||||
|
|
||||||
|
public static NodeId newNodeID(String host, int port) {
|
||||||
|
return NodeId.newInstance(host, port);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class FakeRMNodeImpl implements RMNode {
|
||||||
|
private NodeId nodeId;
|
||||||
|
private String hostName;
|
||||||
|
private String nodeAddr;
|
||||||
|
private String httpAddress;
|
||||||
|
private int cmdPort;
|
||||||
|
private Resource perNode;
|
||||||
|
private String rackName;
|
||||||
|
private String healthReport;
|
||||||
|
private NodeState state;
|
||||||
|
private List<ContainerId> toCleanUpContainers;
|
||||||
|
private List<ApplicationId> toCleanUpApplications;
|
||||||
|
|
||||||
|
public FakeRMNodeImpl(NodeId nodeId, String nodeAddr, String httpAddress,
|
||||||
|
Resource perNode, String rackName, String healthReport,
|
||||||
|
int cmdPort, String hostName, NodeState state) {
|
||||||
|
this.nodeId = nodeId;
|
||||||
|
this.nodeAddr = nodeAddr;
|
||||||
|
this.httpAddress = httpAddress;
|
||||||
|
this.perNode = perNode;
|
||||||
|
this.rackName = rackName;
|
||||||
|
this.healthReport = healthReport;
|
||||||
|
this.cmdPort = cmdPort;
|
||||||
|
this.hostName = hostName;
|
||||||
|
this.state = state;
|
||||||
|
toCleanUpApplications = new ArrayList<ApplicationId>();
|
||||||
|
toCleanUpContainers = new ArrayList<ContainerId>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public NodeId getNodeID() {
|
||||||
|
return nodeId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getHostName() {
|
||||||
|
return hostName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getCommandPort() {
|
||||||
|
return cmdPort;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getHttpPort() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getNodeAddress() {
|
||||||
|
return nodeAddr;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getHttpAddress() {
|
||||||
|
return httpAddress;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getHealthReport() {
|
||||||
|
return healthReport;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getLastHealthReportTime() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Resource getTotalCapability() {
|
||||||
|
return perNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRackName() {
|
||||||
|
return rackName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Node getNode() {
|
||||||
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
|
}
|
||||||
|
|
||||||
|
public NodeState getState() {
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<ContainerId> getContainersToCleanUp() {
|
||||||
|
return toCleanUpContainers;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<ApplicationId> getAppsToCleanup() {
|
||||||
|
return toCleanUpApplications;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void updateNodeHeartbeatResponseForCleanup(
|
||||||
|
NodeHeartbeatResponse response) {
|
||||||
|
}
|
||||||
|
|
||||||
|
public NodeHeartbeatResponse getLastNodeHeartBeatResponse() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<UpdatedContainerInfo> pullContainerUpdates() {
|
||||||
|
ArrayList<UpdatedContainerInfo> list = new ArrayList<UpdatedContainerInfo>();
|
||||||
|
|
||||||
|
ArrayList<ContainerStatus> list2 = new ArrayList<ContainerStatus>();
|
||||||
|
for(ContainerId cId : this.toCleanUpContainers) {
|
||||||
|
list2.add(ContainerStatus.newInstance(cId, ContainerState.RUNNING, "",
|
||||||
|
ContainerExitStatus.SUCCESS));
|
||||||
|
}
|
||||||
|
list.add(new UpdatedContainerInfo(new ArrayList<ContainerStatus>(),
|
||||||
|
list2));
|
||||||
|
return list;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static RMNode newNodeInfo(String rackName, String hostName,
|
||||||
|
final Resource resource, int port) {
|
||||||
|
final NodeId nodeId = newNodeID(hostName, port);
|
||||||
|
final String nodeAddr = hostName + ":" + port;
|
||||||
|
final String httpAddress = hostName;
|
||||||
|
|
||||||
|
return new FakeRMNodeImpl(nodeId, nodeAddr, httpAddress,
|
||||||
|
resource, rackName, "Me good",
|
||||||
|
port, hostName, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static RMNode newNodeInfo(String rackName, String hostName,
|
||||||
|
final Resource resource) {
|
||||||
|
return newNodeInfo(rackName, hostName, resource, NODE_ID++);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,31 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.sls.scheduler;
|
||||||
|
|
||||||
|
public class CapacitySchedulerMetrics extends SchedulerMetrics {
|
||||||
|
|
||||||
|
public CapacitySchedulerMetrics() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void trackQueue(String queueName) {
|
||||||
|
trackedQueues.add(queueName);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,113 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.sls.scheduler;
|
||||||
|
|
||||||
|
import java.util.concurrent.Delayed;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
|
||||||
|
public class ContainerSimulator implements Delayed {
|
||||||
|
// id
|
||||||
|
private ContainerId id;
|
||||||
|
// resource allocated
|
||||||
|
private Resource resource;
|
||||||
|
// end time
|
||||||
|
private long endTime;
|
||||||
|
// life time (ms)
|
||||||
|
private long lifeTime;
|
||||||
|
// host name
|
||||||
|
private String hostname;
|
||||||
|
// priority
|
||||||
|
private int priority;
|
||||||
|
// type
|
||||||
|
private String type;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* invoked when AM schedules containers to allocate
|
||||||
|
*/
|
||||||
|
public ContainerSimulator(Resource resource, long lifeTime,
|
||||||
|
String hostname, int priority, String type) {
|
||||||
|
this.resource = resource;
|
||||||
|
this.lifeTime = lifeTime;
|
||||||
|
this.hostname = hostname;
|
||||||
|
this.priority = priority;
|
||||||
|
this.type = type;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* invoke when NM schedules containers to run
|
||||||
|
*/
|
||||||
|
public ContainerSimulator(ContainerId id, Resource resource, long endTime,
|
||||||
|
long lifeTime) {
|
||||||
|
this.id = id;
|
||||||
|
this.resource = resource;
|
||||||
|
this.endTime = endTime;
|
||||||
|
this.lifeTime = lifeTime;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Resource getResource() {
|
||||||
|
return resource;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ContainerId getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareTo(Delayed o) {
|
||||||
|
if (!(o instanceof ContainerSimulator)) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"Parameter must be a ContainerSimulator instance");
|
||||||
|
}
|
||||||
|
ContainerSimulator other = (ContainerSimulator) o;
|
||||||
|
return (int) Math.signum(endTime - other.endTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getDelay(TimeUnit unit) {
|
||||||
|
return unit.convert(endTime - System.currentTimeMillis(),
|
||||||
|
TimeUnit.MILLISECONDS);
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getLifeTime() {
|
||||||
|
return lifeTime;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getHostname() {
|
||||||
|
return hostname;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getEndTime() {
|
||||||
|
return endTime;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getPriority() {
|
||||||
|
return priority;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getType() {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPriority(int p) {
|
||||||
|
priority = p;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,266 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.sls.scheduler;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair
|
||||||
|
.AppSchedulable;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSQueue;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair
|
||||||
|
.FairScheduler;
|
||||||
|
|
||||||
|
import com.codahale.metrics.Gauge;
|
||||||
|
import org.apache.hadoop.yarn.sls.SLSRunner;
|
||||||
|
|
||||||
|
public class FairSchedulerMetrics extends SchedulerMetrics {
|
||||||
|
|
||||||
|
private int totalMemoryMB = Integer.MAX_VALUE;
|
||||||
|
private int totalVCores = Integer.MAX_VALUE;
|
||||||
|
private boolean maxReset = false;
|
||||||
|
|
||||||
|
public FairSchedulerMetrics() {
|
||||||
|
super();
|
||||||
|
appTrackedMetrics.add("demand.memory");
|
||||||
|
appTrackedMetrics.add("demand.vcores");
|
||||||
|
appTrackedMetrics.add("usage.memory");
|
||||||
|
appTrackedMetrics.add("usage.vcores");
|
||||||
|
appTrackedMetrics.add("minshare.memory");
|
||||||
|
appTrackedMetrics.add("minshare.vcores");
|
||||||
|
appTrackedMetrics.add("maxshare.memory");
|
||||||
|
appTrackedMetrics.add("maxshare.vcores");
|
||||||
|
appTrackedMetrics.add("fairshare.memory");
|
||||||
|
appTrackedMetrics.add("fairshare.vcores");
|
||||||
|
queueTrackedMetrics.add("demand.memory");
|
||||||
|
queueTrackedMetrics.add("demand.vcores");
|
||||||
|
queueTrackedMetrics.add("usage.memory");
|
||||||
|
queueTrackedMetrics.add("usage.vcores");
|
||||||
|
queueTrackedMetrics.add("minshare.memory");
|
||||||
|
queueTrackedMetrics.add("minshare.vcores");
|
||||||
|
queueTrackedMetrics.add("maxshare.memory");
|
||||||
|
queueTrackedMetrics.add("maxshare.vcores");
|
||||||
|
queueTrackedMetrics.add("fairshare.memory");
|
||||||
|
queueTrackedMetrics.add("fairshare.vcores");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void trackApp(ApplicationAttemptId appAttemptId, String oldAppId) {
|
||||||
|
super.trackApp(appAttemptId, oldAppId);
|
||||||
|
FairScheduler fair = (FairScheduler) scheduler;
|
||||||
|
final AppSchedulable app = fair.getSchedulerApp(appAttemptId)
|
||||||
|
.getAppSchedulable();
|
||||||
|
metrics.register("variable.app." + oldAppId + ".demand.memory",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
return app.getDemand().getMemory();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.app." + oldAppId + ".demand.vcores",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
return app.getDemand().getVirtualCores();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.app." + oldAppId + ".usage.memory",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
return app.getResourceUsage().getMemory();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.app." + oldAppId + ".usage.vcores",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
return app.getResourceUsage().getVirtualCores();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.app." + oldAppId + ".minshare.memory",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
return app.getMinShare().getMemory();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.app." + oldAppId + ".minshare.vcores",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
return app.getMinShare().getMemory();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.app." + oldAppId + ".maxshare.memory",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
return Math.min(app.getMaxShare().getMemory(), totalMemoryMB);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.app." + oldAppId + ".maxshare.vcores",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
return Math.min(app.getMaxShare().getVirtualCores(), totalVCores);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.app." + oldAppId + ".fairshare.memory",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
return app.getFairShare().getVirtualCores();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.app." + oldAppId + ".fairshare.vcores",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
return app.getFairShare().getVirtualCores();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void trackQueue(String queueName) {
|
||||||
|
trackedQueues.add(queueName);
|
||||||
|
FairScheduler fair = (FairScheduler) scheduler;
|
||||||
|
final FSQueue queue = fair.getQueueManager().getQueue(queueName);
|
||||||
|
metrics.register("variable.queue." + queueName + ".demand.memory",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
return queue.getDemand().getMemory();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.queue." + queueName + ".demand.vcores",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
return queue.getDemand().getVirtualCores();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.queue." + queueName + ".usage.memory",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
return queue.getResourceUsage().getMemory();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.queue." + queueName + ".usage.vcores",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
return queue.getResourceUsage().getVirtualCores();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.queue." + queueName + ".minshare.memory",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
return queue.getMinShare().getMemory();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.queue." + queueName + ".minshare.vcores",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
return queue.getMinShare().getVirtualCores();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.queue." + queueName + ".maxshare.memory",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
if (! maxReset &&
|
||||||
|
SLSRunner.simulateInfoMap.containsKey("Number of nodes") &&
|
||||||
|
SLSRunner.simulateInfoMap.containsKey("Node memory (MB)") &&
|
||||||
|
SLSRunner.simulateInfoMap.containsKey("Node VCores")) {
|
||||||
|
int numNMs = Integer.parseInt(
|
||||||
|
SLSRunner.simulateInfoMap.get("Number of nodes").toString());
|
||||||
|
int numMemoryMB = Integer.parseInt(
|
||||||
|
SLSRunner.simulateInfoMap.get("Node memory (MB)").toString());
|
||||||
|
int numVCores = Integer.parseInt(
|
||||||
|
SLSRunner.simulateInfoMap.get("Node VCores").toString());
|
||||||
|
|
||||||
|
totalMemoryMB = numNMs * numMemoryMB;
|
||||||
|
totalVCores = numNMs * numVCores;
|
||||||
|
maxReset = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Math.min(queue.getMaxShare().getMemory(), totalMemoryMB);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.queue." + queueName + ".maxshare.vcores",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
return Math.min(queue.getMaxShare().getVirtualCores(), totalVCores);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.queue." + queueName + ".fairshare.memory",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
return queue.getFairShare().getMemory();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.queue." + queueName + ".fairshare.vcores",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
return queue.getFairShare().getVirtualCores();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void untrackQueue(String queueName) {
|
||||||
|
trackedQueues.remove(queueName);
|
||||||
|
metrics.remove("variable.queue." + queueName + ".demand.memory");
|
||||||
|
metrics.remove("variable.queue." + queueName + ".demand.vcores");
|
||||||
|
metrics.remove("variable.queue." + queueName + ".usage.memory");
|
||||||
|
metrics.remove("variable.queue." + queueName + ".usage.vcores");
|
||||||
|
metrics.remove("variable.queue." + queueName + ".minshare.memory");
|
||||||
|
metrics.remove("variable.queue." + queueName + ".minshare.vcores");
|
||||||
|
metrics.remove("variable.queue." + queueName + ".maxshare.memory");
|
||||||
|
metrics.remove("variable.queue." + queueName + ".maxshare.vcores");
|
||||||
|
metrics.remove("variable.queue." + queueName + ".fairshare.memory");
|
||||||
|
metrics.remove("variable.queue." + queueName + ".fairshare.vcores");
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,58 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.sls.scheduler;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.api.records.QueueInfo;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo
|
||||||
|
.FifoScheduler;
|
||||||
|
|
||||||
|
import com.codahale.metrics.Gauge;
|
||||||
|
|
||||||
|
public class FifoSchedulerMetrics extends SchedulerMetrics {
|
||||||
|
|
||||||
|
public FifoSchedulerMetrics() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void trackQueue(String queueName) {
|
||||||
|
trackedQueues.add(queueName);
|
||||||
|
FifoScheduler fifo = (FifoScheduler) scheduler;
|
||||||
|
// for FifoScheduler, only DEFAULT_QUEUE
|
||||||
|
// here the three parameters doesn't affect results
|
||||||
|
final QueueInfo queue = fifo.getQueueInfo(queueName, false, false);
|
||||||
|
// track currentCapacity, maximumCapacity (always 1.0f)
|
||||||
|
metrics.register("variable.queue." + queueName + ".currentcapacity",
|
||||||
|
new Gauge<Float>() {
|
||||||
|
@Override
|
||||||
|
public Float getValue() {
|
||||||
|
return queue.getCurrentCapacity();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.queue." + queueName + ".",
|
||||||
|
new Gauge<Float>() {
|
||||||
|
@Override
|
||||||
|
public Float getValue() {
|
||||||
|
return queue.getCurrentCapacity();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,30 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.sls.scheduler;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
|
||||||
|
.NodeUpdateSchedulerEvent;
|
||||||
|
|
||||||
|
public class NodeUpdateSchedulerEventWrapper extends NodeUpdateSchedulerEvent {
|
||||||
|
|
||||||
|
public NodeUpdateSchedulerEventWrapper(NodeUpdateSchedulerEvent event) {
|
||||||
|
super(new RMNodeWrapper(event.getRMNode()));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,141 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.sls.scheduler;
|
||||||
|
|
||||||
|
import org.apache.hadoop.net.Node;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode
|
||||||
|
.UpdatedContainerInfo;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class RMNodeWrapper implements RMNode {
|
||||||
|
private RMNode node;
|
||||||
|
private List<UpdatedContainerInfo> updates;
|
||||||
|
private boolean pulled = false;
|
||||||
|
|
||||||
|
public RMNodeWrapper(RMNode node) {
|
||||||
|
this.node = node;
|
||||||
|
updates = node.pullContainerUpdates();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public NodeId getNodeID() {
|
||||||
|
return node.getNodeID();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHostName() {
|
||||||
|
return node.getHostName();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getCommandPort() {
|
||||||
|
return node.getCommandPort();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getHttpPort() {
|
||||||
|
return node.getHttpPort();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getNodeAddress() {
|
||||||
|
return node.getNodeAddress();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHttpAddress() {
|
||||||
|
return node.getHttpAddress();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHealthReport() {
|
||||||
|
return node.getHealthReport();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getLastHealthReportTime() {
|
||||||
|
return node.getLastHealthReportTime();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Resource getTotalCapability() {
|
||||||
|
return node.getTotalCapability();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getRackName() {
|
||||||
|
return node.getRackName();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Node getNode() {
|
||||||
|
return node.getNode();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public NodeState getState() {
|
||||||
|
return node.getState();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<ContainerId> getContainersToCleanUp() {
|
||||||
|
return node.getContainersToCleanUp();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<ApplicationId> getAppsToCleanup() {
|
||||||
|
return node.getAppsToCleanup();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void updateNodeHeartbeatResponseForCleanup(
|
||||||
|
NodeHeartbeatResponse nodeHeartbeatResponse) {
|
||||||
|
node.updateNodeHeartbeatResponseForCleanup(nodeHeartbeatResponse);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public NodeHeartbeatResponse getLastNodeHeartBeatResponse() {
|
||||||
|
return node.getLastNodeHeartBeatResponse();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
public List<UpdatedContainerInfo> pullContainerUpdates() {
|
||||||
|
List<UpdatedContainerInfo> list = Collections.EMPTY_LIST;
|
||||||
|
if (! pulled) {
|
||||||
|
list = updates;
|
||||||
|
pulled = true;
|
||||||
|
}
|
||||||
|
return list;
|
||||||
|
}
|
||||||
|
|
||||||
|
List<UpdatedContainerInfo> getContainerUpdates() {
|
||||||
|
return updates;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,855 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.yarn.sls.scheduler;
|
||||||
|
|
||||||
|
import org.apache.hadoop.util.ShutdownHookManager;
|
||||||
|
import org.apache.hadoop.yarn.sls.SLSRunner;
|
||||||
|
import org.apache.hadoop.yarn.sls.conf.SLSConfiguration;
|
||||||
|
import org.apache.hadoop.yarn.sls.web.SLSWebApp;
|
||||||
|
import com.codahale.metrics.Counter;
|
||||||
|
import com.codahale.metrics.CsvReporter;
|
||||||
|
import com.codahale.metrics.Gauge;
|
||||||
|
import com.codahale.metrics.Histogram;
|
||||||
|
import com.codahale.metrics.MetricRegistry;
|
||||||
|
import com.codahale.metrics.SlidingWindowReservoir;
|
||||||
|
import com.codahale.metrics.Timer;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configurable;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.util.ReflectionUtils;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Container;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.QueueInfo;
|
||||||
|
import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode
|
||||||
|
.UpdatedContainerInfo;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler
|
||||||
|
.ResourceScheduler;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler
|
||||||
|
.SchedulerAppReport;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler
|
||||||
|
.SchedulerNodeReport;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity
|
||||||
|
.CapacityScheduler;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
|
||||||
|
.AppAddedSchedulerEvent;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
|
||||||
|
.AppRemovedSchedulerEvent;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
|
||||||
|
.NodeUpdateSchedulerEvent;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
|
||||||
|
.SchedulerEvent;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
|
||||||
|
.SchedulerEventType;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair
|
||||||
|
.FairScheduler;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo
|
||||||
|
.FifoScheduler;
|
||||||
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
|
import java.io.BufferedWriter;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileWriter;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.SortedMap;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
import java.util.concurrent.ScheduledExecutorService;
|
||||||
|
import java.util.concurrent.ScheduledThreadPoolExecutor;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.locks.Lock;
|
||||||
|
import java.util.concurrent.locks.ReentrantLock;
|
||||||
|
|
||||||
|
public class ResourceSchedulerWrapper implements ResourceScheduler,
|
||||||
|
Configurable {
|
||||||
|
private static final String EOL = System.getProperty("line.separator");
|
||||||
|
private static final int SAMPLING_SIZE = 60;
|
||||||
|
private ScheduledExecutorService pool;
|
||||||
|
// counters for scheduler allocate/handle operations
|
||||||
|
private Counter schedulerAllocateCounter;
|
||||||
|
private Counter schedulerHandleCounter;
|
||||||
|
private Map<SchedulerEventType, Counter> schedulerHandleCounterMap;
|
||||||
|
// Timers for scheduler allocate/handle operations
|
||||||
|
private Timer schedulerAllocateTimer;
|
||||||
|
private Timer schedulerHandleTimer;
|
||||||
|
private Map<SchedulerEventType, Timer> schedulerHandleTimerMap;
|
||||||
|
private List<Histogram> schedulerHistogramList;
|
||||||
|
private Map<Histogram, Timer> histogramTimerMap;
|
||||||
|
private Lock samplerLock;
|
||||||
|
private Lock queueLock;
|
||||||
|
|
||||||
|
private Configuration conf;
|
||||||
|
private ResourceScheduler scheduler;
|
||||||
|
private Map<ApplicationAttemptId, String> appQueueMap =
|
||||||
|
new ConcurrentHashMap<ApplicationAttemptId, String>();
|
||||||
|
private BufferedWriter jobRuntimeLogBW;
|
||||||
|
|
||||||
|
// Priority of the ResourceSchedulerWrapper shutdown hook.
|
||||||
|
public static final int SHUTDOWN_HOOK_PRIORITY = 30;
|
||||||
|
|
||||||
|
// web app
|
||||||
|
private SLSWebApp web;
|
||||||
|
|
||||||
|
private Map<ContainerId, Resource> preemptionContainerMap =
|
||||||
|
new ConcurrentHashMap<ContainerId, Resource>();
|
||||||
|
|
||||||
|
// metrics
|
||||||
|
private MetricRegistry metrics;
|
||||||
|
private SchedulerMetrics schedulerMetrics;
|
||||||
|
private boolean metricsON;
|
||||||
|
private String metricsOutputDir;
|
||||||
|
private BufferedWriter metricsLogBW;
|
||||||
|
private boolean running = false;
|
||||||
|
private static Map<Class, Class> defaultSchedulerMetricsMap =
|
||||||
|
new HashMap<Class, Class>();
|
||||||
|
static {
|
||||||
|
defaultSchedulerMetricsMap.put(FairScheduler.class,
|
||||||
|
FairSchedulerMetrics.class);
|
||||||
|
defaultSchedulerMetricsMap.put(FifoScheduler.class,
|
||||||
|
FifoSchedulerMetrics.class);
|
||||||
|
defaultSchedulerMetricsMap.put(CapacityScheduler.class,
|
||||||
|
CapacitySchedulerMetrics.class);
|
||||||
|
}
|
||||||
|
// must set by outside
|
||||||
|
private Set<String> queueSet;
|
||||||
|
private Set<String> trackedAppSet;
|
||||||
|
|
||||||
|
public final Logger LOG = Logger.getLogger(ResourceSchedulerWrapper.class);
|
||||||
|
|
||||||
|
public ResourceSchedulerWrapper() {
|
||||||
|
samplerLock = new ReentrantLock();
|
||||||
|
queueLock = new ReentrantLock();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setConf(Configuration conf) {
|
||||||
|
this.conf = conf;
|
||||||
|
// set scheduler
|
||||||
|
Class<? extends ResourceScheduler> klass =
|
||||||
|
conf.getClass(SLSConfiguration.RM_SCHEDULER, null,
|
||||||
|
ResourceScheduler.class);
|
||||||
|
|
||||||
|
scheduler = ReflectionUtils.newInstance(klass, conf);
|
||||||
|
// start metrics
|
||||||
|
metricsON = conf.getBoolean(SLSConfiguration.METRICS_SWITCH, true);
|
||||||
|
if (metricsON) {
|
||||||
|
try {
|
||||||
|
initMetrics();
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ShutdownHookManager.get().addShutdownHook(new Runnable() {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
try {
|
||||||
|
if (metricsLogBW != null) {
|
||||||
|
metricsLogBW.write("]");
|
||||||
|
metricsLogBW.close();
|
||||||
|
}
|
||||||
|
if (web != null) {
|
||||||
|
web.stop();
|
||||||
|
}
|
||||||
|
tearDown();
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, SHUTDOWN_HOOK_PRIORITY);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Allocation allocate(ApplicationAttemptId attemptId,
|
||||||
|
List<ResourceRequest> resourceRequests,
|
||||||
|
List<ContainerId> containerIds,
|
||||||
|
List<String> strings, List<String> strings2) {
|
||||||
|
if (metricsON) {
|
||||||
|
final Timer.Context context = schedulerAllocateTimer.time();
|
||||||
|
Allocation allocation = null;
|
||||||
|
try {
|
||||||
|
allocation = scheduler.allocate(attemptId, resourceRequests,
|
||||||
|
containerIds, strings, strings2);
|
||||||
|
return allocation;
|
||||||
|
} finally {
|
||||||
|
context.stop();
|
||||||
|
schedulerAllocateCounter.inc();
|
||||||
|
try {
|
||||||
|
updateQueueWithAllocateRequest(allocation, attemptId,
|
||||||
|
resourceRequests, containerIds);
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return scheduler.allocate(attemptId,
|
||||||
|
resourceRequests, containerIds, strings, strings2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void handle(SchedulerEvent schedulerEvent) {
|
||||||
|
// metrics off
|
||||||
|
if (! metricsON) {
|
||||||
|
scheduler.handle(schedulerEvent);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if(!running) running = true;
|
||||||
|
|
||||||
|
// metrics on
|
||||||
|
Timer.Context handlerTimer = null;
|
||||||
|
Timer.Context operationTimer = null;
|
||||||
|
|
||||||
|
NodeUpdateSchedulerEventWrapper eventWrapper;
|
||||||
|
try {
|
||||||
|
//if (schedulerEvent instanceof NodeUpdateSchedulerEvent) {
|
||||||
|
if (schedulerEvent.getType() == SchedulerEventType.NODE_UPDATE
|
||||||
|
&& schedulerEvent instanceof NodeUpdateSchedulerEvent) {
|
||||||
|
eventWrapper = new NodeUpdateSchedulerEventWrapper(
|
||||||
|
(NodeUpdateSchedulerEvent)schedulerEvent);
|
||||||
|
schedulerEvent = eventWrapper;
|
||||||
|
updateQueueWithNodeUpdate(eventWrapper);
|
||||||
|
} else if (schedulerEvent.getType() == SchedulerEventType.APP_REMOVED
|
||||||
|
&& schedulerEvent instanceof AppRemovedSchedulerEvent) {
|
||||||
|
// check if having AM Container, update resource usage information
|
||||||
|
AppRemovedSchedulerEvent appRemoveEvent =
|
||||||
|
(AppRemovedSchedulerEvent) schedulerEvent;
|
||||||
|
ApplicationAttemptId appAttemptId =
|
||||||
|
appRemoveEvent.getApplicationAttemptID();
|
||||||
|
String queue = appQueueMap.get(appAttemptId);
|
||||||
|
SchedulerAppReport app = scheduler.getSchedulerAppInfo(appAttemptId);
|
||||||
|
if (! app.getLiveContainers().isEmpty()) { // have 0 or 1
|
||||||
|
// should have one container which is AM container
|
||||||
|
RMContainer rmc = app.getLiveContainers().iterator().next();
|
||||||
|
updateQueueMetrics(queue,
|
||||||
|
rmc.getContainer().getResource().getMemory(),
|
||||||
|
rmc.getContainer().getResource().getVirtualCores());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
handlerTimer = schedulerHandleTimer.time();
|
||||||
|
operationTimer = schedulerHandleTimerMap
|
||||||
|
.get(schedulerEvent.getType()).time();
|
||||||
|
|
||||||
|
scheduler.handle(schedulerEvent);
|
||||||
|
} finally {
|
||||||
|
if (handlerTimer != null) handlerTimer.stop();
|
||||||
|
if (operationTimer != null) operationTimer.stop();
|
||||||
|
schedulerHandleCounter.inc();
|
||||||
|
schedulerHandleCounterMap.get(schedulerEvent.getType()).inc();
|
||||||
|
|
||||||
|
if (schedulerEvent.getType() == SchedulerEventType.APP_REMOVED
|
||||||
|
&& schedulerEvent instanceof AppRemovedSchedulerEvent) {
|
||||||
|
SLSRunner.decreaseRemainingApps();
|
||||||
|
AppRemovedSchedulerEvent appRemoveEvent =
|
||||||
|
(AppRemovedSchedulerEvent) schedulerEvent;
|
||||||
|
ApplicationAttemptId appAttemptId =
|
||||||
|
appRemoveEvent.getApplicationAttemptID();
|
||||||
|
appQueueMap.remove(appRemoveEvent.getApplicationAttemptID());
|
||||||
|
} else if (schedulerEvent.getType() == SchedulerEventType.APP_ADDED
|
||||||
|
&& schedulerEvent instanceof AppAddedSchedulerEvent) {
|
||||||
|
AppAddedSchedulerEvent appAddEvent =
|
||||||
|
(AppAddedSchedulerEvent) schedulerEvent;
|
||||||
|
String queueName = appAddEvent.getQueue();
|
||||||
|
appQueueMap.put(appAddEvent.getApplicationAttemptId(), queueName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateQueueWithNodeUpdate(
|
||||||
|
NodeUpdateSchedulerEventWrapper eventWrapper) {
|
||||||
|
RMNodeWrapper node = (RMNodeWrapper) eventWrapper.getRMNode();
|
||||||
|
List<UpdatedContainerInfo> containerList = node.getContainerUpdates();
|
||||||
|
for (UpdatedContainerInfo info : containerList) {
|
||||||
|
for (ContainerStatus status : info.getCompletedContainers()) {
|
||||||
|
ContainerId containerId = status.getContainerId();
|
||||||
|
SchedulerAppReport app = scheduler.getSchedulerAppInfo(
|
||||||
|
containerId.getApplicationAttemptId());
|
||||||
|
|
||||||
|
if (app == null) {
|
||||||
|
// this happens for the AM container
|
||||||
|
// The app have already removed when the NM sends the release
|
||||||
|
// information.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
String queue = appQueueMap.get(containerId.getApplicationAttemptId());
|
||||||
|
int releasedMemory = 0, releasedVCores = 0;
|
||||||
|
if (status.getExitStatus() == ContainerExitStatus.SUCCESS) {
|
||||||
|
for (RMContainer rmc : app.getLiveContainers()) {
|
||||||
|
if (rmc.getContainerId() == containerId) {
|
||||||
|
releasedMemory += rmc.getContainer().getResource().getMemory();
|
||||||
|
releasedVCores += rmc.getContainer()
|
||||||
|
.getResource().getVirtualCores();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (status.getExitStatus() == ContainerExitStatus.ABORTED) {
|
||||||
|
if (preemptionContainerMap.containsKey(containerId)) {
|
||||||
|
Resource preResource = preemptionContainerMap.get(containerId);
|
||||||
|
releasedMemory += preResource.getMemory();
|
||||||
|
releasedVCores += preResource.getVirtualCores();
|
||||||
|
preemptionContainerMap.remove(containerId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// update queue counters
|
||||||
|
updateQueueMetrics(queue, releasedMemory, releasedVCores);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateQueueWithAllocateRequest(Allocation allocation,
|
||||||
|
ApplicationAttemptId attemptId,
|
||||||
|
List<ResourceRequest> resourceRequests,
|
||||||
|
List<ContainerId> containerIds) throws IOException {
|
||||||
|
// update queue information
|
||||||
|
Resource pendingResource = Resources.createResource(0, 0);
|
||||||
|
Resource allocatedResource = Resources.createResource(0, 0);
|
||||||
|
String queueName = appQueueMap.get(attemptId);
|
||||||
|
// container requested
|
||||||
|
for (ResourceRequest request : resourceRequests) {
|
||||||
|
if (request.getResourceName().equals(ResourceRequest.ANY)) {
|
||||||
|
Resources.addTo(pendingResource,
|
||||||
|
Resources.multiply(request.getCapability(),
|
||||||
|
request.getNumContainers()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// container allocated
|
||||||
|
for (Container container : allocation.getContainers()) {
|
||||||
|
Resources.addTo(allocatedResource, container.getResource());
|
||||||
|
Resources.subtractFrom(pendingResource, container.getResource());
|
||||||
|
}
|
||||||
|
// container released from AM
|
||||||
|
SchedulerAppReport report = scheduler.getSchedulerAppInfo(attemptId);
|
||||||
|
for (ContainerId containerId : containerIds) {
|
||||||
|
Container container = null;
|
||||||
|
for (RMContainer c : report.getLiveContainers()) {
|
||||||
|
if (c.getContainerId().equals(containerId)) {
|
||||||
|
container = c.getContainer();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (container != null) {
|
||||||
|
// released allocated containers
|
||||||
|
Resources.subtractFrom(allocatedResource, container.getResource());
|
||||||
|
} else {
|
||||||
|
for (RMContainer c : report.getReservedContainers()) {
|
||||||
|
if (c.getContainerId().equals(containerId)) {
|
||||||
|
container = c.getContainer();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (container != null) {
|
||||||
|
// released reserved containers
|
||||||
|
Resources.subtractFrom(pendingResource, container.getResource());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// containers released/preemption from scheduler
|
||||||
|
Set<ContainerId> preemptionContainers = new HashSet<ContainerId>();
|
||||||
|
if (allocation.getContainerPreemptions() != null) {
|
||||||
|
preemptionContainers.addAll(allocation.getContainerPreemptions());
|
||||||
|
}
|
||||||
|
if (allocation.getStrictContainerPreemptions() != null) {
|
||||||
|
preemptionContainers.addAll(allocation.getStrictContainerPreemptions());
|
||||||
|
}
|
||||||
|
if (! preemptionContainers.isEmpty()) {
|
||||||
|
for (ContainerId containerId : preemptionContainers) {
|
||||||
|
if (! preemptionContainerMap.containsKey(containerId)) {
|
||||||
|
Container container = null;
|
||||||
|
for (RMContainer c : report.getLiveContainers()) {
|
||||||
|
if (c.getContainerId().equals(containerId)) {
|
||||||
|
container = c.getContainer();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (container != null) {
|
||||||
|
preemptionContainerMap.put(containerId, container.getResource());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// update metrics
|
||||||
|
SortedMap<String, Counter> counterMap = metrics.getCounters();
|
||||||
|
String names[] = new String[]{
|
||||||
|
"counter.queue." + queueName + ".pending.memory",
|
||||||
|
"counter.queue." + queueName + ".pending.cores",
|
||||||
|
"counter.queue." + queueName + ".allocated.memory",
|
||||||
|
"counter.queue." + queueName + ".allocated.cores"};
|
||||||
|
int values[] = new int[]{pendingResource.getMemory(),
|
||||||
|
pendingResource.getVirtualCores(),
|
||||||
|
allocatedResource.getMemory(), allocatedResource.getVirtualCores()};
|
||||||
|
for (int i = names.length - 1; i >= 0; i --) {
|
||||||
|
if (! counterMap.containsKey(names[i])) {
|
||||||
|
metrics.counter(names[i]);
|
||||||
|
counterMap = metrics.getCounters();
|
||||||
|
}
|
||||||
|
counterMap.get(names[i]).inc(values[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
queueLock.lock();
|
||||||
|
try {
|
||||||
|
if (! schedulerMetrics.isTracked(queueName)) {
|
||||||
|
schedulerMetrics.trackQueue(queueName);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
queueLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void tearDown() throws IOException {
|
||||||
|
// close job runtime writer
|
||||||
|
if (jobRuntimeLogBW != null) {
|
||||||
|
jobRuntimeLogBW.close();
|
||||||
|
}
|
||||||
|
// shut pool
|
||||||
|
if (pool != null) pool.shutdown();
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
private void initMetrics() throws Exception {
|
||||||
|
metrics = new MetricRegistry();
|
||||||
|
// configuration
|
||||||
|
metricsOutputDir = conf.get(SLSConfiguration.METRICS_OUTPUT_DIR);
|
||||||
|
int metricsWebAddressPort = conf.getInt(
|
||||||
|
SLSConfiguration.METRICS_WEB_ADDRESS_PORT,
|
||||||
|
SLSConfiguration.METRICS_WEB_ADDRESS_PORT_DEFAULT);
|
||||||
|
// create SchedulerMetrics for current scheduler
|
||||||
|
String schedulerMetricsType = conf.get(scheduler.getClass().getName());
|
||||||
|
Class schedulerMetricsClass = schedulerMetricsType == null?
|
||||||
|
defaultSchedulerMetricsMap.get(scheduler.getClass()) :
|
||||||
|
Class.forName(schedulerMetricsType);
|
||||||
|
schedulerMetrics = (SchedulerMetrics)ReflectionUtils
|
||||||
|
.newInstance(schedulerMetricsClass, new Configuration());
|
||||||
|
schedulerMetrics.init(scheduler, metrics);
|
||||||
|
|
||||||
|
// register various metrics
|
||||||
|
registerJvmMetrics();
|
||||||
|
registerClusterResourceMetrics();
|
||||||
|
registerContainerAppNumMetrics();
|
||||||
|
registerSchedulerMetrics();
|
||||||
|
|
||||||
|
// .csv output
|
||||||
|
initMetricsCSVOutput();
|
||||||
|
|
||||||
|
// start web app to provide real-time tracking
|
||||||
|
web = new SLSWebApp(this, metricsWebAddressPort);
|
||||||
|
web.start();
|
||||||
|
|
||||||
|
// a thread to update histogram timer
|
||||||
|
pool = new ScheduledThreadPoolExecutor(2);
|
||||||
|
pool.scheduleAtFixedRate(new HistogramsRunnable(), 0, 1000,
|
||||||
|
TimeUnit.MILLISECONDS);
|
||||||
|
|
||||||
|
// a thread to output metrics for real-tiem tracking
|
||||||
|
pool.scheduleAtFixedRate(new MetricsLogRunnable(), 0, 1000,
|
||||||
|
TimeUnit.MILLISECONDS);
|
||||||
|
|
||||||
|
// application running information
|
||||||
|
jobRuntimeLogBW = new BufferedWriter(
|
||||||
|
new FileWriter(metricsOutputDir + "/jobruntime.csv"));
|
||||||
|
jobRuntimeLogBW.write("JobID,real_start_time,real_end_time," +
|
||||||
|
"simulate_start_time,simulate_end_time" + EOL);
|
||||||
|
jobRuntimeLogBW.flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void registerJvmMetrics() {
|
||||||
|
// add JVM gauges
|
||||||
|
metrics.register("variable.jvm.free.memory",
|
||||||
|
new Gauge<Long>() {
|
||||||
|
@Override
|
||||||
|
public Long getValue() {
|
||||||
|
return Runtime.getRuntime().freeMemory();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.jvm.max.memory",
|
||||||
|
new Gauge<Long>() {
|
||||||
|
@Override
|
||||||
|
public Long getValue() {
|
||||||
|
return Runtime.getRuntime().maxMemory();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.jvm.total.memory",
|
||||||
|
new Gauge<Long>() {
|
||||||
|
@Override
|
||||||
|
public Long getValue() {
|
||||||
|
return Runtime.getRuntime().totalMemory();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void registerClusterResourceMetrics() {
|
||||||
|
metrics.register("variable.cluster.allocated.memory",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
if(scheduler == null || scheduler.getRootQueueMetrics() == null) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
return scheduler.getRootQueueMetrics().getAllocatedMB();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.cluster.allocated.vcores",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
if(scheduler == null || scheduler.getRootQueueMetrics() == null) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
return scheduler.getRootQueueMetrics().getAllocatedVirtualCores();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.cluster.available.memory",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
if(scheduler == null || scheduler.getRootQueueMetrics() == null) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
return scheduler.getRootQueueMetrics().getAvailableMB();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.cluster.available.vcores",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
if(scheduler == null || scheduler.getRootQueueMetrics() == null) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
return scheduler.getRootQueueMetrics().getAvailableVirtualCores();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void registerContainerAppNumMetrics() {
|
||||||
|
metrics.register("variable.running.application",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
if(scheduler == null || scheduler.getRootQueueMetrics() == null) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
return scheduler.getRootQueueMetrics().getAppsRunning();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.running.container",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
if(scheduler == null || scheduler.getRootQueueMetrics() == null) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
return scheduler.getRootQueueMetrics().getAllocatedContainers();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void registerSchedulerMetrics() {
|
||||||
|
samplerLock.lock();
|
||||||
|
try {
|
||||||
|
// counters for scheduler operations
|
||||||
|
schedulerAllocateCounter = metrics.counter(
|
||||||
|
"counter.scheduler.operation.allocate");
|
||||||
|
schedulerHandleCounter = metrics.counter(
|
||||||
|
"counter.scheduler.operation.handle");
|
||||||
|
schedulerHandleCounterMap = new HashMap<SchedulerEventType, Counter>();
|
||||||
|
for (SchedulerEventType e : SchedulerEventType.values()) {
|
||||||
|
Counter counter = metrics.counter(
|
||||||
|
"counter.scheduler.operation.handle." + e);
|
||||||
|
schedulerHandleCounterMap.put(e, counter);
|
||||||
|
}
|
||||||
|
// timers for scheduler operations
|
||||||
|
int timeWindowSize = conf.getInt(
|
||||||
|
SLSConfiguration.METRICS_TIMER_WINDOW_SIZE,
|
||||||
|
SLSConfiguration.METRICS_TIMER_WINDOW_SIZE_DEFAULT);
|
||||||
|
schedulerAllocateTimer = new Timer(
|
||||||
|
new SlidingWindowReservoir(timeWindowSize));
|
||||||
|
schedulerHandleTimer = new Timer(
|
||||||
|
new SlidingWindowReservoir(timeWindowSize));
|
||||||
|
schedulerHandleTimerMap = new HashMap<SchedulerEventType, Timer>();
|
||||||
|
for (SchedulerEventType e : SchedulerEventType.values()) {
|
||||||
|
Timer timer = new Timer(new SlidingWindowReservoir(timeWindowSize));
|
||||||
|
schedulerHandleTimerMap.put(e, timer);
|
||||||
|
}
|
||||||
|
// histogram for scheduler operations (Samplers)
|
||||||
|
schedulerHistogramList = new ArrayList<Histogram>();
|
||||||
|
histogramTimerMap = new HashMap<Histogram, Timer>();
|
||||||
|
Histogram schedulerAllocateHistogram = new Histogram(
|
||||||
|
new SlidingWindowReservoir(SAMPLING_SIZE));
|
||||||
|
metrics.register("sampler.scheduler.operation.allocate.timecost",
|
||||||
|
schedulerAllocateHistogram);
|
||||||
|
schedulerHistogramList.add(schedulerAllocateHistogram);
|
||||||
|
histogramTimerMap.put(schedulerAllocateHistogram, schedulerAllocateTimer);
|
||||||
|
Histogram schedulerHandleHistogram = new Histogram(
|
||||||
|
new SlidingWindowReservoir(SAMPLING_SIZE));
|
||||||
|
metrics.register("sampler.scheduler.operation.handle.timecost",
|
||||||
|
schedulerHandleHistogram);
|
||||||
|
schedulerHistogramList.add(schedulerHandleHistogram);
|
||||||
|
histogramTimerMap.put(schedulerHandleHistogram, schedulerHandleTimer);
|
||||||
|
for (SchedulerEventType e : SchedulerEventType.values()) {
|
||||||
|
Histogram histogram = new Histogram(
|
||||||
|
new SlidingWindowReservoir(SAMPLING_SIZE));
|
||||||
|
metrics.register(
|
||||||
|
"sampler.scheduler.operation.handle." + e + ".timecost",
|
||||||
|
histogram);
|
||||||
|
schedulerHistogramList.add(histogram);
|
||||||
|
histogramTimerMap.put(histogram, schedulerHandleTimerMap.get(e));
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
samplerLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initMetricsCSVOutput() {
|
||||||
|
int timeIntervalMS = conf.getInt(
|
||||||
|
SLSConfiguration.METRICS_RECORD_INTERVAL_MS,
|
||||||
|
SLSConfiguration.METRICS_RECORD_INTERVAL_MS_DEFAULT);
|
||||||
|
File dir = new File(metricsOutputDir + "/metrics");
|
||||||
|
if(! dir.exists()
|
||||||
|
&& ! dir.mkdirs()) {
|
||||||
|
LOG.error("Cannot create directory " + dir.getAbsoluteFile());
|
||||||
|
}
|
||||||
|
final CsvReporter reporter = CsvReporter.forRegistry(metrics)
|
||||||
|
.formatFor(Locale.US)
|
||||||
|
.convertRatesTo(TimeUnit.SECONDS)
|
||||||
|
.convertDurationsTo(TimeUnit.MILLISECONDS)
|
||||||
|
.build(new File(metricsOutputDir + "/metrics"));
|
||||||
|
reporter.start(timeIntervalMS, TimeUnit.MILLISECONDS);
|
||||||
|
}
|
||||||
|
|
||||||
|
class HistogramsRunnable implements Runnable {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
samplerLock.lock();
|
||||||
|
try {
|
||||||
|
for (Histogram histogram : schedulerHistogramList) {
|
||||||
|
Timer timer = histogramTimerMap.get(histogram);
|
||||||
|
histogram.update((int) timer.getSnapshot().getMean());
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
samplerLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class MetricsLogRunnable implements Runnable {
|
||||||
|
private boolean firstLine = true;
|
||||||
|
public MetricsLogRunnable() {
|
||||||
|
try {
|
||||||
|
metricsLogBW = new BufferedWriter(
|
||||||
|
new FileWriter(metricsOutputDir + "/realtimetrack.json"));
|
||||||
|
metricsLogBW.write("[");
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
if(running) {
|
||||||
|
// all WebApp to get real tracking json
|
||||||
|
String metrics = web.generateRealTimeTrackingMetrics();
|
||||||
|
// output
|
||||||
|
try {
|
||||||
|
if(firstLine) {
|
||||||
|
metricsLogBW.write(metrics + EOL);
|
||||||
|
firstLine = false;
|
||||||
|
} else {
|
||||||
|
metricsLogBW.write("," + metrics + EOL);
|
||||||
|
}
|
||||||
|
metricsLogBW.flush();
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// the following functions are used by AMSimulator
|
||||||
|
public void addAMRuntime(ApplicationId appId,
|
||||||
|
long traceStartTimeMS, long traceEndTimeMS,
|
||||||
|
long simulateStartTimeMS, long simulateEndTimeMS) {
|
||||||
|
|
||||||
|
try {
|
||||||
|
// write job runtime information
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append(appId).append(",").append(traceStartTimeMS).append(",")
|
||||||
|
.append(traceEndTimeMS).append(",").append(simulateStartTimeMS)
|
||||||
|
.append(",").append(simulateEndTimeMS);
|
||||||
|
jobRuntimeLogBW.write(sb.toString() + EOL);
|
||||||
|
jobRuntimeLogBW.flush();
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateQueueMetrics(String queue,
|
||||||
|
int releasedMemory, int releasedVCores) {
|
||||||
|
// update queue counters
|
||||||
|
SortedMap<String, Counter> counterMap = metrics.getCounters();
|
||||||
|
if (releasedMemory != 0) {
|
||||||
|
String name = "counter.queue." + queue + ".allocated.memory";
|
||||||
|
if (! counterMap.containsKey(name)) {
|
||||||
|
metrics.counter(name);
|
||||||
|
counterMap = metrics.getCounters();
|
||||||
|
}
|
||||||
|
counterMap.get(name).inc(-releasedMemory);
|
||||||
|
}
|
||||||
|
if (releasedVCores != 0) {
|
||||||
|
String name = "counter.queue." + queue + ".allocated.cores";
|
||||||
|
if (! counterMap.containsKey(name)) {
|
||||||
|
metrics.counter(name);
|
||||||
|
counterMap = metrics.getCounters();
|
||||||
|
}
|
||||||
|
counterMap.get(name).inc(-releasedVCores);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setQueueSet(Set<String> queues) {
|
||||||
|
this.queueSet = queues;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Set<String> getQueueSet() {
|
||||||
|
return this.queueSet;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTrackedAppSet(Set<String> apps) {
|
||||||
|
this.trackedAppSet = apps;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Set<String> getTrackedAppSet() {
|
||||||
|
return this.trackedAppSet;
|
||||||
|
}
|
||||||
|
|
||||||
|
public MetricRegistry getMetrics() {
|
||||||
|
return metrics;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SchedulerMetrics getSchedulerMetrics() {
|
||||||
|
return schedulerMetrics;
|
||||||
|
}
|
||||||
|
|
||||||
|
// API open to out classes
|
||||||
|
public void addTrackedApp(ApplicationAttemptId appAttemptId,
|
||||||
|
String oldAppId) {
|
||||||
|
if (metricsON) {
|
||||||
|
schedulerMetrics.trackApp(appAttemptId, oldAppId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void removeTrackedApp(ApplicationAttemptId appAttemptId,
|
||||||
|
String oldAppId) {
|
||||||
|
if (metricsON) {
|
||||||
|
schedulerMetrics.untrackApp(appAttemptId, oldAppId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Configuration getConf() {
|
||||||
|
return conf;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reinitialize(Configuration entries, RMContext rmContext)
|
||||||
|
throws IOException {
|
||||||
|
scheduler.reinitialize(entries, rmContext);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void recover(RMStateStore.RMState rmState) throws Exception {
|
||||||
|
scheduler.recover(rmState);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public QueueInfo getQueueInfo(String s, boolean b, boolean b2)
|
||||||
|
throws IOException {
|
||||||
|
return scheduler.getQueueInfo(s, b, b2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<QueueUserACLInfo> getQueueUserAclInfo() {
|
||||||
|
return scheduler.getQueueUserAclInfo();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Resource getMinimumResourceCapability() {
|
||||||
|
return scheduler.getMinimumResourceCapability();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Resource getMaximumResourceCapability() {
|
||||||
|
return scheduler.getMaximumResourceCapability();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getNumClusterNodes() {
|
||||||
|
return scheduler.getNumClusterNodes();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SchedulerNodeReport getNodeReport(NodeId nodeId) {
|
||||||
|
return scheduler.getNodeReport(nodeId);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SchedulerAppReport getSchedulerAppInfo(
|
||||||
|
ApplicationAttemptId attemptId) {
|
||||||
|
return scheduler.getSchedulerAppInfo(attemptId);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public QueueMetrics getRootQueueMetrics() {
|
||||||
|
return scheduler.getRootQueueMetrics();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,100 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.sls.scheduler;
|
||||||
|
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler
|
||||||
|
.ResourceScheduler;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler
|
||||||
|
.SchedulerAppReport;
|
||||||
|
|
||||||
|
import com.codahale.metrics.Gauge;
|
||||||
|
import com.codahale.metrics.MetricRegistry;
|
||||||
|
|
||||||
|
public abstract class SchedulerMetrics {
|
||||||
|
protected ResourceScheduler scheduler;
|
||||||
|
protected Set<String> trackedQueues;
|
||||||
|
protected MetricRegistry metrics;
|
||||||
|
protected Set<String> appTrackedMetrics;
|
||||||
|
protected Set<String> queueTrackedMetrics;
|
||||||
|
|
||||||
|
public SchedulerMetrics() {
|
||||||
|
appTrackedMetrics = new HashSet<String>();
|
||||||
|
appTrackedMetrics.add("live.containers");
|
||||||
|
appTrackedMetrics.add("reserved.containers");
|
||||||
|
queueTrackedMetrics = new HashSet<String>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void init(ResourceScheduler scheduler, MetricRegistry metrics) {
|
||||||
|
this.scheduler = scheduler;
|
||||||
|
this.trackedQueues = new HashSet<String>();
|
||||||
|
this.metrics = metrics;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void trackApp(final ApplicationAttemptId appAttemptId,
|
||||||
|
String oldAppId) {
|
||||||
|
metrics.register("variable.app." + oldAppId + ".live.containers",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
SchedulerAppReport app = scheduler.getSchedulerAppInfo(appAttemptId);
|
||||||
|
return app.getLiveContainers().size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
metrics.register("variable.app." + oldAppId + ".reserved.containers",
|
||||||
|
new Gauge<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer getValue() {
|
||||||
|
SchedulerAppReport app = scheduler.getSchedulerAppInfo(appAttemptId);
|
||||||
|
return app.getReservedContainers().size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void untrackApp(ApplicationAttemptId appAttemptId,
|
||||||
|
String oldAppId) {
|
||||||
|
for (String m : appTrackedMetrics) {
|
||||||
|
metrics.remove("variable.app." + oldAppId + "." + m);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract void trackQueue(String queueName);
|
||||||
|
|
||||||
|
public void untrackQueue(String queueName) {
|
||||||
|
for (String m : queueTrackedMetrics) {
|
||||||
|
metrics.remove("variable.queue." + queueName + "." + m);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isTracked(String queueName) {
|
||||||
|
return trackedQueues.contains(queueName);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Set<String> getAppTrackedMetrics() {
|
||||||
|
return appTrackedMetrics;
|
||||||
|
}
|
||||||
|
public Set<String> getQueueTrackedMetrics() {
|
||||||
|
return queueTrackedMetrics;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,183 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.yarn.sls.scheduler;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.text.MessageFormat;
|
||||||
|
import java.util.Queue;
|
||||||
|
import java.util.concurrent.DelayQueue;
|
||||||
|
import java.util.concurrent.Delayed;
|
||||||
|
import java.util.concurrent.ThreadPoolExecutor;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
|
|
||||||
|
public class TaskRunner {
|
||||||
|
public abstract static class Task implements Runnable, Delayed {
|
||||||
|
private long start;
|
||||||
|
private long end;
|
||||||
|
private long nextRun;
|
||||||
|
private long startTime;
|
||||||
|
private long endTime;
|
||||||
|
private long repeatInterval;
|
||||||
|
private Queue<Task> queue;
|
||||||
|
|
||||||
|
public Task(){}
|
||||||
|
|
||||||
|
//values in milliseconds, start/end are milliseconds from now
|
||||||
|
public void init(long startTime, long endTime, long repeatInterval) {
|
||||||
|
if (endTime - startTime < 0) {
|
||||||
|
throw new IllegalArgumentException(MessageFormat.format(
|
||||||
|
"endTime[{0}] cannot be smaller than startTime[{1}]", endTime,
|
||||||
|
startTime));
|
||||||
|
}
|
||||||
|
if (repeatInterval < 0) {
|
||||||
|
throw new IllegalArgumentException(MessageFormat.format(
|
||||||
|
"repeatInterval[{0}] cannot be less than 1", repeatInterval));
|
||||||
|
}
|
||||||
|
if ((endTime - startTime) % repeatInterval != 0) {
|
||||||
|
throw new IllegalArgumentException(MessageFormat.format(
|
||||||
|
"Invalid parameters: (endTime[{0}] - startTime[{1}]) " +
|
||||||
|
"% repeatInterval[{2}] != 0",
|
||||||
|
endTime, startTime, repeatInterval));
|
||||||
|
}
|
||||||
|
start = startTime;
|
||||||
|
end = endTime;
|
||||||
|
this.repeatInterval = repeatInterval;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void timeRebase(long now) {
|
||||||
|
startTime = now + start;
|
||||||
|
endTime = now + end;
|
||||||
|
this.nextRun = startTime;
|
||||||
|
}
|
||||||
|
|
||||||
|
//values in milliseconds, start is milliseconds from now
|
||||||
|
//it only executes firstStep()
|
||||||
|
public void init(long startTime) {
|
||||||
|
init(startTime, startTime, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setQueue(Queue<Task> queue) {
|
||||||
|
this.queue = queue;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final void run() {
|
||||||
|
try {
|
||||||
|
if (nextRun == startTime) {
|
||||||
|
firstStep();
|
||||||
|
nextRun += repeatInterval;
|
||||||
|
if (nextRun <= endTime) {
|
||||||
|
queue.add(this);
|
||||||
|
}
|
||||||
|
} else if (nextRun < endTime) {
|
||||||
|
middleStep();
|
||||||
|
nextRun += repeatInterval;
|
||||||
|
queue.add(this);
|
||||||
|
} else {
|
||||||
|
lastStep();
|
||||||
|
}
|
||||||
|
} catch (YarnException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getDelay(TimeUnit unit) {
|
||||||
|
return unit.convert(nextRun - System.currentTimeMillis(),
|
||||||
|
TimeUnit.MILLISECONDS);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareTo(Delayed o) {
|
||||||
|
if (!(o instanceof Task)) {
|
||||||
|
throw new IllegalArgumentException("Parameter must be a Task instance");
|
||||||
|
}
|
||||||
|
Task other = (Task) o;
|
||||||
|
return (int) Math.signum(nextRun - other.nextRun);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public abstract void firstStep()
|
||||||
|
throws YarnException, IOException, InterruptedException;
|
||||||
|
|
||||||
|
public abstract void middleStep()
|
||||||
|
throws YarnException, InterruptedException, IOException;
|
||||||
|
|
||||||
|
public abstract void lastStep() throws YarnException;
|
||||||
|
|
||||||
|
public void setEndTime(long et) {
|
||||||
|
endTime = et;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private DelayQueue queue;
|
||||||
|
private int threadPoolSize;
|
||||||
|
private ThreadPoolExecutor executor;
|
||||||
|
private long startTimeMS = 0;
|
||||||
|
|
||||||
|
public TaskRunner() {
|
||||||
|
queue = new DelayQueue();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setQueueSize(int threadPoolSize) {
|
||||||
|
this.threadPoolSize = threadPoolSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
public void start() {
|
||||||
|
if (executor != null) {
|
||||||
|
throw new IllegalStateException("Already started");
|
||||||
|
}
|
||||||
|
DelayQueue preStartQueue = queue;
|
||||||
|
|
||||||
|
queue = new DelayQueue();
|
||||||
|
executor = new ThreadPoolExecutor(threadPoolSize, threadPoolSize, 0,
|
||||||
|
TimeUnit.MILLISECONDS, queue);
|
||||||
|
executor.prestartAllCoreThreads();
|
||||||
|
|
||||||
|
startTimeMS = System.currentTimeMillis();
|
||||||
|
for (Object d : preStartQueue) {
|
||||||
|
schedule((Task) d, startTimeMS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void stop() {
|
||||||
|
executor.shutdownNow();
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
private void schedule(Task task, long timeNow) {
|
||||||
|
task.timeRebase(timeNow);
|
||||||
|
task.setQueue(queue);
|
||||||
|
queue.add(task);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void schedule(Task task) {
|
||||||
|
schedule(task, System.currentTimeMillis());
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getStartTimeMS() {
|
||||||
|
return this.startTimeMS;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,133 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.yarn.sls.utils;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.tools.rumen.JobTraceReader;
|
||||||
|
import org.apache.hadoop.tools.rumen.LoggedJob;
|
||||||
|
import org.apache.hadoop.tools.rumen.LoggedTask;
|
||||||
|
import org.apache.hadoop.tools.rumen.LoggedTaskAttempt;
|
||||||
|
import org.codehaus.jackson.JsonFactory;
|
||||||
|
import org.codehaus.jackson.map.ObjectMapper;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
public class SLSUtils {
|
||||||
|
|
||||||
|
public static String[] getRackHostName(String hostname) {
|
||||||
|
hostname = hostname.substring(1);
|
||||||
|
return hostname.split("/");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* parse the rumen trace file, return each host name
|
||||||
|
*/
|
||||||
|
public static Set<String> parseNodesFromRumenTrace(String jobTrace)
|
||||||
|
throws IOException {
|
||||||
|
Set<String> nodeSet = new HashSet<String>();
|
||||||
|
|
||||||
|
File fin = new File(jobTrace);
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.set("fs.defaultFS", "file:///");
|
||||||
|
JobTraceReader reader = new JobTraceReader(
|
||||||
|
new Path(fin.getAbsolutePath()), conf);
|
||||||
|
try {
|
||||||
|
LoggedJob job = null;
|
||||||
|
while ((job = reader.getNext()) != null) {
|
||||||
|
for(LoggedTask mapTask : job.getMapTasks()) {
|
||||||
|
// select the last attempt
|
||||||
|
LoggedTaskAttempt taskAttempt = mapTask.getAttempts()
|
||||||
|
.get(mapTask.getAttempts().size() - 1);
|
||||||
|
nodeSet.add(taskAttempt.getHostName().getValue());
|
||||||
|
}
|
||||||
|
for(LoggedTask reduceTask : job.getReduceTasks()) {
|
||||||
|
LoggedTaskAttempt taskAttempt = reduceTask.getAttempts()
|
||||||
|
.get(reduceTask.getAttempts().size() - 1);
|
||||||
|
nodeSet.add(taskAttempt.getHostName().getValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
reader.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
return nodeSet;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* parse the sls trace file, return each host name
|
||||||
|
*/
|
||||||
|
public static Set<String> parseNodesFromSLSTrace(String jobTrace)
|
||||||
|
throws IOException {
|
||||||
|
Set<String> nodeSet = new HashSet<String>();
|
||||||
|
JsonFactory jsonF = new JsonFactory();
|
||||||
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
|
Reader input = new FileReader(jobTrace);
|
||||||
|
try {
|
||||||
|
Iterator<Map> i = mapper.readValues(
|
||||||
|
jsonF.createJsonParser(input), Map.class);
|
||||||
|
while (i.hasNext()) {
|
||||||
|
Map jsonE = i.next();
|
||||||
|
List tasks = (List) jsonE.get("job.tasks");
|
||||||
|
for (Object o : tasks) {
|
||||||
|
Map jsonTask = (Map) o;
|
||||||
|
String hostname = jsonTask.get("container.host").toString();
|
||||||
|
nodeSet.add(hostname);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
input.close();
|
||||||
|
}
|
||||||
|
return nodeSet;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* parse the input node file, return each host name
|
||||||
|
*/
|
||||||
|
public static Set<String> parseNodesFromNodeFile(String nodeFile)
|
||||||
|
throws IOException {
|
||||||
|
Set<String> nodeSet = new HashSet<String>();
|
||||||
|
JsonFactory jsonF = new JsonFactory();
|
||||||
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
|
Reader input = new FileReader(nodeFile);
|
||||||
|
try {
|
||||||
|
Iterator<Map> i = mapper.readValues(
|
||||||
|
jsonF.createJsonParser(input), Map.class);
|
||||||
|
while (i.hasNext()) {
|
||||||
|
Map jsonE = i.next();
|
||||||
|
String rack = "/" + jsonE.get("rack");
|
||||||
|
List tasks = (List) jsonE.get("nodes");
|
||||||
|
for (Object o : tasks) {
|
||||||
|
Map jsonNode = (Map) o;
|
||||||
|
nodeSet.add(rack + "/" + jsonNode.get("node"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
input.close();
|
||||||
|
}
|
||||||
|
return nodeSet;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,527 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.sls.web;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.text.MessageFormat;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import javax.servlet.http.HttpServlet;
|
||||||
|
import javax.servlet.http.HttpServletRequest;
|
||||||
|
import javax.servlet.http.HttpServletResponse;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
|
||||||
|
.SchedulerEventType;
|
||||||
|
import org.mortbay.jetty.Handler;
|
||||||
|
import org.mortbay.jetty.Server;
|
||||||
|
import org.mortbay.jetty.handler.AbstractHandler;
|
||||||
|
import org.mortbay.jetty.Request;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.sls.SLSRunner;
|
||||||
|
import org.apache.hadoop.yarn.sls.scheduler.FairSchedulerMetrics;
|
||||||
|
import org.apache.hadoop.yarn.sls.scheduler.ResourceSchedulerWrapper;
|
||||||
|
import org.apache.hadoop.yarn.sls.scheduler.SchedulerMetrics;
|
||||||
|
import com.codahale.metrics.Counter;
|
||||||
|
import com.codahale.metrics.Gauge;
|
||||||
|
import com.codahale.metrics.Histogram;
|
||||||
|
import com.codahale.metrics.MetricRegistry;
|
||||||
|
import org.mortbay.jetty.handler.ResourceHandler;
|
||||||
|
|
||||||
|
public class SLSWebApp extends HttpServlet {
|
||||||
|
private static final long serialVersionUID = 1905162041950251407L;
|
||||||
|
private transient Server server;
|
||||||
|
private transient ResourceSchedulerWrapper wrapper;
|
||||||
|
private transient MetricRegistry metrics;
|
||||||
|
private transient SchedulerMetrics schedulerMetrics;
|
||||||
|
// metrics objects
|
||||||
|
private transient Gauge jvmFreeMemoryGauge;
|
||||||
|
private transient Gauge jvmMaxMemoryGauge;
|
||||||
|
private transient Gauge jvmTotalMemoryGauge;
|
||||||
|
private transient Gauge numRunningAppsGauge;
|
||||||
|
private transient Gauge numRunningContainersGauge;
|
||||||
|
private transient Gauge allocatedMemoryGauge;
|
||||||
|
private transient Gauge allocatedVCoresGauge;
|
||||||
|
private transient Gauge availableMemoryGauge;
|
||||||
|
private transient Gauge availableVCoresGauge;
|
||||||
|
private transient Histogram allocateTimecostHistogram;
|
||||||
|
private transient Histogram handleTimecostHistogram;
|
||||||
|
private Map<SchedulerEventType, Histogram> handleOperTimecostHistogramMap;
|
||||||
|
private Map<String, Counter> queueAllocatedMemoryCounterMap;
|
||||||
|
private Map<String, Counter> queueAllocatedVCoresCounterMap;
|
||||||
|
private int port;
|
||||||
|
private int ajaxUpdateTimeMS = 1000;
|
||||||
|
// html page templates
|
||||||
|
private String simulateInfoTemplate;
|
||||||
|
private String simulateTemplate;
|
||||||
|
private String trackTemplate;
|
||||||
|
|
||||||
|
{
|
||||||
|
// load templates
|
||||||
|
ClassLoader cl = Thread.currentThread().getContextClassLoader();
|
||||||
|
try {
|
||||||
|
simulateInfoTemplate = FileUtils.readFileToString(new File(
|
||||||
|
cl.getResource("simulate.info.html.template").getFile()));
|
||||||
|
simulateTemplate = FileUtils.readFileToString(new File(
|
||||||
|
cl.getResource("simulate.html.template").getFile()));
|
||||||
|
trackTemplate = FileUtils.readFileToString(new File(
|
||||||
|
cl.getResource("track.html.template").getFile()));
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public SLSWebApp(ResourceSchedulerWrapper wrapper, int metricsAddressPort) {
|
||||||
|
this.wrapper = wrapper;
|
||||||
|
metrics = wrapper.getMetrics();
|
||||||
|
handleOperTimecostHistogramMap =
|
||||||
|
new HashMap<SchedulerEventType, Histogram>();
|
||||||
|
queueAllocatedMemoryCounterMap = new HashMap<String, Counter>();
|
||||||
|
queueAllocatedVCoresCounterMap = new HashMap<String, Counter>();
|
||||||
|
schedulerMetrics = wrapper.getSchedulerMetrics();
|
||||||
|
port = metricsAddressPort;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void start() throws Exception {
|
||||||
|
// static files
|
||||||
|
final ResourceHandler staticHandler = new ResourceHandler();
|
||||||
|
staticHandler.setResourceBase("html");
|
||||||
|
|
||||||
|
Handler handler = new AbstractHandler() {
|
||||||
|
@Override
|
||||||
|
public void handle(String target, HttpServletRequest request,
|
||||||
|
HttpServletResponse response, int dispatch) {
|
||||||
|
try{
|
||||||
|
// timeunit
|
||||||
|
int timeunit = 1000; // second, divide millionsecond / 1000
|
||||||
|
String timeunitLabel = "second";
|
||||||
|
if (request.getParameter("u")!= null &&
|
||||||
|
request.getParameter("u").equalsIgnoreCase("m")) {
|
||||||
|
timeunit = 1000 * 60;
|
||||||
|
timeunitLabel = "minute";
|
||||||
|
}
|
||||||
|
|
||||||
|
// http request
|
||||||
|
if (target.equals("/")) {
|
||||||
|
printPageIndex(request, response);
|
||||||
|
} else if (target.equals("/simulate")) {
|
||||||
|
printPageSimulate(request, response, timeunit, timeunitLabel);
|
||||||
|
} else if (target.equals("/track")) {
|
||||||
|
printPageTrack(request, response, timeunit, timeunitLabel);
|
||||||
|
} else
|
||||||
|
// js/css request
|
||||||
|
if (target.startsWith("/js") || target.startsWith("/css")) {
|
||||||
|
response.setCharacterEncoding("utf-8");
|
||||||
|
staticHandler.handle(target, request, response, dispatch);
|
||||||
|
} else
|
||||||
|
// json request
|
||||||
|
if (target.equals("/simulateMetrics")) {
|
||||||
|
printJsonMetrics(request, response);
|
||||||
|
} else if (target.equals("/trackMetrics")) {
|
||||||
|
printJsonTrack(request, response);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
server = new Server(port);
|
||||||
|
server.setHandler(handler);
|
||||||
|
|
||||||
|
server.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void stop() throws Exception {
|
||||||
|
if (server != null) {
|
||||||
|
server.stop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* index html page, show simulation info
|
||||||
|
* path ""
|
||||||
|
* @param request http request
|
||||||
|
* @param response http response
|
||||||
|
* @throws java.io.IOException
|
||||||
|
*/
|
||||||
|
private void printPageIndex(HttpServletRequest request,
|
||||||
|
HttpServletResponse response) throws IOException {
|
||||||
|
response.setContentType("text/html");
|
||||||
|
response.setStatus(HttpServletResponse.SC_OK);
|
||||||
|
|
||||||
|
String simulateInfo;
|
||||||
|
if (SLSRunner.simulateInfoMap.isEmpty()) {
|
||||||
|
String empty = "<tr><td colspan='2' align='center'>" +
|
||||||
|
"No information available</td></tr>";
|
||||||
|
simulateInfo = MessageFormat.format(simulateInfoTemplate, empty);
|
||||||
|
} else {
|
||||||
|
StringBuilder info = new StringBuilder();
|
||||||
|
for (Map.Entry<String, Object> entry :
|
||||||
|
SLSRunner.simulateInfoMap.entrySet()) {
|
||||||
|
info.append("<tr>");
|
||||||
|
info.append("<td class='td1'>").append(entry.getKey()).append("</td>");
|
||||||
|
info.append("<td class='td2'>").append(entry.getValue())
|
||||||
|
.append("</td>");
|
||||||
|
info.append("</tr>");
|
||||||
|
}
|
||||||
|
simulateInfo =
|
||||||
|
MessageFormat.format(simulateInfoTemplate, info.toString());
|
||||||
|
}
|
||||||
|
response.getWriter().println(simulateInfo);
|
||||||
|
|
||||||
|
((Request) request).setHandled(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* simulate html page, show several real-runtime chart
|
||||||
|
* path "/simulate"
|
||||||
|
* use d3.js
|
||||||
|
* @param request http request
|
||||||
|
* @param response http response
|
||||||
|
* @throws java.io.IOException
|
||||||
|
*/
|
||||||
|
private void printPageSimulate(HttpServletRequest request,
|
||||||
|
HttpServletResponse response, int timeunit,
|
||||||
|
String timeunitLabel)
|
||||||
|
throws IOException {
|
||||||
|
response.setContentType("text/html");
|
||||||
|
response.setStatus(HttpServletResponse.SC_OK);
|
||||||
|
|
||||||
|
// queues {0}
|
||||||
|
Set<String> queues = wrapper.getQueueSet();
|
||||||
|
StringBuilder queueInfo = new StringBuilder();
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
for (String queue : queues) {
|
||||||
|
queueInfo.append("legends[4][").append(i).append("] = 'queue.")
|
||||||
|
.append(queue).append(".allocated.memory';");
|
||||||
|
queueInfo.append("legends[5][").append(i).append("] = 'queue.")
|
||||||
|
.append(queue).append(".allocated.vcores';");
|
||||||
|
i ++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// time unit label {1}
|
||||||
|
// time unit {2}
|
||||||
|
// ajax update time interval {3}
|
||||||
|
String simulateInfo = MessageFormat.format(simulateTemplate,
|
||||||
|
queueInfo.toString(), timeunitLabel, "" + timeunit,
|
||||||
|
"" + ajaxUpdateTimeMS);
|
||||||
|
response.getWriter().println(simulateInfo);
|
||||||
|
|
||||||
|
((Request) request).setHandled(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* html page for tracking one queue or job
|
||||||
|
* use d3.js
|
||||||
|
* @param request http request
|
||||||
|
* @param response http response
|
||||||
|
* @throws java.io.IOException
|
||||||
|
*/
|
||||||
|
private void printPageTrack(HttpServletRequest request,
|
||||||
|
HttpServletResponse response, int timeunit,
|
||||||
|
String timeunitLabel)
|
||||||
|
throws IOException {
|
||||||
|
response.setContentType("text/html");
|
||||||
|
response.setStatus(HttpServletResponse.SC_OK);
|
||||||
|
|
||||||
|
// tracked queues {0}
|
||||||
|
StringBuilder trackedQueueInfo = new StringBuilder();
|
||||||
|
Set<String> trackedQueues = wrapper.getQueueSet();
|
||||||
|
for(String queue : trackedQueues) {
|
||||||
|
trackedQueueInfo.append("<option value='Queue ").append(queue)
|
||||||
|
.append("'>").append(queue).append("</option>");
|
||||||
|
}
|
||||||
|
|
||||||
|
// tracked apps {1}
|
||||||
|
StringBuilder trackedAppInfo = new StringBuilder();
|
||||||
|
Set<String> trackedApps = wrapper.getTrackedAppSet();
|
||||||
|
for(String job : trackedApps) {
|
||||||
|
trackedAppInfo.append("<option value='Job ").append(job)
|
||||||
|
.append("'>").append(job).append("</option>");
|
||||||
|
}
|
||||||
|
|
||||||
|
// timeunit label {2}
|
||||||
|
// time unit {3}
|
||||||
|
// ajax update time {4}
|
||||||
|
// final html
|
||||||
|
String trackInfo = MessageFormat.format(trackTemplate,
|
||||||
|
trackedQueueInfo.toString(), trackedAppInfo.toString(),
|
||||||
|
timeunitLabel, "" + timeunit, "" + ajaxUpdateTimeMS);
|
||||||
|
response.getWriter().println(trackInfo);
|
||||||
|
|
||||||
|
((Request) request).setHandled(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* package metrics information in a json and return
|
||||||
|
* @param request http request
|
||||||
|
* @param response http response
|
||||||
|
* @throws java.io.IOException
|
||||||
|
*/
|
||||||
|
private void printJsonMetrics(HttpServletRequest request,
|
||||||
|
HttpServletResponse response)
|
||||||
|
throws IOException {
|
||||||
|
response.setContentType("text/json");
|
||||||
|
response.setStatus(HttpServletResponse.SC_OK);
|
||||||
|
|
||||||
|
response.getWriter().println(generateRealTimeTrackingMetrics());
|
||||||
|
((Request) request).setHandled(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String generateRealTimeTrackingMetrics() {
|
||||||
|
// JVM
|
||||||
|
double jvmFreeMemoryGB, jvmMaxMemoryGB, jvmTotalMemoryGB;
|
||||||
|
if (jvmFreeMemoryGauge == null &&
|
||||||
|
metrics.getGauges().containsKey("variable.jvm.free.memory")) {
|
||||||
|
jvmFreeMemoryGauge = metrics.getGauges().get("variable.jvm.free.memory");
|
||||||
|
}
|
||||||
|
if (jvmMaxMemoryGauge == null &&
|
||||||
|
metrics.getGauges().containsKey("variable.jvm.max.memory")) {
|
||||||
|
jvmMaxMemoryGauge = metrics.getGauges().get("variable.jvm.max.memory");
|
||||||
|
}
|
||||||
|
if (jvmTotalMemoryGauge == null &&
|
||||||
|
metrics.getGauges().containsKey("variable.jvm.total.memory")) {
|
||||||
|
jvmTotalMemoryGauge = metrics.getGauges()
|
||||||
|
.get("variable.jvm.total.memory");
|
||||||
|
}
|
||||||
|
jvmFreeMemoryGB = jvmFreeMemoryGauge == null ? 0 :
|
||||||
|
Double.parseDouble(jvmFreeMemoryGauge.getValue().toString())
|
||||||
|
/1024/1024/1024;
|
||||||
|
jvmMaxMemoryGB = jvmMaxMemoryGauge == null ? 0 :
|
||||||
|
Double.parseDouble(jvmMaxMemoryGauge.getValue().toString())
|
||||||
|
/1024/1024/1024;
|
||||||
|
jvmTotalMemoryGB = jvmTotalMemoryGauge == null ? 0 :
|
||||||
|
Double.parseDouble(jvmTotalMemoryGauge.getValue().toString())
|
||||||
|
/1024/1024/1024;
|
||||||
|
|
||||||
|
// number of running applications/containers
|
||||||
|
String numRunningApps, numRunningContainers;
|
||||||
|
if (numRunningAppsGauge == null &&
|
||||||
|
metrics.getGauges().containsKey("variable.running.application")) {
|
||||||
|
numRunningAppsGauge =
|
||||||
|
metrics.getGauges().get("variable.running.application");
|
||||||
|
}
|
||||||
|
if (numRunningContainersGauge == null &&
|
||||||
|
metrics.getGauges().containsKey("variable.running.container")) {
|
||||||
|
numRunningContainersGauge =
|
||||||
|
metrics.getGauges().get("variable.running.container");
|
||||||
|
}
|
||||||
|
numRunningApps = numRunningAppsGauge == null ? "0" :
|
||||||
|
numRunningAppsGauge.getValue().toString();
|
||||||
|
numRunningContainers = numRunningContainersGauge == null ? "0" :
|
||||||
|
numRunningContainersGauge.getValue().toString();
|
||||||
|
|
||||||
|
// cluster available/allocate resource
|
||||||
|
double allocatedMemoryGB, allocatedVCoresGB,
|
||||||
|
availableMemoryGB, availableVCoresGB;
|
||||||
|
if (allocatedMemoryGauge == null &&
|
||||||
|
metrics.getGauges()
|
||||||
|
.containsKey("variable.cluster.allocated.memory")) {
|
||||||
|
allocatedMemoryGauge = metrics.getGauges()
|
||||||
|
.get("variable.cluster.allocated.memory");
|
||||||
|
}
|
||||||
|
if (allocatedVCoresGauge == null &&
|
||||||
|
metrics.getGauges()
|
||||||
|
.containsKey("variable.cluster.allocated.vcores")) {
|
||||||
|
allocatedVCoresGauge = metrics.getGauges()
|
||||||
|
.get("variable.cluster.allocated.vcores");
|
||||||
|
}
|
||||||
|
if (availableMemoryGauge == null &&
|
||||||
|
metrics.getGauges()
|
||||||
|
.containsKey("variable.cluster.available.memory")) {
|
||||||
|
availableMemoryGauge = metrics.getGauges()
|
||||||
|
.get("variable.cluster.available.memory");
|
||||||
|
}
|
||||||
|
if (availableVCoresGauge == null &&
|
||||||
|
metrics.getGauges()
|
||||||
|
.containsKey("variable.cluster.available.vcores")) {
|
||||||
|
availableVCoresGauge = metrics.getGauges()
|
||||||
|
.get("variable.cluster.available.vcores");
|
||||||
|
}
|
||||||
|
allocatedMemoryGB = allocatedMemoryGauge == null ? 0 :
|
||||||
|
Double.parseDouble(allocatedMemoryGauge.getValue().toString())/1024;
|
||||||
|
allocatedVCoresGB = allocatedVCoresGauge == null ? 0 :
|
||||||
|
Double.parseDouble(allocatedVCoresGauge.getValue().toString());
|
||||||
|
availableMemoryGB = availableMemoryGauge == null ? 0 :
|
||||||
|
Double.parseDouble(availableMemoryGauge.getValue().toString())/1024;
|
||||||
|
availableVCoresGB = availableVCoresGauge == null ? 0 :
|
||||||
|
Double.parseDouble(availableVCoresGauge.getValue().toString());
|
||||||
|
|
||||||
|
// scheduler operation
|
||||||
|
double allocateTimecost, handleTimecost;
|
||||||
|
if (allocateTimecostHistogram == null &&
|
||||||
|
metrics.getHistograms().containsKey(
|
||||||
|
"sampler.scheduler.operation.allocate.timecost")) {
|
||||||
|
allocateTimecostHistogram = metrics.getHistograms()
|
||||||
|
.get("sampler.scheduler.operation.allocate.timecost");
|
||||||
|
}
|
||||||
|
if (handleTimecostHistogram == null &&
|
||||||
|
metrics.getHistograms().containsKey(
|
||||||
|
"sampler.scheduler.operation.handle.timecost")) {
|
||||||
|
handleTimecostHistogram = metrics.getHistograms()
|
||||||
|
.get("sampler.scheduler.operation.handle.timecost");
|
||||||
|
}
|
||||||
|
allocateTimecost = allocateTimecostHistogram == null ? 0.0 :
|
||||||
|
allocateTimecostHistogram.getSnapshot().getMean()/1000000;
|
||||||
|
handleTimecost = handleTimecostHistogram == null ? 0.0 :
|
||||||
|
handleTimecostHistogram.getSnapshot().getMean()/1000000;
|
||||||
|
// various handle operation
|
||||||
|
Map<SchedulerEventType, Double> handleOperTimecostMap =
|
||||||
|
new HashMap<SchedulerEventType, Double>();
|
||||||
|
for (SchedulerEventType e : SchedulerEventType.values()) {
|
||||||
|
String key = "sampler.scheduler.operation.handle." + e + ".timecost";
|
||||||
|
if (! handleOperTimecostHistogramMap.containsKey(e) &&
|
||||||
|
metrics.getHistograms().containsKey(key)) {
|
||||||
|
handleOperTimecostHistogramMap.put(e, metrics.getHistograms().get(key));
|
||||||
|
}
|
||||||
|
double timecost = handleOperTimecostHistogramMap.containsKey(e) ?
|
||||||
|
handleOperTimecostHistogramMap.get(e).getSnapshot().getMean()/1000000
|
||||||
|
: 0;
|
||||||
|
handleOperTimecostMap.put(e, timecost);
|
||||||
|
}
|
||||||
|
|
||||||
|
// allocated resource for each queue
|
||||||
|
Map<String, Double> queueAllocatedMemoryMap = new HashMap<String, Double>();
|
||||||
|
Map<String, Long> queueAllocatedVCoresMap = new HashMap<String, Long>();
|
||||||
|
for (String queue : wrapper.getQueueSet()) {
|
||||||
|
// memory
|
||||||
|
String key = "counter.queue." + queue + ".allocated.memory";
|
||||||
|
if (! queueAllocatedMemoryCounterMap.containsKey(queue) &&
|
||||||
|
metrics.getCounters().containsKey(key)) {
|
||||||
|
queueAllocatedMemoryCounterMap.put(queue,
|
||||||
|
metrics.getCounters().get(key));
|
||||||
|
}
|
||||||
|
double queueAllocatedMemoryGB =
|
||||||
|
queueAllocatedMemoryCounterMap.containsKey(queue) ?
|
||||||
|
queueAllocatedMemoryCounterMap.get(queue).getCount()/1024.0
|
||||||
|
: 0;
|
||||||
|
queueAllocatedMemoryMap.put(queue, queueAllocatedMemoryGB);
|
||||||
|
// vCores
|
||||||
|
key = "counter.queue." + queue + ".allocated.cores";
|
||||||
|
if (! queueAllocatedVCoresCounterMap.containsKey(queue) &&
|
||||||
|
metrics.getCounters().containsKey(key)) {
|
||||||
|
queueAllocatedVCoresCounterMap.put(
|
||||||
|
queue, metrics.getCounters().get(key));
|
||||||
|
}
|
||||||
|
long queueAllocatedVCores =
|
||||||
|
queueAllocatedVCoresCounterMap.containsKey(queue) ?
|
||||||
|
queueAllocatedVCoresCounterMap.get(queue).getCount(): 0;
|
||||||
|
queueAllocatedVCoresMap.put(queue, queueAllocatedVCores);
|
||||||
|
}
|
||||||
|
|
||||||
|
// package results
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append("{");
|
||||||
|
sb.append("\"time\":" ).append(System.currentTimeMillis())
|
||||||
|
.append(",\"jvm.free.memory\":").append(jvmFreeMemoryGB)
|
||||||
|
.append(",\"jvm.max.memory\":").append(jvmMaxMemoryGB)
|
||||||
|
.append(",\"jvm.total.memory\":").append(jvmTotalMemoryGB)
|
||||||
|
.append(",\"running.applications\":").append(numRunningApps)
|
||||||
|
.append(",\"running.containers\":").append(numRunningContainers)
|
||||||
|
.append(",\"cluster.allocated.memory\":").append(allocatedMemoryGB)
|
||||||
|
.append(",\"cluster.allocated.vcores\":").append(allocatedVCoresGB)
|
||||||
|
.append(",\"cluster.available.memory\":").append(availableMemoryGB)
|
||||||
|
.append(",\"cluster.available.vcores\":").append(availableVCoresGB);
|
||||||
|
|
||||||
|
for (String queue : wrapper.getQueueSet()) {
|
||||||
|
sb.append(",\"queue.").append(queue).append(".allocated.memory\":")
|
||||||
|
.append(queueAllocatedMemoryMap.get(queue));
|
||||||
|
sb.append(",\"queue.").append(queue).append(".allocated.vcores\":")
|
||||||
|
.append(queueAllocatedVCoresMap.get(queue));
|
||||||
|
}
|
||||||
|
// scheduler allocate & handle
|
||||||
|
sb.append(",\"scheduler.allocate.timecost\":").append(allocateTimecost);
|
||||||
|
sb.append(",\"scheduler.handle.timecost\":").append(handleTimecost);
|
||||||
|
for (SchedulerEventType e : SchedulerEventType.values()) {
|
||||||
|
sb.append(",\"scheduler.handle-").append(e).append(".timecost\":")
|
||||||
|
.append(handleOperTimecostMap.get(e));
|
||||||
|
}
|
||||||
|
sb.append("}");
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* package metrics information for one tracked queue/app
|
||||||
|
* only support FairScheduler currently
|
||||||
|
* @throws java.io.IOException
|
||||||
|
*/
|
||||||
|
private void printJsonTrack(HttpServletRequest request,
|
||||||
|
HttpServletResponse response) throws IOException {
|
||||||
|
response.setContentType("text/json");
|
||||||
|
response.setStatus(HttpServletResponse.SC_OK);
|
||||||
|
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
if(schedulerMetrics instanceof FairSchedulerMetrics) {
|
||||||
|
String para = request.getParameter("t");
|
||||||
|
if (para.startsWith("Job ")) {
|
||||||
|
String appId = para.substring("Job ".length());
|
||||||
|
|
||||||
|
sb.append("{");
|
||||||
|
sb.append("\"time\": ").append(System.currentTimeMillis()).append(",");
|
||||||
|
sb.append("\"appId\": \"").append(appId).append("\"");
|
||||||
|
for(String metric : this.schedulerMetrics.getAppTrackedMetrics()) {
|
||||||
|
String key = "variable.app." + appId + "." + metric;
|
||||||
|
sb.append(",\"").append(metric).append("\": ");
|
||||||
|
if (metrics.getGauges().containsKey(key)) {
|
||||||
|
double memoryGB =
|
||||||
|
Double.parseDouble(
|
||||||
|
metrics.getGauges().get(key).getValue().toString())
|
||||||
|
/ 1024;
|
||||||
|
sb.append(memoryGB);
|
||||||
|
} else {
|
||||||
|
sb.append(-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sb.append("}");
|
||||||
|
|
||||||
|
} else if(para.startsWith("Queue ")) {
|
||||||
|
String queueName = para.substring("Queue ".length());
|
||||||
|
sb.append("{");
|
||||||
|
sb.append("\"time\": ").append(System.currentTimeMillis()).append(",");
|
||||||
|
sb.append("\"queueName\": \"").append(queueName).append("\"");
|
||||||
|
for(String metric : this.schedulerMetrics.getQueueTrackedMetrics()) {
|
||||||
|
String key = "variable.queue." + queueName + "." + metric;
|
||||||
|
sb.append(",\"").append(metric).append("\": ");
|
||||||
|
if (metrics.getGauges().containsKey(key)) {
|
||||||
|
double memoryGB =
|
||||||
|
Double.parseDouble(
|
||||||
|
metrics.getGauges().get(key).getValue().toString())
|
||||||
|
/ 1024;
|
||||||
|
sb.append(memoryGB);
|
||||||
|
} else {
|
||||||
|
sb.append(-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sb.append("}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
String output = sb.toString();
|
||||||
|
if (output.isEmpty()) {
|
||||||
|
output = "[]";
|
||||||
|
}
|
||||||
|
response.getWriter().println(output);
|
||||||
|
// package result
|
||||||
|
((Request) request).setHandled(true);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,67 @@
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<!--
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!--
|
||||||
|
This file contains queue allocations for the Capacity Scheduler.
|
||||||
|
Its format is explained in the Capacity Scheduler documentation at
|
||||||
|
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html.
|
||||||
|
The documentation also includes a sample config file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>yarn.scheduler.capacity.root.queues</name>
|
||||||
|
<value>sls_queue_1,sls_queue_2,sls_queue_3</value>
|
||||||
|
<description>The queues at the this level (root is the root queue).
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>yarn.scheduler.capacity.root.sls_queue_1.capacity</name>
|
||||||
|
<value>25</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>yarn.scheduler.capacity.root.sls_queue_1.maximum-capacity</name>
|
||||||
|
<value>100</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>yarn.scheduler.capacity.root.sls_queue_2.capacity</name>
|
||||||
|
<value>25</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>yarn.scheduler.capacity.root.sls_queue_2.maximum-capacity</name>
|
||||||
|
<value>100</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>yarn.scheduler.capacity.root.sls_queue_3.capacity</name>
|
||||||
|
<value>50</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>yarn.scheduler.capacity.root.sls_queue_3.maximum-capacity</name>
|
||||||
|
<value>100</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>yarn.scheduler.capacity.maximum-applications</name>
|
||||||
|
<value>1000</value>
|
||||||
|
<description>Maximum number of applications in the system which
|
||||||
|
can be concurrently active both running and pending</description>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,50 @@
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<!--
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!--
|
||||||
|
This file contains pool and user allocations for the Fair Scheduler.
|
||||||
|
Its format is explained in the Fair Scheduler documentation at
|
||||||
|
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html
|
||||||
|
The documentation also includes a sample config file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<allocations>
|
||||||
|
<user name="jenkins">
|
||||||
|
<!-- Limit on running jobs for the user across all pools. If more
|
||||||
|
jobs than this are submitted, only the first <maxRunningJobs> will
|
||||||
|
be scheduled at any given time. Defaults to infinity or the
|
||||||
|
userMaxJobsDefault value set below. -->
|
||||||
|
<maxRunningJobs>1000</maxRunningJobs>
|
||||||
|
</user>
|
||||||
|
<userMaxAppsDefault>1000</userMaxAppsDefault>
|
||||||
|
<queue name="sls_queue_1">
|
||||||
|
<minResources>1024 mb, 1 vcores</minResources>
|
||||||
|
<schedulingMode>fair</schedulingMode>
|
||||||
|
<weight>0.25</weight>
|
||||||
|
<minSharePreemptionTimeout>2</minSharePreemptionTimeout>
|
||||||
|
</queue>
|
||||||
|
<queue name="sls_queue_2">
|
||||||
|
<minResources>1024 mb, 1 vcores</minResources>
|
||||||
|
<schedulingMode>fair</schedulingMode>
|
||||||
|
<weight>0.25</weight>
|
||||||
|
<minSharePreemptionTimeout>2</minSharePreemptionTimeout>
|
||||||
|
</queue>
|
||||||
|
<queue name="sls_queue_3">
|
||||||
|
<minResources>1024 mb, 1 vcores</minResources>
|
||||||
|
<weight>0.5</weight>
|
||||||
|
<schedulingMode>fair</schedulingMode>
|
||||||
|
<minSharePreemptionTimeout>2</minSharePreemptionTimeout>
|
||||||
|
</queue>
|
||||||
|
</allocations>
|
|
@ -0,0 +1,47 @@
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<!--
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!--
|
||||||
|
This file contains pool and user allocations for the Fair Scheduler.
|
||||||
|
Its format is explained in the Fair Scheduler documentation at
|
||||||
|
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html.
|
||||||
|
The documentation also includes a sample config file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<description>Absolute path to allocation file. An allocation file is an XML
|
||||||
|
manifest describing queues and their properties, in addition to certain
|
||||||
|
policy defaults. This file must be in XML format as described in
|
||||||
|
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html.
|
||||||
|
</description>
|
||||||
|
<name>yarn.scheduler.fair.allocation.file</name>
|
||||||
|
<value>fair-scheduler-allocation.xml</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>Whether to use preemption. Note that preemption is experimental
|
||||||
|
in the current version. Defaults to false.</description>
|
||||||
|
<name>yarn.scheduler.fair.preemption</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>Whether to allow multiple container assignments in one
|
||||||
|
heartbeat. Defaults to false.</description>
|
||||||
|
<name>yarn.scheduler.fair.assignmultiple</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,19 @@
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License. See accompanying LICENSE file.
|
||||||
|
#
|
||||||
|
log4j.appender.test=org.apache.log4j.ConsoleAppender
|
||||||
|
log4j.appender.test.Target=System.out
|
||||||
|
log4j.appender.test.layout=org.apache.log4j.PatternLayout
|
||||||
|
log4j.appender.test.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n
|
||||||
|
|
||||||
|
log4j.logger=NONE, test
|
|
@ -0,0 +1,81 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
|
<!--
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<configuration>
|
||||||
|
|
||||||
|
<!-- SLSRunner configuration -->
|
||||||
|
<property>
|
||||||
|
<name>yarn.sls.runner.pool.size</name>
|
||||||
|
<value>100</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<!-- Nodes configuration -->
|
||||||
|
<property>
|
||||||
|
<name>yarn.sls.nm.memory.mb</name>
|
||||||
|
<value>10240</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.sls.nm.vcores</name>
|
||||||
|
<value>10</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.sls.nm.heartbeat.interval.ms</name>
|
||||||
|
<value>1000</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<!-- Apps configuration -->
|
||||||
|
<property>
|
||||||
|
<name>yarn.sls.am.heartbeat.interval.ms</name>
|
||||||
|
<value>1000</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.sls.am.type.mapreduce</name>
|
||||||
|
<value>org.apache.hadoop.yarn.sls.appmaster.MRAMSimulator</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<!-- Containers configuration -->
|
||||||
|
<property>
|
||||||
|
<name>yarn.sls.container.memory.mb</name>
|
||||||
|
<value>1024</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.sls.container.vcores</name>
|
||||||
|
<value>1</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<!-- metrics -->
|
||||||
|
<property>
|
||||||
|
<name>yarn.sls.metrics.switch</name>
|
||||||
|
<value>ON</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.sls.metrics.web.address.port</name>
|
||||||
|
<value>10001</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler</name>
|
||||||
|
<value>org.apache.hadoop.yarn.sls.scheduler.FifoSchedulerMetrics</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</name>
|
||||||
|
<value>org.apache.hadoop.yarn.sls.scheduler.FairSchedulerMetrics</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</name>
|
||||||
|
<value>org.apache.hadoop.yarn.sls.scheduler.CapacitySchedulerMetrics</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
</configuration>
|
|
@ -0,0 +1,60 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
|
<!--
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>yarn.resourcemanager.scheduler.class</name>
|
||||||
|
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
|
||||||
|
<!-- <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler</value> -->
|
||||||
|
<!-- <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value> -->
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>The address of the RM web application.</description>
|
||||||
|
<name>yarn.resourcemanager.webapp.address</name>
|
||||||
|
<value>localhost:18088</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>yarn.resourcemanager.resource-tracker.address</name>
|
||||||
|
<value>localhost:18031</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>The address of the scheduler interface.</description>
|
||||||
|
<name>yarn.resourcemanager.scheduler.address</name>
|
||||||
|
<value>localhost:18030</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>The address of the applications manager interface in the RM.</description>
|
||||||
|
<name>yarn.resourcemanager.address</name>
|
||||||
|
<value>localhost:18032</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>The address of the RM admin interface.</description>
|
||||||
|
<name>yarn.resourcemanager.admin.address</name>
|
||||||
|
<value>localhost:18033</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>Set to false, to avoid ip check</description>
|
||||||
|
<name>hadoop.security.token.service.use_ip</name>
|
||||||
|
<value>false</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
</configuration>
|
|
@ -0,0 +1,440 @@
|
||||||
|
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
~~ you may not use this file except in compliance with the License.
|
||||||
|
~~ You may obtain a copy of the License at
|
||||||
|
~~
|
||||||
|
~~ http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
~~
|
||||||
|
~~ Unless required by applicable law or agreed to in writing, software
|
||||||
|
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
~~ See the License for the specific language governing permissions and
|
||||||
|
~~ limitations under the License.
|
||||||
|
|
||||||
|
---
|
||||||
|
Yarn Scheduler Load Simulator (SLS)
|
||||||
|
---
|
||||||
|
---
|
||||||
|
${maven.build.timestamp}
|
||||||
|
|
||||||
|
Yarn Scheduler Load Simulator (SLS)
|
||||||
|
|
||||||
|
\[ {{{./index.html}Go Back}} \]
|
||||||
|
|
||||||
|
%{toc|section=1|fromDepth=0}
|
||||||
|
|
||||||
|
* Overview
|
||||||
|
|
||||||
|
** Overview
|
||||||
|
|
||||||
|
The Yarn scheduler is a fertile area of interest with different
|
||||||
|
implementations, e.g., Fifo, Capacity and Fair schedulers. Meanwhile, several
|
||||||
|
optimizations are also made to improve scheduler performance for different
|
||||||
|
scenarios and workload. Each scheduler algorithm has its own set of features,
|
||||||
|
and drives scheduling decisions by many factors, such as fairness, capacity
|
||||||
|
guarantee, resource availability, etc. It is very important to evaluate a
|
||||||
|
scheduler algorithm very well before we deploy in a production cluster.
|
||||||
|
Unfortunately, currently it is non-trivial to evaluate a scheduler algorithm.
|
||||||
|
Evaluating in a real cluster is always time and cost consuming, and it is
|
||||||
|
also very hard to find a large-enough cluster. Hence, a simulator which can
|
||||||
|
predict how well a scheduler algorithm for some specific workload would be
|
||||||
|
quite useful.
|
||||||
|
|
||||||
|
The Yarn Scheduler Load Simulator (SLS) is such a tool, which can simulate
|
||||||
|
large-scale Yarn clusters and application loads in a single machine.This
|
||||||
|
simulator would be invaluable in furthering Yarn by providing a tool for
|
||||||
|
researchers and developers to prototype new scheduler features and predict
|
||||||
|
their behavior and performance with reasonable amount of confidence,
|
||||||
|
thereby aiding rapid innovation.
|
||||||
|
|
||||||
|
The simulator will exercise the real Yarn <<<ResourceManager>>> removing the
|
||||||
|
network factor by simulating <<<NodeManagers>>> and <<<ApplicationMasters>>>
|
||||||
|
via handling and dispatching <<<NM>>>/<<<AMs>>> heartbeat events from within
|
||||||
|
the same JVM. To keep tracking of scheduler behavior and performance, a
|
||||||
|
scheduler wrapper will wrap the real scheduler.
|
||||||
|
|
||||||
|
The size of the cluster and the application load can be loaded from
|
||||||
|
configuration files, which are generated from job history files directly by
|
||||||
|
adopting {{{https://hadoop.apache.org/docs/stable/rumen.html}Apache Rumen}}.
|
||||||
|
|
||||||
|
The simulator will produce real time metrics while executing, including:
|
||||||
|
|
||||||
|
* Resource usages for whole cluster and each queue, which can be utilized to
|
||||||
|
configure cluster and queue's capacity.
|
||||||
|
|
||||||
|
* The detailed application execution trace (recorded in relation to simulated
|
||||||
|
time), which can be analyzed to understand/validate the scheduler behavior
|
||||||
|
(individual jobs turn around time, throughput, fairness, capacity guarantee,
|
||||||
|
etc.).
|
||||||
|
|
||||||
|
* Several key metrics of scheduler algorithm, such as time cost of each
|
||||||
|
scheduler operation (allocate, handle, etc.), which can be utilized by Hadoop
|
||||||
|
developers to find the code spots and scalability limits.
|
||||||
|
|
||||||
|
** Goals
|
||||||
|
|
||||||
|
* Exercise the scheduler at scale without a real cluster using real job
|
||||||
|
traces.
|
||||||
|
|
||||||
|
* Being able to simulate real workloads.
|
||||||
|
|
||||||
|
** Architecture
|
||||||
|
|
||||||
|
The following figure illustrates the implementation architecture of the
|
||||||
|
simulator.
|
||||||
|
|
||||||
|
[images/sls_arch.png] The architecture of the simulator
|
||||||
|
|
||||||
|
The simulator takes input of workload traces, and fetches the cluster and
|
||||||
|
applications information. For each NM and AM, the simulator builds a simulator
|
||||||
|
to simulate their running. All NM/AM simulators run in a thread pool. The
|
||||||
|
simulator reuses Yarn Resource Manager, and builds a wrapper out of the
|
||||||
|
scheduler. The Scheduler Wrapper can track the scheduler behaviors and
|
||||||
|
generates several logs, which are the outputs of the simulator and can be
|
||||||
|
further analyzed.
|
||||||
|
|
||||||
|
** Usecases
|
||||||
|
|
||||||
|
* Engineering
|
||||||
|
|
||||||
|
* Verify correctness of scheduler algorithm under load
|
||||||
|
|
||||||
|
* Cheap/practical way for finding code hotspots/critical-path.
|
||||||
|
|
||||||
|
* Validate the impact of changes and new features.
|
||||||
|
|
||||||
|
* Determine what drives the scheduler scalability limits.
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
* QA
|
||||||
|
|
||||||
|
* Validate scheduler behavior for "large" clusters and several workload
|
||||||
|
profiles.
|
||||||
|
|
||||||
|
* Solutions/Sales.
|
||||||
|
|
||||||
|
* Sizing model for predefined/typical workloads.
|
||||||
|
|
||||||
|
* Cluster sizing tool using real customer data (job traces).
|
||||||
|
|
||||||
|
* Determine minimum SLAs under a particular workload.
|
||||||
|
|
||||||
|
* Usage
|
||||||
|
|
||||||
|
This section will show how to use the simulator. Here let <<<$HADOOP_ROOT>>>
|
||||||
|
represent the Hadoop install directory. If you build Hadoop yourself,
|
||||||
|
<<<$HADOOP_ROOT>>> is <<<hadoop-dist/target/hadoop-$VERSION>>>. The simulator
|
||||||
|
is located at <<<$HADOOP_ROOT/share/hadoop/tools/sls>>>. The fold <<<sls>>>
|
||||||
|
containers four directories: <<<bin>>>, <<<html>>>, <<<sample-conf>>>, and
|
||||||
|
<<<sample-data>>>
|
||||||
|
|
||||||
|
* <<<bin>>>: contains running scripts for the simulator.
|
||||||
|
|
||||||
|
* <<<html>>>: contains several html/css/js files we needed for real-time
|
||||||
|
tracking.
|
||||||
|
|
||||||
|
* <<<sample-conf>>>: specifies the simulator configurations.
|
||||||
|
|
||||||
|
* <<<sample-data>>>: provides an example rumen trace, which can be used to
|
||||||
|
generate inputs of the simulator.
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
The following sections will describe how to use the simulator step by step.
|
||||||
|
Before start, make sure that command <<<hadoop>>> is included in your
|
||||||
|
<<<$PATH>>> environment parameter.
|
||||||
|
|
||||||
|
** Step 1: Configure Hadoop and the simulator
|
||||||
|
|
||||||
|
Before we start, make sure Hadoop and the simulator are configured well.
|
||||||
|
All configuration files for Hadoop and the simulator should be placed in
|
||||||
|
directory <<<$HADOOP_ROOT/etc/hadoop>>>, where the <<<ResourceManager>>>
|
||||||
|
and Yarn scheduler load their configurations. Directory
|
||||||
|
<<<$HADOOP_ROOT/share/hadoop/tools/sls/sample-conf/>>> provides several
|
||||||
|
example configurations, that can be used to start a demo.
|
||||||
|
|
||||||
|
For configuration of Hadoop and Yarn scheduler, users can refer to Yarn’s
|
||||||
|
website ({{{http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/}
|
||||||
|
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/}}).
|
||||||
|
|
||||||
|
For the simulator, it loads configuration information from file
|
||||||
|
<<<$HADOOP_ROOT/etc/hadoop/sls-runner.xml>>>.
|
||||||
|
|
||||||
|
Here we illustrate each configuration parameter in <<<sls-runner.xml>>>.
|
||||||
|
Note that <<<$HADOOP_ROOT/share/hadoop/tools/sls/sample-conf/sls-runner.xml>>>
|
||||||
|
contains all the default values for these configuration parameters.
|
||||||
|
|
||||||
|
* <<<yarn.sls.runner.pool.size>>>
|
||||||
|
|
||||||
|
The simulator uses a thread pool to simulate the <<<NM>>> and <<<AM>>> running
|
||||||
|
, and this parameter specifies the number of threads in the pool.
|
||||||
|
|
||||||
|
* <<<yarn.sls.nm.memory.mb>>>
|
||||||
|
|
||||||
|
The total memory for each <<<NMSimulator>>>.
|
||||||
|
|
||||||
|
* <<<yarn.sls.nm.vcores>>>
|
||||||
|
|
||||||
|
The total vCores for each <<<NMSimulator>>>.
|
||||||
|
|
||||||
|
* <<<yarn.sls.nm.heartbeat.interval.ms>>>
|
||||||
|
|
||||||
|
The heartbeat interval for each <<<NMSimulator>>>.
|
||||||
|
|
||||||
|
* <<<yarn.sls.am.heartbeat.interval.ms>>>
|
||||||
|
|
||||||
|
The heartbeat interval for each <<<AMSimulator>>>.
|
||||||
|
|
||||||
|
* <<<yarn.sls.am.type.mapreduce>>>
|
||||||
|
|
||||||
|
The <<<AMSimulator>>> implementation for MapReduce-like applications.
|
||||||
|
Users can specify implementations for other type of applications.
|
||||||
|
|
||||||
|
* <<<yarn.sls.container.memory.mb>>>
|
||||||
|
|
||||||
|
The memory required for each container simulator.
|
||||||
|
|
||||||
|
* <<<yarn.sls.container.vcores>>>
|
||||||
|
|
||||||
|
The vCores required for each container simulator.
|
||||||
|
|
||||||
|
* <<<yarn.sls.runner.metrics.switch>>>
|
||||||
|
|
||||||
|
The simulator introduces {{{http://metrics.codahale.com/}Metrics}} to measure
|
||||||
|
the behaviors of critical components and operations. This field specifies
|
||||||
|
whether we open (<<<ON>>>) or close (<<<OFF>>>) the Metrics running.
|
||||||
|
|
||||||
|
* <<<yarn.sls.metrics.web.address.port>>>
|
||||||
|
|
||||||
|
The port used by simulator to provide real-time tracking. The default value is
|
||||||
|
10001.
|
||||||
|
|
||||||
|
* <<<org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler>>>
|
||||||
|
|
||||||
|
The implementation of scheduler metrics of Fifo Scheduler.
|
||||||
|
|
||||||
|
* <<<org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler>>>
|
||||||
|
|
||||||
|
The implementation of scheduler metrics of Fair Scheduler.
|
||||||
|
|
||||||
|
* <<<org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler>>>
|
||||||
|
|
||||||
|
The implementation of scheduler metrics of Capacity Scheduler.
|
||||||
|
|
||||||
|
** Step 2: Run the simulator
|
||||||
|
|
||||||
|
The simulator supports two types of input files: the rumen traces and its own
|
||||||
|
input traces. The script to start the simulator is <<<slsrun.sh>>>.
|
||||||
|
|
||||||
|
+----+
|
||||||
|
$ $HADOOP_ROOT/share/hadoop/tools/sls/bin/slsrun.sh
|
||||||
|
--input-rumen|--input-sls=<TRACE_FILE1,TRACE_FILE2,...>
|
||||||
|
--output-dir=<SLS_SIMULATION_OUTPUT_DIRECTORY> [--nodes=<SLS_NODES_FILE>]
|
||||||
|
[--track-jobs=<JOBID1,JOBID2,...>] [--print-simulation]
|
||||||
|
+----+
|
||||||
|
|
||||||
|
* <<<--input-rumen>>>: The input rumen trace files. Users can input multiple
|
||||||
|
files, separated by comma. One example trace is provided in
|
||||||
|
<<<$HADOOP_ROOT/share/hadoop/tools/sls/sample-data/2jobs2min-rumen-jh.json>>>.
|
||||||
|
|
||||||
|
* <<<--input-sls>>>: Simulator its own file format. The simulator also
|
||||||
|
provides a tool to convert rumen traces to sls traces (<<<rumen2sls.sh>>>).
|
||||||
|
Refer to appendix for an example of sls input json file.
|
||||||
|
|
||||||
|
* <<<--output-dir>>>: The output directory for generated running logs and
|
||||||
|
metrics.
|
||||||
|
|
||||||
|
* <<<--nodes>>>: The cluster topology. By default, the simulator will use the
|
||||||
|
topology fetched from the input json files. Users can specifies a new topology
|
||||||
|
by setting this parameter. Refer to the appendix for the topology file format.
|
||||||
|
|
||||||
|
* <<<--track-jobs>>>: The particular jobs that will be tracked during
|
||||||
|
simulator running, spearated by comma.
|
||||||
|
|
||||||
|
* <<<--print-simulation>>>: Whether to print out simulation information
|
||||||
|
before simulator running, including number of nodes, applications, tasks,
|
||||||
|
and information for each application.
|
||||||
|
|
||||||
|
In comparison to rumen format, here the sls format is much simpler and users
|
||||||
|
can easily generate various workload. The simulator also provides a tool to
|
||||||
|
convert rumen traces to sls traces.
|
||||||
|
|
||||||
|
+----+
|
||||||
|
$ $HADOOP_ROOT/share/hadoop/tools/sls/bin/rumen2sls.sh
|
||||||
|
--rumen-file=<RUMEN_FILE>
|
||||||
|
--output-dir=<SLS_OUTPUT_DIRECTORY>
|
||||||
|
[--output-prefix=<SLS_FILE_PREFIX>]
|
||||||
|
+----+
|
||||||
|
|
||||||
|
* <<<--rumen-file>>>: The rumen format file. One example trace is provided
|
||||||
|
in directory <<<sample-data>>>.
|
||||||
|
|
||||||
|
* <<<--output-dir>>>: The output directory of generated simulation traces.
|
||||||
|
Two files will be generated in this output directory, including one trace
|
||||||
|
file including all job and task information, and another file showing the
|
||||||
|
topology information.
|
||||||
|
|
||||||
|
* <<<--output-prefix>>>: The prefix of the generated files. The default value
|
||||||
|
is ”sls”, and the two generated files are <<<sls-jobs.json>>> and
|
||||||
|
<<<sls-nodes.json>>>.
|
||||||
|
|
||||||
|
* Metrics
|
||||||
|
|
||||||
|
The Yarn Scheduler Load Simulator has integrated
|
||||||
|
{{{http://metrics.codahale.com/}Metrics}} to measure the behaviors of critical
|
||||||
|
components and operations, including running applications and containers,
|
||||||
|
cluster available resources, scheduler operation timecost, et al. If the
|
||||||
|
switch <<<yarn.sls.runner.metrics.switch>>> is set <<<ON>>>, <<<Metrics>>>
|
||||||
|
will run and output it logs in <<<--output-dir>>> directory specified by users.
|
||||||
|
Users can track these information during simulator running, and can also
|
||||||
|
analyze these logs after running to evaluate the scheduler performance.
|
||||||
|
|
||||||
|
** Real-time Tracking
|
||||||
|
|
||||||
|
The simulator provides an interface for tracking its running in real-time.
|
||||||
|
Users can go to <<<http://host:port/simulate>>> to track whole running,
|
||||||
|
and <<<http://host:port/track>>> to track a particular job or queue. Here
|
||||||
|
the <<<host>>> is the place when we run the simulator, and <<<port>>> is
|
||||||
|
the value configured by <<<yarn.sls.metrics.web.address.port>>> (default value
|
||||||
|
is 10001).
|
||||||
|
|
||||||
|
Here we'll illustrate each chart shown in the webpage.
|
||||||
|
|
||||||
|
The first figure describes the number of running applications and containers.
|
||||||
|
|
||||||
|
[images/sls_running_apps_containers.png] Number of running applications/containers
|
||||||
|
|
||||||
|
The second figure describes the allocated and available resources (memory)
|
||||||
|
in the cluster.
|
||||||
|
|
||||||
|
[images/sls_cluster_memory.png] Cluster Resource (Memory)
|
||||||
|
|
||||||
|
The third figure describes the allocated resource for each queue. Here we have
|
||||||
|
three queues: sls_queue_1, sls_queue_2, and sls_queue_3.The first two queues
|
||||||
|
are configured with 25% share, while the last one has 50% share.
|
||||||
|
|
||||||
|
[images/sls_queue_allocated_memory.png] Queue Allocated Resource (Memory)
|
||||||
|
|
||||||
|
The fourth figure describes the timecost for each scheduler operation.
|
||||||
|
|
||||||
|
[images/sls_scheduler_operation_timecost.png] Scheduler Opertion Timecost
|
||||||
|
|
||||||
|
Finally, we measure the memory used by the simulator.
|
||||||
|
|
||||||
|
[images/sls_JVM.png] JVM Memory
|
||||||
|
|
||||||
|
The simulator also provides an interface for tracking some particular
|
||||||
|
jobs and queues. Go to <<<http://<Host>:<Port>/track>>> to get these
|
||||||
|
information.
|
||||||
|
|
||||||
|
Here the first figure illustrates the resource usage information for queue
|
||||||
|
<<<SLS_Queue_1>>>.
|
||||||
|
|
||||||
|
[images/sls_track_queue.png] Tracking Queue <<<sls_queue_3>>>
|
||||||
|
|
||||||
|
The second figure illustrates the resource usage information for job
|
||||||
|
<<<job_1369942127770_0653>>>.
|
||||||
|
|
||||||
|
[images/sls_track_job.png] Tracking Job <<<job_1369942127770_0653>>>
|
||||||
|
|
||||||
|
** Offline Analysis
|
||||||
|
|
||||||
|
After the simulator finishes, all logs are saved in the output directory
|
||||||
|
specified by <<<--output-dir>>> in
|
||||||
|
<<<$HADOOP_ROOT/share/hadoop/tools/sls/bin/slsrun.sh>>>.
|
||||||
|
|
||||||
|
* File <<<realtimetrack.json>>>: records all real-time tracking logs every 1
|
||||||
|
second.
|
||||||
|
|
||||||
|
* File <<<jobruntime.csv>>>: records all jobs’ start and end time in the
|
||||||
|
simulator.
|
||||||
|
|
||||||
|
* Folder <<<metrics>>>: logs generated by the Metrics.
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
|
Users can also reproduce those real-time tracking charts in offline mode.
|
||||||
|
Just upload the <<<realtimetrack.json>>> to
|
||||||
|
<<<$HADOOP_ROOT/share/hadoop/tools/sls/html/showSimulationTrace.html>>>.
|
||||||
|
For browser security problem, need to put files <<<realtimetrack.json>>> and
|
||||||
|
<<<showSimulationTrace.html>>> in the same directory.
|
||||||
|
|
||||||
|
* Appendix
|
||||||
|
|
||||||
|
** Resources
|
||||||
|
|
||||||
|
{{{https://issues.apache.org/jira/browse/YARN-1021}YARN-1021}} is the main
|
||||||
|
JIRA that introduces Yarn Scheduler Load Simulator to Hadoop Yarn project.
|
||||||
|
|
||||||
|
** SLS JSON input file format
|
||||||
|
|
||||||
|
Here we provide an example format of the sls json file, which contains 2 jobs.
|
||||||
|
The first job has 3 map tasks and the second one has 2 map tasks.
|
||||||
|
|
||||||
|
+----+
|
||||||
|
{
|
||||||
|
"am.type" : "mapreduce",
|
||||||
|
"job.start.ms" : 0,
|
||||||
|
"job.end.ms" : 95375,
|
||||||
|
"job.queue.name" : "sls_queue_1",
|
||||||
|
"job.id" : "job_1",
|
||||||
|
"job.user" : "default",
|
||||||
|
"job.tasks" : [ {
|
||||||
|
"container.host" : "/default-rack/node1",
|
||||||
|
"container.start.ms" : 6664,
|
||||||
|
"container.end.ms" : 23707,
|
||||||
|
"container.priority" : 20,
|
||||||
|
"container.type" : "map"
|
||||||
|
}, {
|
||||||
|
"container.host" : "/default-rack/node3",
|
||||||
|
"container.start.ms" : 6665,
|
||||||
|
"container.end.ms" : 21593,
|
||||||
|
"container.priority" : 20,
|
||||||
|
"container.type" : "map"
|
||||||
|
}, {
|
||||||
|
"container.host" : "/default-rack/node2",
|
||||||
|
"container.start.ms" : 68770,
|
||||||
|
"container.end.ms" : 86613,
|
||||||
|
"container.priority" : 20,
|
||||||
|
"container.type" : "map"
|
||||||
|
} ]
|
||||||
|
}
|
||||||
|
{
|
||||||
|
"am.type" : "mapreduce",
|
||||||
|
"job.start.ms" : 105204,
|
||||||
|
"job.end.ms" : 197256,
|
||||||
|
"job.queue.name" : "sls_queue_2",
|
||||||
|
"job.id" : "job_2",
|
||||||
|
"job.user" : "default",
|
||||||
|
"job.tasks" : [ {
|
||||||
|
"container.host" : "/default-rack/node1",
|
||||||
|
"container.start.ms" : 111822,
|
||||||
|
"container.end.ms" : 133985,
|
||||||
|
"container.priority" : 20,
|
||||||
|
"container.type" : "map"
|
||||||
|
}, {
|
||||||
|
"container.host" : "/default-rack/node2",
|
||||||
|
"container.start.ms" : 111788,
|
||||||
|
"container.end.ms" : 131377,
|
||||||
|
"container.priority" : 20,
|
||||||
|
"container.type" : "map"
|
||||||
|
} ]
|
||||||
|
}
|
||||||
|
+----+
|
||||||
|
|
||||||
|
** Simulator input topology file format
|
||||||
|
|
||||||
|
Here is an example input topology file which has 3 nodes organized in 1 rack.
|
||||||
|
|
||||||
|
+----+
|
||||||
|
{
|
||||||
|
"rack" : "default-rack",
|
||||||
|
"nodes" : [ {
|
||||||
|
"node" : "node1"
|
||||||
|
}, {
|
||||||
|
"node" : "node2"
|
||||||
|
}, {
|
||||||
|
"node" : "node3"
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
+----+
|
|
@ -0,0 +1,30 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#banner {
|
||||||
|
height: 93px;
|
||||||
|
background: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
#bannerLeft img {
|
||||||
|
margin-left: 30px;
|
||||||
|
margin-top: 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
#bannerRight img {
|
||||||
|
margin: 17px;
|
||||||
|
}
|
||||||
|
|
After Width: | Height: | Size: 49 KiB |
After Width: | Height: | Size: 45 KiB |
After Width: | Height: | Size: 81 KiB |
After Width: | Height: | Size: 83 KiB |
After Width: | Height: | Size: 78 KiB |
After Width: | Height: | Size: 77 KiB |
After Width: | Height: | Size: 67 KiB |
After Width: | Height: | Size: 110 KiB |
After Width: | Height: | Size: 58 KiB |
After Width: | Height: | Size: 54 KiB |
|
@ -0,0 +1,46 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.sls;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
public class TestSLSRunner {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@SuppressWarnings("all")
|
||||||
|
public void testSimulatorRunning() throws Exception {
|
||||||
|
File tempDir = new File("target", UUID.randomUUID().toString());
|
||||||
|
|
||||||
|
// start the simulator
|
||||||
|
File slsOutputDir = new File(tempDir.getAbsolutePath() + "/slsoutput/");
|
||||||
|
String args[] = new String[]{
|
||||||
|
"-inputrumen", "src/main/data/2jobs2min-rumen-jh.json",
|
||||||
|
"-output", slsOutputDir.getAbsolutePath()};
|
||||||
|
SLSRunner.main(args);
|
||||||
|
|
||||||
|
// wait for 45 seconds before stop
|
||||||
|
Thread.sleep(45 * 1000);
|
||||||
|
SLSRunner.getRunner().stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,247 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.yarn.sls.scheduler;
|
||||||
|
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.concurrent.CountDownLatch;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
public class TestTaskRunner {
|
||||||
|
private TaskRunner runner;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() {
|
||||||
|
runner = new TaskRunner();
|
||||||
|
runner.setQueueSize(5);
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void cleanUp() {
|
||||||
|
runner.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class SingleTask extends TaskRunner.Task {
|
||||||
|
public static CountDownLatch latch = new CountDownLatch(1);
|
||||||
|
public static boolean first;
|
||||||
|
|
||||||
|
public SingleTask(long startTime) {
|
||||||
|
super.init(startTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void firstStep() {
|
||||||
|
if (first) {
|
||||||
|
Assert.fail();
|
||||||
|
}
|
||||||
|
first = true;
|
||||||
|
latch.countDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void middleStep() {
|
||||||
|
Assert.fail();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void lastStep() {
|
||||||
|
Assert.fail();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSingleTask() throws Exception {
|
||||||
|
runner.start();
|
||||||
|
runner.schedule(new SingleTask(0));
|
||||||
|
SingleTask.latch.await(5000, TimeUnit.MILLISECONDS);
|
||||||
|
Assert.assertTrue(SingleTask.first);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class DualTask extends TaskRunner.Task {
|
||||||
|
public static CountDownLatch latch = new CountDownLatch(1);
|
||||||
|
public static boolean first;
|
||||||
|
public static boolean last;
|
||||||
|
|
||||||
|
public DualTask(long startTime, long endTime, long interval) {
|
||||||
|
super.init(startTime, endTime, interval);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void firstStep() {
|
||||||
|
if (first) {
|
||||||
|
Assert.fail();
|
||||||
|
}
|
||||||
|
first = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void middleStep() {
|
||||||
|
Assert.fail();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void lastStep() {
|
||||||
|
if (last) {
|
||||||
|
Assert.fail();
|
||||||
|
}
|
||||||
|
last = true;
|
||||||
|
latch.countDown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDualTask() throws Exception {
|
||||||
|
runner.start();
|
||||||
|
runner.schedule(new DualTask(0, 10, 10));
|
||||||
|
DualTask.latch.await(5000, TimeUnit.MILLISECONDS);
|
||||||
|
Assert.assertTrue(DualTask.first);
|
||||||
|
Assert.assertTrue(DualTask.last);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class TriTask extends TaskRunner.Task {
|
||||||
|
public static CountDownLatch latch = new CountDownLatch(1);
|
||||||
|
public static boolean first;
|
||||||
|
public static boolean middle;
|
||||||
|
public static boolean last;
|
||||||
|
|
||||||
|
public TriTask(long startTime, long endTime, long interval) {
|
||||||
|
super.init(startTime, endTime, interval);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void firstStep() {
|
||||||
|
if (first) {
|
||||||
|
Assert.fail();
|
||||||
|
}
|
||||||
|
first = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void middleStep() {
|
||||||
|
if (middle) {
|
||||||
|
Assert.fail();
|
||||||
|
}
|
||||||
|
middle = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void lastStep() {
|
||||||
|
if (last) {
|
||||||
|
Assert.fail();
|
||||||
|
}
|
||||||
|
last = true;
|
||||||
|
latch.countDown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTriTask() throws Exception {
|
||||||
|
runner.start();
|
||||||
|
runner.schedule(new TriTask(0, 10, 5));
|
||||||
|
TriTask.latch.await(5000, TimeUnit.MILLISECONDS);
|
||||||
|
Assert.assertTrue(TriTask.first);
|
||||||
|
Assert.assertTrue(TriTask.middle);
|
||||||
|
Assert.assertTrue(TriTask.last);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class MultiTask extends TaskRunner.Task {
|
||||||
|
public static CountDownLatch latch = new CountDownLatch(1);
|
||||||
|
public static boolean first;
|
||||||
|
public static int middle;
|
||||||
|
public static boolean last;
|
||||||
|
|
||||||
|
public MultiTask(long startTime, long endTime, long interval) {
|
||||||
|
super.init(startTime, endTime, interval);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void firstStep() {
|
||||||
|
if (first) {
|
||||||
|
Assert.fail();
|
||||||
|
}
|
||||||
|
first = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void middleStep() {
|
||||||
|
middle++;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void lastStep() {
|
||||||
|
if (last) {
|
||||||
|
Assert.fail();
|
||||||
|
}
|
||||||
|
last = true;
|
||||||
|
latch.countDown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultiTask() throws Exception {
|
||||||
|
runner.start();
|
||||||
|
runner.schedule(new MultiTask(0, 20, 5));
|
||||||
|
MultiTask.latch.await(5000, TimeUnit.MILLISECONDS);
|
||||||
|
Assert.assertTrue(MultiTask.first);
|
||||||
|
Assert.assertEquals((20 - 0) / 5 - 2 + 1, MultiTask.middle);
|
||||||
|
Assert.assertTrue(MultiTask.last);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static class PreStartTask extends TaskRunner.Task {
|
||||||
|
public static CountDownLatch latch = new CountDownLatch(1);
|
||||||
|
public static boolean first;
|
||||||
|
|
||||||
|
public PreStartTask(long startTime) {
|
||||||
|
super.init(startTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void firstStep() {
|
||||||
|
if (first) {
|
||||||
|
Assert.fail();
|
||||||
|
}
|
||||||
|
first = true;
|
||||||
|
latch.countDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void middleStep() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void lastStep() {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPreStartQueueing() throws Exception {
|
||||||
|
runner.schedule(new PreStartTask(210));
|
||||||
|
Thread.sleep(210);
|
||||||
|
runner.start();
|
||||||
|
long startedAt = System.currentTimeMillis();
|
||||||
|
PreStartTask.latch.await(5000, TimeUnit.MILLISECONDS);
|
||||||
|
long runAt = System.currentTimeMillis();
|
||||||
|
Assert.assertTrue(PreStartTask.first);
|
||||||
|
Assert.assertTrue(runAt - startedAt >= 200);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,34 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.sls.utils;
|
||||||
|
|
||||||
|
import junit.framework.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class TestSLSUtils {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetRackHostname() {
|
||||||
|
String str = "/rack1/node1";
|
||||||
|
String rackHostname[] = SLSUtils.getRackHostName(str);
|
||||||
|
Assert.assertEquals(rackHostname[0], "rack1");
|
||||||
|
Assert.assertEquals(rackHostname[1], "node1");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,121 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.sls.web;
|
||||||
|
|
||||||
|
import junit.framework.Assert;
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.hadoop.yarn.sls.SLSRunner;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.text.MessageFormat;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
public class TestSLSWebApp {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSimulateInfoPageHtmlTemplate() throws Exception {
|
||||||
|
String simulateInfoTemplate = FileUtils.readFileToString(
|
||||||
|
new File("src/main/html/simulate.info.html.template"));
|
||||||
|
|
||||||
|
SLSRunner.simulateInfoMap.put("Number of racks", 10);
|
||||||
|
SLSRunner.simulateInfoMap.put("Number of nodes", 100);
|
||||||
|
SLSRunner.simulateInfoMap.put("Node memory (MB)", 1024);
|
||||||
|
SLSRunner.simulateInfoMap.put("Node VCores", 1);
|
||||||
|
SLSRunner.simulateInfoMap.put("Number of applications", 100);
|
||||||
|
SLSRunner.simulateInfoMap.put("Number of tasks", 1000);
|
||||||
|
SLSRunner.simulateInfoMap.put("Average tasks per applicaion", 10);
|
||||||
|
SLSRunner.simulateInfoMap.put("Number of queues", 4);
|
||||||
|
SLSRunner.simulateInfoMap.put("Average applications per queue", 25);
|
||||||
|
SLSRunner.simulateInfoMap.put("Estimated simulate time (s)", 10000);
|
||||||
|
|
||||||
|
StringBuilder info = new StringBuilder();
|
||||||
|
for (Map.Entry<String, Object> entry :
|
||||||
|
SLSRunner.simulateInfoMap.entrySet()) {
|
||||||
|
info.append("<tr>");
|
||||||
|
info.append("<td class='td1'>" + entry.getKey() + "</td>");
|
||||||
|
info.append("<td class='td2'>" + entry.getValue() + "</td>");
|
||||||
|
info.append("</tr>");
|
||||||
|
}
|
||||||
|
|
||||||
|
String simulateInfo =
|
||||||
|
MessageFormat.format(simulateInfoTemplate, info.toString());
|
||||||
|
Assert.assertTrue("The simulate info html page should not be empty",
|
||||||
|
simulateInfo.length() > 0);
|
||||||
|
for (Map.Entry<String, Object> entry :
|
||||||
|
SLSRunner.simulateInfoMap.entrySet()) {
|
||||||
|
Assert.assertTrue("The simulate info html page should have information "
|
||||||
|
+ "of " + entry.getKey(), simulateInfo.contains("<td class='td1'>"
|
||||||
|
+ entry.getKey() + "</td><td class='td2'>"
|
||||||
|
+ entry.getValue() + "</td>"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSimulatePageHtmlTemplate() throws Exception {
|
||||||
|
String simulateTemplate = FileUtils.readFileToString(
|
||||||
|
new File("src/main/html/simulate.html.template"));
|
||||||
|
|
||||||
|
Set<String> queues = new HashSet<String>();
|
||||||
|
queues.add("sls_queue_1");
|
||||||
|
queues.add("sls_queue_2");
|
||||||
|
queues.add("sls_queue_3");
|
||||||
|
String queueInfo = "";
|
||||||
|
int i = 0;
|
||||||
|
for (String queue : queues) {
|
||||||
|
queueInfo += "legends[4][" + i + "] = 'queue" + queue
|
||||||
|
+ ".allocated.memory'";
|
||||||
|
queueInfo += "legends[5][" + i + "] = 'queue" + queue
|
||||||
|
+ ".allocated.vcores'";
|
||||||
|
i ++;
|
||||||
|
}
|
||||||
|
String simulateInfo = MessageFormat.format(simulateTemplate,
|
||||||
|
queueInfo, "s", 1000, 1000);
|
||||||
|
Assert.assertTrue("The simulate page html page should not be empty",
|
||||||
|
simulateInfo.length() > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTrackPageHtmlTemplate() throws Exception {
|
||||||
|
String trackTemplate = FileUtils.readFileToString(
|
||||||
|
new File("src/main/html/track.html.template"));
|
||||||
|
String trackedQueueInfo = "";
|
||||||
|
Set<String> trackedQueues = new HashSet<String>();
|
||||||
|
trackedQueues.add("sls_queue_1");
|
||||||
|
trackedQueues.add("sls_queue_2");
|
||||||
|
trackedQueues.add("sls_queue_3");
|
||||||
|
for(String queue : trackedQueues) {
|
||||||
|
trackedQueueInfo += "<option value='Queue " + queue + "'>"
|
||||||
|
+ queue + "</option>";
|
||||||
|
}
|
||||||
|
String trackedAppInfo = "";
|
||||||
|
Set<String> trackedApps = new HashSet<String>();
|
||||||
|
trackedApps.add("app_1");
|
||||||
|
trackedApps.add("app_2");
|
||||||
|
for(String job : trackedApps) {
|
||||||
|
trackedAppInfo += "<option value='Job " + job + "'>" + job + "</option>";
|
||||||
|
}
|
||||||
|
String trackInfo = MessageFormat.format(trackTemplate, trackedQueueInfo,
|
||||||
|
trackedAppInfo, "s", 1000, 1000);
|
||||||
|
Assert.assertTrue("The queue/app tracking html page should not be empty",
|
||||||
|
trackInfo.length() > 0);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,50 @@
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<!--
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!--
|
||||||
|
This file contains pool and user allocations for the Fair Scheduler.
|
||||||
|
Its format is explained in the Fair Scheduler documentation at
|
||||||
|
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html
|
||||||
|
The documentation also includes a sample config file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<allocations>
|
||||||
|
<user name="jenkins">
|
||||||
|
<!-- Limit on running jobs for the user across all pools. If more
|
||||||
|
jobs than this are submitted, only the first <maxRunningJobs> will
|
||||||
|
be scheduled at any given time. Defaults to infinity or the
|
||||||
|
userMaxJobsDefault value set below. -->
|
||||||
|
<maxRunningJobs>1000</maxRunningJobs>
|
||||||
|
</user>
|
||||||
|
<userMaxAppsDefault>1000</userMaxAppsDefault>
|
||||||
|
<queue name="sls_queue_1">
|
||||||
|
<minResources>1024 mb, 1 vcores</minResources>
|
||||||
|
<schedulingMode>fair</schedulingMode>
|
||||||
|
<weight>0.25</weight>
|
||||||
|
<minSharePreemptionTimeout>2</minSharePreemptionTimeout>
|
||||||
|
</queue>
|
||||||
|
<queue name="sls_queue_2">
|
||||||
|
<minResources>1024 mb, 1 vcores</minResources>
|
||||||
|
<schedulingMode>fair</schedulingMode>
|
||||||
|
<weight>0.25</weight>
|
||||||
|
<minSharePreemptionTimeout>2</minSharePreemptionTimeout>
|
||||||
|
</queue>
|
||||||
|
<queue name="sls_queue_3">
|
||||||
|
<minResources>1024 mb, 1 vcores</minResources>
|
||||||
|
<weight>0.5</weight>
|
||||||
|
<schedulingMode>fair</schedulingMode>
|
||||||
|
<minSharePreemptionTimeout>2</minSharePreemptionTimeout>
|
||||||
|
</queue>
|
||||||
|
</allocations>
|
|
@ -0,0 +1,47 @@
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<!--
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!--
|
||||||
|
This file contains pool and user allocations for the Fair Scheduler.
|
||||||
|
Its format is explained in the Fair Scheduler documentation at
|
||||||
|
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html.
|
||||||
|
The documentation also includes a sample config file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<description>Absolute path to allocation file. An allocation file is an XML
|
||||||
|
manifest describing queues and their properties, in addition to certain
|
||||||
|
policy defaults. This file must be in XML format as described in
|
||||||
|
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html.
|
||||||
|
</description>
|
||||||
|
<name>yarn.scheduler.fair.allocation.file</name>
|
||||||
|
<value>src/test/resources/fair-scheduler-allocation.xml</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>Whether to use preemption. Note that preemption is experimental
|
||||||
|
in the current version. Defaults to false.</description>
|
||||||
|
<name>yarn.scheduler.fair.preemption</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>Whether to allow multiple container assignments in one
|
||||||
|
heartbeat. Defaults to false.</description>
|
||||||
|
<name>yarn.scheduler.fair.assignmultiple</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,278 @@
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<link rel="stylesheet" href="css/bootstrap.min.css" media="screen">
|
||||||
|
<link rel="stylesheet" href="css/bootstrap-responsive.min.css">
|
||||||
|
<style type="text/css">
|
||||||
|
body '{' font: 20px sans-serif; '}'
|
||||||
|
.axis path,
|
||||||
|
.axis line '{' fill: none; stroke: #000; shape-rendering: crispEdges; '}'
|
||||||
|
.axis text '{' font-family: sans-serif; font-size: 20px; '}'
|
||||||
|
.line '{' fill: none; stroke: steelblue; stroke-width: 3px; '}'
|
||||||
|
.legend '{'
|
||||||
|
padding: 5px;
|
||||||
|
font: 18px sans-serif;
|
||||||
|
background: yellow;
|
||||||
|
box-shadow: 2px 2px 1px #888;
|
||||||
|
'}'
|
||||||
|
.title '{' font: 24px sans-serif; '}'
|
||||||
|
.divborder '{'
|
||||||
|
border-width: 1px;
|
||||||
|
border-style: solid;
|
||||||
|
border-color: black;
|
||||||
|
margin-top:10px
|
||||||
|
'}'
|
||||||
|
</style>
|
||||||
|
<script src="js/thirdparty/d3.v3.js"></script>
|
||||||
|
<script src="js/thirdparty/jquery.js"></script>
|
||||||
|
<script src="js/thirdparty/bootstrap.min.js"></script>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="row">
|
||||||
|
<div class="span10 offset2"><br>
|
||||||
|
<input type="button" style="float: right;" value="Stop"
|
||||||
|
onClick="stop()" />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="row">
|
||||||
|
<div class="divborder span8" style="margin-left:50px" id="area1"></div>
|
||||||
|
<div class="divborder span8" id="area2"></div>
|
||||||
|
</div>
|
||||||
|
<div class="row">
|
||||||
|
<div class="divborder span8" style="margin-left:50px" id="area3"></div>
|
||||||
|
<div class="divborder span8" id="area4"></div>
|
||||||
|
</div>
|
||||||
|
<div class="row">
|
||||||
|
<div class="divborder span8" style="margin-left:50px" id="area5"></div>
|
||||||
|
<div class="divborder span8" id="area6"></div>
|
||||||
|
</div>
|
||||||
|
<div class="row">
|
||||||
|
<div class="divborder span8" style="margin-left:50px" id="area7"></div>
|
||||||
|
<div class="span8" id="area8"></div>
|
||||||
|
</div><br/><br/>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
var basetime = 0;
|
||||||
|
var running = 1;
|
||||||
|
var data = [];
|
||||||
|
var width, height;
|
||||||
|
var legends = [];
|
||||||
|
var titles = [];
|
||||||
|
var yLabels = [];
|
||||||
|
var isAreas = [];
|
||||||
|
var svgs = [];
|
||||||
|
var xs = [];
|
||||||
|
var ys = [];
|
||||||
|
var xAxiss = [];
|
||||||
|
var yAxiss = [];
|
||||||
|
var lineAreas = [];
|
||||||
|
var stacks = [];
|
||||||
|
|
||||||
|
// legends
|
||||||
|
legends[0] = [''running.applications'', ''running.containers''];
|
||||||
|
legends[1] = [''jvm.free.memory'', ''jvm.max.memory'', ''jvm.total.memory''];
|
||||||
|
legends[2] = [''cluster.allocated.memory'', ''cluster.available.memory''];
|
||||||
|
legends[3] = [''cluster.allocated.vcores'', ''cluster.available.vcores''];
|
||||||
|
legends[4] = [];
|
||||||
|
legends[5] = [];
|
||||||
|
{0}
|
||||||
|
legends[6] = [''scheduler.allocate.timecost'',
|
||||||
|
''scheduler.handle-NODE_ADDED.timecost'',
|
||||||
|
''scheduler.handle-NODE_REMOVED.timecost'',
|
||||||
|
''scheduler.handle-NODE_UPDATE.timecost'',
|
||||||
|
''scheduler.handle-APP_ADDED.timecost'',
|
||||||
|
''scheduler.handle-APP_REMOVED.timecost'',
|
||||||
|
''scheduler.handle-CONTAINER_EXPIRED.timecost''];
|
||||||
|
|
||||||
|
// title
|
||||||
|
titles[0] = ''Cluster running applications & containers'';
|
||||||
|
titles[1] = ''JVM memory'';
|
||||||
|
titles[2] = ''Cluster allocated & available memory'';
|
||||||
|
titles[3] = ''Cluster allocated & available vcores'';
|
||||||
|
titles[4] = ''Queue allocated memory'';
|
||||||
|
titles[5] = ''Queue allocated vcores'';
|
||||||
|
titles[6] = ''Scheduler allocate & handle operation timecost'';
|
||||||
|
|
||||||
|
// ylabels
|
||||||
|
yLabels[0] = ''Number'';
|
||||||
|
yLabels[1] = ''Memory (GB)'';
|
||||||
|
yLabels[2] = ''Memory (GB)'';
|
||||||
|
yLabels[3] = ''Number'';
|
||||||
|
yLabels[4] = ''Memory (GB)'';
|
||||||
|
yLabels[5] = ''Number'';
|
||||||
|
yLabels[6] = ''Timecost (ms)'';
|
||||||
|
|
||||||
|
// is area?
|
||||||
|
isAreas = [0, 0, 0, 0, 1, 1, 0];
|
||||||
|
|
||||||
|
// draw all charts
|
||||||
|
for (var i = 0; i < 7; i ++) '{'
|
||||||
|
drawEachChart(i);
|
||||||
|
'}'
|
||||||
|
|
||||||
|
// draw each chart
|
||||||
|
function drawEachChart(index) '{'
|
||||||
|
var margin = '{'top: 50, right: 250, bottom: 50, left: 70'}';
|
||||||
|
width = 750 - margin.left - margin.right;
|
||||||
|
height = 420 - margin.top - margin.bottom;
|
||||||
|
|
||||||
|
xs[index] = d3.scale.linear().range([0, width]);
|
||||||
|
ys[index] = d3.scale.linear().range([height, 0]);
|
||||||
|
xAxiss[index] = d3.svg.axis().scale(xs[index]).orient(''bottom'');
|
||||||
|
yAxiss[index] = d3.svg.axis().scale(ys[index]).orient(''left'');
|
||||||
|
|
||||||
|
if (isAreas[index] == 1)'{'
|
||||||
|
lineAreas[index] = d3.svg.area()
|
||||||
|
.x(function(d) '{' return xs[index](d.time); '}')
|
||||||
|
.y0(function(d) '{' return ys[index](d.y0); '}')
|
||||||
|
.y1(function(d) '{' return ys[index](d.y0 + d.y); '}');
|
||||||
|
|
||||||
|
stacks[index] = d3.layout.stack()
|
||||||
|
.values(function(d) '{' return d.values; '}');
|
||||||
|
'}' else '{'
|
||||||
|
lineAreas[index] = d3.svg.line()
|
||||||
|
.interpolate(''basis'')
|
||||||
|
.x(function(d) '{' return xs[index](d.time); '}')
|
||||||
|
.y(function(d) '{' return ys[index](d.value); '}');
|
||||||
|
'}'
|
||||||
|
|
||||||
|
svgs[index] = d3.select(''#area'' + (index + 1)).append(''svg'')
|
||||||
|
.attr(''width'', width + margin.left + margin.right)
|
||||||
|
.attr(''height'', height + margin.top + margin.bottom)
|
||||||
|
.append(''g'')
|
||||||
|
.attr(''transform'', ''translate('' + margin.left + '','' + margin.top + '')'');
|
||||||
|
|
||||||
|
// x, y and title
|
||||||
|
svgs[index].append(''text'')
|
||||||
|
.attr(''transform'', ''translate('' + (width / 2) + '' ,'' +
|
||||||
|
(height + margin.bottom - 10 ) + '')'')
|
||||||
|
.style(''text-anchor'', ''middle'')
|
||||||
|
.text(''Time ({1})'');
|
||||||
|
|
||||||
|
svgs[index].append(''text'')
|
||||||
|
.attr(''transform'', ''rotate(-90)'')
|
||||||
|
.attr(''y'', 0 - margin.left)
|
||||||
|
.attr(''x'',0 - (height / 2))
|
||||||
|
.attr(''dy'', ''1em'')
|
||||||
|
.style(''text-anchor'', ''middle'')
|
||||||
|
.text(yLabels[index]);
|
||||||
|
|
||||||
|
svgs[index].append(''text'')
|
||||||
|
.attr(''x'', (width / 2))
|
||||||
|
.attr(''y'', 10 - (margin.top / 2))
|
||||||
|
.attr(''text-anchor'', ''middle'')
|
||||||
|
.text(titles[index]);
|
||||||
|
'}'
|
||||||
|
|
||||||
|
// request data
|
||||||
|
function requestData() '{'
|
||||||
|
$.ajax('{'url: ''simulateMetrics'',
|
||||||
|
success: function(point) '{'
|
||||||
|
// update data
|
||||||
|
if (basetime == 0) basetime = point.time;
|
||||||
|
point.time = (point.time - basetime) / {2};
|
||||||
|
data.push(point);
|
||||||
|
|
||||||
|
// clear old
|
||||||
|
for (var i = 0; i < 7; i ++) '{'
|
||||||
|
svgs[i].selectAll(''g.tick'').remove();
|
||||||
|
svgs[i].selectAll(''g'').remove();
|
||||||
|
var color = d3.scale.category10();
|
||||||
|
color.domain(d3.keys(data[0]).filter(function(key) '{'
|
||||||
|
return $.inArray(key, legends[i]) !== -1;
|
||||||
|
'}'));
|
||||||
|
|
||||||
|
var values;
|
||||||
|
if (isAreas[i] == 1) '{'
|
||||||
|
values = stacks[i](color.domain().map(function(name) '{'
|
||||||
|
return '{'
|
||||||
|
name: name,
|
||||||
|
values: data.map(function(d) '{'
|
||||||
|
return '{'time: d.time, y: d[name]'}';
|
||||||
|
'}')
|
||||||
|
'}'
|
||||||
|
'}'));
|
||||||
|
xs[i].domain(d3.extent(data, function(d) '{' return d.time;'}'));
|
||||||
|
ys[i].domain([
|
||||||
|
d3.min(values, function(c) '{' return 0; '}'),
|
||||||
|
d3.max(values, function(c) '{' return 1.1 * d3.max(c.values,
|
||||||
|
function(v) '{' return v.y + v.y0; '}'); '}')
|
||||||
|
]);
|
||||||
|
'}' else '{'
|
||||||
|
values = color.domain().map(function(name) '{'
|
||||||
|
return '{'
|
||||||
|
name: name,
|
||||||
|
values: data.map(function(d) '{'
|
||||||
|
return '{'time: d.time, value: d[name]'}';
|
||||||
|
'}')
|
||||||
|
'}'
|
||||||
|
'}');
|
||||||
|
xs[i].domain(d3.extent(data, function(d) '{' return d.time;'}'));
|
||||||
|
ys[i].domain([
|
||||||
|
d3.min(values, function(c) '{' return 0; '}'),
|
||||||
|
d3.max(values, function(c) '{' return 1.1 * d3.max(c.values,
|
||||||
|
function(v) '{' return v.value; '}'); '}')
|
||||||
|
]);
|
||||||
|
'}'
|
||||||
|
|
||||||
|
svgs[i].append(''g'').attr(''class'', ''x axis'')
|
||||||
|
.attr(''transform'', ''translate(0,'' + height + '')'').call(xAxiss[i]);
|
||||||
|
|
||||||
|
svgs[i].append(''g'').attr(''class'', ''y axis'').call(yAxiss[i]);
|
||||||
|
|
||||||
|
var value = svgs[i].selectAll(''.path'')
|
||||||
|
.data(values).enter().append(''g'').attr(''class'', ''line'');
|
||||||
|
|
||||||
|
if(isAreas[i] == 1) '{'
|
||||||
|
value.append(''path'').attr(''class'', ''area'')
|
||||||
|
.attr(''d'', function(d) '{'return lineAreas[i](d.values); '}')
|
||||||
|
.style(''fill'', function(d) '{'return color(d.name); '}');
|
||||||
|
'}' else '{'
|
||||||
|
value.append(''path'').attr(''class'', ''line'')
|
||||||
|
.attr(''d'', function(d) '{'return lineAreas[i](d.values); '}')
|
||||||
|
.style(''stroke'', function(d) '{'return color(d.name); '}');
|
||||||
|
'}'
|
||||||
|
|
||||||
|
// legend
|
||||||
|
var legend = svgs[i].append(''g'')
|
||||||
|
.attr(''class'', ''legend'')
|
||||||
|
.attr(''x'', width + 5)
|
||||||
|
.attr(''y'', 25)
|
||||||
|
.attr(''height'', 120)
|
||||||
|
.attr(''width'', 140);
|
||||||
|
legend.selectAll(''g'').data(legends[i])
|
||||||
|
.enter()
|
||||||
|
.append(''g'')
|
||||||
|
.each(function(d, i) '{'
|
||||||
|
var g = d3.select(this);
|
||||||
|
g.append(''rect'')
|
||||||
|
.attr(''x'', width + 5)
|
||||||
|
.attr(''y'', i*20)
|
||||||
|
.attr(''width'', 10)
|
||||||
|
.attr(''height'', 10)
|
||||||
|
.style(''fill'', color(d));
|
||||||
|
g.append(''text'')
|
||||||
|
.attr(''x'', width + 25)
|
||||||
|
.attr(''y'', i * 20 + 8)
|
||||||
|
.attr(''height'',30)
|
||||||
|
.attr(''width'',250)
|
||||||
|
.style(''fill'', color(d))
|
||||||
|
.text(d);
|
||||||
|
'}');
|
||||||
|
'}'
|
||||||
|
|
||||||
|
if(running == 1)
|
||||||
|
setTimeout(requestData, {3});
|
||||||
|
'}',
|
||||||
|
cache: false
|
||||||
|
'}');
|
||||||
|
'}'
|
||||||
|
|
||||||
|
// stop
|
||||||
|
function stop() '{'
|
||||||
|
running = 0;
|
||||||
|
'}'
|
||||||
|
requestData();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -0,0 +1,50 @@
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<style type="text/css">
|
||||||
|
.td1 '{'
|
||||||
|
border-width: 1px;
|
||||||
|
padding: 8px;
|
||||||
|
border-style: solid;
|
||||||
|
border-color: #666666;
|
||||||
|
background-color: #dedede;
|
||||||
|
width: 50%;
|
||||||
|
'}'
|
||||||
|
table.gridtable '{'
|
||||||
|
font-family: verdana,arial,sans-serif;
|
||||||
|
font-size:11px;
|
||||||
|
color:#333333;
|
||||||
|
border-width: 1px;
|
||||||
|
border-color: #666666;
|
||||||
|
border-collapse: collapse;
|
||||||
|
margin-top: 80px;
|
||||||
|
'}'
|
||||||
|
.td2 '{'
|
||||||
|
border-width: 1px;
|
||||||
|
padding: 8px;
|
||||||
|
border-style: solid;
|
||||||
|
border-color: #666666;
|
||||||
|
background-color: #ffffff;
|
||||||
|
width: 50%;
|
||||||
|
'}'
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<table class="gridtable" align="center" width="400px">
|
||||||
|
<tr>
|
||||||
|
<td colspan="2" class="td2" align="center">
|
||||||
|
<b>SLS Simulate Information</b>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{0}
|
||||||
|
<tr>
|
||||||
|
<td align="center" height="80px">
|
||||||
|
<a href="simulate">Simulation Charts</a>
|
||||||
|
</td>
|
||||||
|
<td align="center">
|
||||||
|
<a href="track">Tracked Jobs & Queues</a>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -0,0 +1,81 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
|
<!--
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<configuration>
|
||||||
|
|
||||||
|
<!-- SLSRunner configuration -->
|
||||||
|
<property>
|
||||||
|
<name>yarn.sls.runner.pool.size</name>
|
||||||
|
<value>100</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<!-- Nodes configuration -->
|
||||||
|
<property>
|
||||||
|
<name>yarn.sls.nm.memory.mb</name>
|
||||||
|
<value>10240</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.sls.nm.vcores</name>
|
||||||
|
<value>10</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.sls.nm.heartbeat.interval.ms</name>
|
||||||
|
<value>1000</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<!-- Apps configuration -->
|
||||||
|
<property>
|
||||||
|
<name>yarn.sls.am.heartbeat.interval.ms</name>
|
||||||
|
<value>1000</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.sls.am.type.mapreduce</name>
|
||||||
|
<value>org.apache.hadoop.yarn.sls.appmaster.MRAMSimulator</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<!-- Containers configuration -->
|
||||||
|
<property>
|
||||||
|
<name>yarn.sls.container.memory.mb</name>
|
||||||
|
<value>1024</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.sls.container.vcores</name>
|
||||||
|
<value>1</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<!-- metrics -->
|
||||||
|
<property>
|
||||||
|
<name>yarn.sls.metrics.switch</name>
|
||||||
|
<value>ON</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.sls.metrics.web.address.port</name>
|
||||||
|
<value>10001</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler</name>
|
||||||
|
<value>org.apache.hadoop.yarn.sls.scheduler.FifoSchedulerMetrics</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</name>
|
||||||
|
<value>org.apache.hadoop.yarn.sls.scheduler.FairSchedulerMetrics</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</name>
|
||||||
|
<value>org.apache.hadoop.yarn.sls.scheduler.CapacitySchedulerMetrics</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
</configuration>
|
|
@ -0,0 +1,193 @@
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<link rel="stylesheet" href="css/bootstrap.min.css" media="screen">
|
||||||
|
<link rel="stylesheet" href="css/bootstrap-responsive.min.css">
|
||||||
|
<style type="text/css">
|
||||||
|
body '{' font: 20px sans-serif;'}'
|
||||||
|
.axis path,
|
||||||
|
.axis line '{' fill: none; stroke: #000; shape-rendering: crispEdges;'}'
|
||||||
|
.axis text '{' font-family: sans-serif; font-size: 20px; '}'
|
||||||
|
.line '{' fill: none; stroke: steelblue; stroke-width: 3px; '}'
|
||||||
|
.legend '{' padding: 5px; font: 18px sans-serif; background: yellow;
|
||||||
|
box-shadow: 2px 2px 1px #888;'}'
|
||||||
|
.title '{' font: 24px sans-serif; '}'
|
||||||
|
.divborder '{' border-width: 1px; border-style: solid; border-color: black;
|
||||||
|
margin-top:10px '}'
|
||||||
|
</style>
|
||||||
|
<script src="js/thirdparty/d3.v3.js"></script>
|
||||||
|
<script src="js/thirdparty/jquery.js"></script>
|
||||||
|
<script src="js/thirdparty/bootstrap.min.js"></script>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="row">
|
||||||
|
<div class="offset4 span8"><br/><br/><br/>
|
||||||
|
Select Tracked Job/Queue:
|
||||||
|
<select id="trackedSelect" onchange="redrawChart()">
|
||||||
|
<option>----Queue----</option>
|
||||||
|
{0}
|
||||||
|
<option>----Job----</option>
|
||||||
|
{1}
|
||||||
|
</select>
|
||||||
|
<input type="button" style="float: right;" value="Stop"
|
||||||
|
onClick="stop()" />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="row">
|
||||||
|
<div class="divborder span9 offset4" id="area1"></div>
|
||||||
|
</div>
|
||||||
|
<script>
|
||||||
|
// global variables
|
||||||
|
var basetime = 0;
|
||||||
|
var running = 1;
|
||||||
|
var para = '''';
|
||||||
|
var data = [];
|
||||||
|
var path, line, svg;
|
||||||
|
var x, y;
|
||||||
|
var width, height;
|
||||||
|
var xAxis, yAxis;
|
||||||
|
var legends = [''usage.memory'', ''demand.memory'', ''maxshare.memory'',
|
||||||
|
''minshare.memory'', ''fairshare.memory''];
|
||||||
|
|
||||||
|
// stop function
|
||||||
|
function stop() '{'
|
||||||
|
running = 0;
|
||||||
|
'}'
|
||||||
|
|
||||||
|
// select changed event
|
||||||
|
function redrawChart() '{'
|
||||||
|
var value = $(''#trackedSelect'').val();
|
||||||
|
if (value.substring(0, ''Job ''.length) === ''Job ''
|
||||||
|
|| value.substring(0, ''Queue ''.length) === ''Queue '') '{'
|
||||||
|
para = value;
|
||||||
|
running = 0;
|
||||||
|
basetime = 0;
|
||||||
|
data = [];
|
||||||
|
$(''#area1'').empty();
|
||||||
|
drawChart(''Tracking '' + value);
|
||||||
|
running = 1;
|
||||||
|
requestData();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// draw chart
|
||||||
|
function drawChart(title) '{'
|
||||||
|
// location
|
||||||
|
var margin = '{'top: 50, right: 150, bottom: 50, left: 80'}';
|
||||||
|
width = 800 - margin.left - margin.right;
|
||||||
|
height = 420 - margin.top - margin.bottom;
|
||||||
|
x = d3.scale.linear().range([0, width]);
|
||||||
|
y = d3.scale.linear().range([height, 0]);
|
||||||
|
xAxis = d3.svg.axis().scale(x).orient(''bottom'');
|
||||||
|
yAxis = d3.svg.axis().scale(y).orient(''left'');
|
||||||
|
// lines
|
||||||
|
line = d3.svg.line().interpolate(''basis'')
|
||||||
|
.x(function(d) '{' return x(d.time); })
|
||||||
|
.y(function(d) '{' return y(d.value); });
|
||||||
|
// create chart
|
||||||
|
svg = d3.select(''#area1'').append(''svg'')
|
||||||
|
.attr(''width'', width + margin.left + margin.right)
|
||||||
|
.attr(''height'', height + margin.top + margin.bottom)
|
||||||
|
.append(''g'')
|
||||||
|
.attr(''transform'', ''translate('' + margin.left + '','' + margin.top + '')'');
|
||||||
|
// axis labels
|
||||||
|
svg.append(''text'')
|
||||||
|
.attr(''transform'', ''translate('' + (width / 2) + '','' + (height + margin.bottom - 5 ) + '')'')
|
||||||
|
.style(''text-anchor'', ''middle'')
|
||||||
|
.text(''Time ({2})'');
|
||||||
|
svg.append(''text'')
|
||||||
|
.attr(''transform'', ''rotate(-90)'')
|
||||||
|
.attr(''y'', 0 - margin.left)
|
||||||
|
.attr(''x'',0 - (height / 2))
|
||||||
|
.attr(''dy'', ''1em'')
|
||||||
|
.style(''text-anchor'', ''middle'')
|
||||||
|
.text(''Memory (GB)'');
|
||||||
|
// title
|
||||||
|
svg.append(''text'')
|
||||||
|
.attr(''x'', (width / 2))
|
||||||
|
.attr(''y'', 10 - (margin.top / 2))
|
||||||
|
.attr(''text-anchor'', ''middle'')
|
||||||
|
.text(title);
|
||||||
|
'}'
|
||||||
|
|
||||||
|
// request data
|
||||||
|
function requestData() '{'
|
||||||
|
$.ajax('{'url: ''trackMetrics?t='' + para,
|
||||||
|
success: function(point) '{'
|
||||||
|
// clear old
|
||||||
|
svg.selectAll(''g.tick'').remove();
|
||||||
|
svg.selectAll(''g'').remove();
|
||||||
|
|
||||||
|
if(basetime == 0) basetime = point.time;
|
||||||
|
point.time = (point.time - basetime)/{3};
|
||||||
|
data.push(point);
|
||||||
|
|
||||||
|
var color = d3.scale.category10();
|
||||||
|
color.domain(d3.keys(data[0]).filter(function(key) '{'
|
||||||
|
return $.inArray(key, legends) !== -1;
|
||||||
|
'}'));
|
||||||
|
|
||||||
|
var values = color.domain().map(function(name) '{'
|
||||||
|
return '{'
|
||||||
|
name: name,
|
||||||
|
values: data.map(function(d) '{'
|
||||||
|
return '{' time: d.time, value: d[name]'}';
|
||||||
|
'}')
|
||||||
|
'}';
|
||||||
|
'}');
|
||||||
|
|
||||||
|
// set x/y range
|
||||||
|
x.domain(d3.extent(data, function(d) '{' return d.time; '}'));
|
||||||
|
y.domain([
|
||||||
|
d3.min(values, function(c) '{' return 0 '}'),
|
||||||
|
d3.max(values, function(c) '{' return 1.1 * d3.max(c.values, function(v) '{' return v.value; '}'); '}')
|
||||||
|
]);
|
||||||
|
|
||||||
|
svg.append(''g'').attr(''class'', ''x axis'')
|
||||||
|
.attr(''transform'', ''translate(0,'' + height + '')'').call(xAxis);
|
||||||
|
svg.append(''g'').attr(''class'', ''y axis'').call(yAxis);
|
||||||
|
var value = svg.selectAll(''.path'')
|
||||||
|
.data(values).enter().append(''g'').attr(''class'', ''line'');
|
||||||
|
|
||||||
|
value.append(''path'').attr(''class'', ''line'')
|
||||||
|
.attr(''d'', function(d) '{'return line(d.values); '}')
|
||||||
|
.style(''stroke'', function(d) '{'return color(d.name); '}');
|
||||||
|
|
||||||
|
// legend
|
||||||
|
var legend = svg.append(''g'')
|
||||||
|
.attr(''class'', ''legend'')
|
||||||
|
.attr(''x'', width + 5)
|
||||||
|
.attr(''y'', 25)
|
||||||
|
.attr(''height'', 120)
|
||||||
|
.attr(''width'', 180);
|
||||||
|
|
||||||
|
legend.selectAll(''g'').data(legends)
|
||||||
|
.enter()
|
||||||
|
.append(''g'')
|
||||||
|
.each(function(d, i) '{'
|
||||||
|
var g = d3.select(this);
|
||||||
|
g.append(''rect'')
|
||||||
|
.attr(''x'', width + 5)
|
||||||
|
.attr(''y'', i * 20)
|
||||||
|
.attr(''width'', 10)
|
||||||
|
.attr(''height'', 10)
|
||||||
|
.style(''fill'', color(d));
|
||||||
|
|
||||||
|
g.append(''text'')
|
||||||
|
.attr(''x'', width + 25)
|
||||||
|
.attr(''y'', i * 20 + 8)
|
||||||
|
.attr(''height'',30)
|
||||||
|
.attr(''width'',250)
|
||||||
|
.style(''fill'', color(d))
|
||||||
|
.text(d);
|
||||||
|
'}');
|
||||||
|
|
||||||
|
if(running == 1)
|
||||||
|
setTimeout(requestData, {4});
|
||||||
|
'}',
|
||||||
|
cache: false
|
||||||
|
'}');
|
||||||
|
'}'
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -0,0 +1,58 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
|
<!--
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>yarn.resourcemanager.scheduler.class</name>
|
||||||
|
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>The address of the RM web application.</description>
|
||||||
|
<name>yarn.resourcemanager.webapp.address</name>
|
||||||
|
<value>localhost:18088</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>yarn.resourcemanager.resource-tracker.address</name>
|
||||||
|
<value>localhost:18031</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>The address of the scheduler interface.</description>
|
||||||
|
<name>yarn.resourcemanager.scheduler.address</name>
|
||||||
|
<value>localhost:18030</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>The address of the applications manager interface in the RM.</description>
|
||||||
|
<name>yarn.resourcemanager.address</name>
|
||||||
|
<value>localhost:18032</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>The address of the RM admin interface.</description>
|
||||||
|
<name>yarn.resourcemanager.admin.address</name>
|
||||||
|
<value>localhost:18033</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>Set to false, to avoid ip check</description>
|
||||||
|
<name>hadoop.security.token.service.use_ip</name>
|
||||||
|
<value>false</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
</configuration>
|
|
@ -83,6 +83,11 @@
|
||||||
<scope>compile</scope>
|
<scope>compile</scope>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-sls</artifactId>
|
||||||
|
<scope>compile</scope>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
|
|
|
@ -41,6 +41,7 @@
|
||||||
<module>hadoop-extras</module>
|
<module>hadoop-extras</module>
|
||||||
<module>hadoop-pipes</module>
|
<module>hadoop-pipes</module>
|
||||||
<module>hadoop-openstack</module>
|
<module>hadoop-openstack</module>
|
||||||
|
<module>hadoop-sls</module>
|
||||||
</modules>
|
</modules>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
|
|
|
@ -9,6 +9,8 @@ Release 2.3.0 - UNRELEASED
|
||||||
YARN-649. Added a new NM web-service to serve container logs in plain text
|
YARN-649. Added a new NM web-service to serve container logs in plain text
|
||||||
over HTTP. (Sandy Ryza via vinodkv)
|
over HTTP. (Sandy Ryza via vinodkv)
|
||||||
|
|
||||||
|
YARN-1021. Yarn Scheduler Load Simulator. (ywskycn via tucu)
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
|
|
||||||
YARN-905. Add state filters to nodes CLI (Wei Yan via Sandy Ryza)
|
YARN-905. Add state filters to nodes CLI (Wei Yan via Sandy Ryza)
|
||||||
|
|