hadoop/hadoop-sls/SchedulerLoadSimulator.html

1065 lines
55 KiB
HTML

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<!--
| Generated by Apache Maven Doxia at 2023-05-24
| Rendered using Apache Maven Stylus Skin 1.5
-->
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Apache Hadoop Scheduler Load Simulator &#x2013; YARN Scheduler Load Simulator (SLS)</title>
<style type="text/css" media="all">
@import url("./css/maven-base.css");
@import url("./css/maven-theme.css");
@import url("./css/site.css");
</style>
<link rel="stylesheet" href="./css/print.css" type="text/css" media="print" />
<meta name="Date-Revision-yyyymmdd" content="20230524" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head>
<body class="composite">
<div id="banner">
<a href="http://hadoop.apache.org/" id="bannerLeft">
<img src="http://hadoop.apache.org/images/hadoop-logo.jpg" alt="" />
</a>
<a href="http://www.apache.org/" id="bannerRight">
<img src="http://www.apache.org/images/asf_logo_wide.png" alt="" />
</a>
<div class="clear">
<hr/>
</div>
</div>
<div id="breadcrumbs">
<div class="xright"> <a href="http://wiki.apache.org/hadoop" class="externalLink">Wiki</a>
|
<a href="https://gitbox.apache.org/repos/asf/hadoop.git" class="externalLink">git</a>
&nbsp;| Last Published: 2023-05-24
&nbsp;| Version: 3.4.0-SNAPSHOT
</div>
<div class="clear">
<hr/>
</div>
</div>
<div id="leftColumn">
<div id="navcolumn">
<h5>General</h5>
<ul>
<li class="none">
<a href="../index.html">Overview</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/SingleCluster.html">Single Node Setup</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/ClusterSetup.html">Cluster Setup</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/CommandsManual.html">Commands Reference</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/FileSystemShell.html">FileSystem Shell</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/Compatibility.html">Compatibility Specification</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/DownstreamDev.html">Downstream Developer's Guide</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/AdminCompatibilityGuide.html">Admin Compatibility Guide</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/InterfaceClassification.html">Interface Classification</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/filesystem/index.html">FileSystem Specification</a>
</li>
</ul>
<h5>Common</h5>
<ul>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/CLIMiniCluster.html">CLI Mini Cluster</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/FairCallQueue.html">Fair Call Queue</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/NativeLibraries.html">Native Libraries</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/Superusers.html">Proxy User</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/RackAwareness.html">Rack Awareness</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/SecureMode.html">Secure Mode</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/ServiceLevelAuth.html">Service Level Authorization</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/HttpAuthentication.html">HTTP Authentication</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/CredentialProviderAPI.html">Credential Provider API</a>
</li>
<li class="none">
<a href="../hadoop-kms/index.html">Hadoop KMS</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/Tracing.html">Tracing</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/UnixShellGuide.html">Unix Shell Guide</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/registry/index.html">Registry</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/AsyncProfilerServlet.html">Async Profiler</a>
</li>
</ul>
<h5>HDFS</h5>
<ul>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsDesign.html">Architecture</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsUserGuide.html">User Guide</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HDFSCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html">NameNode HA With QJM</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithNFS.html">NameNode HA With NFS</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ObserverNameNode.html">Observer NameNode</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/Federation.html">Federation</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ViewFs.html">ViewFs</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ViewFsOverloadScheme.html">ViewFsOverloadScheme</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsSnapshots.html">Snapshots</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsEditsViewer.html">Edits Viewer</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsImageViewer.html">Image Viewer</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html">Permissions and HDFS</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsQuotaAdminGuide.html">Quotas and HDFS</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/LibHdfs.html">libhdfs (C API)</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/WebHDFS.html">WebHDFS (REST API)</a>
</li>
<li class="none">
<a href="../hadoop-hdfs-httpfs/index.html">HttpFS</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ShortCircuitLocalReads.html">Short Circuit Local Reads</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/CentralizedCacheManagement.html">Centralized Cache Management</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsNfsGateway.html">NFS Gateway</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsRollingUpgrade.html">Rolling Upgrade</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ExtendedAttributes.html">Extended Attributes</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/TransparentEncryption.html">Transparent Encryption</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsMultihoming.html">Multihoming</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ArchivalStorage.html">Storage Policies</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/MemoryStorage.html">Memory Storage Support</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/SLGUserGuide.html">Synthetic Load Generator</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HDFSErasureCoding.html">Erasure Coding</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HDFSDiskbalancer.html">Disk Balancer</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsUpgradeDomain.html">Upgrade Domain</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsDataNodeAdminGuide.html">DataNode Admin</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs-rbf/HDFSRouterFederation.html">Router Federation</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsProvidedStorage.html">Provided Storage</a>
</li>
</ul>
<h5>MapReduce</h5>
<ul>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html">Tutorial</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html">Compatibility with 1.x</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/EncryptedShuffle.html">Encrypted Shuffle</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/PluggableShuffleAndPluggableSort.html">Pluggable Shuffle/Sort</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/DistributedCacheDeploy.html">Distributed Cache Deploy</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/SharedCacheSupport.html">Support for YARN Shared Cache</a>
</li>
</ul>
<h5>MapReduce REST APIs</h5>
<ul>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredAppMasterRest.html">MR Application Master</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-hs/HistoryServerRest.html">MR History Server</a>
</li>
</ul>
<h5>YARN</h5>
<ul>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/YARN.html">Architecture</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/YarnCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html">Capacity Scheduler</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/FairScheduler.html">Fair Scheduler</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/ResourceManagerRestart.html">ResourceManager Restart</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/ResourceManagerHA.html">ResourceManager HA</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/ResourceModel.html">Resource Model</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/NodeLabel.html">Node Labels</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/NodeAttributes.html">Node Attributes</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/WebApplicationProxy.html">Web Application Proxy</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/TimelineServer.html">Timeline Server</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html">Timeline Service V.2</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/WritingYarnApplications.html">Writing YARN Applications</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/YarnApplicationSecurity.html">YARN Application Security</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/NodeManager.html">NodeManager</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/DockerContainers.html">Running Applications in Docker Containers</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/RuncContainers.html">Running Applications in runC Containers</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/NodeManagerCgroups.html">Using CGroups</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/SecureContainer.html">Secure Containers</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/ReservationSystem.html">Reservation System</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/GracefulDecommission.html">Graceful Decommission</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/OpportunisticContainers.html">Opportunistic Containers</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/Federation.html">YARN Federation</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/SharedCache.html">Shared Cache</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/UsingGpus.html">Using GPU</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/UsingFPGA.html">Using FPGA</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/PlacementConstraints.html">Placement Constraints</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/YarnUI2.html">YARN UI2</a>
</li>
</ul>
<h5>YARN REST APIs</h5>
<ul>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/WebServicesIntro.html">Introduction</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html">Resource Manager</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/NodeManagerRest.html">Node Manager</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/TimelineServer.html#Timeline_Server_REST_API_v1">Timeline Server</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html#Timeline_Service_v.2_REST_API">Timeline Service V.2</a>
</li>
</ul>
<h5>YARN Service</h5>
<ul>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/Overview.html">Overview</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/QuickStart.html">QuickStart</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/Concepts.html">Concepts</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/YarnServiceAPI.html">Yarn Service API</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/ServiceDiscovery.html">Service Discovery</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/SystemServices.html">System Services</a>
</li>
</ul>
<h5>Hadoop Compatible File Systems</h5>
<ul>
<li class="none">
<a href="../hadoop-aliyun/tools/hadoop-aliyun/index.html">Aliyun OSS</a>
</li>
<li class="none">
<a href="../hadoop-aws/tools/hadoop-aws/index.html">Amazon S3</a>
</li>
<li class="none">
<a href="../hadoop-azure/index.html">Azure Blob Storage</a>
</li>
<li class="none">
<a href="../hadoop-azure-datalake/index.html">Azure Data Lake Storage</a>
</li>
<li class="none">
<a href="../hadoop-cos/cloud-storage/index.html">Tencent COS</a>
</li>
<li class="none">
<a href="../hadoop-huaweicloud/cloud-storage/index.html">Huaweicloud OBS</a>
</li>
</ul>
<h5>Auth</h5>
<ul>
<li class="none">
<a href="../hadoop-auth/index.html">Overview</a>
</li>
<li class="none">
<a href="../hadoop-auth/Examples.html">Examples</a>
</li>
<li class="none">
<a href="../hadoop-auth/Configuration.html">Configuration</a>
</li>
<li class="none">
<a href="../hadoop-auth/BuildingIt.html">Building</a>
</li>
</ul>
<h5>Tools</h5>
<ul>
<li class="none">
<a href="../hadoop-streaming/HadoopStreaming.html">Hadoop Streaming</a>
</li>
<li class="none">
<a href="../hadoop-archives/HadoopArchives.html">Hadoop Archives</a>
</li>
<li class="none">
<a href="../hadoop-archive-logs/HadoopArchiveLogs.html">Hadoop Archive Logs</a>
</li>
<li class="none">
<a href="../hadoop-distcp/DistCp.html">DistCp</a>
</li>
<li class="none">
<a href="../hadoop-federation-balance/HDFSFederationBalance.html">HDFS Federation Balance</a>
</li>
<li class="none">
<a href="../hadoop-gridmix/GridMix.html">GridMix</a>
</li>
<li class="none">
<a href="../hadoop-rumen/Rumen.html">Rumen</a>
</li>
<li class="none">
<a href="../hadoop-resourceestimator/ResourceEstimator.html">Resource Estimator Service</a>
</li>
<li class="none">
<a href="../hadoop-sls/SchedulerLoadSimulator.html">Scheduler Load Simulator</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/Benchmarking.html">Hadoop Benchmarking</a>
</li>
<li class="none">
<a href="../hadoop-dynamometer/Dynamometer.html">Dynamometer</a>
</li>
</ul>
<h5>Reference</h5>
<ul>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/release/">Changelog and Release Notes</a>
</li>
<li class="none">
<a href="../api/index.html">Java API docs</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/UnixShellAPI.html">Unix Shell API</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/Metrics.html">Metrics</a>
</li>
</ul>
<h5>Configuration</h5>
<ul>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/core-default.xml">core-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/hdfs-default.xml">hdfs-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs-rbf/hdfs-rbf-default.xml">hdfs-rbf-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml">mapred-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-common/yarn-default.xml">yarn-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-kms/kms-default.html">kms-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-hdfs-httpfs/httpfs-default.html">httpfs-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/DeprecatedProperties.html">Deprecated Properties</a>
</li>
</ul>
<a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
<img alt="Built by Maven" src="./images/logos/maven-feather.png"/>
</a>
</div>
</div>
<div id="bodyColumn">
<div id="contentBox">
<!---
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<h1>YARN Scheduler Load Simulator (SLS)</h1>
<ul>
<li><a href="#Yarn_Scheduler_Load_Simulator_SLS">YARN Scheduler Load Simulator (SLS)</a>
<ul>
<li><a href="#Overview">Overview</a>
<ul>
<li><a href="#Overview">Overview</a></li>
<li><a href="#Goals">Goals</a></li>
<li><a href="#Architecture">Architecture</a></li>
<li><a href="#Usecases">Usecases</a></li>
</ul>
</li>
<li><a href="#Usage">Usage</a>
<ul>
<li><a href="#Step_1:_Configure_Hadoop_and_the_simulator">Step 1: Configure Hadoop and the simulator</a></li>
<li><a href="#Step_2:_Run_the_simulator">Step 2: Run the simulator</a></li>
</ul>
</li>
<li><a href="#Metrics">Metrics</a>
<ul>
<li><a href="#Real-time_Tracking">Real-time Tracking</a></li>
<li><a href="#Offline_Analysis">Offline Analysis</a></li>
</ul>
</li>
<li><a href="#Synthetic_Load_Generator">Synthetic Load Generator</a></li>
<li><a href="#Appendix">Appendix</a>
<ul>
<li><a href="#Resources">Resources</a></li>
<li><a href="#SLS_JSON_input_file_format">SLS JSON input file format</a></li>
<li><a href="#SYNTH_JSON_input_file_format">SYNTH JSON input file format</a></li>
<li><a href="#Simulator_input_topology_file_format">Simulator input topology file format</a></li>
</ul>
</li>
</ul>
</li>
</ul><section>
<h2><a name="Overview"></a>Overview</h2><section>
<h3><a name="Overview"></a>Overview</h3>
<p>The YARN scheduler is a fertile area of interest with different implementations, e.g., Fifo, Capacity and Fair schedulers. Meanwhile, several optimizations are also made to improve scheduler performance for different scenarios and workload. Each scheduler algorithm has its own set of features, and drives scheduling decisions by many factors, such as fairness, capacity guarantee, resource availability, etc. It is very important to evaluate a scheduler algorithm very well before we deploy in a production cluster. Unfortunately, currently it is non-trivial to evaluate a scheduler algorithm. Evaluating in a real cluster is always time and cost consuming, and it is also very hard to find a large-enough cluster. Hence, a simulator which can predict how well a scheduler algorithm for some specific workload would be quite useful.</p>
<p>The YARN Scheduler Load Simulator (SLS) is such a tool, which can simulate large-scale YARN clusters and application loads in a single machine.This simulator would be invaluable in furthering YARN by providing a tool for researchers and developers to prototype new scheduler features and predict their behavior and performance with reasonable amount of confidence, thereby aiding rapid innovation. o The simulator will exercise the real YARN <code>ResourceManager</code> removing the network factor by simulating <code>NodeManagers</code> and <code>ApplicationMasters</code> via handling and dispatching <code>NM</code>/<code>AMs</code> heartbeat events from within the same JVM. To keep tracking of scheduler behavior and performance, a scheduler wrapper will wrap the real scheduler.</p>
<p>The size of the cluster and the application load can be loaded from configuration files, which are generated from job history files directly by adopting <a href="../hadoop-rumen/Rumen.html">Apache Rumen</a>.</p>
<p>The simulator will produce real time metrics while executing, including:</p>
<ul>
<li>
<p>Resource usages for whole cluster and each queue, which can be utilized to configure cluster and queue&#x2019;s capacity.</p>
</li>
<li>
<p>The detailed application execution trace (recorded in relation to simulated time), which can be analyzed to understand/validate the scheduler behavior (individual jobs turn around time, throughput, fairness, capacity guarantee, etc.).</p>
</li>
<li>
<p>Several key metrics of scheduler algorithm, such as time cost of each scheduler operation (allocate, handle, etc.), which can be utilized by Hadoop developers to find the code spots and scalability limits.</p>
</li>
</ul></section><section>
<h3><a name="Goals"></a>Goals</h3>
<ul>
<li>
<p>Exercise the scheduler at scale without a real cluster using real job traces.</p>
</li>
<li>
<p>Being able to simulate real workloads.</p>
</li>
</ul></section><section>
<h3><a name="Architecture"></a>Architecture</h3>
<p>The following figure illustrates the implementation architecture of the simulator.</p>
<p><img src="images/sls_arch.png" alt="The architecture of the simulator" /></p>
<p>The simulator takes input of workload traces, or synthetic load distributions and generaters the cluster and applications information. For each NM and AM, the simulator builds a simulator to simulate their running. All NM/AM simulators run in a thread pool. The simulator reuses YARN Resource Manager, and builds a wrapper out of the scheduler. The Scheduler Wrapper can track the scheduler behaviors and generates several logs, which are the outputs of the simulator and can be further analyzed.</p></section><section>
<h3><a name="Usecases"></a>Usecases</h3>
<ul>
<li>
<p>Engineering</p>
<ul>
<li>Verify correctness of scheduler algorithm under load</li>
<li>Cheap/practical way for finding code hotspots/critical-path.</li>
<li>Validate the impact of changes and new features.</li>
<li>Determine what drives the scheduler scalability limits.</li>
</ul>
</li>
<li>
<p>QA</p>
<ul>
<li>Validate scheduler behavior for &#x201c;large&#x201d; clusters and several workload profiles.</li>
</ul>
</li>
<li>
<p>Solutions/Sales.</p>
<ul>
<li>Sizing model for predefined/typical workloads.</li>
<li>Cluster sizing tool using real customer data (job traces).</li>
<li>Determine minimum SLAs under a particular workload.</li>
</ul>
</li>
</ul></section></section><section>
<h2><a name="Usage"></a>Usage</h2>
<p>This section will show how to use the simulator. Here let <code>$HADOOP_ROOT</code> represent the Hadoop install directory. If you build Hadoop yourself, <code>$HADOOP_ROOT</code> is <code>hadoop-dist/target/hadoop-$VERSION</code>. The simulator is located at <code>$HADOOP_ROOT/share/hadoop/tools/sls</code>. The fold <code>sls</code> containers four directories: <code>bin</code>, <code>html</code>, <code>sample-conf</code>, and <code>sample-data</code></p>
<ul>
<li>
<p><code>bin</code>: contains running scripts for the simulator.</p>
</li>
<li>
<p><code>html</code>: Users can also reproduce those real-time tracking charts in offline mode. Just upload the <code>realtimetrack.json</code> to <code>$HADOOP_ROOT/share/hadoop/tools/sls/html/showSimulationTrace.html</code>. For browser security problem, need to put files <code>realtimetrack.json</code> and <code>showSimulationTrace.html</code> in the same directory.</p>
</li>
<li>
<p><code>sample-conf</code>: specifies the simulator configurations.</p>
</li>
<li>
<p><code>sample-data</code>: provides an example rumen trace, which can be used to generate inputs of the simulator.</p>
</li>
</ul>
<p>The following sections will describe how to use the simulator step by step. Before start, make sure that command <code>hadoop</code> is included in your <code>$PATH</code> environment parameter.</p><section>
<h3><a name="Step_1:_Configure_Hadoop_and_the_simulator"></a>Step 1: Configure Hadoop and the simulator</h3>
<p>Before we start, make sure Hadoop and the simulator are configured well. All configuration files for Hadoop and the simulator should be placed in directory <code>$HADOOP_ROOT/etc/hadoop</code>, where the <code>ResourceManager</code> and YARN scheduler load their configurations. Directory <code>$HADOOP_ROOT/share/hadoop/tools/sls/sample-conf/</code> provides several example configurations, that can be used to start a demo.</p>
<p>For configuration of Hadoop and YARN scheduler, users can refer to Yarn&#x2019;s website (<a class="externalLink" href="http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/">http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/</a>).</p>
<p>For the simulator, it loads configuration information from file <code>$HADOOP_ROOT/etc/hadoop/sls-runner.xml</code>.</p>
<p>Here we illustrate each configuration parameter in <code>sls-runner.xml</code>. Note that <code>$HADOOP_ROOT/share/hadoop/tools/sls/sample-conf/sls-runner.xml</code> contains all the default values for these configuration parameters.</p>
<ul>
<li>
<p><code>yarn.sls.runner.pool.size</code></p>
<p>The simulator uses a thread pool to simulate the <code>NM</code> and <code>AM</code> running, and this parameter specifies the number of threads in the pool.</p>
</li>
<li>
<p><code>yarn.sls.nm.memory.mb</code></p>
<p>The total memory for each <code>NMSimulator</code>.</p>
</li>
<li>
<p><code>yarn.sls.nm.vcores</code></p>
<p>The total vCores for each <code>NMSimulator</code>.</p>
</li>
<li>
<p><code>yarn.sls.nm.heartbeat.interval.ms</code></p>
<p>The heartbeat interval for each <code>NMSimulator</code>.</p>
</li>
<li>
<p><code>yarn.sls.am.heartbeat.interval.ms</code></p>
<p>The heartbeat interval for each <code>AMSimulator</code>.</p>
</li>
<li>
<p><code>yarn.sls.am.type.mapreduce</code></p>
<p>The <code>AMSimulator</code> implementation for MapReduce-like applications. Users can specify implementations for other type of applications.</p>
</li>
<li>
<p><code>yarn.sls.container.memory.mb</code></p>
<p>The memory required for each container simulator.</p>
</li>
<li>
<p><code>yarn.sls.container.vcores</code></p>
<p>The vCores required for each container simulator.</p>
</li>
<li>
<p><code>yarn.sls.runner.metrics.switch</code></p>
<p>The simulator introduces <a class="externalLink" href="http://metrics.codahale.com/">Metrics</a> to measure the behaviors of critical components and operations. This field specifies whether we open (<code>ON</code>) or close (<code>OFF</code>) the Metrics running.</p>
</li>
<li>
<p><code>yarn.sls.metrics.web.address.port</code></p>
<p>The port used by simulator to provide real-time tracking. The default value is 10001.</p>
</li>
<li>
<p><code>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler</code></p>
<p>The implementation of scheduler metrics of Fifo Scheduler.</p>
</li>
<li>
<p><code>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</code></p>
<p>The implementation of scheduler metrics of Fair Scheduler.</p>
</li>
<li>
<p><code>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</code></p>
<p>The implementation of scheduler metrics of Capacity Scheduler.</p>
</li>
</ul></section><section>
<h3><a name="Step_2:_Run_the_simulator"></a>Step 2: Run the simulator</h3>
<p>The simulator supports two types of input files: the rumen traces and its own input traces. The script to start the simulator is <code>slsrun.sh</code>.</p>
<div class="source">
<div class="source">
<pre>$ cd $HADOOP_ROOT/share/hadoop/tools/sls
$ bin/slsrun.sh
Usage: slsrun.sh &lt;OPTIONS&gt;
--tracetype=&lt;SYNTH | SLS | RUMEN&gt;
--tracelocation=&lt;FILE1,FILE2,...&gt;
(deprecated --input-rumen=&lt;FILE1,FILE2,...&gt; | --input-sls=&lt;FILE1,FILE2,...&gt;)
--output-dir=&lt;SLS_SIMULATION_OUTPUT_DIRECTORY&gt;
[--nodes=&lt;SLS_NODES_FILE&gt;]
[--track-jobs=&lt;JOBID1,JOBID2,...&gt;]
[--print-simulation]
</pre></div></div>
<ul>
<li>
<p><code>--input-rumen</code>: The input rumen trace files. Users can input multiple files, separated by comma. One example trace is provided in <code>$HADOOP_ROOT/share/hadoop/tools/sls/sample-data/2jobs2min-rumen-jh.json</code>. This is equivalent to <code>--tracetype=RUMEN --tracelocation=&lt;path_to_trace&gt;</code>.</p>
</li>
<li>
<p><code>--input-sls</code>: Simulator its own file format. The simulator also provides a tool to convert rumen traces to sls traces (<code>rumen2sls.sh</code>). Refer to appendix for an example of sls input json file. This is equivalent to <code>--tracetype=SLS --tracelocation=&lt;path_to_trace&gt;</code>.</p>
</li>
<li>
<p><code>--tracetype</code>: This is the new way to configure the trace generation and takes values RUMEN, SLS, or SYNTH, to trigger the three type of load generation</p>
</li>
<li>
<p><code>--tracelocation</code>: Path to the input file, matching the tracetype above.</p>
</li>
<li>
<p><code>--output-dir</code>: The output directory for generated running logs and metrics.</p>
</li>
<li>
<p><code>--nodes</code>: The cluster topology. By default, the simulator will use the topology fetched from the input json files. Users can specifies a new topology by setting this parameter. Refer to the appendix for the topology file format.</p>
</li>
<li>
<p><code>--track-jobs</code>: The particular jobs that will be tracked during simulator running, spearated by comma.</p>
</li>
<li>
<p><code>--print-simulation</code>: Whether to print out simulation information before simulator running, including number of nodes, applications, tasks, and information for each application.</p>
<p>In comparison to rumen format, here the sls format is much simpler and users can easily generate various workload. The simulator also provides a tool to convert rumen traces to sls traces.</p>
<div class="source">
<div class="source">
<pre>$ bin/rumen2sls.sh
--rumen-file=&lt;RUMEN_FILE&gt;
--output-dir=&lt;SLS_OUTPUT_DIRECTORY&gt;
[--output-prefix=&lt;SLS_FILE_PREFIX&gt;]
</pre></div></div>
</li>
<li>
<p><code>--rumen-file</code>: The rumen format file. One example trace is provided in directory <code>sample-data</code>.</p>
</li>
<li>
<p><code>--output-dir</code>: The output directory of generated simulation traces. Two files will be generated in this output directory, including one trace file including all job and task information, and another file showing the topology information.</p>
</li>
<li>
<p><code>--output-prefix</code>: The prefix of the generated files. The default value is &#x201c;sls&#x201d;, and the two generated files are <code>sls-jobs.json</code> and <code>sls-nodes.json</code>.</p>
</li>
</ul></section></section><section>
<h2><a name="Metrics"></a>Metrics</h2>
<p>The YARN Scheduler Load Simulator has integrated <a class="externalLink" href="http://metrics.codahale.com/">Metrics</a> to measure the behaviors of critical components and operations, including running applications and containers, cluster available resources, scheduler operation timecost, et al. If the switch <code>yarn.sls.runner.metrics.switch</code> is set <code>ON</code>, <code>Metrics</code> will run and output it logs in <code>--output-dir</code> directory specified by users. Users can track these information during simulator running, and can also analyze these logs after running to evaluate the scheduler performance.</p><section>
<h3><a name="Real-time_Tracking"></a>Real-time Tracking</h3>
<p>The simulator provides an interface for tracking its running in real-time. Users can go to <code>http://host:port/simulate</code> to track whole running, and <code>http://host:port/track</code> to track a particular job or queue. Here the <code>host</code> is the place when we run the simulator, and <code>port</code> is the value configured by <code>yarn.sls.metrics.web.address.port</code> (default value is 10001).</p>
<p>Here we&#x2019;ll illustrate each chart shown in the webpage.</p>
<p>The first figure describes the number of running applications and containers.</p>
<p><img src="images/sls_running_apps_containers.png" alt="Number of running applications/containers" /></p>
<p>The second figure describes the allocated and available resources (memory) in the cluster.</p>
<p><img src="images/sls_cluster_memory.png" alt="Cluster Resource (Memory)" /></p>
<p>The third figure describes the allocated resource for each queue. Here we have three queues: sls_queue_1, sls_queue_2, and sls_queue_3.The first two queues are configured with 25% share, while the last one has 50% share.</p>
<p><img src="images/sls_queue_allocated_memory.png" alt="Queue Allocated Resource (Memory)" /></p>
<p>The fourth figure describes the timecost for each scheduler operation.</p>
<p><img src="images/sls_scheduler_operation_timecost.png" alt="Scheduler Opertion Timecost" /></p>
<p>Finally, we measure the memory used by the simulator.</p>
<p><img src="images/sls_JVM.png" alt="JVM Memory" /></p>
<p>The simulator also provides an interface for tracking some particular jobs and queues. Go to <code>http://&lt;Host&gt;:&lt;Port&gt;/track</code> to get these information.</p>
<p>Here the first figure illustrates the resource usage information for queue <code>SLS_Queue_1</code>.</p>
<p><img src="images/sls_track_queue.png" alt="Tracking Queue sls_queue_3" /></p>
<p>The second figure illustrates the resource usage information for job <code>job_1369942127770_0653</code>.</p>
<p><img src="images/sls_track_job.png" alt="Tracking Job job_1369942127770_0653" /></p></section><section>
<h3><a name="Offline_Analysis"></a>Offline Analysis</h3>
<p>After the simulator finishes, all logs are saved in the output directory specified by <code>--output-dir</code> in <code>$HADOOP_ROOT/share/hadoop/tools/sls/bin/slsrun.sh</code>.</p>
<ul>
<li>
<p>File <code>realtimetrack.json</code>: records all real-time tracking logs every 1 second.</p>
</li>
<li>
<p>File <code>jobruntime.csv</code>: records all jobs&#x2019; start and end time in the simulator.</p>
</li>
<li>
<p>Folder <code>metrics</code>: logs generated by the Metrics.</p>
</li>
</ul>
<p>Users can also reproduce those real-time tracking charts in offline mode. Just upload the <code>realtimetrack.json</code> to <code>$HADOOP_ROOT/share/hadoop/tools/sls/html/showSimulationTrace.html</code>. For browser security problem, need to put files <code>realtimetrack.json</code> and <code>showSimulationTrace.html</code> in the same directory.</p></section></section><section>
<h2><a name="Synthetic_Load_Generator"></a>Synthetic Load Generator</h2>
<p>The Synthetic Load Generator complements the extensive nature of SLS-native and RUMEN traces, by providing a distribution-driven generation of load. The load generator is organized as a JobStoryProducer (compatible with rumen, and thus gridmix for later integration). We seed the Random number generator so that results randomized but deterministic&#x2014;hence reproducible. We organize the jobs being generated around <i>/workloads/job_class</i> hierarchy, which allow to easily group jobs with similar behaviors and categorize them (e.g., jobs with long running containers, or maponly computations, etc..). The user can control average and standard deviations for many of the important parameters, such as number of mappers/reducers, duration of mapper/reducers, size (mem/cpu) of containers, chance of reservation, etc. We use weighted-random sampling (whenever we pick among a small number of options) or LogNormal distributions (to avoid negative values) when we pick from wide ranges of values&#x2014;see appendix on LogNormal distributions.</p>
<p>The SYNTH mode of SLS is very convenient to generate very large loads without the need for extensive input files. This allows to easily explore wide range of use cases (e.g., imagine simulating 100k jobs, and in different runs simply tune the average number of mappers, or average task duration), in an efficient and compact way.</p></section><section>
<h2><a name="Resource_Type_in_SLS"></a>Resource Type in SLS</h2>
<p>This section talks about how to use resource type in SLS.</p></section><section>
<h2><a name="Configure_Resource_Manager"></a>Configure Resource Manager</h2>
<p>This is the same to how to configure resource type for a real cluster. Configure item <code>yarn.resource-types</code> in yarn-site.xml as the following example does.</p>
<div class="source">
<div class="source">
<pre> &lt;property&gt;
&lt;name&gt;yarn.resource-types&lt;/name&gt;
&lt;value&gt;resource-type1, resource-type2&lt;/value&gt;
&lt;/property&gt;
</pre></div></div>
</section><section>
<h2><a name="Configure_Node_Manager"></a>Configure Node Manager</h2>
<p>Specify the size of resource in each node by adding relevant items into sls-runner.xml like the following example does. The values apply for every node in SLS. The default values for resources other than memory and vcores are 0.</p>
<div class="source">
<div class="source">
<pre> &lt;property&gt;
&lt;name&gt;yarn.sls.nm.resource-type1&lt;/name&gt;
&lt;value&gt;10&lt;/value&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;yarn.sls.nm.resource-type2&lt;/name&gt;
&lt;value&gt;10&lt;/value&gt;
&lt;/property&gt;
</pre></div></div>
</section><section>
<h2><a name="Specify_Resource_in_SLS_JSON_input"></a>Specify Resource in SLS JSON input</h2>
<p>Resource Type is supported in SLS JSON input format, but not in other two formats(SYNTH and RUMEN). To make it work in SLS JSON input format, you can specify resource sizes for both task containers and the AM container. Here is an example.</p>
<div class="source">
<div class="source">
<pre>{
&quot;job.start.ms&quot; : 0,
&quot;am.memory-mb&quot;: 2048,
&quot;am.vcores&quot;: 2,
&quot;am.resource-type1&quot;: 2,
&quot;am.resource-type2&quot;: 2,
&quot;job.tasks&quot; : [ {
&quot;container.duration.ms&quot;: 5000
&quot;container.memory-mb&quot;: 1024,
&quot;container.vcores&quot;: 1,
&quot;container.resource-type1&quot;: 1,
&quot;container.resource-type2&quot;: 1
}
}
</pre></div></div>
</section><section>
<h2><a name="Appendix"></a>Appendix</h2><section>
<h3><a name="Resources"></a>Resources</h3>
<p><a class="externalLink" href="https://issues.apache.org/jira/browse/YARN-1021">YARN-1021</a> is the main JIRA that introduces YARN Scheduler Load Simulator to Hadoop YARN project. <a class="externalLink" href="https://issues.apache.org/jira/browse/YARN-6363">YARN-6363</a> is the main JIRA that introduces the Synthetic Load Generator to SLS.</p></section><section>
<h3><a name="SLS_JSON_input_file_format"></a>SLS JSON input file format</h3>
<p>Here we provide an example format of the sls json file, which contains 2 jobs. The first job has 3 map tasks and the second one has 2 map tasks.</p>
<div class="source">
<div class="source">
<pre>{
&quot;num.nodes&quot;: 3, // total number of nodes in the cluster
&quot;num.racks&quot;: 1 // total number of racks in the cluster, it divides num.nodes into the racks evenly, optional, the default value is 1
}
{
&quot;am.type&quot; : &quot;mapreduce&quot;, // type of AM, optional, the default value is &quot;mapreduce&quot;
&quot;job.start.ms&quot; : 0, // job start time
&quot;job.end.ms&quot; : 95375, // job finish time, optional, the default value is 0
&quot;job.queue.name&quot; : &quot;sls_queue_1&quot;, // the queue job will be submitted to
&quot;job.id&quot; : &quot;job_1&quot;, // the job id used to track the job, optional. The default value, an zero-based integer increasing with number of jobs, is used if this is not specified or job.count &gt; 1
&quot;job.user&quot; : &quot;default&quot;, // user, optional, the default value is &quot;default&quot;
&quot;job.count&quot; : 1, // number of jobs, optional, the default value is 1
&quot;job.tasks&quot; : [ {
&quot;count&quot;: 1, // number of tasks, optional, the default value is 1
&quot;container.host&quot; : &quot;/default-rack/node1&quot;, // host the container asks for
&quot;container.start.ms&quot; : 6664, // container start time, optional
&quot;container.end.ms&quot; : 23707, // container finish time, optional
&quot;container.duration.ms&quot;: 50000, // duration of the container, optional if start and end time is specified
&quot;container.priority&quot; : 20, // priority of the container, optional, the default value is 20
&quot;container.type&quot; : &quot;map&quot; // type of the container, could be &quot;map&quot; or &quot;reduce&quot;, optional, the default value is &quot;map&quot;
}, {
&quot;container.host&quot; : &quot;/default-rack/node3&quot;,
&quot;container.start.ms&quot; : 6665,
&quot;container.end.ms&quot; : 21593,
&quot;container.priority&quot; : 20,
&quot;container.type&quot; : &quot;map&quot;
}, {
&quot;container.host&quot; : &quot;/default-rack/node2&quot;,
&quot;container.start.ms&quot; : 68770,
&quot;container.end.ms&quot; : 86613,
&quot;container.priority&quot; : 20,
&quot;container.type&quot; : &quot;map&quot;
} ]
}
{
&quot;am.type&quot; : &quot;mapreduce&quot;,
&quot;job.start.ms&quot; : 105204,
&quot;job.end.ms&quot; : 197256,
&quot;job.queue.name&quot; : &quot;sls_queue_2&quot;,
&quot;job.id&quot; : &quot;job_2&quot;,
&quot;job.user&quot; : &quot;default&quot;,
&quot;job.tasks&quot; : [ {
&quot;container.host&quot; : &quot;/default-rack/node1&quot;,
&quot;container.start.ms&quot; : 111822,
&quot;container.end.ms&quot; : 133985,
&quot;container.priority&quot; : 20,
&quot;container.type&quot; : &quot;map&quot;
}, {
&quot;container.host&quot; : &quot;/default-rack/node2&quot;,
&quot;container.start.ms&quot; : 111788,
&quot;container.end.ms&quot; : 131377,
&quot;container.priority&quot; : 20,
&quot;container.type&quot; : &quot;map&quot;
} ]
}
</pre></div></div>
</section><section>
<h3><a name="SYNTH_JSON_input_file_format"></a>SYNTH JSON input file format</h3>
<p>Here we provide an example format of the synthetic generator json file. We use <i>(json-non-conforming)</i> inline comments to explain the use of each parameter.</p>
<div class="source">
<div class="source">
<pre>{
&quot;description&quot; : &quot;tiny jobs workload&quot;, //description of the meaning of this collection of workloads
&quot;num_nodes&quot; : 10, //total nodes in the simulated cluster
&quot;nodes_per_rack&quot; : 4, //number of nodes in each simulated rack
&quot;num_jobs&quot; : 10, // total number of jobs being simulated
&quot;rand_seed&quot; : 2, //the random seed used for deterministic randomized runs
// a list of &#x201c;workloads&#x201d;, each of which has job classes, and temporal properties
&quot;workloads&quot; : [
{
&quot;workload_name&quot; : &quot;tiny-test&quot;, // name of the workload
&quot;workload_weight&quot;: 0.5, // used for weighted random selection of which workload to sample from
&quot;queue_name&quot; : &quot;sls_queue_1&quot;, //queue the job will be submitted to
//different classes of jobs for this workload
&quot;job_classes&quot; : [
{
&quot;class_name&quot; : &quot;class_1&quot;, //name of the class
&quot;class_weight&quot; : 1.0, //used for weighted random selection of class within workload
//nextr group controls average and standard deviation of a LogNormal distribution that
//determines the number of mappers and reducers for thejob.
&quot;mtasks_avg&quot; : 5,
&quot;mtasks_stddev&quot; : 1,
&quot;rtasks_avg&quot; : 5,
&quot;rtasks_stddev&quot; : 1,
//averge and stdev input param of LogNormal distribution controlling job duration
&quot;dur_avg&quot; : 60,
&quot;dur_stddev&quot; : 5,
//averge and stdev input param of LogNormal distribution controlling mappers and reducers durations
&quot;mtime_avg&quot; : 10,
&quot;mtime_stddev&quot; : 2,
&quot;rtime_avg&quot; : 20,
&quot;rtime_stddev&quot; : 4,
//averge and stdev input param of LogNormal distribution controlling memory and cores for map and reduce
&quot;map_max_memory_avg&quot; : 1024,
&quot;map_max_memory_stddev&quot; : 0.001,
&quot;reduce_max_memory_avg&quot; : 2048,
&quot;reduce_max_memory_stddev&quot; : 0.001,
&quot;map_max_vcores_avg&quot; : 1,
&quot;map_max_vcores_stddev&quot; : 0.001,
&quot;reduce_max_vcores_avg&quot; : 2,
&quot;reduce_max_vcores_stddev&quot; : 0.001,
//probability of running this job with a reservation
&quot;chance_of_reservation&quot; : 0.5,
//input parameters of LogNormal distribution that determines the deadline slack (as a multiplier of job duration)
&quot;deadline_factor_avg&quot; : 10.0,
&quot;deadline_factor_stddev&quot; : 0.001,
}
],
// for each workload determines with what probability each time bucket is picked to choose the job starttime.
// In the example below the jobs have twice as much chance to start in the first minute than in the second minute
// of simulation, and then zero chance thereafter.
&quot;time_distribution&quot; : [
{ &quot;time&quot; : 1, &quot;weight&quot; : 66 },
{ &quot;time&quot; : 60, &quot;weight&quot; : 33 },
{ &quot;time&quot; : 120, &quot;jobs&quot; : 0 }
]
}
]
}
</pre></div></div>
</section><section>
<h3><a name="Simulator_input_topology_file_format"></a>Simulator input topology file format</h3>
<p>Here is an example input topology file which has 3 nodes organized in 1 rack.</p>
<div class="source">
<div class="source">
<pre>{
&quot;rack&quot; : &quot;default-rack&quot;,
&quot;nodes&quot; : [ {
&quot;node&quot; : &quot;node1&quot;
}, {
&quot;node&quot; : &quot;node2&quot;
}, {
&quot;node&quot; : &quot;node3&quot;
}]
}
</pre></div></div>
</section><section>
<h3><a name="Notes_on_LogNormal_distribution:"></a>Notes on LogNormal distribution:</h3>
<p>LogNormal distributions represent well many of the parameters we see in practice (e.g., most jobs have a small number of mappers, but few might be very large, and few very small, but greater than zero. It is however worth noticing that it might be tricky to use, as the average is typically on the right side of the peak (most common value) of the distribution, because the distribution has a one-side tail.</p></section></section>
</div>
</div>
<div class="clear">
<hr/>
</div>
<div id="footer">
<div class="xright">
&#169; 2008-2023
Apache Software Foundation
- <a href="http://maven.apache.org/privacy-policy.html">Privacy Policy</a>.
Apache Maven, Maven, Apache, the Apache feather logo, and the Apache Maven project logos are trademarks of The Apache Software Foundation.
</div>
<div class="clear">
<hr/>
</div>
</div>
</body>
</html>