hadoop/hadoop-yarn/hadoop-yarn-site/SharedCache.html

715 lines
38 KiB
HTML

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<!--
| Generated by Apache Maven Doxia at 2023-03-27
| Rendered using Apache Maven Stylus Skin 1.5
-->
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Apache Hadoop 3.4.0-SNAPSHOT &#x2013; YARN Shared Cache</title>
<style type="text/css" media="all">
@import url("./css/maven-base.css");
@import url("./css/maven-theme.css");
@import url("./css/site.css");
</style>
<link rel="stylesheet" href="./css/print.css" type="text/css" media="print" />
<meta name="Date-Revision-yyyymmdd" content="20230327" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head>
<body class="composite">
<div id="banner">
<a href="http://hadoop.apache.org/" id="bannerLeft">
<img src="http://hadoop.apache.org/images/hadoop-logo.jpg" alt="" />
</a>
<a href="http://www.apache.org/" id="bannerRight">
<img src="http://www.apache.org/images/asf_logo_wide.png" alt="" />
</a>
<div class="clear">
<hr/>
</div>
</div>
<div id="breadcrumbs">
<div class="xright"> <a href="http://wiki.apache.org/hadoop" class="externalLink">Wiki</a>
|
<a href="https://gitbox.apache.org/repos/asf/hadoop.git" class="externalLink">git</a>
|
<a href="http://hadoop.apache.org/" class="externalLink">Apache Hadoop</a>
&nbsp;| Last Published: 2023-03-27
&nbsp;| Version: 3.4.0-SNAPSHOT
</div>
<div class="clear">
<hr/>
</div>
</div>
<div id="leftColumn">
<div id="navcolumn">
<h5>General</h5>
<ul>
<li class="none">
<a href="../../index.html">Overview</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/SingleCluster.html">Single Node Setup</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/ClusterSetup.html">Cluster Setup</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/CommandsManual.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/FileSystemShell.html">FileSystem Shell</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Compatibility.html">Compatibility Specification</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/DownstreamDev.html">Downstream Developer's Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/AdminCompatibilityGuide.html">Admin Compatibility Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/InterfaceClassification.html">Interface Classification</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/filesystem/index.html">FileSystem Specification</a>
</li>
</ul>
<h5>Common</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/CLIMiniCluster.html">CLI Mini Cluster</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/FairCallQueue.html">Fair Call Queue</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/NativeLibraries.html">Native Libraries</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Superusers.html">Proxy User</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/RackAwareness.html">Rack Awareness</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/SecureMode.html">Secure Mode</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/ServiceLevelAuth.html">Service Level Authorization</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/HttpAuthentication.html">HTTP Authentication</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/CredentialProviderAPI.html">Credential Provider API</a>
</li>
<li class="none">
<a href="../../hadoop-kms/index.html">Hadoop KMS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Tracing.html">Tracing</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/UnixShellGuide.html">Unix Shell Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/registry/index.html">Registry</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/AsyncProfilerServlet.html">Async Profiler</a>
</li>
</ul>
<h5>HDFS</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsDesign.html">Architecture</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsUserGuide.html">User Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html">NameNode HA With QJM</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithNFS.html">NameNode HA With NFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ObserverNameNode.html">Observer NameNode</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/Federation.html">Federation</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ViewFs.html">ViewFs</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ViewFsOverloadScheme.html">ViewFsOverloadScheme</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsSnapshots.html">Snapshots</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsEditsViewer.html">Edits Viewer</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsImageViewer.html">Image Viewer</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html">Permissions and HDFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsQuotaAdminGuide.html">Quotas and HDFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/LibHdfs.html">libhdfs (C API)</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/WebHDFS.html">WebHDFS (REST API)</a>
</li>
<li class="none">
<a href="../../hadoop-hdfs-httpfs/index.html">HttpFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ShortCircuitLocalReads.html">Short Circuit Local Reads</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/CentralizedCacheManagement.html">Centralized Cache Management</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsNfsGateway.html">NFS Gateway</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsRollingUpgrade.html">Rolling Upgrade</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ExtendedAttributes.html">Extended Attributes</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/TransparentEncryption.html">Transparent Encryption</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsMultihoming.html">Multihoming</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ArchivalStorage.html">Storage Policies</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/MemoryStorage.html">Memory Storage Support</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/SLGUserGuide.html">Synthetic Load Generator</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSErasureCoding.html">Erasure Coding</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSDiskbalancer.html">Disk Balancer</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsUpgradeDomain.html">Upgrade Domain</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsDataNodeAdminGuide.html">DataNode Admin</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs-rbf/HDFSRouterFederation.html">Router Federation</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsProvidedStorage.html">Provided Storage</a>
</li>
</ul>
<h5>MapReduce</h5>
<ul>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html">Tutorial</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html">Compatibility with 1.x</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/EncryptedShuffle.html">Encrypted Shuffle</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/PluggableShuffleAndPluggableSort.html">Pluggable Shuffle/Sort</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/DistributedCacheDeploy.html">Distributed Cache Deploy</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/SharedCacheSupport.html">Support for YARN Shared Cache</a>
</li>
</ul>
<h5>MapReduce REST APIs</h5>
<ul>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredAppMasterRest.html">MR Application Master</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-hs/HistoryServerRest.html">MR History Server</a>
</li>
</ul>
<h5>YARN</h5>
<ul>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YARN.html">Architecture</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YarnCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html">Capacity Scheduler</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/FairScheduler.html">Fair Scheduler</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceManagerRestart.html">ResourceManager Restart</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceManagerHA.html">ResourceManager HA</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceModel.html">Resource Model</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeLabel.html">Node Labels</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeAttributes.html">Node Attributes</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/WebApplicationProxy.html">Web Application Proxy</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServer.html">Timeline Server</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html">Timeline Service V.2</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/WritingYarnApplications.html">Writing YARN Applications</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YarnApplicationSecurity.html">YARN Application Security</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeManager.html">NodeManager</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/DockerContainers.html">Running Applications in Docker Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/RuncContainers.html">Running Applications in runC Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeManagerCgroups.html">Using CGroups</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/SecureContainer.html">Secure Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ReservationSystem.html">Reservation System</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/GracefulDecommission.html">Graceful Decommission</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/OpportunisticContainers.html">Opportunistic Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/Federation.html">YARN Federation</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/SharedCache.html">Shared Cache</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/UsingGpus.html">Using GPU</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/UsingFPGA.html">Using FPGA</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/PlacementConstraints.html">Placement Constraints</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YarnUI2.html">YARN UI2</a>
</li>
</ul>
<h5>YARN REST APIs</h5>
<ul>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/WebServicesIntro.html">Introduction</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html">Resource Manager</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeManagerRest.html">Node Manager</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServer.html#Timeline_Server_REST_API_v1">Timeline Server</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html#Timeline_Service_v.2_REST_API">Timeline Service V.2</a>
</li>
</ul>
<h5>YARN Service</h5>
<ul>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/Overview.html">Overview</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/QuickStart.html">QuickStart</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/Concepts.html">Concepts</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/YarnServiceAPI.html">Yarn Service API</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/ServiceDiscovery.html">Service Discovery</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/SystemServices.html">System Services</a>
</li>
</ul>
<h5>Hadoop Compatible File Systems</h5>
<ul>
<li class="none">
<a href="../../hadoop-aliyun/tools/hadoop-aliyun/index.html">Aliyun OSS</a>
</li>
<li class="none">
<a href="../../hadoop-aws/tools/hadoop-aws/index.html">Amazon S3</a>
</li>
<li class="none">
<a href="../../hadoop-azure/index.html">Azure Blob Storage</a>
</li>
<li class="none">
<a href="../../hadoop-azure-datalake/index.html">Azure Data Lake Storage</a>
</li>
<li class="none">
<a href="../../hadoop-cos/cloud-storage/index.html">Tencent COS</a>
</li>
<li class="none">
<a href="../../hadoop-huaweicloud/cloud-storage/index.html">Huaweicloud OBS</a>
</li>
</ul>
<h5>Auth</h5>
<ul>
<li class="none">
<a href="../../hadoop-auth/index.html">Overview</a>
</li>
<li class="none">
<a href="../../hadoop-auth/Examples.html">Examples</a>
</li>
<li class="none">
<a href="../../hadoop-auth/Configuration.html">Configuration</a>
</li>
<li class="none">
<a href="../../hadoop-auth/BuildingIt.html">Building</a>
</li>
</ul>
<h5>Tools</h5>
<ul>
<li class="none">
<a href="../../hadoop-streaming/HadoopStreaming.html">Hadoop Streaming</a>
</li>
<li class="none">
<a href="../../hadoop-archives/HadoopArchives.html">Hadoop Archives</a>
</li>
<li class="none">
<a href="../../hadoop-archive-logs/HadoopArchiveLogs.html">Hadoop Archive Logs</a>
</li>
<li class="none">
<a href="../../hadoop-distcp/DistCp.html">DistCp</a>
</li>
<li class="none">
<a href="../../hadoop-federation-balance/HDFSFederationBalance.html">HDFS Federation Balance</a>
</li>
<li class="none">
<a href="../../hadoop-gridmix/GridMix.html">GridMix</a>
</li>
<li class="none">
<a href="../../hadoop-rumen/Rumen.html">Rumen</a>
</li>
<li class="none">
<a href="../../hadoop-resourceestimator/ResourceEstimator.html">Resource Estimator Service</a>
</li>
<li class="none">
<a href="../../hadoop-sls/SchedulerLoadSimulator.html">Scheduler Load Simulator</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Benchmarking.html">Hadoop Benchmarking</a>
</li>
<li class="none">
<a href="../../hadoop-dynamometer/Dynamometer.html">Dynamometer</a>
</li>
</ul>
<h5>Reference</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/release/">Changelog and Release Notes</a>
</li>
<li class="none">
<a href="../../api/index.html">Java API docs</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/UnixShellAPI.html">Unix Shell API</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Metrics.html">Metrics</a>
</li>
</ul>
<h5>Configuration</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/core-default.xml">core-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/hdfs-default.xml">hdfs-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs-rbf/hdfs-rbf-default.xml">hdfs-rbf-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml">mapred-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-common/yarn-default.xml">yarn-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-kms/kms-default.html">kms-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-hdfs-httpfs/httpfs-default.html">httpfs-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/DeprecatedProperties.html">Deprecated Properties</a>
</li>
</ul>
<a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
<img alt="Built by Maven" src="./images/logos/maven-feather.png"/>
</a>
</div>
</div>
<div id="bodyColumn">
<div id="contentBox">
<!---
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<h1>YARN Shared Cache</h1>
<ul>
<li><a href="#Overview">Overview</a></li>
<li><a href="#Current_Status_and_Future_Plans">Current Status and Future Plans</a></li>
<li><a href="#Architecture">Architecture</a>
<ul>
<li><a href="#The_Shared_Cache_Client">The Shared Cache Client</a></li>
<li><a href="#The_Shared_Cache_HDFS_Directory">The Shared Cache HDFS Directory</a></li>
<li><a href="#Shared_Cache_Manager_.28SCM.29">Shared Cache Manager (SCM)</a></li>
<li><a href="#The_Shared_Cache_uploader_and_localization">The Shared Cache uploader and localization</a></li></ul></li>
<li><a href="#Developing_YARN_applications_with_the_Shared_Cache">Developing YARN applications with the Shared Cache</a></li>
<li><a href="#Administrating_the_Shared_Cache">Administrating the Shared Cache</a>
<ul>
<li><a href="#Setting_up_the_shared_cache">Setting up the shared cache</a></li>
<li><a href="#Configuration_parameters">Configuration parameters</a></li></ul></li></ul>
<section>
<h2><a name="Overview"></a>Overview</h2>
<p>The YARN Shared Cache provides the facility to upload and manage shared application resources to HDFS in a safe and scalable manner. YARN applications can leverage resources uploaded by other applications or previous runs of the same application without having to re&#xad;upload and localize identical files multiple times. This will save network resources and reduce YARN application startup time.</p></section><section>
<h2><a name="Current_Status_and_Future_Plans"></a>Current Status and Future Plans</h2>
<p>Currently the YARN Shared Cache is released and ready to use. The major components are implemented and have been deployed in a large-scale production setting. There are still some pieces missing (i.e. strong authentication). These missing features will be implemented as part of a follow-up phase 2 effort. Please see <a class="externalLink" href="https://issues.apache.org/jira/browse/YARN-7282">YARN-7282</a> for more information.</p></section><section>
<h2><a name="Architecture"></a>Architecture</h2>
<p>The shared cache feature consists of 4 major components:</p>
<ol style="list-style-type: decimal">
<li>The shared cache client.</li>
<li>The HDFS directory that acts as a cache.</li>
<li>The shared cache manager (aka. SCM).</li>
<li>The localization service and uploader.</li>
</ol><section>
<h3><a name="The_Shared_Cache_Client"></a>The Shared Cache Client</h3>
<p>YARN application developers and users, should interact with the shared cache using the shared cache client. This client is responsible for interacting with the shared cache manager, computing the checksum of application resources, and claiming application resources in the shared cache. Once an application has claimed a resource, it is free to use that resource for the life-cycle of the application. Please see the SharedCacheClient.java javadoc for further documentation.</p></section><section>
<h3><a name="The_Shared_Cache_HDFS_Directory"></a>The Shared Cache HDFS Directory</h3>
<p>The shared cache HDFS directory stores all of the shared cache resources. It is protected by HDFS permissions and is globally readable, but writing is restricted to a trusted user. This HDFS directory is only modified by the shared cache manager and the resource uploader on the node manager. Resources are spread across a set of subdirectories using the resources&#x2019;s checksum:</p>
<div class="source">
<div class="source">
<pre>/sharedcache/a/8/9/a896857d078/foo.jar
/sharedcache/5/0/f/50f11b09f87/bar.jar
/sharedcache/a/6/7/a678cb1aa8f/job.jar
</pre></div></div>
</section><section>
<h3><a name="Shared_Cache_Manager_.28SCM.29"></a>Shared Cache Manager (SCM)</h3>
<p>The shared cache manager is responsible for serving requests from the client and managing the contents of the shared cache. It looks after both the meta data as well as the persisted resources in HDFS. It is made up of two major components, a back end store and a cleaner service. The SCM runs as a separate daemon process that can be placed on any node in the cluster. This allows for administrators to start/stop/upgrade the SCM without affecting other YARN components (i.e. the resource manager or node managers).</p>
<p>The back end store is responsible for maintaining and persisting metadata about the shared cache. This includes the resources in the cache, when a resource was last used and a list of applications that are currently using the resource. The implementation for the backing store is pluggable and it currently uses an in-memory store that recreates its state after a restart.</p>
<p>The cleaner service maintains the persisted resources in HDFS by ensuring that resources that are no longer used are removed from the cache. It scans the resources in the cache periodically and evicts resources if they are both stale and there are no live applications currently using the application.</p></section><section>
<h3><a name="The_Shared_Cache_uploader_and_localization"></a>The Shared Cache uploader and localization</h3>
<p>The shared cache uploader is a service that runs on the node manager and adds resources to the shared cache. It is responsible for verifying a resources checksum, uploading the resource to HDFS and notifying the shared cache manager that a resource has been added to the cache. It is important to note that the uploader service is asynchronous from the container launch and does not block the startup of a yarn application. In addition adding things to the cache is done in a best effort way and does not impact running applications. Once the uploader has placed a resource in the shared cache, YARN uses the normal node manager localization mechanism to make resources available to the application.</p></section></section><section>
<h2><a name="Developing_YARN_applications_with_the_Shared_Cache"></a>Developing YARN applications with the Shared Cache</h2>
<p>To support the YARN shared cache, an application must use the shared cache client during application submission. The shared cache client returns a URL corresponding to a resource if it is in the shared cache. To use the cached resource, a YARN application simply uses the cached URL to create a LocalResource object and sets setShouldBeUploadedToSharedCache to true during application submission.</p>
<p>For example, here is how you would create a LocalResource using a cached URL:</p>
<div class="source">
<div class="source">
<pre>String localPathChecksum = sharedCacheClient.getFileChecksum(localPath);
URL cachedResource = sharedCacheClient.use(appId, localPathChecksum);
LocalResource resource = LocalResource.newInstance(cachedResource,
LocalResourceType.FILE, LocalResourceVisibility.PUBLIC
size, timestamp, null, true);
</pre></div></div>
</section><section>
<h2><a name="Administrating_the_Shared_Cache"></a>Administrating the Shared Cache</h2><section>
<h3><a name="Setting_up_the_shared_cache"></a>Setting up the shared cache</h3>
<p>An administrator can initially set up the shared cache by following these steps:</p>
<ol style="list-style-type: decimal">
<li>Create an HDFS directory for the shared cache (default: /sharedcache).</li>
<li>Set the shared cache directory permissions to 0755.</li>
<li>Ensure that the shared cache directory is owned by the user that runs the shared cache manager daemon and the node manager.</li>
<li>In the yarn-site.xml file, set <i>yarn.sharedcache.enabled</i> to true and <i>yarn.sharedcache.root-dir</i> to the directory specified in step 1. For more configuration parameters, see the configuration parameters section.</li>
<li>Start the shared cache manager:</li>
</ol>
<div class="source">
<div class="source">
<pre>/hadoop/bin/yarn --daemon start sharedcachemanager
</pre></div></div>
</section><section>
<h3><a name="Configuration_parameters"></a>Configuration parameters</h3>
<p>The configuration parameters can be found in yarn-default.xml and should be set in the yarn-site.xml file. Here are a list of configuration parameters and their defaults:</p>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th>Name </th>
<th> Description </th>
<th> Default value</th></tr>
</thead><tbody>
<tr class="b">
<td>yarn.sharedcache.enabled </td>
<td> Whether the shared cache is enabled </td>
<td> false</td></tr>
<tr class="a">
<td>yarn.sharedcache.root-dir </td>
<td> The root directory for the shared cache </td>
<td> /sharedcache</td></tr>
<tr class="b">
<td>yarn.sharedcache.nested-level </td>
<td> The level of nested directories before getting to the checksum directories. It must be non-negative. </td>
<td> 3</td></tr>
<tr class="a">
<td>yarn.sharedcache.store.class </td>
<td> The implementation to be used for the SCM store </td>
<td> org.apache.hadoop.yarn.server.sharedcachemanager.store.InMemorySCMStore</td></tr>
<tr class="b">
<td>yarn.sharedcache.app-checker.class </td>
<td> The implementation to be used for the SCM app-checker </td>
<td> org.apache.hadoop.yarn.server.sharedcachemanager.RemoteAppChecker</td></tr>
<tr class="a">
<td>yarn.sharedcache.store.in-memory.staleness-period-mins </td>
<td> A resource in the in-memory store is considered stale if the time since the last reference exceeds the staleness period. This value is specified in minutes. </td>
<td> 10080</td></tr>
<tr class="b">
<td>yarn.sharedcache.store.in-memory.initial-delay-mins </td>
<td> Initial delay before the in-memory store runs its first check to remove dead initial applications. Specified in minutes. </td>
<td> 10</td></tr>
<tr class="a">
<td>yarn.sharedcache.store.in-memory.check-period-mins </td>
<td> The frequency at which the in-memory store checks to remove dead initial applications. Specified in minutes. </td>
<td> 720</td></tr>
<tr class="b">
<td>yarn.sharedcache.admin.address </td>
<td> The address of the admin interface in the SCM (shared cache manager) </td>
<td> 0.0.0.0:8047</td></tr>
<tr class="a">
<td>yarn.sharedcache.admin.thread-count </td>
<td> The number of threads used to handle SCM admin interface (1 by default) </td>
<td> 1</td></tr>
<tr class="b">
<td>yarn.sharedcache.webapp.address </td>
<td> The address of the web application in the SCM (shared cache manager) </td>
<td> 0.0.0.0:8788</td></tr>
<tr class="a">
<td>yarn.sharedcache.cleaner.period-mins </td>
<td> The frequency at which a cleaner task runs. Specified in minutes. </td>
<td> 1440</td></tr>
<tr class="b">
<td>yarn.sharedcache.cleaner.initial-delay-mins </td>
<td> Initial delay before the first cleaner task is scheduled. Specified in minutes. </td>
<td> 10</td></tr>
<tr class="a">
<td>yarn.sharedcache.cleaner.resource-sleep-ms </td>
<td> The time to sleep between processing each shared cache resource. Specified in milliseconds. </td>
<td> 0</td></tr>
<tr class="b">
<td>yarn.sharedcache.uploader.server.address </td>
<td> The address of the node manager interface in the SCM (shared cache manager) </td>
<td> 0.0.0.0:8046</td></tr>
<tr class="a">
<td>yarn.sharedcache.uploader.server.thread-count </td>
<td> The number of threads used to handle shared cache manager requests from the node manager (50 by default) </td>
<td> 50</td></tr>
<tr class="b">
<td>yarn.sharedcache.client-server.address </td>
<td> The address of the client interface in the SCM (shared cache manager) </td>
<td> 0.0.0.0:8045</td></tr>
<tr class="a">
<td>yarn.sharedcache.client-server.thread-count </td>
<td> The number of threads used to handle shared cache manager requests from clients (50 by default) </td>
<td> 50</td></tr>
<tr class="b">
<td>yarn.sharedcache.checksum.algo.impl </td>
<td> The algorithm used to compute checksums of files (SHA-256 by default) </td>
<td> org.apache.hadoop.yarn.sharedcache.ChecksumSHA256Impl</td></tr>
<tr class="a">
<td>yarn.sharedcache.nm.uploader.replication.factor </td>
<td> The replication factor for the node manager uploader for the shared cache (10 by default) </td>
<td> 10</td></tr>
<tr class="b">
<td>yarn.sharedcache.nm.uploader.thread-count </td>
<td> The number of threads used to upload files from a node manager instance (20 by default) </td>
<td> 20</td></tr>
</tbody>
</table></section></section>
</div>
</div>
<div class="clear">
<hr/>
</div>
<div id="footer">
<div class="xright">
&#169; 2008-2023
Apache Software Foundation
- <a href="http://maven.apache.org/privacy-policy.html">Privacy Policy</a>.
Apache Maven, Maven, Apache, the Apache feather logo, and the Apache Maven project logos are trademarks of The Apache Software Foundation.
</div>
<div class="clear">
<hr/>
</div>
</div>
</body>
</html>