hadoop/hadoop-project-dist/hadoop-hdfs-rbf/HDFSRouterFederation.html

1285 lines
75 KiB
HTML

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<!--
| Generated by Apache Maven Doxia at 2023-03-05
| Rendered using Apache Maven Stylus Skin 1.5
-->
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Apache Hadoop 3.4.0-SNAPSHOT &#x2013; HDFS Router-based Federation</title>
<style type="text/css" media="all">
@import url("./css/maven-base.css");
@import url("./css/maven-theme.css");
@import url("./css/site.css");
</style>
<link rel="stylesheet" href="./css/print.css" type="text/css" media="print" />
<meta name="Date-Revision-yyyymmdd" content="20230305" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head>
<body class="composite">
<div id="banner">
<a href="http://hadoop.apache.org/" id="bannerLeft">
<img src="http://hadoop.apache.org/images/hadoop-logo.jpg" alt="" />
</a>
<a href="http://www.apache.org/" id="bannerRight">
<img src="http://www.apache.org/images/asf_logo_wide.png" alt="" />
</a>
<div class="clear">
<hr/>
</div>
</div>
<div id="breadcrumbs">
<div class="xright"> <a href="http://wiki.apache.org/hadoop" class="externalLink">Wiki</a>
|
<a href="https://gitbox.apache.org/repos/asf/hadoop.git" class="externalLink">git</a>
|
<a href="http://hadoop.apache.org/" class="externalLink">Apache Hadoop</a>
&nbsp;| Last Published: 2023-03-05
&nbsp;| Version: 3.4.0-SNAPSHOT
</div>
<div class="clear">
<hr/>
</div>
</div>
<div id="leftColumn">
<div id="navcolumn">
<h5>General</h5>
<ul>
<li class="none">
<a href="../../index.html">Overview</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/SingleCluster.html">Single Node Setup</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/ClusterSetup.html">Cluster Setup</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/CommandsManual.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/FileSystemShell.html">FileSystem Shell</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Compatibility.html">Compatibility Specification</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/DownstreamDev.html">Downstream Developer's Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/AdminCompatibilityGuide.html">Admin Compatibility Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/InterfaceClassification.html">Interface Classification</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/filesystem/index.html">FileSystem Specification</a>
</li>
</ul>
<h5>Common</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/CLIMiniCluster.html">CLI Mini Cluster</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/FairCallQueue.html">Fair Call Queue</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/NativeLibraries.html">Native Libraries</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Superusers.html">Proxy User</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/RackAwareness.html">Rack Awareness</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/SecureMode.html">Secure Mode</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/ServiceLevelAuth.html">Service Level Authorization</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/HttpAuthentication.html">HTTP Authentication</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/CredentialProviderAPI.html">Credential Provider API</a>
</li>
<li class="none">
<a href="../../hadoop-kms/index.html">Hadoop KMS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Tracing.html">Tracing</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/UnixShellGuide.html">Unix Shell Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/registry/index.html">Registry</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/AsyncProfilerServlet.html">Async Profiler</a>
</li>
</ul>
<h5>HDFS</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsDesign.html">Architecture</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsUserGuide.html">User Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html">NameNode HA With QJM</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithNFS.html">NameNode HA With NFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ObserverNameNode.html">Observer NameNode</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/Federation.html">Federation</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ViewFs.html">ViewFs</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ViewFsOverloadScheme.html">ViewFsOverloadScheme</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsSnapshots.html">Snapshots</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsEditsViewer.html">Edits Viewer</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsImageViewer.html">Image Viewer</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html">Permissions and HDFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsQuotaAdminGuide.html">Quotas and HDFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/LibHdfs.html">libhdfs (C API)</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/WebHDFS.html">WebHDFS (REST API)</a>
</li>
<li class="none">
<a href="../../hadoop-hdfs-httpfs/index.html">HttpFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ShortCircuitLocalReads.html">Short Circuit Local Reads</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/CentralizedCacheManagement.html">Centralized Cache Management</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsNfsGateway.html">NFS Gateway</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsRollingUpgrade.html">Rolling Upgrade</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ExtendedAttributes.html">Extended Attributes</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/TransparentEncryption.html">Transparent Encryption</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsMultihoming.html">Multihoming</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ArchivalStorage.html">Storage Policies</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/MemoryStorage.html">Memory Storage Support</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/SLGUserGuide.html">Synthetic Load Generator</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSErasureCoding.html">Erasure Coding</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSDiskbalancer.html">Disk Balancer</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsUpgradeDomain.html">Upgrade Domain</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsDataNodeAdminGuide.html">DataNode Admin</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs-rbf/HDFSRouterFederation.html">Router Federation</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsProvidedStorage.html">Provided Storage</a>
</li>
</ul>
<h5>MapReduce</h5>
<ul>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html">Tutorial</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html">Compatibility with 1.x</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/EncryptedShuffle.html">Encrypted Shuffle</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/PluggableShuffleAndPluggableSort.html">Pluggable Shuffle/Sort</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/DistributedCacheDeploy.html">Distributed Cache Deploy</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/SharedCacheSupport.html">Support for YARN Shared Cache</a>
</li>
</ul>
<h5>MapReduce REST APIs</h5>
<ul>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredAppMasterRest.html">MR Application Master</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-hs/HistoryServerRest.html">MR History Server</a>
</li>
</ul>
<h5>YARN</h5>
<ul>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YARN.html">Architecture</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YarnCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html">Capacity Scheduler</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/FairScheduler.html">Fair Scheduler</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceManagerRestart.html">ResourceManager Restart</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceManagerHA.html">ResourceManager HA</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceModel.html">Resource Model</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeLabel.html">Node Labels</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeAttributes.html">Node Attributes</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/WebApplicationProxy.html">Web Application Proxy</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServer.html">Timeline Server</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html">Timeline Service V.2</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/WritingYarnApplications.html">Writing YARN Applications</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YarnApplicationSecurity.html">YARN Application Security</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeManager.html">NodeManager</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/DockerContainers.html">Running Applications in Docker Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/RuncContainers.html">Running Applications in runC Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeManagerCgroups.html">Using CGroups</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/SecureContainer.html">Secure Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ReservationSystem.html">Reservation System</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/GracefulDecommission.html">Graceful Decommission</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/OpportunisticContainers.html">Opportunistic Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/Federation.html">YARN Federation</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/SharedCache.html">Shared Cache</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/UsingGpus.html">Using GPU</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/UsingFPGA.html">Using FPGA</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/PlacementConstraints.html">Placement Constraints</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YarnUI2.html">YARN UI2</a>
</li>
</ul>
<h5>YARN REST APIs</h5>
<ul>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/WebServicesIntro.html">Introduction</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html">Resource Manager</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeManagerRest.html">Node Manager</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServer.html#Timeline_Server_REST_API_v1">Timeline Server</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html#Timeline_Service_v.2_REST_API">Timeline Service V.2</a>
</li>
</ul>
<h5>YARN Service</h5>
<ul>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/Overview.html">Overview</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/QuickStart.html">QuickStart</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/Concepts.html">Concepts</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/YarnServiceAPI.html">Yarn Service API</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/ServiceDiscovery.html">Service Discovery</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/SystemServices.html">System Services</a>
</li>
</ul>
<h5>Hadoop Compatible File Systems</h5>
<ul>
<li class="none">
<a href="../../hadoop-aliyun/tools/hadoop-aliyun/index.html">Aliyun OSS</a>
</li>
<li class="none">
<a href="../../hadoop-aws/tools/hadoop-aws/index.html">Amazon S3</a>
</li>
<li class="none">
<a href="../../hadoop-azure/index.html">Azure Blob Storage</a>
</li>
<li class="none">
<a href="../../hadoop-azure-datalake/index.html">Azure Data Lake Storage</a>
</li>
<li class="none">
<a href="../../hadoop-cos/cloud-storage/index.html">Tencent COS</a>
</li>
<li class="none">
<a href="../../hadoop-huaweicloud/cloud-storage/index.html">Huaweicloud OBS</a>
</li>
</ul>
<h5>Auth</h5>
<ul>
<li class="none">
<a href="../../hadoop-auth/index.html">Overview</a>
</li>
<li class="none">
<a href="../../hadoop-auth/Examples.html">Examples</a>
</li>
<li class="none">
<a href="../../hadoop-auth/Configuration.html">Configuration</a>
</li>
<li class="none">
<a href="../../hadoop-auth/BuildingIt.html">Building</a>
</li>
</ul>
<h5>Tools</h5>
<ul>
<li class="none">
<a href="../../hadoop-streaming/HadoopStreaming.html">Hadoop Streaming</a>
</li>
<li class="none">
<a href="../../hadoop-archives/HadoopArchives.html">Hadoop Archives</a>
</li>
<li class="none">
<a href="../../hadoop-archive-logs/HadoopArchiveLogs.html">Hadoop Archive Logs</a>
</li>
<li class="none">
<a href="../../hadoop-distcp/DistCp.html">DistCp</a>
</li>
<li class="none">
<a href="../../hadoop-federation-balance/HDFSFederationBalance.html">HDFS Federation Balance</a>
</li>
<li class="none">
<a href="../../hadoop-gridmix/GridMix.html">GridMix</a>
</li>
<li class="none">
<a href="../../hadoop-rumen/Rumen.html">Rumen</a>
</li>
<li class="none">
<a href="../../hadoop-resourceestimator/ResourceEstimator.html">Resource Estimator Service</a>
</li>
<li class="none">
<a href="../../hadoop-sls/SchedulerLoadSimulator.html">Scheduler Load Simulator</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Benchmarking.html">Hadoop Benchmarking</a>
</li>
<li class="none">
<a href="../../hadoop-dynamometer/Dynamometer.html">Dynamometer</a>
</li>
</ul>
<h5>Reference</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/release/">Changelog and Release Notes</a>
</li>
<li class="none">
<a href="../../api/index.html">Java API docs</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/UnixShellAPI.html">Unix Shell API</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Metrics.html">Metrics</a>
</li>
</ul>
<h5>Configuration</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/core-default.xml">core-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/hdfs-default.xml">hdfs-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs-rbf/hdfs-rbf-default.xml">hdfs-rbf-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml">mapred-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-common/yarn-default.xml">yarn-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-kms/kms-default.html">kms-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-hdfs-httpfs/httpfs-default.html">httpfs-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/DeprecatedProperties.html">Deprecated Properties</a>
</li>
</ul>
<a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
<img alt="Built by Maven" src="./images/logos/maven-feather.png"/>
</a>
</div>
</div>
<div id="bodyColumn">
<div id="contentBox">
<!---
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<h1>HDFS Router-based Federation</h1>
<ul>
<li><a href="#Introduction">Introduction</a></li>
<li><a href="#Architecture">Architecture</a>
<ul>
<li><a href="#Example_flow">Example flow</a></li>
<li><a href="#Router">Router</a>
<ul>
<li><a href="#Federated_interface">Federated interface</a></li>
<li><a href="#Router_heartbeat">Router heartbeat</a></li>
<li><a href="#NameNode_heartbeat">NameNode heartbeat</a></li>
<li><a href="#Availability_and_fault_tolerance">Availability and fault tolerance</a></li>
<li><a href="#Interfaces">Interfaces</a></li></ul></li>
<li><a href="#Quota_management">Quota management</a></li>
<li><a href="#State_Store">State Store</a></li>
<li><a href="#Security">Security</a></li>
<li><a href="#Isolation">Isolation</a></li></ul></li>
<li><a href="#Deployment">Deployment</a>
<ul>
<li><a href="#Mount_table_management">Mount table management</a>
<ul>
<li><a href="#Quotas">Quotas</a></li>
<li><a href="#Multiple_subclusters">Multiple subclusters</a></li></ul></li>
<li><a href="#Disabling_nameservices">Disabling nameservices</a></li>
<li><a href="#Router_server_generically_refresh">Router server generically refresh</a></li>
<li><a href="#Router_state_dump">Router state dump</a></li></ul></li>
<li><a href="#Client_configuration">Client configuration</a></li>
<li><a href="#NameNode_configuration">NameNode configuration</a></li>
<li><a href="#Router_configuration">Router configuration</a>
<ul>
<li><a href="#RPC_server">RPC server</a>
<ul>
<li><a href="#Connection_to_the_Namenodes">Connection to the Namenodes</a></li></ul></li>
<li><a href="#Admin_server">Admin server</a></li>
<li><a href="#HTTP_Server">HTTP Server</a></li>
<li><a href="#State_Store">State Store</a></li>
<li><a href="#Routing">Routing</a></li>
<li><a href="#Namenode_monitoring">Namenode monitoring</a></li>
<li><a href="#Quota">Quota</a></li>
<li><a href="#Security">Security</a></li>
<li><a href="#Isolation">Isolation</a></li></ul></li>
<li><a href="#Metrics">Metrics</a></li>
<li><a href="#Router_Federation_Rename">Router Federation Rename</a></li></ul>
<section>
<h2><a name="Introduction"></a>Introduction</h2>
<p>NameNodes have scalability limits because of the metadata overhead comprised of inodes (files and directories) and file blocks, the number of Datanode heartbeats, and the number of HDFS RPC client requests. The common solution is to split the filesystem into smaller subclusters <a href="../hadoop-hdfs/Federation.html">HDFS Federation</a> and provide a federated view <a href="../hadoop-hdfs/ViewFs.html">ViewFs</a>. The problem is how to maintain the split of the subclusters (e.g., namespace partition), which forces users to connect to multiple subclusters and manage the allocation of folders/files to them.</p></section><section>
<h2><a name="Architecture"></a>Architecture</h2>
<p>A natural extension to this partitioned federation is to add a layer of software responsible for federating the namespaces. This extra layer allows users to access any subcluster transparently, lets subclusters manage their own block pools independently, and will support rebalancing of data across subclusters later (see more info in <a class="externalLink" href="https://issues.apache.org/jira/browse/HDFS-13123">HDFS-13123</a>). The subclusters in RBF are not required to be the independent HDFS clusters, a normal federation cluster (with multiple block pools) or a mixed cluster with federation and independent cluster is also allowed. To accomplish these goals, the federation layer directs block accesses to the proper subcluster, maintains the state of the namespaces, and provides mechanisms for data rebalancing. This layer must be scalable, highly available, and fault tolerant.</p>
<p>This federation layer comprises multiple components. The <i>Router</i> component that has the same interface as a NameNode, and forwards the client requests to the correct subcluster, based on ground-truth information from a State Store. The <i>State Store</i> combines a remote <i>Mount Table</i> (in the flavor of <a href="../hadoop-hdfs/ViewFs.html">ViewFs</a>, but shared between clients) and utilization (load/capacity) information about the subclusters. This approach has the same architecture as <a href="../../hadoop-yarn/hadoop-yarn-site/Federation.html">YARN federation</a>.</p>
<p><img src="./images/routerfederation.png" alt="Router-based Federation Sequence Diagram | width=800" /></p><section>
<h3><a name="Example_flow"></a>Example flow</h3>
<p>The simplest configuration deploys a Router on each NameNode machine. The Router monitors the local NameNode and its state and heartbeats to the State Store. When a regular DFS client contacts any of the Routers to access a file in the federated filesystem, the Router checks the Mount Table in the State Store (i.e., the local cache) to find out which subcluster contains the file. Then it checks the Membership table in the State Store (i.e., the local cache) for the NameNode responsible for the subcluster. After it has identified the correct NameNode, the Router proxies the request. The client accesses Datanodes directly.</p></section><section>
<h3><a name="Router"></a>Router</h3>
<p>There can be multiple Routers in the system with soft state. Each Router has two roles:</p>
<ul>
<li>Federated interface: expose a single, global NameNode interface to the clients and forward the requests to the active NameNode in the correct subcluster</li>
<li>NameNode heartbeat: maintain the information about a NameNode in the State Store</li>
</ul><section>
<h4><a name="Federated_interface"></a>Federated interface</h4>
<p>The Router receives a client request, checks the State Store for the correct subcluster, and forwards the request to the active NameNode of that subcluster. The reply from the NameNode then flows in the opposite direction. The Routers are stateless and can be behind a load balancer. For health checking, you can use /isActive endpoint as a health probe (e.g. <a class="externalLink" href="http://ROUTER_HOSTNAME:ROUTER_PORT/isActive">http://ROUTER_HOSTNAME:ROUTER_PORT/isActive</a>). For performance, the Router also caches remote mount table entries and the state of the subclusters. To make sure that changes have been propagated to all Routers, each Router heartbeats its state to the State Store.</p>
<p>The communications between the Routers and the State Store are cached (with timed expiration for freshness). This improves the performance of the system.</p></section><section>
<h4><a name="Router_heartbeat"></a>Router heartbeat</h4>
<p>The Router periodically heartbeats its state to the State Store.</p></section><section>
<h4><a name="NameNode_heartbeat"></a>NameNode heartbeat</h4>
<p>For this role, the Router periodically checks the state of a NameNode (usually on the same server) and reports their high availability (HA) state and load/space status to the State Store. Note that this is an optional role, as a Router can be independent of any subcluster. For performance with NameNode HA, the Router uses the high availability state information in the State Store to forward the request to the NameNode that is most likely to be active. Note that this service can be embedded into the NameNode itself to simplify the operation.</p></section><section>
<h4><a name="Availability_and_fault_tolerance"></a>Availability and fault tolerance</h4>
<p>The Router operates with failures at multiple levels.</p>
<ul>
<li>
<p><b>Federated interface HA:</b> The Routers are stateless and metadata operations are atomic at the NameNodes. If a Router becomes unavailable, any Router can take over for it. The clients configure their DFS HA client (e.g., ConfiguredFailoverProvider or RequestHedgingProxyProvider) with all the Routers in the federation as endpoints.</p>
</li>
<li>
<p><b>Unavailable State Store:</b> If a Router cannot contact the State Store, it will enter into a Safe Mode state which disallows it from serving requests. Clients will treat Routers in Safe Mode as it was an Standby NameNode and try another Router. There is a manual way to manage the Safe Mode for the Router.</p>
</li>
</ul>
<p>The Safe Mode state can be managed by using the following command:</p>
<div class="source">
<div class="source">
<pre>[hdfs]$ $HADOOP_HOME/bin/hdfs dfsrouteradmin -safemode enter | leave | get
</pre></div></div>
<ul>
<li>
<p><b>NameNode heartbeat HA:</b> For high availability and flexibility, multiple Routers can monitor the same NameNode and heartbeat the information to the State Store. This increases clients&#x2019; resiliency to stale information, should a Router fail. Conflicting NameNode information in the State Store is resolved by each Router via a quorum.</p>
</li>
<li>
<p><b>Unavailable NameNodes:</b> If a Router cannot contact the active NameNode, then it will try the other NameNodes in the subcluster. It will first try those reported as standby and then the unavailable ones. If the Router cannot reach any NameNode, then it throws an exception.</p>
</li>
<li>
<p><b>Expired NameNodes:</b> If a NameNode heartbeat has not been recorded in the State Store for a multiple of the heartbeat interval, the monitoring Router will record that the NameNode has expired and no Routers will attempt to access it. If an updated heartbeat is subsequently recorded for the NameNode, the monitoring Router will restore the NameNode from the expired state.</p>
</li>
</ul></section><section>
<h4><a name="Interfaces"></a>Interfaces</h4>
<p>To interact with the users and the administrators, the Router exposes multiple interfaces.</p>
<ul>
<li>
<p><b>RPC:</b> The Router RPC implements the most common interfaces clients use to interact with HDFS. The current implementation has been tested using analytics workloads written in plain MapReduce, Spark, and Hive (on Tez, Spark, and MapReduce). Advanced functions like snapshot, encryption and tiered storage are left for future versions. All unimplemented functions will throw exceptions.</p>
</li>
<li>
<p><b>Admin:</b> Administrators can query information from clusters and add/remove entries from the mount table over RPC. This interface is also exposed through the command line to get and modify information from the federation.</p>
</li>
<li>
<p><b>Web UI:</b> The Router exposes a Web UI visualizing the state of the federation, mimicking the current NameNode UI. It displays information about the mount table, membership information about each subcluster, and the status of the Routers.</p>
</li>
<li>
<p><b>WebHDFS:</b> The Router provides the HDFS REST interface (WebHDFS) in addition to the RPC one.</p>
</li>
<li>
<p><b>JMX:</b> It exposes metrics through JMX mimicking the NameNode. This is used by the Web UI to get the cluster status.</p>
</li>
</ul>
<p>Some operations are not available in Router-based federation. The Router throws exceptions for those. Examples users may encounter include the following.</p>
<ul>
<li>Rename file/folder in two different nameservices.</li>
<li>Copy file/folder in two different nameservices.</li>
<li>Write into a file/folder being rebalanced.</li>
</ul></section></section><section>
<h3><a name="Quota_management"></a>Quota management</h3>
<p>Federation supports and controls global quota at mount table level. For performance reasons, the Router caches the quota usage and updates it periodically. These quota usage values will be used for quota-verification during each WRITE RPC call invoked in RouterRPCSever. See <a href="../hadoop-hdfs/HdfsQuotaAdminGuide.html">HDFS Quotas Guide</a> for the quota detail.</p>
<p>Note: When global quota is enabled, setting or clearing sub-cluster&#x2019;s quota directly is not recommended since Router Admin server will override sub-cluster&#x2019;s quota with global quota.</p></section><section>
<h3><a name="State_Store"></a>State Store</h3>
<p>The (logically centralized, but physically distributed) State Store maintains:</p>
<ul>
<li>The state of the subclusters in terms of their block access load, available disk space, HA state, etc.</li>
<li>The mapping between folder/files and subclusters, i.e. the remote mount table.</li>
</ul>
<p>The backend of the State Store is pluggable. We leverage the fault tolerance of the backend implementations. The main information stored in the State Store and its implementation:</p>
<ul>
<li>
<p><b>Membership</b>: The membership information encodes the state of the NameNodes in the federation. This includes information about the subcluster, such as storage capacity and the number of nodes. The Router periodically heartbeats this information about one or more NameNodes. Given that multiple Routers can monitor a single NameNode, the heartbeat from every Router is stored. The Routers apply a quorum of the data when querying this information from the State Store. The Routers discard the entries older than a certain threshold (e.g., ten Router heartbeat periods).</p>
</li>
<li>
<p><b>Mount Table</b>: This table hosts the mapping between folders and subclusters. It is similar to the mount table in <a href="../hadoop-hdfs/ViewFs.html">ViewFs</a> where it specifies the federated folder, the destination subcluster and the path in that folder.</p>
</li>
</ul></section><section>
<h3><a name="Security"></a>Security</h3>
<p>Router supports security similar to <a href="../hadoop-common/SecureMode.html">current security model</a> in HDFS. This feature is available for both RPC and Web based calls. It has the capability to proxy to underlying secure HDFS clusters.</p>
<p>Similar to Namenode, support exists for both kerberos and token based authentication for clients connecting to routers. Router internally relies on existing security related configs of <code>core-site.xml</code> and <code>hdfs-site.xml</code> to support this feature. In addition to that, routers need to be configured with its own keytab and principal.</p>
<p>For token based authentication, router issues delegation tokens to upstream clients without communicating with downstream namenodes. Router uses its own credentials to securely proxy to downstream namenode on behalf of upstream real user. Router principal has to be configured as a superuser in all secure downstream namenodes. Refer <a href="../hadoop-common/Superusers.html">here</a> to configure proxy user for namenode. Along with that, user owning router daemons should be configured with the same identity as namenode process itself. Refer <a href="../hadoop-hdfs/HdfsPermissionsGuide.html#The_Super-User">here</a> for details. Router relies on a state store to distribute tokens across all routers. Apart from default implementation provided users can plugin their own implementation of state store for token management. Default implementation relies on zookeeper for token management. Since a large router/zookeeper cluster could potentially hold millions of tokens, <code>jute.maxbuffer</code> system property that zookeeper clients rely on should be appropriately configured in router daemons.</p>
<p>See the Apache JIRA ticket <a class="externalLink" href="https://issues.apache.org/jira/browse/HDFS-13532">HDFS-13532</a> for more information on this feature.</p></section><section>
<h3><a name="Isolation"></a>Isolation</h3>
<p>Router supports assignment of the dedicated number of RPC handlers to achieve isolation for all downstream nameservices it is configured to proxy. Since large or busy clusters may have relatively higher RPC traffic to the namenode compared to other clusters namenodes, this feature if enabled allows admins to configure higher number of RPC handlers for busy clusters. If dedicated handlers are not assigned for specific nameservices, equal distribution of RPC handlers is done for all configured nameservices. <b>Note</b> Fanout calls are treated as targeting a special nameservice, thus can be configured with handlers as well.</p>
<p>If a downstream namenode is slow/busy enough that permits are unavailable, routers would throw StandByException exception to the client. This would in turn trigger a failover behavior at the client side and clients would connect to a different router in the cluster. This offers a positive effect of automatically load balancing RPCs across all routers nodes. This is important to ensure that a single router does not become a bottleneck in case of unhealthy namenodes and all handlers available in the entire router cluster are utilized.</p>
<p>Users can configure handlers based on steady state load that individual downstream namenodes expect and can introduce more routers to the cluster to handle more RPCs overall. Because of the bouncing behavior that clients automatically get in this feature in an event where certain namenodes are overloaded, good clients connecting to good namenodes will always continue to use Rpc lanes dedicated to them. For bad behaving namenodes or backfill jobs that put spiky loads on namenodes, more routers could potentially be added with a higher than usual handler count to deal with the surge in traffic for specific nameservices if needed.</p>
<p>Overall the isolation feature is exposed via a configuration dfs.federation.router.handler.isolation.enable. The default value of this feature will be &#x201c;false&#x201d;. Users can also introduce their own fairness policy controller for custom allocation of handlers to various nameservices.</p>
<p>See the Apache JIRA ticket <a class="externalLink" href="https://issues.apache.org/jira/browse/HDFS-14090">HDFS-14090</a> for more information on this feature.</p></section></section><section>
<h2><a name="Deployment"></a>Deployment</h2>
<p>By default, the Router is ready to take requests and monitor the NameNode in the local machine. It needs to know the State Store endpoint by setting <code>dfs.federation.router.store.driver.class</code>. The rest of the options are documented in <a href="../hadoop-hdfs-rbf/hdfs-rbf-default.xml">hdfs-rbf-default.xml</a>.</p>
<p>Once the Router is configured, it can be started:</p>
<div class="source">
<div class="source">
<pre>[hdfs]$ $HADOOP_PREFIX/bin/hdfs --daemon start dfsrouter
</pre></div></div>
<p>And to stop it:</p>
<div class="source">
<div class="source">
<pre>[hdfs]$ $HADOOP_PREFIX/bin/hdfs --daemon stop dfsrouter
</pre></div></div>
<section>
<h3><a name="Mount_table_management"></a>Mount table management</h3>
<p>The mount table entries are pretty much the same as in <a href="../hadoop-hdfs/ViewFs.html">ViewFs</a>. Please make sure the downstream namespace path exists before creating mount table entry pointing to it. A good practice for simplifying the management is to name the federated namespace with the same names as the destination namespaces. For example, if we to mount <code>/data/app1</code> in the federated namespace, it is recommended to have that same name as in the destination namespace.</p>
<p>The federation admin tool supports managing the mount table. For example, to create three mount points and list them:</p>
<div class="source">
<div class="source">
<pre>[hdfs]$ $HADOOP_HOME/bin/hdfs dfsrouteradmin -add /tmp ns1 /tmp
[hdfs]$ $HADOOP_HOME/bin/hdfs dfsrouteradmin -add /data/app1 ns2 /data/app1
[hdfs]$ $HADOOP_HOME/bin/hdfs dfsrouteradmin -add /data/app2 ns3 /data/app2
[hdfs]$ $HADOOP_HOME/bin/hdfs dfsrouteradmin -ls
</pre></div></div>
<p>It also supports mount points that disallow writes:</p>
<div class="source">
<div class="source">
<pre>[hdfs]$ $HADOOP_HOME/bin/hdfs dfsrouteradmin -add /readonly ns1 / -readonly
</pre></div></div>
<p>If a mount point is not set, the Router will map it to the default namespace <code>dfs.federation.router.default.nameserviceId</code>.</p>
<p>Mount table have UNIX-like <i>permissions</i>, which restrict which users and groups have access to the mount point. Write permissions allow users to add , update or remove mount point. Read permissions allow users to list mount point. Execute permissions are unused.</p>
<p>Mount table permission can be set by following command:</p>
<div class="source">
<div class="source">
<pre>[hdfs]$ $HADOOP_HOME/bin/hdfs dfsrouteradmin -add /tmp ns1 /tmp -owner root -group supergroup -mode 0755
</pre></div></div>
<p>The option mode is UNIX-style permissions for the mount table. Permissions are specified in octal, e.g. 0755. By default, this is set to 0755.</p><section>
<h4><a name="Quotas"></a>Quotas</h4>
<p>Router-based federation supports global quota at mount table level. Mount table entries may spread multiple subclusters and the global quota will be accounted across these subclusters.</p>
<p>The federation admin tool supports setting quotas for specified mount table entries:</p>
<div class="source">
<div class="source">
<pre>[hdfs]$ $HADOOP_HOME/bin/hdfs dfsrouteradmin -setQuota /path -nsQuota 100 -ssQuota 1024
</pre></div></div>
<p>The above command means that we allow the path to have a maximum of 100 file/directories and use at most 1024 bytes storage space. The parameter for <i>ssQuota</i> supports multiple size-unit suffix (e.g. 1k is 1KB, 5m is 5MB). If no suffix is specified then bytes is assumed.</p>
<p>Set storage type quota for specified mount table entry:</p>
<div class="source">
<div class="source">
<pre>[hdfs]$ $HADOOP_HOME/bin/hdfs dfsrouteradmin -setStorageTypeQuota &lt;path&gt; -storageType &lt;storage type&gt;
</pre></div></div>
<p>Remove quota for specified mount table entry:</p>
<div class="source">
<div class="source">
<pre>[hdfs]$ $HADOOP_HOME/bin/hdfs dfsrouteradmin -clrQuota &lt;path&gt;
</pre></div></div>
<p>Remove storage type quota for specified mount table entry:</p>
<div class="source">
<div class="source">
<pre>[hdfs]$ $HADOOP_HOME/bin/hdfs dfsrouteradmin -clrStorageTypeQuota &lt;path&gt;
</pre></div></div>
<p>Ls command will show below information for each mount table entry:</p>
<div class="source">
<div class="source">
<pre>Source Destinations Owner Group Mode Quota/Usage
/path ns0-&gt;/path root supergroup rwxr-xr-x [NsQuota: 50/0, SsQuota: 100 B/0 B]
</pre></div></div>
<p>Mount table cache is refreshed periodically but it can also be refreshed by executing refresh command:</p>
<div class="source">
<div class="source">
<pre>[hdfs]$ $HADOOP_HOME/bin/hdfs dfsrouteradmin -refresh
</pre></div></div>
<p>The above command will refresh cache of the connected router. This command is redundant when mount table refresh service is enabled as the service will always keep the cache updated.</p></section><section>
<h4><a name="Multiple_subclusters"></a>Multiple subclusters</h4>
<p>A mount point also supports mapping multiple subclusters. For example, to create a mount point that stores files in subclusters <code>ns1</code> and <code>ns2</code>.</p>
<div class="source">
<div class="source">
<pre>[hdfs]$ $HADOOP_HOME/bin/hdfs dfsrouteradmin -add /data ns1,ns2 /data -order SPACE
</pre></div></div>
<p>When listing <code>/data</code>, it will show all the folders and files in both subclusters. For deciding where to create a new file/folder it uses the order parameter, it currently supports the following methods:</p>
<ul>
<li>HASH: Follow consistent hashing in the first level. Deeper levels will be in the one of the parent.</li>
<li>LOCAL: Try to write data in the local subcluster.</li>
<li>RANDOM: Random subcluster. This is usually useful for balancing the load across. Folders are created in all subclusters.</li>
<li>HASH_ALL: Follow consistent hashing at all the levels. This approach tries to balance the reads and writes evenly across subclusters. Folders are created in all subclusters.</li>
<li>SPACE: Try to write data in the subcluster with the most available space. Folders are created in all subclusters.</li>
</ul>
<p>For the hash-based approaches, the difference is that HASH would make all the files/folders within a folder belong to the same subcluster while HASH_ALL will spread all files under a mount point. For example, assuming we have a HASH mount point for <code>/data/hash</code>, files and folders under <code>/data/hash/folder0</code> will all be in the same subcluster. On the other hand, a HASH_ALL mount point for <code>/data/hash_all</code>, will spread files under <code>/data/hash_all/folder0</code> across all the subclusters for that mount point (subfolders will be created to all subclusters).</p>
<p>RANDOM can be used for reading and writing data from/into different subclusters. The common use for this approach is to have the same data in multiple subclusters and balance the reads across subclusters. For example, if thousands of containers need to read the same data (e.g., a library), one can use RANDOM to read the data from any of the subclusters.</p>
<p>To determine which subcluster contains a file:</p>
<div class="source">
<div class="source">
<pre>[hdfs]$ $HADOOP_HOME/bin/hdfs dfsrouteradmin -getDestination /user/user1/file.txt
</pre></div></div>
<p>Note that consistency of the data across subclusters is not guaranteed by the Router. By default, if one subcluster is unavailable, writes may fail if they target that subcluster. To allow writing in another subcluster, one can make the mount point fault tolerant:</p>
<div class="source">
<div class="source">
<pre>[hdfs]$ $HADOOP_HOME/bin/hdfs dfsrouteradmin -add /data ns1,ns2 /data -order HASH_ALL -faulttolerant
</pre></div></div>
<p>Note that this can lead to a file to be written in multiple subclusters or a folder missing in one. One needs to be aware of the possibility of these inconsistencies and target this <code>faulttolerant</code> approach to resilient paths. An example for this is the <code>/app-logs</code> folder which will mostly write once into a subfolder.</p></section></section><section>
<h3><a name="Disabling_nameservices"></a>Disabling nameservices</h3>
<p>To prevent accessing a nameservice (sublcuster), it can be disabled from the federation. For example, one can disable <code>ns1</code>, list it and enable it again:</p>
<div class="source">
<div class="source">
<pre>[hdfs]$ $HADOOP_HOME/bin/hdfs dfsrouteradmin -nameservice disable ns1
[hdfs]$ $HADOOP_HOME/bin/hdfs dfsrouteradmin -getDisabledNameservices
[hdfs]$ $HADOOP_HOME/bin/hdfs dfsrouteradmin -nameservice enable ns1
</pre></div></div>
<p>This is useful when decommissioning subclusters or when one subcluster is missbehaving (e.g., low performance or unavailability).</p></section><section>
<h3><a name="Router_server_generically_refresh"></a>Router server generically refresh</h3>
<p>To trigger a runtime-refresh of the resource specified by &lt;key&gt; on &lt;host:ipc_port&gt;. For example, to enable white list checking, we just need to send a refresh command other than restart the router server.</p>
<div class="source">
<div class="source">
<pre>[hdfs]$ $HADOOP_HOME/bin/hdfs dfsrouteradmin -refreshRouterArgs &lt;host:ipc_port&gt; &lt;key&gt; [arg1..argn]
</pre></div></div>
</section><section>
<h3><a name="Router_state_dump"></a>Router state dump</h3>
<p>To diagnose the current state of the routers, you can use the dumpState command. It generates a text dump of the records in the State Store. Since it uses the configuration to find and read the state store, it is often easiest to use the machine where the routers run. The command runs locally, so the routers do not have to be up to use this command.</p>
<div class="source">
<div class="source">
<pre>[hdfs]$ $HADOOP_HOME/bin/hdfs dfsrouteradmin -dumpState
</pre></div></div>
</section></section><section>
<h2><a name="Client_configuration"></a>Client configuration</h2>
<p>For clients to use the federated namespace, they need to create a new one that points to the routers. For example, a cluster with 4 namespaces <b>ns0, ns1, ns2, ns3</b>, can add a new one to <b>hdfs-site.xml</b> called <b>ns-fed</b> which points to two of the routers:</p>
<div class="source">
<div class="source">
<pre>&lt;configuration&gt;
&lt;property&gt;
&lt;name&gt;dfs.nameservices&lt;/name&gt;
&lt;value&gt;ns0,ns1,ns2,ns3,ns-fed&lt;/value&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;dfs.ha.namenodes.ns-fed&lt;/name&gt;
&lt;value&gt;r1,r2&lt;/value&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;dfs.namenode.rpc-address.ns-fed.r1&lt;/name&gt;
&lt;value&gt;router1:rpc-port&lt;/value&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;dfs.namenode.rpc-address.ns-fed.r2&lt;/name&gt;
&lt;value&gt;router2:rpc-port&lt;/value&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;dfs.client.failover.proxy.provider.ns-fed&lt;/name&gt;
&lt;value&gt;org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider&lt;/value&gt;
&lt;/property&gt;
&lt;property&gt;
&lt;name&gt;dfs.client.failover.random.order&lt;/name&gt;
&lt;value&gt;true&lt;/value&gt;
&lt;/property&gt;
&lt;/configuration&gt;
</pre></div></div>
<p>The <code>dfs.client.failover.random.order</code> set to <code>true</code> allows distributing the load evenly across the routers.</p>
<p>With this setting a user can interact with <code>ns-fed</code> as a regular namespace:</p>
<div class="source">
<div class="source">
<pre>$ $HADOOP_HOME/bin/hdfs dfs -ls hdfs://ns-fed/
/tmp
/data
</pre></div></div>
<p>This federated namespace can also be set as the default one at <b>core-site.xml</b> using <code>fs.defaultFS</code>.</p></section><section>
<h2><a name="NameNode_configuration"></a>NameNode configuration</h2>
<p>In order for the system to support data-locality, you must configure your NameNodes so that they will trust the routers to supply the user&#x2019;s client IP address. <code>dfs.namenode.ip-proxy-users</code> defines a comma separated list of users that are allowed to provide the client ip address via the caller context.</p>
<div class="source">
<div class="source">
<pre>&lt;configuration&gt;
&lt;property&gt;
&lt;name&gt;dfs.namenode.ip-proxy-users&lt;/name&gt;
&lt;value&gt;hdfs&lt;/value&gt;
&lt;/property&gt;
&lt;/configuration&gt;
</pre></div></div>
</section><section>
<h2><a name="Router_configuration"></a>Router configuration</h2>
<p>One can add the configurations for Router-based federation to <b>hdfs-rbf-site.xml</b>. The main options are documented in <a href="../hadoop-hdfs-rbf/hdfs-rbf-default.xml">hdfs-rbf-default.xml</a>. The configuration values are described in this section.</p><section>
<h3><a name="RPC_server"></a>RPC server</h3>
<p>The RPC server to receive connections from the clients.</p>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th align="left"> Property </th>
<th align="left"> Default </th>
<th align="left"> Description</th></tr>
</thead><tbody>
<tr class="b">
<td align="left"> dfs.federation.router.default.nameserviceId </td>
<td align="left"> </td>
<td align="left"> Nameservice identifier of the default subcluster to monitor. </td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.rpc.enable </td>
<td align="left"> <code>true</code> </td>
<td align="left"> If <code>true</code>, the RPC service to handle client requests in the router is enabled. </td></tr>
<tr class="b">
<td align="left"> dfs.federation.router.rpc-address </td>
<td align="left"> 0.0.0.0:8888 </td>
<td align="left"> RPC address that handles all clients requests. </td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.rpc-bind-host </td>
<td align="left"> 0.0.0.0 </td>
<td align="left"> The actual address the RPC server will bind to. </td></tr>
<tr class="b">
<td align="left"> dfs.federation.router.handler.count </td>
<td align="left"> 10 </td>
<td align="left"> The number of server threads for the router to handle RPC requests from clients. </td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.handler.queue.size </td>
<td align="left"> 100 </td>
<td align="left"> The size of the queue for the number of handlers to handle RPC client requests. </td></tr>
<tr class="b">
<td align="left"> dfs.federation.router.reader.count </td>
<td align="left"> 1 </td>
<td align="left"> The number of readers for the router to handle RPC client requests. </td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.reader.queue.size </td>
<td align="left"> 100 </td>
<td align="left"> The size of the queue for the number of readers for the router to handle RPC client requests. </td></tr>
</tbody>
</table><section>
<h4><a name="Connection_to_the_Namenodes"></a>Connection to the Namenodes</h4>
<p>The Router forwards the client requests to the NameNodes. It uses a pool of connections to reduce the latency of creating them.</p>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th align="left"> Property </th>
<th align="left"> Default </th>
<th align="left"> Description </th></tr>
</thead><tbody>
<tr class="b">
<td align="left"> dfs.federation.router.connection.pool-size </td>
<td align="left"> 1 </td>
<td align="left"> Size of the pool of connections from the router to namenodes. </td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.connection.clean.ms </td>
<td align="left"> 10000 </td>
<td align="left"> Time interval, in milliseconds, to check if the connection pool should remove unused connections. </td></tr>
<tr class="b">
<td align="left"> dfs.federation.router.connection.pool.clean.ms </td>
<td align="left"> 60000 </td>
<td align="left"> Time interval, in milliseconds, to check if the connection manager should remove unused connection pools. </td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.enable.multiple.socket </td>
<td align="left"> false </td>
<td align="left"> If true, ConnectionPool will use a new socket when creating a new connection for the same user. And it&#x2019;s best used with dfs.federation.router.max.concurrency.per.connection together. </td></tr>
<tr class="b">
<td align="left"> dfs.federation.router.max.concurrency.per.connection </td>
<td align="left"> 1 </td>
<td align="left"> The maximum number of requests that a connection can handle concurrently. </td></tr>
</tbody>
</table></section></section><section>
<h3><a name="Admin_server"></a>Admin server</h3>
<p>The administration server to manage the Mount Table.</p>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th align="left"> Property </th>
<th align="left"> Default </th>
<th align="left"> Description</th></tr>
</thead><tbody>
<tr class="b">
<td align="left"> dfs.federation.router.admin.enable </td>
<td align="left"> <code>true</code> </td>
<td align="left"> If <code>true</code>, the RPC admin service to handle client requests in the router is enabled. </td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.admin-address </td>
<td align="left"> 0.0.0.0:8111 </td>
<td align="left"> RPC address that handles the admin requests. </td></tr>
<tr class="b">
<td align="left"> dfs.federation.router.admin-bind-host </td>
<td align="left"> 0.0.0.0 </td>
<td align="left"> The actual address the RPC admin server will bind to. </td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.admin.handler.count </td>
<td align="left"> 1 </td>
<td align="left"> The number of server threads for the router to handle RPC requests from admin. </td></tr>
</tbody>
</table></section><section>
<h3><a name="HTTP_Server"></a>HTTP Server</h3>
<p>The HTTP Server to provide Web UI and the HDFS REST interface (<a href="../hadoop-hdfs/WebHDFS.html">WebHDFS</a>) for the clients. The default URL is &#x201c;<code>http://router_host:50071</code>&#x201d;.</p>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th align="left"> Property </th>
<th align="left"> Default </th>
<th align="left"> Description</th></tr>
</thead><tbody>
<tr class="b">
<td align="left"> dfs.federation.router.http.enable </td>
<td align="left"> <code>true</code> </td>
<td align="left"> If <code>true</code>, the HTTP service to handle client requests in the router is enabled. </td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.http-address </td>
<td align="left"> 0.0.0.0:50071 </td>
<td align="left"> HTTP address that handles the web requests to the Router. </td></tr>
<tr class="b">
<td align="left"> dfs.federation.router.http-bind-host </td>
<td align="left"> 0.0.0.0 </td>
<td align="left"> The actual address the HTTP server will bind to. </td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.https-address </td>
<td align="left"> 0.0.0.0:50072 </td>
<td align="left"> HTTPS address that handles the web requests to the Router. </td></tr>
<tr class="b">
<td align="left"> dfs.federation.router.https-bind-host </td>
<td align="left"> 0.0.0.0 </td>
<td align="left"> The actual address the HTTPS server will bind to. </td></tr>
</tbody>
</table></section><section>
<h3><a name="State_Store"></a>State Store</h3>
<p>The connection to the State Store and the internal caching at the Router.</p>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th align="left"> Property </th>
<th align="left"> Default </th>
<th align="left"> Description</th></tr>
</thead><tbody>
<tr class="b">
<td align="left"> dfs.federation.router.store.enable </td>
<td align="left"> <code>true</code> </td>
<td align="left"> If <code>true</code>, the Router connects to the State Store. </td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.store.serializer </td>
<td align="left"> <code>org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreSerializerPBImpl</code> </td>
<td align="left"> Class to serialize State Store records. </td></tr>
<tr class="b">
<td align="left"> dfs.federation.router.store.driver.class </td>
<td align="left"> <code>org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreZooKeeperImpl</code> </td>
<td align="left"> Class to implement the State Store. </td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.store.connection.test </td>
<td align="left"> 60000 </td>
<td align="left"> How often to check for the connection to the State Store in milliseconds. </td></tr>
<tr class="b">
<td align="left"> dfs.federation.router.cache.ttl </td>
<td align="left"> 60000 </td>
<td align="left"> How often to refresh the State Store caches in milliseconds. </td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.store.membership.expiration </td>
<td align="left"> 300000 </td>
<td align="left"> Expiration time in milliseconds for a membership record. </td></tr>
<tr class="b">
<td align="left"> dfs.federation.router.mount-table.cache.update </td>
<td align="left"> false </td>
<td align="left"> If true, Mount table cache is updated whenever a mount table entry is added, modified or removed for all the routers. </td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.mount-table.cache.update.timeout </td>
<td align="left"> 1m </td>
<td align="left"> Max time to wait for all the routers to finish their mount table cache update. </td></tr>
<tr class="b">
<td align="left"> dfs.federation.router.mount-table.cache.update.client.max.time </td>
<td align="left"> 5m </td>
<td align="left"> Max time a RouterClient connection can be cached. </td></tr>
</tbody>
</table></section><section>
<h3><a name="Routing"></a>Routing</h3>
<p>Forwarding client requests to the right subcluster.</p>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th align="left"> Property </th>
<th align="left"> Default </th>
<th align="left"> Description</th></tr>
</thead><tbody>
<tr class="b">
<td align="left"> dfs.federation.router.file.resolver.client.class </td>
<td align="left"> <code>org.apache.hadoop.hdfs.server.federation.resolver.MountTableResolver</code> </td>
<td align="left"> Class to resolve files to subclusters. To enable multiple subclusters for a mount point, set to org.apache.hadoop.hdfs.server.federation.resolver.MultipleDestinationMountTableResolver. </td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.namenode.resolver.client.class </td>
<td align="left"> <code>org.apache.hadoop.hdfs.server.federation.resolver.MembershipNamenodeResolver</code> </td>
<td align="left"> Class to resolve the namenode for a subcluster. </td></tr>
</tbody>
</table></section><section>
<h3><a name="Namenode_monitoring"></a>Namenode monitoring</h3>
<p>Monitor the namenodes in the subclusters for forwarding the client requests.</p>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th align="left"> Property </th>
<th align="left"> Default </th>
<th align="left"> Description</th></tr>
</thead><tbody>
<tr class="b">
<td align="left"> dfs.federation.router.heartbeat.enable </td>
<td align="left"> <code>true</code> </td>
<td align="left"> If <code>true</code>, the Router periodically heartbeats its state to the State Store. </td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.namenode.heartbeat.enable </td>
<td align="left"> </td>
<td align="left"> If <code>true</code>, the Router gets namenode heartbeats and send to the State Store. If not explicitly specified takes the same value as for <code>dfs.federation.router.heartbeat.enable</code>. </td></tr>
<tr class="b">
<td align="left"> dfs.federation.router.heartbeat.interval </td>
<td align="left"> 5000 </td>
<td align="left"> How often the Router should heartbeat into the State Store in milliseconds. </td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.monitor.namenode </td>
<td align="left"> </td>
<td align="left"> The identifier of the namenodes to monitor and heartbeat. </td></tr>
<tr class="b">
<td align="left"> dfs.federation.router.monitor.localnamenode.enable </td>
<td align="left"> <code>true</code> </td>
<td align="left"> If <code>true</code>, the Router should monitor the namenode in the local machine. </td></tr>
</tbody>
</table>
<p>Note: The config <i>dfs.nameservice.id</i> is recommended to configure if <i>dfs.federation.router.monitor.localnamenode.enable</i> is enabled. This will allow the Router finding the local node directly. Otherwise, it will find the nameservice Id by matching namenode RPC address with the local node address. If multiple addresses are matched, the Router will fail to start. In addition, if the local node is in a HA mode, it is recommend to configure <i>dfs.ha.namenode.id</i>.</p></section><section>
<h3><a name="Quota"></a>Quota</h3>
<p>Global quota supported in federation.</p>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th align="left"> Property </th>
<th align="left"> Default </th>
<th align="left"> Description</th></tr>
</thead><tbody>
<tr class="b">
<td align="left"> dfs.federation.router.quota.enable </td>
<td align="left"> <code>false</code> </td>
<td align="left"> If <code>true</code>, the quota system enabled in the Router. In that case, setting or clearing sub-cluster&#x2019;s quota directly is not recommended since Router Admin server will override sub-cluster&#x2019;s quota with global quota.</td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.quota-cache.update.interval </td>
<td align="left"> 60s </td>
<td align="left"> How often the Router updates quota cache. This setting supports multiple time unit suffixes. If no suffix is specified then milliseconds is assumed. </td></tr>
</tbody>
</table></section><section>
<h3><a name="Security"></a>Security</h3>
<p>Kerberos and Delegation token supported in federation.</p>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th align="left"> Property </th>
<th align="left"> Default </th>
<th align="left"> Description</th></tr>
</thead><tbody>
<tr class="b">
<td align="left"> dfs.federation.router.keytab.file </td>
<td align="left"> </td>
<td align="left"> The keytab file used by router to login as its service principal. The principal name is configured with &#x2018;dfs.federation.router.kerberos.principal&#x2019;.</td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.kerberos.principal </td>
<td align="left"> </td>
<td align="left"> The Router service principal. This is typically set to <a class="externalLink" href="mailto:router/_HOST@REALM.TLD">router/_HOST@REALM.TLD</a>. Each Router will substitute _HOST with its own fully qualified hostname at startup. The _HOST placeholder allows using the same configuration setting on all Routers in an HA setup. </td></tr>
<tr class="b">
<td align="left"> dfs.federation.router.kerberos.principal.hostname </td>
<td align="left"> </td>
<td align="left"> The hostname for the Router containing this configuration file. Will be different for each machine. Defaults to current hostname. </td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.kerberos.internal.spnego.principal </td>
<td align="left"> <code>${dfs.web.authentication.kerberos.principal}</code> </td>
<td align="left"> The server principal used by the Router for web UI SPNEGO authentication when Kerberos security is enabled. This is typically set to <a class="externalLink" href="mailto:HTTP/_HOST@REALM.TLD">HTTP/_HOST@REALM.TLD</a> The SPNEGO server principal begins with the prefix HTTP/ by convention. If the value is &#x2018;*&#x2019;, the web server will attempt to login with every principal specified in the keytab file &#x2018;dfs.web.authentication.kerberos.keytab&#x2019;. </td></tr>
<tr class="b">
<td align="left"> dfs.federation.router.secret.manager.class </td>
<td align="left"> <code>org.apache.hadoop.hdfs.server.federation.router.security.token.ZKDelegationTokenSecretManagerImpl</code> </td>
<td align="left"> Class to implement state store to delegation tokens. Default implementation uses zookeeper as the backend to store delegation tokens. </td></tr>
</tbody>
</table></section><section>
<h3><a name="Isolation"></a>Isolation</h3>
<p>Isolation and dedicated assignment of RPC handlers across all configured downstream nameservices. The sum of these numbers must be strictly smaller than the total number of router handlers (configed by dfs.federation.router.handler.count).</p>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th align="left"> Property </th>
<th align="left"> Default </th>
<th align="left"> Description</th></tr>
</thead><tbody>
<tr class="b">
<td align="left"> dfs.federation.router.fairness.policy.controller.class </td>
<td align="left"> <code>org.apache.hadoop.hdfs.server.federation.fairness.NoRouterRpcFairnessPolicyController</code> </td>
<td align="left"> Default handler allocation model to be used if isolation feature is enabled. Recommend to use <code>org.apache.hadoop.hdfs.server.federation.fairness.StaticRouterRpcFairnessPolicyController</code> to fully use the feature. </td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.fairness.handler.count.<i>EXAMPLENAMESERVICE</i> </td>
<td align="left"> </td>
<td align="left"> Dedicated handler assigned to a specific nameservice. If none is specified equal allocation is done across all nameservices. </td></tr>
<tr class="b">
<td align="left"> dfs.federation.router.fairness.handler.count.concurrent </td>
<td align="left"> </td>
<td align="left"> Dedicated handler assigned to fan out calls such as <code>renewLease</code>. </td></tr>
</tbody>
</table></section></section><section>
<h2><a name="Metrics"></a>Metrics</h2>
<p>The Router and State Store statistics are exposed in metrics/JMX. These info will be very useful for monitoring. More metrics info can see <a href="../../hadoop-project-dist/hadoop-common/Metrics.html#RBFMetrics">RBF Metrics</a>, <a href="../../hadoop-project-dist/hadoop-common/Metrics.html#RouterRPCMetrics">Router RPC Metrics</a> and <a href="../../hadoop-project-dist/hadoop-common/Metrics.html#StateStoreMetrics">State Store Metrics</a>.</p></section><section>
<h2><a name="Router_Federation_Rename"></a>Router Federation Rename</h2>
<p>Enable Router to rename across namespaces. Currently it is implemented based on <a href="../../hadoop-federation-balance/HDFSFederationBalance.md">HDFS Federation Balance</a> and has some limits comparing with normal rename. 1. It is much slower than the normal rename so need a longer RPC timeout configuration. See <code>ipc.client.rpc-timeout.ms</code> and its description for more information about RPC timeout. 2. It doesn&#x2019;t support snapshot path. 3. It doesn&#x2019;t support to rename path with multiple destinations.</p>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th align="left"> Property </th>
<th align="left"> Default </th>
<th align="left"> Description</th></tr>
</thead><tbody>
<tr class="b">
<td align="left"> dfs.federation.router.federation.rename.option </td>
<td align="left"> NONE </td>
<td align="left"> Specify the action when rename across namespaces. The option can be NONE(reject rename across namespaces) and DISTCP(rename across namespaces with distcp). </td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.federation.rename.force.close.open.file </td>
<td align="left"> true </td>
<td align="left"> Force close all open files when there is no diff in the DIFF_DISTCP stage.</td></tr>
<tr class="b">
<td align="left"> dfs.federation.router.federation.rename.map </td>
<td align="left"> </td>
<td align="left"> Max number of concurrent maps to use for copy.</td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.federation.rename.bandwidth </td>
<td align="left"> </td>
<td align="left"> Specify bandwidth per map in MB.</td></tr>
<tr class="b">
<td align="left"> dfs.federation.router.federation.rename.delay </td>
<td align="left"> 1000 </td>
<td align="left"> Specify the delayed duration(millie seconds) when the job needs to retry.</td></tr>
<tr class="a">
<td align="left"> dfs.federation.router.federation.rename.diff </td>
<td align="left"> 0 </td>
<td align="left"> Specify the threshold of the diff entries that used in incremental copy stage.</td></tr>
<tr class="b">
<td align="left"> dfs.federation.router.federation.rename.trash </td>
<td align="left"> trash </td>
<td align="left"> This options has 3 values: trash (move the source path to trash), delete (delete the source path directly) and skip (skip both trash and deletion).</td></tr>
</tbody>
</table></section>
</div>
</div>
<div class="clear">
<hr/>
</div>
<div id="footer">
<div class="xright">
&#169; 2008-2023
Apache Software Foundation
- <a href="http://maven.apache.org/privacy-policy.html">Privacy Policy</a>.
Apache Maven, Maven, Apache, the Apache feather logo, and the Apache Maven project logos are trademarks of The Apache Software Foundation.
</div>
<div class="clear">
<hr/>
</div>
</div>
</body>
</html>