hadoop/hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html

1033 lines
55 KiB
HTML

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<!--
| Generated by Apache Maven Doxia at 2023-05-26
| Rendered using Apache Maven Stylus Skin 1.5
-->
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Apache Hadoop 3.4.0-SNAPSHOT &#x2013; HDFS Permissions Guide</title>
<style type="text/css" media="all">
@import url("./css/maven-base.css");
@import url("./css/maven-theme.css");
@import url("./css/site.css");
</style>
<link rel="stylesheet" href="./css/print.css" type="text/css" media="print" />
<meta name="Date-Revision-yyyymmdd" content="20230526" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head>
<body class="composite">
<div id="banner">
<a href="http://hadoop.apache.org/" id="bannerLeft">
<img src="http://hadoop.apache.org/images/hadoop-logo.jpg" alt="" />
</a>
<a href="http://www.apache.org/" id="bannerRight">
<img src="http://www.apache.org/images/asf_logo_wide.png" alt="" />
</a>
<div class="clear">
<hr/>
</div>
</div>
<div id="breadcrumbs">
<div class="xright"> <a href="http://wiki.apache.org/hadoop" class="externalLink">Wiki</a>
|
<a href="https://gitbox.apache.org/repos/asf/hadoop.git" class="externalLink">git</a>
|
<a href="http://hadoop.apache.org/" class="externalLink">Apache Hadoop</a>
&nbsp;| Last Published: 2023-05-26
&nbsp;| Version: 3.4.0-SNAPSHOT
</div>
<div class="clear">
<hr/>
</div>
</div>
<div id="leftColumn">
<div id="navcolumn">
<h5>General</h5>
<ul>
<li class="none">
<a href="../../index.html">Overview</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/SingleCluster.html">Single Node Setup</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/ClusterSetup.html">Cluster Setup</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/CommandsManual.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/FileSystemShell.html">FileSystem Shell</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Compatibility.html">Compatibility Specification</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/DownstreamDev.html">Downstream Developer's Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/AdminCompatibilityGuide.html">Admin Compatibility Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/InterfaceClassification.html">Interface Classification</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/filesystem/index.html">FileSystem Specification</a>
</li>
</ul>
<h5>Common</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/CLIMiniCluster.html">CLI Mini Cluster</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/FairCallQueue.html">Fair Call Queue</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/NativeLibraries.html">Native Libraries</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Superusers.html">Proxy User</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/RackAwareness.html">Rack Awareness</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/SecureMode.html">Secure Mode</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/ServiceLevelAuth.html">Service Level Authorization</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/HttpAuthentication.html">HTTP Authentication</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/CredentialProviderAPI.html">Credential Provider API</a>
</li>
<li class="none">
<a href="../../hadoop-kms/index.html">Hadoop KMS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Tracing.html">Tracing</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/UnixShellGuide.html">Unix Shell Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/registry/index.html">Registry</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/AsyncProfilerServlet.html">Async Profiler</a>
</li>
</ul>
<h5>HDFS</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsDesign.html">Architecture</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsUserGuide.html">User Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html">NameNode HA With QJM</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithNFS.html">NameNode HA With NFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ObserverNameNode.html">Observer NameNode</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/Federation.html">Federation</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ViewFs.html">ViewFs</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ViewFsOverloadScheme.html">ViewFsOverloadScheme</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsSnapshots.html">Snapshots</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsEditsViewer.html">Edits Viewer</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsImageViewer.html">Image Viewer</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html">Permissions and HDFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsQuotaAdminGuide.html">Quotas and HDFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/LibHdfs.html">libhdfs (C API)</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/WebHDFS.html">WebHDFS (REST API)</a>
</li>
<li class="none">
<a href="../../hadoop-hdfs-httpfs/index.html">HttpFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ShortCircuitLocalReads.html">Short Circuit Local Reads</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/CentralizedCacheManagement.html">Centralized Cache Management</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsNfsGateway.html">NFS Gateway</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsRollingUpgrade.html">Rolling Upgrade</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ExtendedAttributes.html">Extended Attributes</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/TransparentEncryption.html">Transparent Encryption</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsMultihoming.html">Multihoming</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ArchivalStorage.html">Storage Policies</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/MemoryStorage.html">Memory Storage Support</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/SLGUserGuide.html">Synthetic Load Generator</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSErasureCoding.html">Erasure Coding</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSDiskbalancer.html">Disk Balancer</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsUpgradeDomain.html">Upgrade Domain</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsDataNodeAdminGuide.html">DataNode Admin</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs-rbf/HDFSRouterFederation.html">Router Federation</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsProvidedStorage.html">Provided Storage</a>
</li>
</ul>
<h5>MapReduce</h5>
<ul>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html">Tutorial</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html">Compatibility with 1.x</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/EncryptedShuffle.html">Encrypted Shuffle</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/PluggableShuffleAndPluggableSort.html">Pluggable Shuffle/Sort</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/DistributedCacheDeploy.html">Distributed Cache Deploy</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/SharedCacheSupport.html">Support for YARN Shared Cache</a>
</li>
</ul>
<h5>MapReduce REST APIs</h5>
<ul>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredAppMasterRest.html">MR Application Master</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-hs/HistoryServerRest.html">MR History Server</a>
</li>
</ul>
<h5>YARN</h5>
<ul>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YARN.html">Architecture</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YarnCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html">Capacity Scheduler</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/FairScheduler.html">Fair Scheduler</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceManagerRestart.html">ResourceManager Restart</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceManagerHA.html">ResourceManager HA</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceModel.html">Resource Model</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeLabel.html">Node Labels</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeAttributes.html">Node Attributes</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/WebApplicationProxy.html">Web Application Proxy</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServer.html">Timeline Server</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html">Timeline Service V.2</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/WritingYarnApplications.html">Writing YARN Applications</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YarnApplicationSecurity.html">YARN Application Security</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeManager.html">NodeManager</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/DockerContainers.html">Running Applications in Docker Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/RuncContainers.html">Running Applications in runC Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeManagerCgroups.html">Using CGroups</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/SecureContainer.html">Secure Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ReservationSystem.html">Reservation System</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/GracefulDecommission.html">Graceful Decommission</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/OpportunisticContainers.html">Opportunistic Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/Federation.html">YARN Federation</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/SharedCache.html">Shared Cache</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/UsingGpus.html">Using GPU</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/UsingFPGA.html">Using FPGA</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/PlacementConstraints.html">Placement Constraints</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YarnUI2.html">YARN UI2</a>
</li>
</ul>
<h5>YARN REST APIs</h5>
<ul>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/WebServicesIntro.html">Introduction</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html">Resource Manager</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeManagerRest.html">Node Manager</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServer.html#Timeline_Server_REST_API_v1">Timeline Server</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html#Timeline_Service_v.2_REST_API">Timeline Service V.2</a>
</li>
</ul>
<h5>YARN Service</h5>
<ul>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/Overview.html">Overview</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/QuickStart.html">QuickStart</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/Concepts.html">Concepts</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/YarnServiceAPI.html">Yarn Service API</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/ServiceDiscovery.html">Service Discovery</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/SystemServices.html">System Services</a>
</li>
</ul>
<h5>Hadoop Compatible File Systems</h5>
<ul>
<li class="none">
<a href="../../hadoop-aliyun/tools/hadoop-aliyun/index.html">Aliyun OSS</a>
</li>
<li class="none">
<a href="../../hadoop-aws/tools/hadoop-aws/index.html">Amazon S3</a>
</li>
<li class="none">
<a href="../../hadoop-azure/index.html">Azure Blob Storage</a>
</li>
<li class="none">
<a href="../../hadoop-azure-datalake/index.html">Azure Data Lake Storage</a>
</li>
<li class="none">
<a href="../../hadoop-cos/cloud-storage/index.html">Tencent COS</a>
</li>
<li class="none">
<a href="../../hadoop-huaweicloud/cloud-storage/index.html">Huaweicloud OBS</a>
</li>
</ul>
<h5>Auth</h5>
<ul>
<li class="none">
<a href="../../hadoop-auth/index.html">Overview</a>
</li>
<li class="none">
<a href="../../hadoop-auth/Examples.html">Examples</a>
</li>
<li class="none">
<a href="../../hadoop-auth/Configuration.html">Configuration</a>
</li>
<li class="none">
<a href="../../hadoop-auth/BuildingIt.html">Building</a>
</li>
</ul>
<h5>Tools</h5>
<ul>
<li class="none">
<a href="../../hadoop-streaming/HadoopStreaming.html">Hadoop Streaming</a>
</li>
<li class="none">
<a href="../../hadoop-archives/HadoopArchives.html">Hadoop Archives</a>
</li>
<li class="none">
<a href="../../hadoop-archive-logs/HadoopArchiveLogs.html">Hadoop Archive Logs</a>
</li>
<li class="none">
<a href="../../hadoop-distcp/DistCp.html">DistCp</a>
</li>
<li class="none">
<a href="../../hadoop-federation-balance/HDFSFederationBalance.html">HDFS Federation Balance</a>
</li>
<li class="none">
<a href="../../hadoop-gridmix/GridMix.html">GridMix</a>
</li>
<li class="none">
<a href="../../hadoop-rumen/Rumen.html">Rumen</a>
</li>
<li class="none">
<a href="../../hadoop-resourceestimator/ResourceEstimator.html">Resource Estimator Service</a>
</li>
<li class="none">
<a href="../../hadoop-sls/SchedulerLoadSimulator.html">Scheduler Load Simulator</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Benchmarking.html">Hadoop Benchmarking</a>
</li>
<li class="none">
<a href="../../hadoop-dynamometer/Dynamometer.html">Dynamometer</a>
</li>
</ul>
<h5>Reference</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/release/">Changelog and Release Notes</a>
</li>
<li class="none">
<a href="../../api/index.html">Java API docs</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/UnixShellAPI.html">Unix Shell API</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Metrics.html">Metrics</a>
</li>
</ul>
<h5>Configuration</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/core-default.xml">core-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/hdfs-default.xml">hdfs-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs-rbf/hdfs-rbf-default.xml">hdfs-rbf-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml">mapred-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-common/yarn-default.xml">yarn-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-kms/kms-default.html">kms-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-hdfs-httpfs/httpfs-default.html">httpfs-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/DeprecatedProperties.html">Deprecated Properties</a>
</li>
</ul>
<a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
<img alt="Built by Maven" src="./images/logos/maven-feather.png"/>
</a>
</div>
</div>
<div id="bodyColumn">
<div id="contentBox">
<!---
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<h1>HDFS Permissions Guide</h1>
<ul>
<li><a href="#Overview">Overview</a></li>
<li><a href="#User_Identity">User Identity</a></li>
<li><a href="#Group_Mapping">Group Mapping</a></li>
<li><a href="#Permission_Checks">Permission Checks</a></li>
<li><a href="#Understanding_the_Implementation">Understanding the Implementation</a></li>
<li><a href="#Changes_to_the_File_System_API">Changes to the File System API</a></li>
<li><a href="#Changes_to_the_Application_Shell">Changes to the Application Shell</a></li>
<li><a href="#The_Super-User">The Super-User</a></li>
<li><a href="#The_Web_Server">The Web Server</a></li>
<li><a href="#ACLs_.28Access_Control_Lists.29">ACLs (Access Control Lists)</a></li>
<li><a href="#ACLs_File_System_API">ACLs File System API</a></li>
<li><a href="#ACLs_Shell_Commands">ACLs Shell Commands</a></li>
<li><a href="#Configuration_Parameters">Configuration Parameters</a></li></ul>
<section>
<h2><a name="Overview"></a>Overview</h2>
<p>The Hadoop Distributed File System (HDFS) implements a permissions model for files and directories that shares much of the POSIX model. Each file and directory is associated with an owner and a group. The file or directory has separate permissions for the user that is the owner, for other users that are members of the group, and for all other users. For files, the r permission is required to read the file, and the w permission is required to write or append to the file. For directories, the r permission is required to list the contents of the directory, the w permission is required to create or delete files or directories, and the x permission is required to access a child of the directory.</p>
<p>In contrast to the POSIX model, there are no setuid or setgid bits for files as there is no notion of executable files. For directories, there are no setuid or setgid bits directory as a simplification. The sticky bit can be set on directories, preventing anyone except the superuser, directory owner or file owner from deleting or moving the files within the directory. Setting the sticky bit for a file has no effect. Collectively, the permissions of a file or directory are its mode. In general, Unix customs for representing and displaying modes will be used, including the use of octal numbers in this description. When a file or directory is created, its owner is the user identity of the client process, and its group is the group of the parent directory (the BSD rule).</p>
<p>HDFS also provides optional support for POSIX ACLs (Access Control Lists) to augment file permissions with finer-grained rules for specific named users or named groups. ACLs are discussed in greater detail later in this document.</p>
<p>Each client process that accesses HDFS has a two-part identity composed of the user name, and groups list. Whenever HDFS must do a permissions check for a file or directory foo accessed by a client process,</p>
<ul>
<li>If the user name matches the owner of foo, then the owner permissions are tested;</li>
<li>Else if the group of foo matches any of member of the groups list, then the group permissions are tested;</li>
<li>Otherwise the other permissions of foo are tested.</li>
</ul>
<p>If a permissions check fails, the client operation fails.</p></section><section>
<h2><a name="User_Identity"></a>User Identity</h2>
<p>As of Hadoop 0.22, Hadoop supports two different modes of operation to determine the user&#x2019;s identity, specified by the hadoop.security.authentication property:</p>
<ul>
<li>
<p><b>simple</b></p>
<p>In this mode of operation, the identity of a client process is determined by the host operating system. On Unix-like systems, the user name is the equivalent of `whoami`.</p>
</li>
<li>
<p><b>kerberos</b></p>
<p>In Kerberized operation, the identity of a client process is determined by its Kerberos credentials. For example, in a Kerberized environment, a user may use the <code>kinit</code> utility to obtain a Kerberos ticket-granting-ticket (TGT) and use klist to determine their current principal. When mapping a Kerberos principal to an HDFS username, all components except for the primary are dropped. For example, a principal <a class="externalLink" href="mailto:todd/foobar@CORP.COMPANY.COM">todd/foobar@CORP.COMPANY.COM</a> will act as the simple username todd on HDFS.</p>
</li>
</ul>
<p>Regardless of the mode of operation, the user identity mechanism is extrinsic to HDFS itself. There is no provision within HDFS for creating user identities, establishing groups, or processing user credentials.</p></section><section>
<h2><a name="Group_Mapping"></a>Group Mapping</h2>
<p>Once a username has been determined as described above, the list of groups is determined by a group mapping service, configured by the <code>hadoop.security.group.mapping</code> property. See <a href="../hadoop-common/GroupsMapping.html">Hadoop Groups Mapping</a> for details.</p></section><section>
<h2><a name="Permission_Checks"></a>Permission Checks</h2>
<p>Each HDFS operation demands that the user has specific permissions (some combination of READ, WRITE and EXECUTE), granted through file ownership, group membership or the other permissions. An operation may perform permission checks at multiple components of the path, not only the final component. Additionally, some operations depend on a check of the owner of a path.</p>
<p>All operations require traversal access. Traversal access demands the EXECUTE permission on all existing components of the path, except for the final path component. For example, for any operation accessing <code>/foo/bar/baz</code>, the caller must have EXECUTE permission on <code>/</code>, <code>/foo</code> and <code>/foo/bar</code>.</p>
<p>The following table describes the permission checks performed by HDFS for each component of the path.</p>
<ul>
<li><b>Ownership:</b> Whether or not to check if the caller is the owner of the path. Typically, operations that change the ownership or permission metadata demand that the caller is the owner.</li>
<li><b>Parent:</b> The parent directory of the requested path. For example, for the path <code>/foo/bar/baz</code>, the parent is <code>/foo/bar</code>.</li>
<li><b>Ancestor:</b> The last <b>existing</b> component of the requested path. For example, for the path <code>/foo/bar/baz</code>, the ancestor path is <code>/foo/bar</code> if <code>/foo/bar</code> exists. The ancestor path is <code>/foo</code> if <code>/foo</code> exists but <code>/foo/bar</code> does not exist.</li>
<li><b>Final:</b> The final component of the requested path. For example, for the path <code>/foo/bar/baz</code>, the final path component is <code>/foo/bar/baz</code>.</li>
<li><b>Sub-tree:</b> For a path that is a directory, the directory itself and all of its child sub-directories, recursively. For example, for the path <code>/foo/bar/baz</code>, which has 2 sub-directories named <code>buz</code> and <code>boo</code>, the sub-tree is <code>/foo/bar/baz</code>, <code>/foo/bar/baz/buz</code> and <code>/foo/bar/baz/boo</code>.</li>
</ul>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th>Operation </th>
<th> Ownership </th>
<th> Parent </th>
<th> Ancestor </th>
<th> Final </th>
<th> Sub-tree</th></tr>
</thead><tbody>
<tr class="b">
<td>append </td>
<td> NO </td>
<td> N/A </td>
<td> N/A </td>
<td> WRITE </td>
<td> N/A</td></tr>
<tr class="a">
<td>concat </td>
<td> NO [2] </td>
<td> WRITE (sources) </td>
<td> N/A </td>
<td> READ (sources), WRITE (destination) </td>
<td> N/A</td></tr>
<tr class="b">
<td>create </td>
<td> NO </td>
<td> N/A </td>
<td> WRITE </td>
<td> WRITE [1] </td>
<td> N/A</td></tr>
<tr class="a">
<td>createSnapshot </td>
<td> YES </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A</td></tr>
<tr class="b">
<td>delete </td>
<td> NO [2] </td>
<td> WRITE </td>
<td> N/A </td>
<td> N/A </td>
<td> READ, WRITE, EXECUTE</td></tr>
<tr class="a">
<td>deleteSnapshot </td>
<td> YES </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A</td></tr>
<tr class="b">
<td>getAclStatus </td>
<td> NO </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A</td></tr>
<tr class="a">
<td>getBlockLocations </td>
<td> NO </td>
<td> N/A </td>
<td> N/A </td>
<td> READ </td>
<td> N/A</td></tr>
<tr class="b">
<td>getContentSummary </td>
<td> NO </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A </td>
<td> READ, EXECUTE</td></tr>
<tr class="a">
<td>getFileInfo </td>
<td> NO </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A</td></tr>
<tr class="b">
<td>getFileLinkInfo </td>
<td> NO </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A</td></tr>
<tr class="a">
<td>getLinkTarget </td>
<td> NO </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A</td></tr>
<tr class="b">
<td>getListing </td>
<td> NO </td>
<td> N/A </td>
<td> N/A </td>
<td> READ, EXECUTE </td>
<td> N/A</td></tr>
<tr class="a">
<td>getSnapshotDiffReport </td>
<td> NO </td>
<td> N/A </td>
<td> N/A </td>
<td> READ </td>
<td> READ</td></tr>
<tr class="b">
<td>getStoragePolicy </td>
<td> NO </td>
<td> N/A </td>
<td> N/A </td>
<td> READ </td>
<td> N/A</td></tr>
<tr class="a">
<td>getXAttrs </td>
<td> NO </td>
<td> N/A </td>
<td> N/A </td>
<td> READ </td>
<td> N/A</td></tr>
<tr class="b">
<td>listXAttrs </td>
<td> NO </td>
<td> EXECUTE </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A</td></tr>
<tr class="a">
<td>mkdirs </td>
<td> NO </td>
<td> N/A </td>
<td> WRITE </td>
<td> N/A </td>
<td> N/A</td></tr>
<tr class="b">
<td>modifyAclEntries </td>
<td> YES </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A</td></tr>
<tr class="a">
<td>removeAcl </td>
<td> YES </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A</td></tr>
<tr class="b">
<td>removeAclEntries </td>
<td> YES </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A</td></tr>
<tr class="a">
<td>removeDefaultAcl </td>
<td> YES </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A</td></tr>
<tr class="b">
<td>removeXAttr </td>
<td> NO [2] </td>
<td> N/A </td>
<td> N/A </td>
<td> WRITE </td>
<td> N/A</td></tr>
<tr class="a">
<td>rename </td>
<td> NO [2] </td>
<td> WRITE (source) </td>
<td> WRITE (destination) </td>
<td> N/A </td>
<td> N/A</td></tr>
<tr class="b">
<td>renameSnapshot </td>
<td> YES </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A</td></tr>
<tr class="a">
<td>setAcl </td>
<td> YES </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A</td></tr>
<tr class="b">
<td>setOwner </td>
<td> YES [3] </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A</td></tr>
<tr class="a">
<td>setPermission </td>
<td> YES </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A </td>
<td> N/A</td></tr>
<tr class="b">
<td>setReplication </td>
<td> NO </td>
<td> N/A </td>
<td> N/A </td>
<td> WRITE </td>
<td> N/A</td></tr>
<tr class="a">
<td>setStoragePolicy </td>
<td> NO </td>
<td> N/A </td>
<td> N/A </td>
<td> WRITE </td>
<td> N/A</td></tr>
<tr class="b">
<td>setTimes </td>
<td> NO </td>
<td> N/A </td>
<td> N/A </td>
<td> WRITE </td>
<td> N/A</td></tr>
<tr class="a">
<td>setXAttr </td>
<td> NO [2] </td>
<td> N/A </td>
<td> N/A </td>
<td> WRITE </td>
<td> N/A</td></tr>
<tr class="b">
<td>truncate </td>
<td> NO </td>
<td> N/A </td>
<td> N/A </td>
<td> WRITE </td>
<td> N/A</td></tr>
</tbody>
</table>
<p>[1] WRITE access on the final path component during <code>create</code> is only required if the call uses the overwrite option and there is an existing file at the path.</p>
<p>[2] Any operation that checks WRITE permission on the parent directory also checks ownership if the <a href="#Overview">sticky bit</a> is set.</p>
<p>[3] Calling <code>setOwner</code> to change the user that owns a file requires <a href="#The_Super-User">HDFS super-user</a> access. HDFS super-user access is not required to change the group, but the caller must be the owner of the file and a member of the specified group.</p></section><section>
<h2><a name="Understanding_the_Implementation"></a>Understanding the Implementation</h2>
<p>Each file or directory operation passes the full path name to the NameNode, and the permissions checks are applied along the path for each operation. The client framework will implicitly associate the user identity with the connection to the NameNode, reducing the need for changes to the existing client API. It has always been the case that when one operation on a file succeeds, the operation might fail when repeated because the file, or some directory on the path, no longer exists. For instance, when the client first begins reading a file, it makes a first request to the NameNode to discover the location of the first blocks of the file. A second request made to find additional blocks may fail. On the other hand, deleting a file does not revoke access by a client that already knows the blocks of the file. With the addition of permissions, a client&#x2019;s access to a file may be withdrawn between requests. Again, changing permissions does not revoke the access of a client that already knows the file&#x2019;s blocks.</p></section><section>
<h2><a name="Changes_to_the_File_System_API"></a>Changes to the File System API</h2>
<p>All methods that use a path parameter will throw <code>AccessControlException</code> if permission checking fails.</p>
<p>New methods:</p>
<ul>
<li><code>public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException;</code></li>
<li><code>public boolean mkdirs(Path f, FsPermission permission) throws IOException;</code></li>
<li><code>public void setPermission(Path p, FsPermission permission) throws IOException;</code></li>
<li><code>public void setOwner(Path p, String username, String groupname) throws IOException;</code></li>
<li><code>public FileStatus getFileStatus(Path f) throws IOException;</code>will additionally return the user, group and mode associated with the path.</li>
</ul>
<p>The mode of a new file or directory is restricted by the umask set as a configuration parameter. When the existing <code>create(path, &#x2026;)</code> method (without the permission parameter) is used, the mode of the new file is <code>0666 &amp; ^umask</code>. When the new <code>create(path, permission, &#x2026;)</code> method (with the permission parameter P) is used, the mode of the new file is <code>P &amp; ^umask &amp; 0666</code>. When a new directory is created with the existing <code>mkdirs(path)</code> method (without the permission parameter), the mode of the new directory is <code>0777 &amp; ^umask</code>. When the new <code>mkdirs(path, permission)</code> method (with the permission parameter P) is used, the mode of new directory is <code>P &amp; ^umask &amp; 0777</code>.</p></section><section>
<h2><a name="Changes_to_the_Application_Shell"></a>Changes to the Application Shell</h2>
<p>New operations:</p>
<ul>
<li>
<p><code>chmod [-R] mode file ...</code></p>
<p>Only the owner of a file or the super-user is permitted to change the mode of a file.</p>
</li>
<li>
<p><code>chgrp [-R] group file ...</code></p>
<p>The user invoking chgrp must belong to the specified group and be the owner of the file, or be the super-user.</p>
</li>
<li>
<p><code>chown [-R] [owner][:[group]] file ...</code></p>
<p>The owner of a file may only be altered by a super-user.</p>
</li>
<li>
<p><code>ls file ...</code></p>
</li>
<li>
<p><code>lsr file ...</code></p>
<p>The output is reformatted to display the owner, group and mode.</p>
</li>
</ul></section><section>
<h2><a name="The_Super-User"></a>The Super-User</h2>
<p>The super-user is the user with the same identity as the NameNode process itself. Loosely, if you started the NameNode, then you are the super-user. The super-user can do anything in that permissions checks never fail for the super-user. There is no persistent notion of who was the super-user; when the NameNode is started the process identity determines who is the super-user for now. The HDFS super-user does not have to be the super-user of the NameNode host, nor is it necessary that all clusters have the same super-user. Also, an experimenter running HDFS on a personal workstation, conveniently becomes that installation&#x2019;s super-user without any configuration.</p>
<p>In addition, the administrator may identify a distinguished group using a configuration parameter. If set, members of this group are also super-users.</p></section><section>
<h2><a name="The_Web_Server"></a>The Web Server</h2>
<p>By default, the identity of the web server is a configuration parameter. That is, the NameNode has no notion of the identity of the real user, but the web server behaves as if it has the identity (user and groups) of a user chosen by the administrator. Unless the chosen identity matches the super-user, parts of the name space may be inaccessible to the web server.</p></section><section>
<h2><a name="ACLs_.28Access_Control_Lists.29"></a>ACLs (Access Control Lists)</h2>
<p>In addition to the traditional POSIX permissions model, HDFS also supports POSIX ACLs (Access Control Lists). ACLs are useful for implementing permission requirements that differ from the natural organizational hierarchy of users and groups. An ACL provides a way to set different permissions for specific named users or named groups, not only the file&#x2019;s owner and the file&#x2019;s group.</p>
<p>By default, support for ACLs is enabled, and the NameNode allows creation of ACLs. To disable support for ACLs, set <code>dfs.namenode.acls.enabled</code> to false in the NameNode configuration.</p>
<p>An ACL consists of a set of ACL entries. Each ACL entry names a specific user or group and grants or denies read, write and execute permissions for that specific user or group. For example:</p>
<div class="source">
<div class="source">
<pre> user::rw-
user:bruce:rwx #effective:r--
group::r-x #effective:r--
group:sales:rwx #effective:r--
mask::r--
other::r--
</pre></div></div>
<p>ACL entries consist of a type, an optional name and a permission string. For display purposes, &#x2018;:&#x2019; is used as the delimiter between each field. In this example ACL, the file owner has read-write access, the file group has read-execute access and others have read access. So far, this is equivalent to setting the file&#x2019;s permission bits to 654.</p>
<p>Additionally, there are 2 extended ACL entries for the named user bruce and the named group sales, both granted full access. The mask is a special ACL entry that filters the permissions granted to all named user entries and named group entries, and also the unnamed group entry. In the example, the mask has only read permissions, and we can see that the effective permissions of several ACL entries have been filtered accordingly.</p>
<p>Every ACL must have a mask. If the user doesn&#x2019;t supply a mask while setting an ACL, then a mask is inserted automatically by calculating the union of permissions on all entries that would be filtered by the mask.</p>
<p>Running <code>chmod</code> on a file that has an ACL actually changes the permissions of the mask. Since the mask acts as a filter, this effectively constrains the permissions of all extended ACL entries instead of changing just the group entry and possibly missing other extended ACL entries.</p>
<p>The model also differentiates between an &#x201c;access ACL&#x201d;, which defines the rules to enforce during permission checks, and a &#x201c;default ACL&#x201d;, which defines the ACL entries that new child files or sub-directories receive automatically during creation. For example:</p>
<div class="source">
<div class="source">
<pre> user::rwx
group::r-x
other::r-x
default:user::rwx
default:user:bruce:rwx #effective:r-x
default:group::r-x
default:group:sales:rwx #effective:r-x
default:mask::r-x
default:other::r-x
</pre></div></div>
<p>Only directories may have a default ACL. When a new file or sub-directory is created, it automatically copies the default ACL of its parent into its own access ACL. A new sub-directory also copies it to its own default ACL. In this way, the default ACL will be copied down through arbitrarily deep levels of the file system tree as new sub-directories get created.</p>
<p>The exact permission values in the new child&#x2019;s access ACL are subject to filtering by the mode parameter. Considering the default umask of 022, this is typically 755 for new directories and 644 for new files. The mode parameter filters the copied permission values for the unnamed user (file owner), the mask and other. Using this particular example ACL, and creating a new sub-directory with 755 for the mode, this mode filtering has no effect on the final result. However, if we consider creation of a file with 644 for the mode, then mode filtering causes the new file&#x2019;s ACL to receive read-write for the unnamed user (file owner), read for the mask and read for others. This mask also means that effective permissions for named user bruce and named group sales are only read.</p>
<p>Note that the copy occurs at time of creation of the new file or sub-directory. Subsequent changes to the parent&#x2019;s default ACL do not change existing children.</p>
<p>The default ACL must have all minimum required ACL entries, including the unnamed user (file owner), unnamed group (file group) and other entries. If the user doesn&#x2019;t supply one of these entries while setting a default ACL, then the entries are inserted automatically by copying the corresponding permissions from the access ACL, or permission bits if there is no access ACL. The default ACL also must have mask. As described above, if the mask is unspecified, then a mask is inserted automatically by calculating the union of permissions on all entries that would be filtered by the mask.</p>
<p>Note that you can not have unlimited amount of ACL entries for a given file or directory. The maximum number is 32 for access and 32 for default entries which is 64 in total.</p>
<p>When considering a file that has an ACL, the algorithm for permission checks changes to:</p>
<ul>
<li>
<p>If the user name matches the owner of file, then the owner permissions are tested;</p>
</li>
<li>
<p>Else if the user name matches the name in one of the named user entries, then these permissions are tested, filtered by the mask permissions;</p>
</li>
<li>
<p>Else if the group of file matches any member of the groups list, and if these permissions filtered by the mask grant access, then these permissions are used;</p>
</li>
<li>
<p>Else if there is a named group entry matching a member of the groups list, and if these permissions filtered by the mask grant access, then these permissions are used;</p>
</li>
<li>
<p>Else if the file group or any named group entry matches a member of the groups list, but access was not granted by any of those permissions, then access is denied;</p>
</li>
<li>
<p>Otherwise the other permissions of file are tested.</p>
</li>
</ul>
<p>Best practice is to rely on traditional permission bits to implement most permission requirements, and define a smaller number of ACLs to augment the permission bits with a few exceptional rules. A file with an ACL incurs an additional cost in memory in the NameNode compared to a file that has only permission bits.</p></section><section>
<h2><a name="ACLs_File_System_API"></a>ACLs File System API</h2>
<p>New methods:</p>
<ul>
<li><code>public void modifyAclEntries(Path path, List&lt;AclEntry&gt; aclSpec) throws IOException;</code></li>
<li><code>public void removeAclEntries(Path path, List&lt;AclEntry&gt; aclSpec) throws IOException;</code></li>
<li><code>public void public void removeDefaultAcl(Path path) throws IOException;</code></li>
<li><code>public void removeAcl(Path path) throws IOException;</code></li>
<li><code>public void setAcl(Path path, List&lt;AclEntry&gt; aclSpec) throws IOException;</code></li>
<li><code>public AclStatus getAclStatus(Path path) throws IOException;</code></li>
</ul></section><section>
<h2><a name="ACLs_Shell_Commands"></a>ACLs Shell Commands</h2>
<ul>
<li>
<p><code>hdfs dfs -getfacl [-R] &lt;path&gt;</code></p>
<p>Displays the Access Control Lists (ACLs) of files and directories. If a directory has a default ACL, then getfacl also displays the default ACL.</p>
</li>
<li>
<p><code>hdfs dfs -setfacl [-R] [-b |-k -m |-x &lt;acl_spec&gt; &lt;path&gt;] |[--set &lt;acl_spec&gt; &lt;path&gt;]</code></p>
<p>Sets Access Control Lists (ACLs) of files and directories.</p>
</li>
<li>
<p><code>hdfs dfs -ls &lt;args&gt;</code></p>
<p>The output of <code>ls</code> will append a &#x2018;+&#x2019; character to the permissions string of any file or directory that has an ACL.</p>
<p>See the <a href="../hadoop-common/FileSystemShell.html">File System Shell</a> documentation for full coverage of these commands.</p>
</li>
</ul></section><section>
<h2><a name="Configuration_Parameters"></a>Configuration Parameters</h2>
<ul>
<li>
<p><code>dfs.permissions.enabled = true</code></p>
<p>If yes use the permissions system as described here. If no, permission checking is turned off, but all other behavior is unchanged. Switching from one parameter value to the other does not change the mode, owner or group of files or directories. Regardless of whether permissions are on or off, chmod, chgrp, chown and setfacl always check permissions. These functions are only useful in the permissions context, and so there is no backwards compatibility issue. Furthermore, this allows administrators to reliably set owners and permissions in advance of turning on regular permissions checking.</p>
</li>
<li>
<p><code>dfs.web.ugi = webuser,webgroup</code></p>
<p>The user name to be used by the web server. Setting this to the name of the super-user allows any web client to see everything. Changing this to an otherwise unused identity allows web clients to see only those things visible using &#x201c;other&#x201d; permissions. Additional groups may be added to the comma-separated list.</p>
</li>
<li>
<p><code>dfs.permissions.superusergroup = supergroup</code></p>
<p>The name of the group of super-users.</p>
</li>
<li>
<p><code>fs.permissions.umask-mode = 0022</code></p>
<p>The umask used when creating files and directories. For configuration files, the decimal value 18 may be used.</p>
</li>
<li>
<p><code>dfs.cluster.administrators = ACL-for-admins</code></p>
<p>The administrators for the cluster specified as an ACL. This controls who can access the default servlets, etc. in the HDFS.</p>
</li>
<li>
<p><code>dfs.namenode.acls.enabled = true</code></p>
<p>Set to true to enable support for HDFS ACLs (Access Control Lists). By default, ACLs are enabled. When ACLs are disabled, the NameNode rejects all attempts to set an ACL.</p>
</li>
<li>
<p><code>dfs.namenode.posix.acl.inheritance.enabled</code></p>
<p>Set to true to enable POSIX style ACL inheritance. Enabled by default. When it is enabled and the create request comes from a compatible client, the NameNode will apply default ACLs from the parent directory to the create mode and ignore the client umask. If no default ACL is found, it will apply the client umask.</p>
</li>
</ul></section>
</div>
</div>
<div class="clear">
<hr/>
</div>
<div id="footer">
<div class="xright">
&#169; 2008-2023
Apache Software Foundation
- <a href="http://maven.apache.org/privacy-policy.html">Privacy Policy</a>.
Apache Maven, Maven, Apache, the Apache feather logo, and the Apache Maven project logos are trademarks of The Apache Software Foundation.
</div>
<div class="clear">
<hr/>
</div>
</div>
</body>
</html>