hadoop/hadoop-project-dist/hadoop-hdfs/TransparentEncryption.html

914 lines
55 KiB
HTML

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<!--
| Generated by Apache Maven Doxia at 2023-02-28
| Rendered using Apache Maven Stylus Skin 1.5
-->
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Apache Hadoop 3.4.0-SNAPSHOT &#x2013; Transparent Encryption in HDFS</title>
<style type="text/css" media="all">
@import url("./css/maven-base.css");
@import url("./css/maven-theme.css");
@import url("./css/site.css");
</style>
<link rel="stylesheet" href="./css/print.css" type="text/css" media="print" />
<meta name="Date-Revision-yyyymmdd" content="20230228" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head>
<body class="composite">
<div id="banner">
<a href="http://hadoop.apache.org/" id="bannerLeft">
<img src="http://hadoop.apache.org/images/hadoop-logo.jpg" alt="" />
</a>
<a href="http://www.apache.org/" id="bannerRight">
<img src="http://www.apache.org/images/asf_logo_wide.png" alt="" />
</a>
<div class="clear">
<hr/>
</div>
</div>
<div id="breadcrumbs">
<div class="xright"> <a href="http://wiki.apache.org/hadoop" class="externalLink">Wiki</a>
|
<a href="https://gitbox.apache.org/repos/asf/hadoop.git" class="externalLink">git</a>
|
<a href="http://hadoop.apache.org/" class="externalLink">Apache Hadoop</a>
&nbsp;| Last Published: 2023-02-28
&nbsp;| Version: 3.4.0-SNAPSHOT
</div>
<div class="clear">
<hr/>
</div>
</div>
<div id="leftColumn">
<div id="navcolumn">
<h5>General</h5>
<ul>
<li class="none">
<a href="../../index.html">Overview</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/SingleCluster.html">Single Node Setup</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/ClusterSetup.html">Cluster Setup</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/CommandsManual.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/FileSystemShell.html">FileSystem Shell</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Compatibility.html">Compatibility Specification</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/DownstreamDev.html">Downstream Developer's Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/AdminCompatibilityGuide.html">Admin Compatibility Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/InterfaceClassification.html">Interface Classification</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/filesystem/index.html">FileSystem Specification</a>
</li>
</ul>
<h5>Common</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/CLIMiniCluster.html">CLI Mini Cluster</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/FairCallQueue.html">Fair Call Queue</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/NativeLibraries.html">Native Libraries</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Superusers.html">Proxy User</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/RackAwareness.html">Rack Awareness</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/SecureMode.html">Secure Mode</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/ServiceLevelAuth.html">Service Level Authorization</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/HttpAuthentication.html">HTTP Authentication</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/CredentialProviderAPI.html">Credential Provider API</a>
</li>
<li class="none">
<a href="../../hadoop-kms/index.html">Hadoop KMS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Tracing.html">Tracing</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/UnixShellGuide.html">Unix Shell Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/registry/index.html">Registry</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/AsyncProfilerServlet.html">Async Profiler</a>
</li>
</ul>
<h5>HDFS</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsDesign.html">Architecture</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsUserGuide.html">User Guide</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html">NameNode HA With QJM</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithNFS.html">NameNode HA With NFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ObserverNameNode.html">Observer NameNode</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/Federation.html">Federation</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ViewFs.html">ViewFs</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ViewFsOverloadScheme.html">ViewFsOverloadScheme</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsSnapshots.html">Snapshots</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsEditsViewer.html">Edits Viewer</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsImageViewer.html">Image Viewer</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html">Permissions and HDFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsQuotaAdminGuide.html">Quotas and HDFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/LibHdfs.html">libhdfs (C API)</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/WebHDFS.html">WebHDFS (REST API)</a>
</li>
<li class="none">
<a href="../../hadoop-hdfs-httpfs/index.html">HttpFS</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ShortCircuitLocalReads.html">Short Circuit Local Reads</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/CentralizedCacheManagement.html">Centralized Cache Management</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsNfsGateway.html">NFS Gateway</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsRollingUpgrade.html">Rolling Upgrade</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ExtendedAttributes.html">Extended Attributes</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/TransparentEncryption.html">Transparent Encryption</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsMultihoming.html">Multihoming</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/ArchivalStorage.html">Storage Policies</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/MemoryStorage.html">Memory Storage Support</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/SLGUserGuide.html">Synthetic Load Generator</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSErasureCoding.html">Erasure Coding</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HDFSDiskbalancer.html">Disk Balancer</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsUpgradeDomain.html">Upgrade Domain</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsDataNodeAdminGuide.html">DataNode Admin</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs-rbf/HDFSRouterFederation.html">Router Federation</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/HdfsProvidedStorage.html">Provided Storage</a>
</li>
</ul>
<h5>MapReduce</h5>
<ul>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html">Tutorial</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html">Compatibility with 1.x</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/EncryptedShuffle.html">Encrypted Shuffle</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/PluggableShuffleAndPluggableSort.html">Pluggable Shuffle/Sort</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/DistributedCacheDeploy.html">Distributed Cache Deploy</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/SharedCacheSupport.html">Support for YARN Shared Cache</a>
</li>
</ul>
<h5>MapReduce REST APIs</h5>
<ul>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredAppMasterRest.html">MR Application Master</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-hs/HistoryServerRest.html">MR History Server</a>
</li>
</ul>
<h5>YARN</h5>
<ul>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YARN.html">Architecture</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YarnCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html">Capacity Scheduler</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/FairScheduler.html">Fair Scheduler</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceManagerRestart.html">ResourceManager Restart</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceManagerHA.html">ResourceManager HA</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceModel.html">Resource Model</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeLabel.html">Node Labels</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeAttributes.html">Node Attributes</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/WebApplicationProxy.html">Web Application Proxy</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServer.html">Timeline Server</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html">Timeline Service V.2</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/WritingYarnApplications.html">Writing YARN Applications</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YarnApplicationSecurity.html">YARN Application Security</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeManager.html">NodeManager</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/DockerContainers.html">Running Applications in Docker Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/RuncContainers.html">Running Applications in runC Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeManagerCgroups.html">Using CGroups</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/SecureContainer.html">Secure Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ReservationSystem.html">Reservation System</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/GracefulDecommission.html">Graceful Decommission</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/OpportunisticContainers.html">Opportunistic Containers</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/Federation.html">YARN Federation</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/SharedCache.html">Shared Cache</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/UsingGpus.html">Using GPU</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/UsingFPGA.html">Using FPGA</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/PlacementConstraints.html">Placement Constraints</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/YarnUI2.html">YARN UI2</a>
</li>
</ul>
<h5>YARN REST APIs</h5>
<ul>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/WebServicesIntro.html">Introduction</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html">Resource Manager</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/NodeManagerRest.html">Node Manager</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServer.html#Timeline_Server_REST_API_v1">Timeline Server</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html#Timeline_Service_v.2_REST_API">Timeline Service V.2</a>
</li>
</ul>
<h5>YARN Service</h5>
<ul>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/Overview.html">Overview</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/QuickStart.html">QuickStart</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/Concepts.html">Concepts</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/YarnServiceAPI.html">Yarn Service API</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/ServiceDiscovery.html">Service Discovery</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-site/yarn-service/SystemServices.html">System Services</a>
</li>
</ul>
<h5>Hadoop Compatible File Systems</h5>
<ul>
<li class="none">
<a href="../../hadoop-aliyun/tools/hadoop-aliyun/index.html">Aliyun OSS</a>
</li>
<li class="none">
<a href="../../hadoop-aws/tools/hadoop-aws/index.html">Amazon S3</a>
</li>
<li class="none">
<a href="../../hadoop-azure/index.html">Azure Blob Storage</a>
</li>
<li class="none">
<a href="../../hadoop-azure-datalake/index.html">Azure Data Lake Storage</a>
</li>
<li class="none">
<a href="../../hadoop-cos/cloud-storage/index.html">Tencent COS</a>
</li>
<li class="none">
<a href="../../hadoop-huaweicloud/cloud-storage/index.html">Huaweicloud OBS</a>
</li>
</ul>
<h5>Auth</h5>
<ul>
<li class="none">
<a href="../../hadoop-auth/index.html">Overview</a>
</li>
<li class="none">
<a href="../../hadoop-auth/Examples.html">Examples</a>
</li>
<li class="none">
<a href="../../hadoop-auth/Configuration.html">Configuration</a>
</li>
<li class="none">
<a href="../../hadoop-auth/BuildingIt.html">Building</a>
</li>
</ul>
<h5>Tools</h5>
<ul>
<li class="none">
<a href="../../hadoop-streaming/HadoopStreaming.html">Hadoop Streaming</a>
</li>
<li class="none">
<a href="../../hadoop-archives/HadoopArchives.html">Hadoop Archives</a>
</li>
<li class="none">
<a href="../../hadoop-archive-logs/HadoopArchiveLogs.html">Hadoop Archive Logs</a>
</li>
<li class="none">
<a href="../../hadoop-distcp/DistCp.html">DistCp</a>
</li>
<li class="none">
<a href="../../hadoop-federation-balance/HDFSFederationBalance.html">HDFS Federation Balance</a>
</li>
<li class="none">
<a href="../../hadoop-gridmix/GridMix.html">GridMix</a>
</li>
<li class="none">
<a href="../../hadoop-rumen/Rumen.html">Rumen</a>
</li>
<li class="none">
<a href="../../hadoop-resourceestimator/ResourceEstimator.html">Resource Estimator Service</a>
</li>
<li class="none">
<a href="../../hadoop-sls/SchedulerLoadSimulator.html">Scheduler Load Simulator</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Benchmarking.html">Hadoop Benchmarking</a>
</li>
<li class="none">
<a href="../../hadoop-dynamometer/Dynamometer.html">Dynamometer</a>
</li>
</ul>
<h5>Reference</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/release/">Changelog and Release Notes</a>
</li>
<li class="none">
<a href="../../api/index.html">Java API docs</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/UnixShellAPI.html">Unix Shell API</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/Metrics.html">Metrics</a>
</li>
</ul>
<h5>Configuration</h5>
<ul>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/core-default.xml">core-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs/hdfs-default.xml">hdfs-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-hdfs-rbf/hdfs-rbf-default.xml">hdfs-rbf-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml">mapred-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-yarn/hadoop-yarn-common/yarn-default.xml">yarn-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-kms/kms-default.html">kms-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-hdfs-httpfs/httpfs-default.html">httpfs-default.xml</a>
</li>
<li class="none">
<a href="../../hadoop-project-dist/hadoop-common/DeprecatedProperties.html">Deprecated Properties</a>
</li>
</ul>
<a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
<img alt="Built by Maven" src="./images/logos/maven-feather.png"/>
</a>
</div>
</div>
<div id="bodyColumn">
<div id="contentBox">
<!---
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<h1>Transparent Encryption in HDFS</h1>
<ul>
<li><a href="#Overview">Overview</a></li>
<li><a href="#Background">Background</a></li>
<li><a href="#Use_Cases">Use Cases</a></li>
<li><a href="#Architecture">Architecture</a>
<ul>
<li><a href="#Overview">Overview</a></li>
<li><a href="#Accessing_data_within_an_encryption_zone">Accessing data within an encryption zone</a></li>
<li><a href="#Key_Management_Server.2C_KeyProvider.2C_EDEKs">Key Management Server, KeyProvider, EDEKs</a></li></ul></li>
<li><a href="#Configuration">Configuration</a>
<ul>
<li><a href="#Configuring_the_cluster_KeyProvider">Configuring the cluster KeyProvider</a></li>
<li><a href="#Selecting_an_encryption_algorithm_and_codec">Selecting an encryption algorithm and codec</a></li>
<li><a href="#Namenode_configuration">Namenode configuration</a></li></ul></li>
<li><a href="#crypto_command-line_interface">crypto command-line interface</a>
<ul>
<li><a href="#createZone">createZone</a></li>
<li><a href="#listZones">listZones</a></li>
<li><a href="#provisionTrash">provisionTrash</a></li>
<li><a href="#getFileEncryptionInfo">getFileEncryptionInfo</a></li>
<li><a href="#reencryptZone">reencryptZone</a></li>
<li><a href="#listReencryptionStatus">listReencryptionStatus</a></li></ul></li>
<li><a href="#Example_usage">Example usage</a></li>
<li><a href="#Distcp_considerations">Distcp considerations</a>
<ul>
<li><a href="#Running_as_the_superuser">Running as the superuser</a></li>
<li><a href="#Copying_into_encrypted_locations">Copying into encrypted locations</a></li></ul></li>
<li><a href="#Rename_and_Trash_considerations">Rename and Trash considerations</a></li>
<li><a href="#Attack_vectors">Attack vectors</a>
<ul>
<li><a href="#Hardware_access_exploits">Hardware access exploits</a></li>
<li><a href="#Root_access_exploits">Root access exploits</a></li>
<li><a href="#HDFS_admin_exploits">HDFS admin exploits</a></li>
<li><a href="#Rogue_user_exploits">Rogue user exploits</a></li></ul></li></ul>
<section>
<h2><a name="Overview"></a>Overview</h2>
<p>HDFS implements <i>transparent</i>, <i>end-to-end</i> encryption. Once configured, data read from and written to special HDFS directories is <i>transparently</i> encrypted and decrypted without requiring changes to user application code. This encryption is also <i>end-to-end</i>, which means the data can only be encrypted and decrypted by the client. HDFS never stores or has access to unencrypted data or unencrypted data encryption keys. This satisfies two typical requirements for encryption: <i>at-rest encryption</i> (meaning data on persistent media, such as a disk) as well as <i>in-transit encryption</i> (e.g. when data is travelling over the network).</p></section><section>
<h2><a name="Background"></a>Background</h2>
<p>Encryption can be done at different layers in a traditional data management software/hardware stack. Choosing to encrypt at a given layer comes with different advantages and disadvantages.</p>
<ul>
<li>
<p><b>Application-level encryption</b>. This is the most secure and most flexible approach. The application has ultimate control over what is encrypted and can precisely reflect the requirements of the user. However, writing applications to do this is hard. This is also not an option for customers of existing applications that do not support encryption.</p>
</li>
<li>
<p><b>Database-level encryption</b>. Similar to application-level encryption in terms of its properties. Most database vendors offer some form of encryption. However, there can be performance issues. One example is that indexes cannot be encrypted.</p>
</li>
<li>
<p><b>Filesystem-level encryption</b>. This option offers high performance, application transparency, and is typically easy to deploy. However, it is unable to model some application-level policies. For instance, multi-tenant applications might want to encrypt based on the end user. A database might want different encryption settings for each column stored within a single file.</p>
</li>
<li>
<p><b>Disk-level encryption</b>. Easy to deploy and high performance, but also quite inflexible. Only really protects against physical theft.</p>
</li>
</ul>
<p>HDFS-level encryption fits between database-level and filesystem-level encryption in this stack. This has a lot of positive effects. HDFS encryption is able to provide good performance and existing Hadoop applications are able to run transparently on encrypted data. HDFS also has more context than traditional filesystems when it comes to making policy decisions.</p>
<p>HDFS-level encryption also prevents attacks at the filesystem-level and below (so-called &#x201c;OS-level attacks&#x201d;). The operating system and disk only interact with encrypted bytes, since the data is already encrypted by HDFS.</p></section><section>
<h2><a name="Use_Cases"></a>Use Cases</h2>
<p>Data encryption is required by a number of different government, financial, and regulatory entities. For example, the health-care industry has HIPAA regulations, the card payment industry has PCI DSS regulations, and the US government has FISMA regulations. Having transparent encryption built into HDFS makes it easier for organizations to comply with these regulations.</p>
<p>Encryption can also be performed at the application-level, but by integrating it into HDFS, existing applications can operate on encrypted data without changes. This integrated architecture implies stronger encrypted file semantics and better coordination with other HDFS functions.</p></section><section>
<h2><a name="Architecture"></a>Architecture</h2><section>
<h3><a name="Overview"></a>Overview</h3>
<p>For transparent encryption, we introduce a new abstraction to HDFS: the <i>encryption zone</i>. An encryption zone is a special directory whose contents will be transparently encrypted upon write and transparently decrypted upon read. Each encryption zone is associated with a single <i>encryption zone key</i> which is specified when the zone is created. Each file within an encryption zone has its own unique <i>data encryption key (DEK)</i>. DEKs are never handled directly by HDFS. Instead, HDFS only ever handles an <i>encrypted data encryption key (EDEK)</i>. Clients decrypt an EDEK, and then use the subsequent DEK to read and write data. HDFS datanodes simply see a stream of encrypted bytes.</p>
<p>A very important use case of encryption is to &#x201c;switch it on&#x201d; and ensure all files across the entire filesystem are encrypted. To support this strong guarantee without losing the flexibility of using different encryption zone keys in different parts of the filesystem, HDFS allows <i>nested encryption zones</i>. After an encryption zone is created (e.g. on the root directory <code>/</code>), a user can create more encryption zones on its descendant directories (e.g. <code>/home/alice</code>) with different keys. The EDEK of a file will generated using the encryption zone key from the closest ancestor encryption zone.</p>
<p>A new cluster service is required to manage encryption keys: the Hadoop Key Management Server (KMS). In the context of HDFS encryption, the KMS performs three basic responsibilities:</p>
<ol style="list-style-type: decimal">
<li>
<p>Providing access to stored encryption zone keys</p>
</li>
<li>
<p>Generating new encrypted data encryption keys for storage on the NameNode</p>
</li>
<li>
<p>Decrypting encrypted data encryption keys for use by HDFS clients</p>
</li>
</ol>
<p>The KMS will be described in more detail below.</p></section><section>
<h3><a name="Accessing_data_within_an_encryption_zone"></a>Accessing data within an encryption zone</h3>
<p>When creating a new file in an encryption zone, the NameNode asks the KMS to generate a new EDEK encrypted with the encryption zone&#x2019;s key. The EDEK is then stored persistently as part of the file&#x2019;s metadata on the NameNode.</p>
<p>When reading a file within an encryption zone, the NameNode provides the client with the file&#x2019;s EDEK and the encryption zone key version used to encrypt the EDEK. The client then asks the KMS to decrypt the EDEK, which involves checking that the client has permission to access the encryption zone key version. Assuming that is successful, the client uses the DEK to decrypt the file&#x2019;s contents.</p>
<p>All of the above steps for the read and write path happen automatically through interactions between the DFSClient, the NameNode, and the KMS.</p>
<p>Access to encrypted file data and metadata is controlled by normal HDFS filesystem permissions. This means that if HDFS is compromised (for example, by gaining unauthorized access to an HDFS superuser account), a malicious user only gains access to ciphertext and encrypted keys. However, since access to encryption zone keys is controlled by a separate set of permissions on the KMS and key store, this does not pose a security threat.</p></section><section>
<h3><a name="Key_Management_Server.2C_KeyProvider.2C_EDEKs"></a>Key Management Server, KeyProvider, EDEKs</h3>
<p>The KMS is a proxy that interfaces with a backing key store on behalf of HDFS daemons and clients. Both the backing key store and the KMS implement the Hadoop KeyProvider API. See the <a href="../../hadoop-kms/index.html">KMS documentation</a> for more information.</p>
<p>In the KeyProvider API, each encryption key has a unique <i>key name</i>. Because keys can be rolled, a key can have multiple <i>key versions</i>, where each key version has its own <i>key material</i> (the actual secret bytes used during encryption and decryption). An encryption key can be fetched by either its key name, returning the latest version of the key, or by a specific key version.</p>
<p>The KMS implements additional functionality which enables creation and decryption of <i>encrypted encryption keys (EEKs)</i>. Creation and decryption of EEKs happens entirely on the KMS. Importantly, the client requesting creation or decryption of an EEK never handles the EEK&#x2019;s encryption key. To create a new EEK, the KMS generates a new random key, encrypts it with the specified key, and returns the EEK to the client. To decrypt an EEK, the KMS checks that the user has access to the encryption key, uses it to decrypt the EEK, and returns the decrypted encryption key.</p>
<p>In the context of HDFS encryption, EEKs are <i>encrypted data encryption keys (EDEKs)</i>, where a <i>data encryption key (DEK)</i> is what is used to encrypt and decrypt file data. Typically, the key store is configured to only allow end users access to the keys used to encrypt DEKs. This means that EDEKs can be safely stored and handled by HDFS, since the HDFS user will not have access to unencrypted encryption keys.</p></section></section><section>
<h2><a name="Configuration"></a>Configuration</h2>
<p>A necessary prerequisite is an instance of the KMS, as well as a backing key store for the KMS. See the <a href="../../hadoop-kms/index.html">KMS documentation</a> for more information.</p>
<p>Once a KMS has been set up and the NameNode and HDFS clients have been correctly configured, an admin can use the <code>hadoop key</code> and <code>hdfs crypto</code> command-line tools to create encryption keys and set up new encryption zones. Existing data can be encrypted by copying it into the new encryption zones using tools like distcp.</p><section>
<h3><a name="Configuring_the_cluster_KeyProvider"></a>Configuring the cluster KeyProvider</h3><section>
<h4><a name="hadoop.security.key.provider.path"></a>hadoop.security.key.provider.path</h4>
<p>The KeyProvider to use when interacting with encryption keys used when reading and writing to an encryption zone. HDFS clients will use the provider path returned from Namenode via getServerDefaults. If namenode doesn&#x2019;t support returning key provider uri then client&#x2019;s conf will be used.</p></section></section><section>
<h3><a name="Selecting_an_encryption_algorithm_and_codec"></a>Selecting an encryption algorithm and codec</h3><section>
<h4><a name="hadoop.security.crypto.codec.classes.EXAMPLECIPHERSUITE"></a>hadoop.security.crypto.codec.classes.EXAMPLECIPHERSUITE</h4>
<p>The prefix for a given crypto codec, contains a comma-separated list of implementation classes for a given crypto codec (eg EXAMPLECIPHERSUITE). The first implementation will be used if available, others are fallbacks.</p></section><section>
<h4><a name="hadoop.security.crypto.codec.classes.aes.ctr.nopadding"></a>hadoop.security.crypto.codec.classes.aes.ctr.nopadding</h4>
<p>Default: <code>org.apache.hadoop.crypto.OpensslAesCtrCryptoCodec, org.apache.hadoop.crypto.JceAesCtrCryptoCodec</code></p>
<p>Comma-separated list of crypto codec implementations for AES/CTR/NoPadding. The first implementation will be used if available, others are fallbacks.</p></section><section>
<h4><a name="hadoop.security.crypto.codec.classes.sm4.ctr.nopadding"></a>hadoop.security.crypto.codec.classes.sm4.ctr.nopadding</h4>
<p>Default: <code>org.apache.hadoop.crypto.OpensslSm4CtrCryptoCodec, org.apache.hadoop.crypto.JceSm4CtrCryptoCodec</code></p>
<p>Comma-separated list of crypto codec implementations for SM4/CTR/NoPadding. The first implementation will be used if available, others are fallbacks.</p></section><section>
<h4><a name="hadoop.security.crypto.cipher.suite"></a>hadoop.security.crypto.cipher.suite</h4>
<p>Default: <code>AES/CTR/NoPadding</code></p>
<p>Cipher suite for crypto codec, now AES/CTR/NoPadding and SM4/CTR/NoPadding are supported.</p></section><section>
<h4><a name="hadoop.security.crypto.jce.provider"></a>hadoop.security.crypto.jce.provider</h4>
<p>Default: None</p>
<p>The JCE provider name used in CryptoCodec.</p></section><section>
<h4><a name="hadoop.security.crypto.buffer.size"></a>hadoop.security.crypto.buffer.size</h4>
<p>Default: <code>8192</code></p>
<p>The buffer size used by CryptoInputStream and CryptoOutputStream.</p></section></section><section>
<h3><a name="Namenode_configuration"></a>Namenode configuration</h3><section>
<h4><a name="dfs.namenode.list.encryption.zones.num.responses"></a>dfs.namenode.list.encryption.zones.num.responses</h4>
<p>Default: <code>100</code></p>
<p>When listing encryption zones, the maximum number of zones that will be returned in a batch. Fetching the list incrementally in batches improves namenode performance.</p></section></section></section><section>
<h2><a name="crypto_command-line_interface"></a><code>crypto</code> command-line interface</h2><section>
<h3><a name="createZone"></a>createZone</h3>
<p>Usage: <code>[-createZone -keyName &lt;keyName&gt; -path &lt;path&gt;]</code></p>
<p>Create a new encryption zone.</p>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th align="left"> </th>
<th align="left"> </th></tr>
</thead><tbody>
<tr class="b">
<td align="left"> <i>path</i> </td>
<td align="left"> The path of the encryption zone to create. It must be an empty directory. A trash directory is provisioned under this path.</td></tr>
<tr class="a">
<td align="left"> <i>keyName</i> </td>
<td align="left"> Name of the key to use for the encryption zone. Uppercase key names are unsupported. </td></tr>
</tbody>
</table></section><section>
<h3><a name="listZones"></a>listZones</h3>
<p>Usage: <code>[-listZones]</code></p>
<p>List all encryption zones. Requires superuser permissions.</p></section><section>
<h3><a name="provisionTrash"></a>provisionTrash</h3>
<p>Usage: <code>[-provisionTrash -path &lt;path&gt;]</code></p>
<p>Provision a trash directory for an encryption zone.</p>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th align="left"> </th>
<th align="left"> </th></tr>
</thead><tbody>
<tr class="b">
<td align="left"> <i>path</i> </td>
<td align="left"> The path to the root of the encryption zone. </td></tr>
</tbody>
</table></section><section>
<h3><a name="getFileEncryptionInfo"></a>getFileEncryptionInfo</h3>
<p>Usage: <code>[-getFileEncryptionInfo -path &lt;path&gt;]</code></p>
<p>Get encryption information from a file. This can be used to find out whether a file is being encrypted, and the key name / key version used to encrypt it.</p>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th align="left"> </th>
<th align="left"> </th></tr>
</thead><tbody>
<tr class="b">
<td align="left"> <i>path</i> </td>
<td align="left"> The path of the file to get encryption information. </td></tr>
</tbody>
</table></section><section>
<h3><a name="reencryptZone"></a>reencryptZone</h3>
<p>Usage: <code>[-reencryptZone &lt;action&gt; -path &lt;zone&gt;]</code></p>
<p>Re-encrypts an encryption zone, by iterating through the encryption zone, and calling the KeyProvider&#x2019;s reencryptEncryptedKeys interface to batch-re-encrypt all files&#x2019; EDEKs with the latest version encryption zone key in the key provider. Requires superuser permissions.</p>
<p>Note that re-encryption does not apply to snapshots, due to snapshots&#x2019; immutable nature.</p>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th align="left"> </th>
<th align="left"> </th></tr>
</thead><tbody>
<tr class="b">
<td align="left"> <i>action</i> </td>
<td align="left"> The re-encrypt action to perform. Must be either <code>-start</code> or <code>-cancel</code>. </td></tr>
<tr class="a">
<td align="left"> <i>path</i> </td>
<td align="left"> The path to the root of the encryption zone. </td></tr>
</tbody>
</table>
<p>Re-encryption is a NameNode-only operation in HDFS, so could potentially put intensive load to the NameNode. The following configurations can be changed to control the stress on the NameNode, depending on the acceptable throughput impact to the cluster.</p>
<table border="0" class="bodyTable">
<thead>
<tr class="a">
<th align="left"> </th>
<th align="left"> </th></tr>
</thead><tbody>
<tr class="b">
<td align="left"> <i>dfs.namenode.reencrypt.batch.size</i> </td>
<td align="left"> The number of EDEKs in a batch to be sent to the KMS for re-encryption. Each batch is processed when holding the name system read/write lock, with throttling happening between batches. See configs below. </td></tr>
<tr class="a">
<td align="left"> <i>dfs.namenode.reencrypt.throttle.limit.handler.ratio</i> </td>
<td align="left"> Ratio of read locks to be held during re-encryption. 1.0 means no throttling. 0.5 means re-encryption can hold the readlock at most 50% of its total processing time. Negative value or 0 are invalid. </td></tr>
<tr class="b">
<td align="left"> <i>dfs.namenode.reencrypt.throttle.limit.updater.ratio</i> </td>
<td align="left"> Ratio of write locks to be held during re-encryption. 1.0 means no throttling. 0.5 means re-encryption can hold the writelock at most 50% of its total processing time. Negative value or 0 are invalid. </td></tr>
</tbody>
</table></section><section>
<h3><a name="listReencryptionStatus"></a>listReencryptionStatus</h3>
<p>Usage: <code>[-listReencryptionStatus]</code></p>
<p>List re-encryption information for all encryption zones. Requires superuser permissions.</p></section></section><section>
<h2><a name="Example_usage"></a>Example usage</h2>
<p>These instructions assume that you are running as the normal user or HDFS superuser as is appropriate. Use <code>sudo</code> as needed for your environment.</p>
<div class="source">
<div class="source">
<pre># As the normal user, create a new encryption key
hadoop key create mykey
# As the super user, create a new empty directory and make it an encryption zone
hadoop fs -mkdir /zone
hdfs crypto -createZone -keyName mykey -path /zone
# chown it to the normal user
hadoop fs -chown myuser:myuser /zone
# As the normal user, put a file in, read it out
hadoop fs -put helloWorld /zone
hadoop fs -cat /zone/helloWorld
# As the normal user, get encryption information from the file
hdfs crypto -getFileEncryptionInfo -path /zone/helloWorld
# console output: {cipherSuite: {name: AES/CTR/NoPadding, algorithmBlockSize: 16}, cryptoProtocolVersion: CryptoProtocolVersion{description='Encryption zones', version=1, unknownValue=null}, edek: 2010d301afbd43b58f10737ce4e93b39, iv: ade2293db2bab1a2e337f91361304cb3, keyName: mykey, ezKeyVersionName: mykey@0}
</pre></div></div>
</section><section>
<h2><a name="Distcp_considerations"></a>Distcp considerations</h2><section>
<h3><a name="Running_as_the_superuser"></a>Running as the superuser</h3>
<p>One common usecase for distcp is to replicate data between clusters for backup and disaster recovery purposes. This is typically performed by the cluster administrator, who is an HDFS superuser.</p>
<p>To enable this same workflow when using HDFS encryption, we introduced a new virtual path prefix, <code>/.reserved/raw/</code>, that gives superusers direct access to the underlying block data in the filesystem. This allows superusers to distcp data without needing having access to encryption keys, and also avoids the overhead of decrypting and re-encrypting data. It also means the source and destination data will be byte-for-byte identical, which would not be true if the data was being re-encrypted with a new EDEK.</p>
<p>When using <code>/.reserved/raw</code> to distcp encrypted data, it&#x2019;s important to preserve extended attributes with the <a href="../../hadoop-distcp/DistCp.html#Command_Line_Options">-px</a> flag. This is because encrypted file attributes (such as the EDEK) are exposed through extended attributes within <code>/.reserved/raw</code>, and must be preserved to be able to decrypt the file. This means that if the distcp is initiated at or above the encryption zone root, it will automatically create an encryption zone at the destination if it does not already exist. However, it&#x2019;s still recommended that the admin first create identical encryption zones on the destination cluster to avoid any potential mishaps.</p></section><section>
<h3><a name="Copying_into_encrypted_locations"></a>Copying into encrypted locations</h3>
<p>By default, distcp compares checksums provided by the filesystem to verify that the data was successfully copied to the destination. When copying from unencrypted or encrypted location into an encrypted location, the filesystem checksums will not match since the underlying block data is different because a new EDEK will be used to encrypt at destination. In this case, specify the <a href="../../hadoop-distcp/DistCp.html#Command_Line_Options">-skipcrccheck</a> and <a href="../../hadoop-distcp/DistCp.html#Command_Line_Options">-update</a> distcp flags to avoid verifying checksums.</p></section></section><section>
<h2><a name="Rename_and_Trash_considerations"></a>Rename and Trash considerations</h2>
<p>HDFS restricts file and directory renames across encryption zone boundaries. This includes renaming an encrypted file / directory into an unencrypted directory (e.g., <code>hdfs dfs mv /zone/encryptedFile /home/bob</code>), renaming an unencrypted file or directory into an encryption zone (e.g., <code>hdfs dfs mv /home/bob/unEncryptedFile /zone</code>), and renaming between two different encryption zones (e.g., <code>hdfs dfs mv /home/alice/zone1/foo /home/alice/zone2</code>). In these examples, <code>/zone</code>, <code>/home/alice/zone1</code>, and <code>/home/alice/zone2</code> are encryption zones, while <code>/home/bob</code> is not. A rename is only allowed if the source and destination paths are in the same encryption zone, or both paths are unencrypted (not in any encryption zone).</p>
<p>This restriction enhances security and eases system management significantly. All file EDEKs under an encryption zone are encrypted with the encryption zone key. Therefore, if the encryption zone key is compromised, it is important to identify all vulnerable files and re-encrypt them. This is fundamentally difficult if a file initially created in an encryption zone can be renamed to an arbitrary location in the filesystem.</p>
<p>To comply with the above rule, each encryption zone has its own <code>.Trash</code> directory under the &#x201c;zone directory&#x201d;. E.g., after <code>hdfs dfs rm /zone/encryptedFile</code>, <code>encryptedFile</code> will be moved to <code>/zone/.Trash</code>, instead of the <code>.Trash</code> directory under the user&#x2019;s home directory. When the entire encryption zone is deleted, the &#x201c;zone directory&#x201d; will be moved to the <code>.Trash</code> directory under the user&#x2019;s home directory.</p>
<p>If the encryption zone is the root directory (e.g., <code>/</code> directory), the trash path of root directory is <code>/.Trash</code>, not the <code>.Trash</code> directory under the user&#x2019;s home directory, and the behavior of renaming sub-directories or sub-files in root directory will keep consistent with the behavior in a general encryption zone, such as <code>/zone</code> which is mentioned at the top of this section.</p>
<p>The <code>crypto</code> command before Hadoop 2.8.0 does not provision the <code>.Trash</code> directory automatically. If an encryption zone is created before Hadoop 2.8.0, and then the cluster is upgraded to Hadoop 2.8.0 or above, the trash directory can be provisioned using <code>-provisionTrash</code> option (e.g., <code>hdfs crypto -provisionTrash -path /zone</code>).</p></section><section>
<h2><a name="Attack_vectors"></a>Attack vectors</h2><section>
<h3><a name="Hardware_access_exploits"></a>Hardware access exploits</h3>
<p>These exploits assume that attacker has gained physical access to hard drives from cluster machines, i.e. datanodes and namenodes.</p>
<ol style="list-style-type: decimal">
<li>
<p>Access to swap files of processes containing data encryption keys.</p>
<ul>
<li>
<p>By itself, this does not expose cleartext, as it also requires access to encrypted block files.</p>
</li>
<li>
<p>This can be mitigated by disabling swap, using encrypted swap, or using mlock to prevent keys from being swapped out.</p>
</li>
</ul>
</li>
<li>
<p>Access to encrypted block files.</p>
<ul>
<li>By itself, this does not expose cleartext, as it also requires access to DEKs.</li>
</ul>
</li>
</ol></section><section>
<h3><a name="Root_access_exploits"></a>Root access exploits</h3>
<p>These exploits assume that attacker has gained root shell access to cluster machines, i.e. datanodes and namenodes. Many of these exploits cannot be addressed in HDFS, since a malicious root user has access to the in-memory state of processes holding encryption keys and cleartext. For these exploits, the only mitigation technique is carefully restricting and monitoring root shell access.</p>
<ol style="list-style-type: decimal">
<li>
<p>Access to encrypted block files.</p>
<ul>
<li>By itself, this does not expose cleartext, as it also requires access to encryption keys.</li>
</ul>
</li>
<li>
<p>Dump memory of client processes to obtain DEKs, delegation tokens, cleartext.</p>
<ul>
<li>No mitigation.</li>
</ul>
</li>
<li>
<p>Recording network traffic to sniff encryption keys and encrypted data in transit.</p>
<ul>
<li>By itself, insufficient to read cleartext without the EDEK encryption key.</li>
</ul>
</li>
<li>
<p>Dump memory of datanode process to obtain encrypted block data.</p>
<ul>
<li>By itself, insufficient to read cleartext without the DEK.</li>
</ul>
</li>
<li>
<p>Dump memory of namenode process to obtain encrypted data encryption keys.</p>
<ul>
<li>By itself, insufficient to read cleartext without the EDEK&#x2019;s encryption key and encrypted block files.</li>
</ul>
</li>
</ol></section><section>
<h3><a name="HDFS_admin_exploits"></a>HDFS admin exploits</h3>
<p>These exploits assume that the attacker has compromised HDFS, but does not have root or <code>hdfs</code> user shell access.</p>
<ol style="list-style-type: decimal">
<li>
<p>Access to encrypted block files.</p>
<ul>
<li>By itself, insufficient to read cleartext without the EDEK and EDEK encryption key.</li>
</ul>
</li>
<li>
<p>Access to encryption zone and encrypted file metadata (including encrypted data encryption keys), via -fetchImage.</p>
<ul>
<li>By itself, insufficient to read cleartext without EDEK encryption keys.</li>
</ul>
</li>
</ol></section><section>
<h3><a name="Rogue_user_exploits"></a>Rogue user exploits</h3>
<p>A rogue user can collect keys of files they have access to, and use them later to decrypt the encrypted data of those files. As the user had access to those files, they already had access to the file contents. This can be mitigated through periodic key rolling policies. The <a href="#reencryptZone">reencryptZone</a> command is usually required after key rolling, to make sure the EDEKs on existing files use the new version key.</p>
<p>Manual steps to a complete key rolling and re-encryption are listed below. These instructions assume that you are running as the key admin or HDFS superuser as is appropriate.</p>
<div class="source">
<div class="source">
<pre># As the key admin, roll the key to a new version
hadoop key roll exposedKey
# As the super user, re-encrypt the encryption zone. Possibly list zones first.
hdfs crypto -listZones
hdfs crypto -reencryptZone -start -path /zone
# As the super user, periodically check the status of re-encryption
hdfs crypto -listReencryptionStatus
# As the super user, get encryption information from the file and double check it's encryption key version
hdfs crypto -getFileEncryptionInfo -path /zone/helloWorld
# console output: {cipherSuite: {name: AES/CTR/NoPadding, algorithmBlockSize: 16}, cryptoProtocolVersion: CryptoProtocolVersion{description='Encryption zones', version=2, unknownValue=null}, edek: 2010d301afbd43b58f10737ce4e93b39, iv: ade2293db2bab1a2e337f91361304cb3, keyName: exposedKey, ezKeyVersionName: exposedKey@1}
</pre></div></div></section></section>
</div>
</div>
<div class="clear">
<hr/>
</div>
<div id="footer">
<div class="xright">
&#169; 2008-2023
Apache Software Foundation
- <a href="http://maven.apache.org/privacy-policy.html">Privacy Policy</a>.
Apache Maven, Maven, Apache, the Apache feather logo, and the Apache Maven project logos are trademarks of The Apache Software Foundation.
</div>
<div class="clear">
<hr/>
</div>
</div>
</body>
</html>