HDFS-7147. Update archival storage user documentation. Contributed by Tsz Wo Nicholas Sze.

This commit is contained in:
Haohui Mai 2014-11-03 15:10:22 -08:00
parent 277141b82d
commit d2d2c84a10
5 changed files with 74 additions and 254 deletions

View File

@ -848,6 +848,9 @@ Release 2.6.0 - UNRELEASED
HDFS-7291. Persist in-memory replicas with appropriate unbuffered copy API HDFS-7291. Persist in-memory replicas with appropriate unbuffered copy API
on POSIX and Windows. (Xiaoyu Yao via cnauroth) on POSIX and Windows. (Xiaoyu Yao via cnauroth)
HDFS-7147. Update archival storage user documentation.
(Tsz Wo Nicholas Sze via wheat9)
BREAKDOWN OF HDFS-6134 AND HADOOP-10150 SUBTASKS AND RELATED JIRAS BREAKDOWN OF HDFS-6134 AND HADOOP-10150 SUBTASKS AND RELATED JIRAS
HDFS-6387. HDFS CLI admin tool for creating & deleting an HDFS-6387. HDFS CLI admin tool for creating & deleting an

View File

@ -18,7 +18,9 @@
package org.apache.hadoop.hdfs.server.blockmanagement; package org.apache.hadoop.hdfs.server.blockmanagement;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import org.apache.hadoop.fs.XAttr; import org.apache.hadoop.fs.XAttr;
import org.apache.hadoop.hdfs.StorageType; import org.apache.hadoop.hdfs.StorageType;
import org.apache.hadoop.hdfs.XAttrHelper; import org.apache.hadoop.hdfs.XAttrHelper;
@ -104,9 +106,11 @@ public class BlockStoragePolicySuite {
} }
public BlockStoragePolicy getPolicy(String policyName) { public BlockStoragePolicy getPolicy(String policyName) {
Preconditions.checkNotNull(policyName);
if (policies != null) { if (policies != null) {
for (BlockStoragePolicy policy : policies) { for (BlockStoragePolicy policy : policies) {
if (policy != null && policy.getName().equals(policyName)) { if (policy != null && policy.getName().equalsIgnoreCase(policyName)) {
return policy; return policy;
} }
} }

View File

@ -1,118 +0,0 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- Do not modify this file directly. Instead, copy entries that you wish -->
<!-- to modify from this file into blockStoragePolicy-site.xml and change -->
<!-- there. If blockStoragePolicy-site.xml does not exist, create it. -->
<configuration>
<property>
<name>dfs.block.storage.policies</name>
<value>HOT:12, WARM:8, COLD:4</value>
<description>
A list of block storage policy names and IDs. The syntax is
NAME_1:ID_1, NAME_2:ID_2, ..., NAME_n:ID_n
where ID is an integer in the range [1,15] and NAME is case insensitive.
The first element is the default policy. Empty list is not allowed.
</description>
</property>
<!-- Block Storage Policy HOT:12 -->
<property>
<name>dfs.block.storage.policy.12</name>
<value>DISK</value>
<description>
A list of storage types for storing the block replicas such as
STORAGE_TYPE_1, STORAGE_TYPE_2, ..., STORAGE_TYPE_n
When creating a block, the i-th replica is stored using i-th storage type
for i less than or equal to n, and
the j-th replica is stored using n-th storage type for j greater than n.
Empty list is not allowed.
Examples:
DISK : all replicas stored using DISK.
DISK, ARCHIVE : the first replica is stored using DISK and all the
remaining replicas are stored using ARCHIVE.
</description>
</property>
<property>
<name>dfs.block.storage.policy.creation-fallback.12</name>
<value></value>
<description>
A list of storage types for creation fallback storage.
STORAGE_TYPE_1, STORAGE_TYPE_2, ..., STORAGE_TYPE_n
When creating a block, if a particular storage type specified in the policy
is unavailable, the fallback STORAGE_TYPE_1 is used. Further, if
STORAGE_TYPE_i is also unavailable, the fallback STORAGE_TYPE_(i+1) is used.
In case that all fallback storages are unavailabe, the block will be created
with number of replicas less than the specified replication factor.
An empty list indicates that there is no fallback storage.
</description>
</property>
<property>
<name>dfs.block.storage.policy.replication-fallback.12</name>
<value>ARCHIVE</value>
<description>
Similar to dfs.block.storage.policy.creation-fallback.x but for replication.
</description>
</property>
<!-- Block Storage Policy WARM:8 -->
<property>
<name>dfs.block.storage.policy.8</name>
<value>DISK, ARCHIVE</value>
</property>
<property>
<name>dfs.block.storage.policy.creation-fallback.8</name>
<value>DISK, ARCHIVE</value>
</property>
<property>
<name>dfs.block.storage.policy.replication-fallback.8</name>
<value>DISK, ARCHIVE</value>
</property>
<!-- Block Storage Policy COLD:4 -->
<property>
<name>dfs.block.storage.policy.4</name>
<value>ARCHIVE</value>
</property>
<property>
<name>dfs.block.storage.policy.creation-fallback.4</name>
<value></value>
</property>
<property>
<name>dfs.block.storage.policy.replication-fallback.4</name>
<value></value>
</property>
</configuration>

View File

@ -11,12 +11,12 @@
~~ limitations under the License. See accompanying LICENSE file. ~~ limitations under the License. See accompanying LICENSE file.
--- ---
HDFS Archival Storage Archival Storage, SSD & Memory
--- ---
--- ---
${maven.build.timestamp} ${maven.build.timestamp}
HDFS Archival Storage Archival Storage, SSD & Memory
%{toc|section=1|fromDepth=0} %{toc|section=1|fromDepth=0}
@ -29,9 +29,13 @@ HDFS Archival Storage
Adding more nodes to the cold storage can grow the storage independent of the compute capacity Adding more nodes to the cold storage can grow the storage independent of the compute capacity
in the cluster. in the cluster.
The frameworks provided by Heterogeneous Storage and Archival Storage generalizes the HDFS architecture
to include other kinds of storage media including <SSD> and <memory>.
Users may choose to store their data in SSD or memory for a better performance.
* {Storage Types and Storage Policies} * {Storage Types and Storage Policies}
** {Storage Types: DISK, SSD and ARCHIVE} ** {Storage Types: ARCHIVE, DISK, SSD and RAM_DISK}
The first phase of The first phase of
{{{https://issues.apache.org/jira/browse/HDFS-2832}Heterogeneous Storage (HDFS-2832)}} {{{https://issues.apache.org/jira/browse/HDFS-2832}Heterogeneous Storage (HDFS-2832)}}
@ -45,7 +49,9 @@ HDFS Archival Storage
which has high storage density (petabyte of storage) but little compute power, which has high storage density (petabyte of storage) but little compute power,
is added for supporting archival storage. is added for supporting archival storage.
** {Storage Policies: Hot, Warm and Cold} Another new storage type <RAM_DISK> is added for supporting writing single replica files in memory.
** {Storage Policies: Hot, Warm, Cold, All_SSD, One_SSD and Lazy_Persist}
A new concept of storage policies is introduced in order to allow files to be stored A new concept of storage policies is introduced in order to allow files to be stored
in different storage types according to the storage policy. in different storage types according to the storage policy.
@ -65,6 +71,14 @@ HDFS Archival Storage
When a block is warm, some of its replicas are stored in DISK When a block is warm, some of its replicas are stored in DISK
and the remaining replicas are stored in ARCHIVE. and the remaining replicas are stored in ARCHIVE.
* <<All_SSD>> - for storing all replicas in SSD.
* <<One_SSD>> - for storing one of the replicas in SSD.
The remaining replicas are stored in DISK.
* <<Lazy_Persist>> - for writing blocks with single replica in memory.
The replica is first written in RAM_DISK and then it is lazily persisted in DISK.
[] []
More formally, a storage policy consists of the following fields: More formally, a storage policy consists of the following fields:
@ -89,149 +103,54 @@ HDFS Archival Storage
The following is a typical storage policy table. The following is a typical storage policy table.
*--------+---------------+-------------------------+-----------------------+-----------------------+ *--------+---------------+--------------------------+-----------------------+-----------------------+
| <<Policy>> | <<Policy>>| <<Block Placement>> | <<Fallback storages>> | <<Fallback storages>> | | <<Policy>> | <<Policy>>| <<Block Placement>> | <<Fallback storages>> | <<Fallback storages>> |
| <<ID>> | <<Name>> | <<(n\ replicas)>> | <<for creation>> | <<for replication>> | | <<ID>> | <<Name>> | <<(n\ replicas)>> | <<for creation>> | <<for replication>> |
*--------+---------------+-------------------------+-----------------------+-----------------------+ *--------+---------------+--------------------------+-----------------------+-----------------------+
| 12 | Hot (default) | DISK: <n> | \<none\> | ARCHIVE | | 15 | Lasy_Persist | RAM_DISK: 1, DISK: <n>-1 | DISK | DISK |
*--------+---------------+-------------------------+-----------------------+-----------------------+ *--------+---------------+--------------------------+-----------------------+-----------------------+
| 8 | Warm | DISK: 1, ARCHIVE: <n>-1 | ARCHIVE, DISK | ARCHIVE, DISK | | 12 | All_SSD | SSD: <n> | DISK | DISK |
*--------+---------------+-------------------------+-----------------------+-----------------------+ *--------+---------------+--------------------------+-----------------------+-----------------------+
| 4 | Cold | ARCHIVE: <n> | \<none\> | \<none\> | | 10 | One_SSD | SSD: 1, DISK: <n>-1 | SSD, DISK | SSD, DISK |
*--------+---------------+-------------------------+-----------------------+-----------------------+ *--------+---------------+--------------------------+-----------------------+-----------------------+
| 7 | Hot (default) | DISK: <n> | \<none\> | ARCHIVE |
*--------+---------------+--------------------------+-----------------------+-----------------------+
| 5 | Warm | DISK: 1, ARCHIVE: <n>-1 | ARCHIVE, DISK | ARCHIVE, DISK |
*--------+---------------+--------------------------+-----------------------+-----------------------+
| 2 | Cold | ARCHIVE: <n> | \<none\> | \<none\> |
*--------+---------------+--------------------------+-----------------------+-----------------------+
Note that cluster administrators may change the storage policy table Note that the Lasy_Persist policy is useful only for single replica blocks.
according to the characteristic of the cluster. For blocks with more than one replicas, all the replicas will be written to DISK
For example, in order to prevent losing archival data, since writing only one of the replicas to RAM_DISK does not improve the overall performance.
administrators may want to use DISK as fallback storage for replication in the Cold policy.
A drawback of such setting is that the DISK storages could be filled up with archival data.
As a result, the entire cluster may become full and cannot serve hot data anymore.
** {Configurations} ** {Storage Policy Resolution}
*** {Setting The List of All Storage Policies} When a file or directory is created, its storage policy is <unspecified>.
The storage policy can be specified using
the "<<<{{{Set Storage Policy}dfsadmin -setStoragePolicy}}>>>" command.
The effective storage policy of a file or directory is resolved by the following rules.
* <<dfs.block.storage.policies>> [[1]] If the file or directory is specificed with a storage policy, return it.
- a list of block storage policy names and IDs.
The syntax is
NAME_1:ID_1, NAME_2:ID_2, ..., NAME_<n>:ID_<n> [[2]] For an unspecified file or directory,
if it is the root directory, return the <default storage policy>.
where ID is an integer in the closed range [1,15] and NAME is case insensitive. Otherwise, return its parent's effective storage policy.
The first element is the <default policy>. Empty list is not allowed.
The default value is shown below.
+------------------------------------------+
<property>
<name>dfs.block.storage.policies</name>
<value>HOT:12, WARM:8, COLD:4</value>
</property>
+------------------------------------------+
[] []
*** {Setting Storage Policy Details} The effective storage policy can be retrieved by
the "<<<{{{Set Storage Policy}dfsadmin -getStoragePolicy}}>>>" command.
The following configuration properties are for setting the details of each storage policy,
where <<<\<ID\>>>> is the actual policy ID.
* <<dfs.block.storage.policy.\<ID\>>> ** {Configuration}
- a list of storage types for storing the block replicas.
The syntax is
STORAGE_TYPE_1, STORAGE_TYPE_2, ..., STORAGE_TYPE_<n> * <<dfs.storage.policy.enabled>>
- for enabling/disabling the storage policy feature.
When creating a block, the <i>-th replica is stored using <i>-th storage type The default value is <<<true>>>.
for <i> less than or equal to <n>, and
the <j>-th replica is stored using <n>-th storage type for <j> greater than <n>.
Empty list is not allowed.
Examples:
+------------------------------------------+
DISK : all replicas stored using DISK.
DISK, ARCHIVE : the first replica is stored using DISK and all the
remaining replicas are stored using ARCHIVE.
+------------------------------------------+
* <<dfs.block.storage.policy.creation-fallback.\<ID\>>>
- a list of storage types for creation fallback storage.
The syntax is
STORAGE_TYPE_1, STORAGE_TYPE_2, ..., STORAGE_TYPE_n
When creating a block, if a particular storage type specified in the policy
is unavailable, the fallback STORAGE_TYPE_1 is used. Further, if
STORAGE_TYPE_<i> is also unavailable, the fallback STORAGE_TYPE_<(i+1)> is used.
In case all fallback storages are unavailable, the block will be created
with number of replicas less than the specified replication factor.
An empty list indicates that there is no fallback storage.
* <<dfs.block.storage.policy.replication-fallback.\<ID\>>>
- a list of storage types for replication fallback storage.
The usage of this configuration property is similar to
<<<dfs.block.storage.policy.creation-fallback.\<ID\>>>>
except that it takes effect on replication but not block creation.
[] []
The following are the default configuration values for Hot, Warm and Cold storage policies.
* Block Storage Policy <<HOT:12>>
+------------------------------------------+
<property>
<name>dfs.block.storage.policy.12</name>
<value>DISK</value>
</property>
<property>
<name>dfs.block.storage.policy.creation-fallback.12</name>
<value></value>
</property>
<property>
<name>dfs.block.storage.policy.replication-fallback.12</name>
<value>ARCHIVE</value>
</property>
+------------------------------------------+
* Block Storage Policy <<WARM:8>>
+------------------------------------------+
<property>
<name>dfs.block.storage.policy.8</name>
<value>DISK, ARCHIVE</value>
</property>
<property>
<name>dfs.block.storage.policy.creation-fallback.8</name>
<value>DISK, ARCHIVE</value>
</property>
<property>
<name>dfs.block.storage.policy.replication-fallback.8</name>
<value>DISK, ARCHIVE</value>
</property>
+------------------------------------------+
* Block Storage Policy <<COLD:4>>
+------------------------------------------+
<property>
<name>dfs.block.storage.policy.4</name>
<value>ARCHIVE</value>
</property>
<property>
<name>dfs.block.storage.policy.creation-fallback.4</name>
<value></value>
</property>
<property>
<name>dfs.block.storage.policy.replication-fallback.4</name>
<value></value>
</property>
+------------------------------------------+
[]
* {Mover - A New Data Migration Tool} * {Mover - A New Data Migration Tool}
@ -261,7 +180,19 @@ hdfs mover [-p <files/dirs> | -f <local file name>]
[] []
* {<<<DFSAdmin>>> Commands} * {Storage Policy Commands}
** {List Storage Policies}
List out all the storage policies.
* Command:
+------------------------------------------+
hdfs storagepolicies
+------------------------------------------+
* Arguments: none.
** {Set Storage Policy} ** {Set Storage Policy}

View File

@ -93,7 +93,7 @@
<item name="Extended Attributes" href="hadoop-project-dist/hadoop-hdfs/ExtendedAttributes.html"/> <item name="Extended Attributes" href="hadoop-project-dist/hadoop-hdfs/ExtendedAttributes.html"/>
<item name="Transparent Encryption" href="hadoop-project-dist/hadoop-hdfs/TransparentEncryption.html"/> <item name="Transparent Encryption" href="hadoop-project-dist/hadoop-hdfs/TransparentEncryption.html"/>
<item name="HDFS Support for Multihoming" href="hadoop-project-dist/hadoop-hdfs/HdfsMultihoming.html"/> <item name="HDFS Support for Multihoming" href="hadoop-project-dist/hadoop-hdfs/HdfsMultihoming.html"/>
<item name="Archival Storage" href="hadoop-project-dist/hadoop-hdfs/ArchivalStorage.html"/> <item name="Archival Storage, SSD &amp; Memory" href="hadoop-project-dist/hadoop-hdfs/ArchivalStorage.html"/>
</menu> </menu>
<menu name="MapReduce" inherit="top"> <menu name="MapReduce" inherit="top">