From b7b8db51dec5e14aa13ff473b5fbc21d661ded6c Mon Sep 17 00:00:00 2001 From: Todd Lipcon Date: Mon, 23 Jul 2012 16:27:59 +0000 Subject: [PATCH] HDFS-3697. Enable fadvise readahead by default. Contributed by Todd Lipcon. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1364698 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 2 +- .../src/main/resources/hdfs-default.xml | 74 +++++++++++++++++++ 3 files changed, 77 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index eb3e60ebe40..2ea4cf39716 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -363,6 +363,8 @@ Branch-2 ( Unreleased changes ) HDFS-3110. Use directRead API to reduce the number of buffer copies in libhdfs (Henry Robinson via todd) + HDFS-3697. Enable fadvise readahead by default. (todd) + BUG FIXES HDFS-3385. The last block of INodeFileUnderConstruction is not diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index ddadbdd44f7..f97c9034e4d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -74,7 +74,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final String DFS_DATANODE_BALANCE_BANDWIDTHPERSEC_KEY = "dfs.datanode.balance.bandwidthPerSec"; public static final long DFS_DATANODE_BALANCE_BANDWIDTHPERSEC_DEFAULT = 1024*1024; public static final String DFS_DATANODE_READAHEAD_BYTES_KEY = "dfs.datanode.readahead.bytes"; - public static final long DFS_DATANODE_READAHEAD_BYTES_DEFAULT = 0; + public static final long DFS_DATANODE_READAHEAD_BYTES_DEFAULT = 4 * 1024 * 1024; // 4MB public static final String DFS_DATANODE_DROP_CACHE_BEHIND_WRITES_KEY = "dfs.datanode.drop.cache.behind.writes"; public static final boolean DFS_DATANODE_DROP_CACHE_BEHIND_WRITES_DEFAULT = false; public static final String DFS_DATANODE_SYNC_BEHIND_WRITES_KEY = "dfs.datanode.sync.behind.writes"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 00c5e8bc446..4815ea147ff 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -714,6 +714,80 @@ + + dfs.datanode.readahead.bytes + 4193404 + + While reading block files, if the Hadoop native libraries are available, + the datanode can use the posix_fadvise system call to explicitly + page data into the operating system buffer cache ahead of the current + reader's position. This can improve performance especially when + disks are highly contended. + + This configuration specifies the number of bytes ahead of the current + read position which the datanode will attempt to read ahead. This + feature may be disabled by configuring this property to 0. + + If the native libraries are not available, this configuration has no + effect. + + + + + dfs.datanode.drop.cache.behind.reads + false + + In some workloads, the data read from HDFS is known to be significantly + large enough that it is unlikely to be useful to cache it in the + operating system buffer cache. In this case, the DataNode may be + configured to automatically purge all data from the buffer cache + after it is delivered to the client. This behavior is automatically + disabled for workloads which read only short sections of a block + (e.g HBase random-IO workloads). + + This may improve performance for some workloads by freeing buffer + cache spage usage for more cacheable data. + + If the Hadoop native libraries are not available, this configuration + has no effect. + + + + + dfs.datanode.drop.cache.behind.writes + false + + In some workloads, the data written to HDFS is known to be significantly + large enough that it is unlikely to be useful to cache it in the + operating system buffer cache. In this case, the DataNode may be + configured to automatically purge all data from the buffer cache + after it is written to disk. + + This may improve performance for some workloads by freeing buffer + cache spage usage for more cacheable data. + + If the Hadoop native libraries are not available, this configuration + has no effect. + + + + + dfs.datanode.sync.behind.writes + false + + If this configuration is enabled, the datanode will instruct the + operating system to enqueue all written data to the disk immediately + after it is written. This differs from the usual OS policy which + may wait for up to 30 seconds before triggering writeback. + + This may improve performance for some workloads by smoothing the + IO profile for data written to disk. + + If the Hadoop native libraries are not available, this configuration + has no effect. + + + dfs.client.failover.max.attempts 15