HDFS-6046. add dfs.client.mmap.enabled (cmccabe)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1573887 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Colin McCabe 2014-03-04 06:46:08 +00:00
parent 33500fd304
commit c94e43c6df
6 changed files with 86 additions and 6 deletions

View File

@ -373,6 +373,8 @@ Release 2.4.0 - UNRELEASED
HDFS-5950. The DFSClient and DataNode should use shared memory segments to HDFS-5950. The DFSClient and DataNode should use shared memory segments to
communicate short-circuit information. (cmccabe) communicate short-circuit information. (cmccabe)
HDFS-6046. add dfs.client.mmap.enabled (cmccabe)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery

View File

@ -279,6 +279,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory {
final long shortCircuitStreamsCacheExpiryMs; final long shortCircuitStreamsCacheExpiryMs;
final int shortCircuitSharedMemoryWatcherInterruptCheckMs; final int shortCircuitSharedMemoryWatcherInterruptCheckMs;
final boolean shortCircuitMmapEnabled;
final int shortCircuitMmapCacheSize; final int shortCircuitMmapCacheSize;
final long shortCircuitMmapCacheExpiryMs; final long shortCircuitMmapCacheExpiryMs;
final long shortCircuitMmapCacheRetryTimeout; final long shortCircuitMmapCacheRetryTimeout;
@ -398,6 +399,9 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory {
shortCircuitStreamsCacheExpiryMs = conf.getLong( shortCircuitStreamsCacheExpiryMs = conf.getLong(
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_KEY, DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_KEY,
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT); DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT);
shortCircuitMmapEnabled = conf.getBoolean(
DFSConfigKeys.DFS_CLIENT_MMAP_ENABLED,
DFSConfigKeys.DFS_CLIENT_MMAP_ENABLED_DEFAULT);
shortCircuitMmapCacheSize = conf.getInt( shortCircuitMmapCacheSize = conf.getInt(
DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE, DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE,
DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT); DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT);

View File

@ -429,6 +429,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final int DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT = 1024 * 1024; public static final int DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT = 1024 * 1024;
public static final String DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC = "dfs.client.domain.socket.data.traffic"; public static final String DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC = "dfs.client.domain.socket.data.traffic";
public static final boolean DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT = false; public static final boolean DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT = false;
public static final String DFS_CLIENT_MMAP_ENABLED= "dfs.client.mmap.enabled";
public static final boolean DFS_CLIENT_MMAP_ENABLED_DEFAULT = true;
public static final String DFS_CLIENT_MMAP_CACHE_SIZE = "dfs.client.mmap.cache.size"; public static final String DFS_CLIENT_MMAP_CACHE_SIZE = "dfs.client.mmap.cache.size";
public static final int DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT = 256; public static final int DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT = 256;
public static final String DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS = "dfs.client.mmap.cache.timeout.ms"; public static final String DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS = "dfs.client.mmap.cache.timeout.ms";

View File

@ -1571,7 +1571,10 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
"at position " + pos); "at position " + pos);
} }
} }
ByteBuffer buffer = tryReadZeroCopy(maxLength, opts); ByteBuffer buffer = null;
if (dfsClient.getConf().shortCircuitMmapEnabled) {
buffer = tryReadZeroCopy(maxLength, opts);
}
if (buffer != null) { if (buffer != null) {
return buffer; return buffer;
} }

View File

@ -1520,26 +1520,34 @@
</description> </description>
</property> </property>
<property>
<name>dfs.client.mmap.enabled</name>
<value>true</value>
<description>
If this is set to false, the client won't attempt to perform memory-mapped reads.
</description>
</property>
<property> <property>
<name>dfs.client.mmap.cache.size</name> <name>dfs.client.mmap.cache.size</name>
<value>1024</value> <value>256</value>
<description> <description>
When zero-copy reads are used, the DFSClient keeps a cache of recently used When zero-copy reads are used, the DFSClient keeps a cache of recently used
memory mapped regions. This parameter controls the maximum number of memory mapped regions. This parameter controls the maximum number of
entries that we will keep in that cache. entries that we will keep in that cache.
If this is set to 0, we will not allow mmap.
The larger this number is, the more file descriptors we will potentially The larger this number is, the more file descriptors we will potentially
use for memory-mapped files. mmaped files also use virtual address space. use for memory-mapped files. mmaped files also use virtual address space.
You may need to increase your ulimit virtual address space limits before You may need to increase your ulimit virtual address space limits before
increasing the client mmap cache size. increasing the client mmap cache size.
Note that you can still do zero-copy reads when this size is set to 0.
</description> </description>
</property> </property>
<property> <property>
<name>dfs.client.mmap.cache.timeout.ms</name> <name>dfs.client.mmap.cache.timeout.ms</name>
<value>900000</value> <value>3600000</value>
<description> <description>
The minimum length of time that we will keep an mmap entry in the cache The minimum length of time that we will keep an mmap entry in the cache
between uses. If an entry is in the cache longer than this, and nobody between uses. If an entry is in the cache longer than this, and nobody
@ -1558,7 +1566,7 @@
<property> <property>
<name>dfs.client.short.circuit.replica.stale.threshold.ms</name> <name>dfs.client.short.circuit.replica.stale.threshold.ms</name>
<value>3000000</value> <value>1800000</value>
<description> <description>
The maximum amount of time that we will consider a short-circuit replica to The maximum amount of time that we will consider a short-circuit replica to
be valid, if there is no communication from the DataNode. After this time be valid, if there is no communication from the DataNode. After this time

View File

@ -21,6 +21,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_ENABLED;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
@ -698,4 +700,63 @@ public class TestEnhancedByteBufferAccess {
} }
}, 10, 60000); }, 10, 60000);
} }
@Test
public void testClientMmapDisable() throws Exception {
HdfsConfiguration conf = initZeroCopyTest();
conf.setBoolean(DFS_CLIENT_MMAP_ENABLED, false);
MiniDFSCluster cluster = null;
final Path TEST_PATH = new Path("/a");
final int TEST_FILE_LENGTH = 16385;
final int RANDOM_SEED = 23453;
final String CONTEXT = "testClientMmapDisable";
FSDataInputStream fsIn = null;
DistributedFileSystem fs = null;
conf.set(DFSConfigKeys.DFS_CLIENT_CONTEXT, CONTEXT);
try {
// With DFS_CLIENT_MMAP_ENABLED set to false, we should not do memory
// mapped reads.
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
cluster.waitActive();
fs = cluster.getFileSystem();
DFSTestUtil.createFile(fs, TEST_PATH,
TEST_FILE_LENGTH, (short)1, RANDOM_SEED);
DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
fsIn = fs.open(TEST_PATH);
try {
fsIn.read(null, 1, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
Assert.fail("expected zero-copy read to fail when client mmaps " +
"were disabled.");
} catch (UnsupportedOperationException e) {
}
} finally {
if (fsIn != null) fsIn.close();
if (fs != null) fs.close();
if (cluster != null) cluster.shutdown();
}
fsIn = null;
fs = null;
cluster = null;
try {
// Now try again with DFS_CLIENT_MMAP_CACHE_SIZE == 0. It should work.
conf.setBoolean(DFS_CLIENT_MMAP_ENABLED, true);
conf.setInt(DFS_CLIENT_MMAP_CACHE_SIZE, 0);
conf.set(DFSConfigKeys.DFS_CLIENT_CONTEXT, CONTEXT + ".1");
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
cluster.waitActive();
fs = cluster.getFileSystem();
DFSTestUtil.createFile(fs, TEST_PATH,
TEST_FILE_LENGTH, (short)1, RANDOM_SEED);
DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
fsIn = fs.open(TEST_PATH);
ByteBuffer buf = fsIn.read(null, 1, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
fsIn.releaseBuffer(buf);
} finally {
if (fsIn != null) fsIn.close();
if (fs != null) fs.close();
if (cluster != null) cluster.shutdown();
}
}
} }