diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Constant.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Constant.java index b49aef462cd..77e67a140f0 100644 --- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Constant.java +++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Constant.java @@ -206,6 +206,12 @@ public class Nfs3Constant { public static final String FILE_DUMP_DIR_DEFAULT = "/tmp/.hdfs-nfs"; public static final String ENABLE_FILE_DUMP_KEY = "dfs.nfs3.enableDump"; public static final boolean ENABLE_FILE_DUMP_DEFAULT = true; + public static final String MAX_READ_TRANSFER_SIZE_KEY = "dfs.nfs.rtmax"; + public static final int MAX_READ_TRANSFER_SIZE_DEFAULT = 1024 * 1024; + public static final String MAX_WRITE_TRANSFER_SIZE_KEY = "dfs.nfs.wtmax"; + public static final int MAX_WRITE_TRANSFER_SIZE_DEFAULT = 1024 * 1024; + public static final String MAX_READDIR_TRANSFER_SIZE_KEY = "dfs.nfs.dtmax"; + public static final int MAX_READDIR_TRANSFER_SIZE_DEFAULT = 64 * 1024; public static final String MAX_OPEN_FILES = "dfs.nfs3.max.open.files"; public static final int MAX_OPEN_FILES_DEFAULT = 256; public static final String OUTPUT_STREAM_TIMEOUT = "dfs.nfs3.stream.timeout"; diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java index e809b72d608..f5fa03e7eeb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java @@ -142,9 +142,6 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { (short) DEFAULT_UMASK); static final Log LOG = LogFactory.getLog(RpcProgramNfs3.class); - private static final int MAX_READ_TRANSFER_SIZE = 64 * 1024; - private static final int MAX_WRITE_TRANSFER_SIZE = 64 * 1024; - private static final int MAX_READDIR_TRANSFER_SIZE = 64 * 1024; private final Configuration config = new Configuration(); private final WriteManager writeManager; @@ -553,7 +550,11 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { + handle.getFileId()); return new READLINK3Response(Nfs3Status.NFS3ERR_SERVERFAULT); } - if (MAX_READ_TRANSFER_SIZE < target.getBytes().length) { + int rtmax = config.getInt(Nfs3Constant.MAX_READ_TRANSFER_SIZE_KEY, + Nfs3Constant.MAX_READ_TRANSFER_SIZE_DEFAULT); + if (rtmax < target.getBytes().length) { + LOG.error("Link size: " + target.getBytes().length + + " is larger than max transfer size: " + rtmax); return new READLINK3Response(Nfs3Status.NFS3ERR_IO, postOpAttr, new byte[0]); } @@ -649,7 +650,9 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { } try { - int buffSize = Math.min(MAX_READ_TRANSFER_SIZE, count); + int rtmax = config.getInt(Nfs3Constant.MAX_READ_TRANSFER_SIZE_KEY, + Nfs3Constant.MAX_READ_TRANSFER_SIZE_DEFAULT); + int buffSize = Math.min(rtmax, count); byte[] readbuffer = new byte[buffSize]; int readCount = 0; @@ -1714,9 +1717,12 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { } try { - int rtmax = MAX_READ_TRANSFER_SIZE; - int wtmax = MAX_WRITE_TRANSFER_SIZE; - int dtperf = MAX_READDIR_TRANSFER_SIZE; + int rtmax = config.getInt(Nfs3Constant.MAX_READ_TRANSFER_SIZE_KEY, + Nfs3Constant.MAX_READ_TRANSFER_SIZE_DEFAULT); + int wtmax = config.getInt(Nfs3Constant.MAX_WRITE_TRANSFER_SIZE_KEY, + Nfs3Constant.MAX_WRITE_TRANSFER_SIZE_DEFAULT); + int dtperf = config.getInt(Nfs3Constant.MAX_READDIR_TRANSFER_SIZE_KEY, + Nfs3Constant.MAX_READDIR_TRANSFER_SIZE_DEFAULT); Nfs3FileAttributes attrs = Nfs3Utils.getFileAttr(dfsClient, Nfs3Utils.getFileIdPath(handle), iug); diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index fd30f9bddee..bb64b295244 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -156,6 +156,9 @@ Release 2.4.0 - UNRELEASED HDFS-6072. Clean up dead code of FSImage. (wheat9) + HDFS-6080. Improve NFS gateway performance by making rtmax and wtmax + configurable. (Abin Shahab via brandonli) + OPTIMIZATIONS HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsNfsGateway.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsNfsGateway.apt.vm index d99692d9cec..fe1b4dbbef0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsNfsGateway.apt.vm +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsNfsGateway.apt.vm @@ -48,35 +48,48 @@ HDFS NFS Gateway The user running the NFS-gateway must be able to proxy all the users using the NFS mounts. For instance, if user 'nfsserver' is running the gateway, and users belonging to the groups 'nfs-users1' - and 'nfs-users2' use the NFS mounts, then in core-site.xml of the namenode, the following must be set: + and 'nfs-users2' use the NFS mounts, then in core-site.xml of the namenode, the following must be set + (NOTE: replace 'nfsserver' with the user name starting the gateway in your cluster): + ---- hadoop.proxyuser.nfsserver.groups nfs-users1,nfs-users2 - The 'nfsserver' user is allowed to proxy all members of the 'nfs-users1' and 'nfs-users2' groups. Set this to '*' to allow nfsserver user to proxy any group. + The 'nfsserver' user is allowed to proxy all members of the 'nfs-users1' and + 'nfs-users2' groups. Set this to '*' to allow nfsserver user to proxy any group. +---- + +---- hadoop.proxyuser.nfsserver.hosts nfs-client-host1.com - This is the host where the nfs gateway is running. Set this to '*' to allow requests from any hosts to be proxied. + This is the host where the nfs gateway is running. Set this to '*' to allow + requests from any hosts to be proxied. ---- + The above are the only required configuration for the NFS gateway in non-secure mode. For Kerberized hadoop clusters, the following configurations need to be added to hdfs-site.xml: + ---- - - dfs.nfsgateway.keytab.file - /etc/hadoop/conf/nfsserver.keytab - - - dfs.nfsgateway.kerberos.principal - nfsserver/_HOST@YOUR-REALM.COM - + + dfs.nfsgateway.keytab.file + /etc/hadoop/conf/nfsserver.keytab + ---- + +---- + + dfs.nfsgateway.kerberos.principal + nfsserver/_HOST@YOUR-REALM.COM + +---- + It's strongly recommended for the users to update a few configuration properties based on their use cases. All the related configuration properties can be added or updated in hdfs-site.xml. @@ -90,31 +103,61 @@ HDFS NFS Gateway dfs.namenode.accesstime.precision 3600000 - The access time for HDFS file is precise upto this value. + The access time for HDFS file is precise upto this value. The default value is 1 hour. Setting a value of 0 disables access times for HDFS. ---- - * Users are expected to update the file dump directory. NFS client often + * Users are expected to update the file dump directory. NFS client often reorders writes. Sequential writes can arrive at the NFS gateway at random order. This directory is used to temporarily save out-of-order writes - before writing to HDFS. For each file, the out-of-order writes are dumped after + before writing to HDFS. For each file, the out-of-order writes are dumped after they are accumulated to exceed certain threshold (e.g., 1MB) in memory. One needs to make sure the directory has enough - space. For example, if the application uploads 10 files with each having + space. For example, if the application uploads 10 files with each having 100MB, it is recommended for this directory to have roughly 1GB space in case if a - worst-case write reorder happens to every file. Only NFS gateway needs to restart after + worst-case write reorder happens to every file. Only NFS gateway needs to restart after this property is updated. ---- - dfs.nfs3.dump.dir + dfs.nfs3.dump.dir /tmp/.hdfs-nfs ---- + * For optimal performance, it is recommended that rtmax be updated to + 1MB. However, note that this 1MB is a per client allocation, and not + from a shared memory pool, and therefore a larger value may adversely + affect small reads, consuming a lot of memory. The maximum value of + this property is 1MB. + +---- + + dfs.nfs.rtmax + 1048576 + This is the maximum size in bytes of a READ request + supported by the NFS gateway. If you change this, make sure you + also update the nfs mount's rsize(add rsize= # of bytes to the + mount directive). + + +---- + +---- + + dfs.nfs.wtmax + 65536 + This is the maximum size in bytes of a WRITE request + supported by the NFS gateway. If you change this, make sure you + also update the nfs mount's wsize(add wsize= # of bytes to the + mount directive). + + +---- + * By default, the export can be mounted by any client. To better control the access, users can update the following property. The value string contains machine name and access privilege, separated by whitespace