From 456426fe6b0a7d5ef9bea2a5c4c06ce3023caf8a Mon Sep 17 00:00:00 2001 From: Zhe Zhang Date: Tue, 1 Dec 2015 10:24:31 -0800 Subject: [PATCH] HDFS-9269. Update the documentation and wrapper for fuse-dfs. Contributed by Wei-Chiu Chuang. Conflicts: hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Change-Id: Idebd3b5d2ab09910c71cfde19e4b4d88f7ce1efb --- .../src/main/native/fuse-dfs/doc/README | 26 ++++++-------- .../main/native/fuse-dfs/fuse_dfs_wrapper.sh | 34 +++++++++++-------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ 3 files changed, 34 insertions(+), 29 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/doc/README b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/doc/README index 1744892bd82..672265e1a71 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/doc/README +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/doc/README @@ -16,45 +16,43 @@ # Fuse-DFS -Supports reads, writes, and directory operations (e.g., cp, ls, more, cat, find, less, rm, mkdir, mv, rmdir). Things like touch, chmod, chown, and permissions are in the works. Fuse-dfs currently shows all files as owned by nobody. +Fuse-DFS allows HDFS to be mounted as a local file system. +It currently supports reads, writes, and directory operations (e.g., cp, ls, more, cat, find, less, rm, mkdir, mv, rmdir, touch, chmod, chown and permissions). Random access writing is not supported. Contributing -It's pretty straightforward to add functionality to fuse-dfs as fuse makes things relatively simple. Some other tasks require also augmenting libhdfs to expose more hdfs functionality to C. See [http://issues.apache.org/jira/secure/IssueNavigator.jspa?reset=true&mode=hide&pid=12310240&sorter/order=DESC&sorter/field=priority&resolution=-1&component=12312376 contrib/fuse-dfs JIRAs] +It's pretty straightforward to add functionality to fuse-dfs as fuse makes things relatively simple. Some other tasks require also augmenting libhdfs to expose more hdfs functionality to C. See [https://issues.apache.org/jira/issues/?jql=text%20~%20%22fuse-dfs%22 fuse-dfs JIRAs] Requirements * Hadoop with compiled libhdfs.so * Linux kernel > 2.6.9 with fuse, which is the default or Fuse 2.7.x, 2.8.x installed. See: [http://fuse.sourceforge.net/] * modprobe fuse to load it - * fuse-dfs executable (see below) + * fuse_dfs executable (see below) * fuse_dfs_wrapper.sh installed in /bin or other appropriate location (see below) BUILDING - 1. in HADOOP_PREFIX: `ant compile-libhdfs -Dlibhdfs=1 - 2. in HADOOP_PREFIX: `ant package` to deploy libhdfs - 3. in HADOOP_PREFIX: `ant compile-contrib -Dlibhdfs=1 -Dfusedfs=1` + fuse-dfs executable can be built by setting `require.fuse` option to true using Maven. For example: + in HADOOP_PREFIX: `mvn package -Pnative -Drequire.fuse=true -DskipTests -Dmaven.javadoc.skip=true` -NOTE: for amd64 architecture, libhdfs will not compile unless you edit -the Makefile in src/c++/libhdfs/Makefile and set OS_ARCH=amd64 -(probably the same for others too). See [https://issues.apache.org/jira/browse/HADOOP-3344 HADOOP-3344] + The executable `fuse_dfs` will be located at HADOOP_PREFIX/hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/ Common build problems include not finding the libjvm.so in JAVA_HOME/jre/lib/OS_ARCH/server or not finding fuse in FUSE_HOME or /usr/local. CONFIGURING -Look at all the paths in fuse_dfs_wrapper.sh and either correct them or set them in your environment before running. (note for automount and mount as root, you probably cannot control the environment, so best to set them in the wrapper) +fuse_dfs_wrapper.sh may not work out of box. To use it, look at all the paths in fuse_dfs_wrapper.sh and either correct them or set them in your environment before running. (note for automount and mount as root, you probably cannot control the environment, so best to set them in the wrapper) INSTALLING 1. `mkdir /export/hdfs` (or wherever you want to mount it) -2. `fuse_dfs_wrapper.sh dfs://hadoop_server1.foo.com:9000 /export/hdfs -d` and from another terminal, try `ls /export/hdfs` +2. `fuse_dfs_wrapper.sh dfs://hadoop_server1.foo.com:9000 /export/hdfs -odebug` and from another terminal, try `ls /export/hdfs` -If 2 works, try again dropping the debug mode, i.e., -d +If 2 works, try again dropping the debug mode, i.e., -debug (note - common problems are that you don't have libhdfs.so or libjvm.so or libfuse.so on your LD_LIBRARY_PATH, and your CLASSPATH does not contain hadoop and other required jars.) @@ -111,7 +109,7 @@ NOTE - you cannot export this with a FUSE module built into the kernel RECOMMENDATIONS -1. From /bin, `ln -s $HADOOP_PREFIX/contrib/fuse-dfs/fuse_dfs* .` +1. From /bin, `ln -s HADOOP_PREFIX/hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/fuse_dfs* .` 2. Always start with debug on so you can see if you are missing a classpath or something like that. @@ -127,5 +125,3 @@ this is very slow. see [https://issues.apache.org/jira/browse/HADOOP-3797 HADOOP 2. Writes are approximately 33% slower than the DFSClient. TBD how to optimize this. see: [https://issues.apache.org/jira/browse/HADOOP-3805 HADOOP-3805] - try using -obig_writes if on a >2.6.26 kernel, should perform much better since bigger writes implies less context switching. 3. Reads are ~20-30% slower even with the read buffering. - -4. fuse-dfs and underlying libhdfs have no support for permissions. See [https://issues.apache.org/jira/browse/HADOOP-3536 HADOOP-3536] diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_dfs_wrapper.sh b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_dfs_wrapper.sh index 97239cc480a..26dfd19005f 100755 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_dfs_wrapper.sh +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/fuse_dfs_wrapper.sh @@ -16,7 +16,12 @@ # limitations under the License. # -export HADOOP_PREFIX=${HADOOP_PREFIX:-/usr/local/share/hadoop} +if [ "$HADOOP_PREFIX" = "" ]; then + echo "HADOOP_PREFIX is empty. Set it to the root directory of Hadoop source code" + exit 1 +fi +export FUSEDFS_PATH="$HADOOP_PREFIX/hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs" +export LIBHDFS_PATH="$HADOOP_PREFIX/hadoop-hdfs-project/hadoop-hdfs-native-client/target/usr/local/lib" if [ "$OS_ARCH" = "" ]; then export OS_ARCH=amd64 @@ -30,17 +35,18 @@ if [ "$LD_LIBRARY_PATH" = "" ]; then export LD_LIBRARY_PATH=$JAVA_HOME/jre/lib/$OS_ARCH/server:/usr/local/lib fi -# If dev build set paths accordingly -if [ -d $HADOOP_PREFIX/build ]; then - export HADOOP_PREFIX=$HADOOP_PREFIX - for f in ${HADOOP_PREFIX}/build/*.jar ; do - export CLASSPATH=$CLASSPATH:$f - done - for f in $HADOOP_PREFIX/build/ivy/lib/hadoop-hdfs/common/*.jar ; do - export CLASSPATH=$CLASSPATH:$f - done - export PATH=$HADOOP_PREFIX/build/contrib/fuse-dfs:$PATH - export LD_LIBRARY_PATH=$HADOOP_PREFIX/build/c++/lib:$JAVA_HOME/jre/lib/$OS_ARCH/server -fi +while IFS= read -r -d '' file +do + export CLASSPATH=$CLASSPATH:$file +done < <(find "$HADOOP_PREFIX/hadoop-client" -name "*.jar" -print0) -fuse_dfs $@ +while IFS= read -r -d '' file +do + export CLASSPATH=$CLASSPATH:$file +done < <(find "$HADOOP_PREFIX/hhadoop-hdfs-project" -name "*.jar" -print0) + +export CLASSPATH=$HADOOP_CONF_DIR:$CLASSPATH +export PATH=$FUSEDFS_PATH:$PATH +export LD_LIBRARY_PATH=$LIBHDFS_PATH:$JAVA_HOME/jre/lib/$OS_ARCH/server + +fuse_dfs "$@" diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 12428488a15..586790f7314 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -826,6 +826,9 @@ Release 2.8.0 - UNRELEASED HDFS-9485. Make BlockManager#removeFromExcessReplicateMap accept BlockInfo instead of Block. (Mingliang Liu via jing9) + HDFS-9269. Update the documentation and wrapper for fuse-dfs. + (Wei-Chiu Chuang via zhz) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than