HADOOP-4687. move test dirs

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/branches/HADOOP-4687/core@776186 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Owen O'Malley 2009-05-19 04:56:52 +00:00
parent c967cef4d3
commit 95a0db602b
36 changed files with 7550 additions and 0 deletions

59
src/saveVersion.sh Executable file
View File

@ -0,0 +1,59 @@
#!/bin/sh
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is used to generate the package-info.java class that
# records the version, revision, branch, user, timestamp, and url
unset LANG
unset LC_CTYPE
version=$1
user=`whoami`
date=`date`
cwd=`pwd`
if [ -d .git ]; then
revision=`git log -1 --pretty=format:"%H"`
hostname=`hostname`
branch=`git branch | sed -n -e 's/^* //p'`
url="git://${hostname}${cwd}"
elif [ -d .svn ]; then
revision=`svn info | sed -n -e 's/Last Changed Rev: \(.*\)/\1/p'`
url=`svn info | sed -n -e 's/URL: \(.*\)/\1/p'`
# Get canonical branch (branches/X, tags/X, or trunk)
branch=`echo $url | sed -n -e 's,.*\(branches/.*\)$,\1,p' \
-e 's,.*\(tags/.*\)$,\1,p' \
-e 's,.*trunk$,trunk,p'`
else
revision="Unknown"
branch="Unknown"
url="file://$cwd"
fi
srcChecksum=`find src -name '*.java' | LC_ALL=C sort | xargs md5sum | md5sum | cut -d ' ' -f 1`
mkdir -p build/src/org/apache/hadoop
cat << EOF | \
sed -e "s/VERSION/$version/" -e "s/USER/$user/" -e "s/DATE/$date/" \
-e "s|URL|$url|" -e "s/REV/$revision/" \
-e "s|BRANCH|$branch|" -e "s/SRCCHECKSUM/$srcChecksum/" \
> build/src/org/apache/hadoop/package-info.java
/*
* Generated by src/saveVersion.sh
*/
@HadoopVersionAnnotation(version="VERSION", revision="REV", branch="BRANCH",
user="USER", date="DATE", url="URL",
srcChecksum="SRCCHECKSUM")
package org.apache.hadoop;
EOF

692
src/test/bin/test-patch.sh Executable file
View File

@ -0,0 +1,692 @@
#!/usr/bin/env bash
#set -x
ulimit -n 1024
### Setup some variables.
### JOB_NAME, SVN_REVISION, and BUILD_NUMBER are set by Hudson if it is run by patch process
###############################################################################
parseArgs() {
case "$1" in
HUDSON)
### Set HUDSON to true to indicate that this script is being run by Hudson
HUDSON=true
if [[ $# != 19 ]] ; then
echo "ERROR: usage $0 HUDSON <PATCH_DIR> <SUPPORT_DIR> <PS_CMD> <WGET_CMD> <JIRACLI> <SVN_CMD> <GREP_CMD> <PATCH_CMD> <FINDBUGS_HOME> <FORREST_HOME> <ECLIPSE_HOME> <PYTHON_HOME> <WORKSPACE_BASEDIR> <TRIGGER_BUILD> <JIRA_PASSWD> <JAVA5_HOME> <CURL_CMD> <DEFECT> "
cleanupAndExit 0
fi
PATCH_DIR=$2
SUPPORT_DIR=$3
PS=$4
WGET=$5
JIRACLI=$6
SVN=$7
GREP=$8
PATCH=$9
FINDBUGS_HOME=${10}
FORREST_HOME=${11}
ECLIPSE_HOME=${12}
PYTHON_HOME=${13}
BASEDIR=${14}
TRIGGER_BUILD_URL=${15}
JIRA_PASSWD=${16}
JAVA5_HOME=${17}
CURL=${18}
defect=${19}
### Retrieve the defect number
if [ -z "$defect" ] ; then
echo "Could not determine the patch to test. Exiting."
cleanupAndExit 0
fi
if [ ! -e "$PATCH_DIR" ] ; then
mkdir -p $PATCH_DIR
fi
ECLIPSE_PROPERTY="-Declipse.home=$ECLIPSE_HOME"
PYTHON_PROPERTY="-Dpython.home=$PYTHON_HOME"
;;
DEVELOPER)
### Set HUDSON to false to indicate that this script is being run by a developer
HUDSON=false
if [[ $# != 10 ]] ; then
echo "ERROR: usage $0 DEVELOPER <PATCH_FILE> <SCRATCH_DIR> <SVN_CMD> <GREP_CMD> <PATCH_CMD> <FINDBUGS_HOME> <FORREST_HOME> <WORKSPACE_BASEDIR> <JAVA5_HOME>"
cleanupAndExit 0
fi
### PATCH_FILE contains the location of the patchfile
PATCH_FILE=$2
if [[ ! -e "$PATCH_FILE" ]] ; then
echo "Unable to locate the patch file $PATCH_FILE"
cleanupAndExit 0
fi
PATCH_DIR=$3
### Check if $PATCH_DIR exists. If it does not exist, create a new directory
if [[ ! -e "$PATCH_DIR" ]] ; then
mkdir "$PATCH_DIR"
if [[ $? == 0 ]] ; then
echo "$PATCH_DIR has been created"
else
echo "Unable to create $PATCH_DIR"
cleanupAndExit 0
fi
fi
SVN=$4
GREP=$5
PATCH=$6
FINDBUGS_HOME=$7
FORREST_HOME=$8
BASEDIR=$9
JAVA5_HOME=${10}
### Obtain the patch filename to append it to the version number
defect=`basename $PATCH_FILE`
;;
*)
echo "ERROR: usage $0 HUDSON [args] | DEVELOPER [args]"
cleanupAndExit 0
;;
esac
}
###############################################################################
checkout () {
echo ""
echo ""
echo "======================================================================"
echo "======================================================================"
echo " Testing patch for ${defect}."
echo "======================================================================"
echo "======================================================================"
echo ""
echo ""
### When run by a developer, if the workspace contains modifications, do not continue
status=`$SVN stat`
if [[ $HUDSON == "false" ]] ; then
if [[ "$status" != "" ]] ; then
echo "ERROR: can't run in a workspace that contains the following modifications"
echo "$status"
cleanupAndExit 1
fi
else
cd $BASEDIR
$SVN revert -R .
rm -rf `$SVN status --no-ignore`
$SVN update
fi
return $?
}
###############################################################################
setup () {
### Download latest patch file (ignoring .htm and .html) when run from patch process
if [[ $HUDSON == "true" ]] ; then
$WGET -q -O $PATCH_DIR/jira http://issues.apache.org/jira/browse/$defect
if [[ `$GREP -c 'Patch Available' $PATCH_DIR/jira` == 0 ]] ; then
echo "$defect is not \"Patch Available\". Exiting."
cleanupAndExit 0
fi
relativePatchURL=`$GREP -o '"/jira/secure/attachment/[0-9]*/[^"]*' $PATCH_DIR/jira | $GREP -v -e 'htm[l]*$' | sort | tail -1 | $GREP -o '/jira/secure/attachment/[0-9]*/[^"]*'`
patchURL="http://issues.apache.org${relativePatchURL}"
patchNum=`echo $patchURL | $GREP -o '[0-9]*/' | $GREP -o '[0-9]*'`
echo "$defect patch is being downloaded at `date` from"
echo "$patchURL"
$WGET -q -O $PATCH_DIR/patch $patchURL
VERSION=${SVN_REVISION}_${defect}_PATCH-${patchNum}
JIRA_COMMENT="Here are the results of testing the latest attachment
$patchURL
against trunk revision ${SVN_REVISION}."
### Copy in any supporting files needed by this process
cp -r $SUPPORT_DIR/lib/* ./lib
#PENDING: cp -f $SUPPORT_DIR/etc/checkstyle* ./src/test
### Copy the patch file to $PATCH_DIR
else
VERSION=PATCH-${defect}
cp $PATCH_FILE $PATCH_DIR/patch
if [[ $? == 0 ]] ; then
echo "Patch file $PATCH_FILE copied to $PATCH_DIR"
else
echo "Could not copy $PATCH_FILE to $PATCH_DIR"
cleanupAndExit 0
fi
fi
echo ""
echo ""
echo "======================================================================"
echo "======================================================================"
echo " Pre-building trunk to determine trunk number"
echo " of release audit, javac, and Findbugs warnings."
echo "======================================================================"
echo "======================================================================"
echo ""
echo ""
echo "$ANT_HOME/bin/ant -Dversion="${VERSION}" -Djava5.home=${JAVA5_HOME} -Dforrest.home=${FORREST_HOME} -DHadoopPatchProcess= releaseaudit > $PATCH_DIR/trunkReleaseAuditWarnings.txt 2>&1"
$ANT_HOME/bin/ant -Dversion="${VERSION}" -Djava5.home=${JAVA5_HOME} -Dforrest.home=${FORREST_HOME} -DHadoopPatchProcess= releaseaudit > $PATCH_DIR/trunkReleaseAuditWarnings.txt 2>&1
echo "$ANT_HOME/bin/ant -Dversion="${VERSION}" -Djavac.args="-Xlint -Xmaxwarns 1000" $ECLIPSE_PROPERTY -Djava5.home=${JAVA5_HOME} -Dforrest.home=${FORREST_HOME} -DHadoopPatchProcess= clean tar > $PATCH_DIR/trunkJavacWarnings.txt 2>&1"
$ANT_HOME/bin/ant -Dversion="${VERSION}" -Djavac.args="-Xlint -Xmaxwarns 1000" $ECLIPSE_PROPERTY -Djava5.home=${JAVA5_HOME} -Dforrest.home=${FORREST_HOME} -DHadoopPatchProcess= clean tar > $PATCH_DIR/trunkJavacWarnings.txt 2>&1
if [[ $? != 0 ]] ; then
echo "Trunk compilation is broken?"
cleanupAndExit 1
fi
echo "$ANT_HOME/bin/ant -Dversion="${VERSION}" -Dfindbugs.home=$FINDBUGS_HOME -Djava5.home=${JAVA5_HOME} -Dforrest.home=${FORREST_HOME} -DHadoopPatchProcess= findbugs > /dev/null 2>&1"
$ANT_HOME/bin/ant -Dversion="${VERSION}" -Dfindbugs.home=$FINDBUGS_HOME -Djava5.home=${JAVA5_HOME} -Dforrest.home=${FORREST_HOME} -DHadoopPatchProcess= findbugs > /dev/null 2>&1
if [[ $? != 0 ]] ; then
echo "Trunk findbugs is broken?"
cleanupAndExit 1
fi
cp $BASEDIR/build/test/findbugs/*.xml $PATCH_DIR/trunkFindbugsWarnings.xml
}
###############################################################################
### Check for @author tags in the patch
checkAuthor () {
echo ""
echo ""
echo "======================================================================"
echo "======================================================================"
echo " Checking there are no @author tags in the patch."
echo "======================================================================"
echo "======================================================================"
echo ""
echo ""
authorTags=`$GREP -c -i '@author' $PATCH_DIR/patch`
echo "There appear to be $authorTags @author tags in the patch."
if [[ $authorTags != 0 ]] ; then
JIRA_COMMENT="$JIRA_COMMENT
-1 @author. The patch appears to contain $authorTags @author tags which the Hadoop community has agreed to not allow in code contributions."
return 1
fi
JIRA_COMMENT="$JIRA_COMMENT
+1 @author. The patch does not contain any @author tags."
return 0
}
###############################################################################
### Check for tests in the patch
checkTests () {
echo ""
echo ""
echo "======================================================================"
echo "======================================================================"
echo " Checking there are new or changed tests in the patch."
echo "======================================================================"
echo "======================================================================"
echo ""
echo ""
testReferences=`$GREP -c -i '/test' $PATCH_DIR/patch`
echo "There appear to be $testReferences test files referenced in the patch."
if [[ $testReferences == 0 ]] ; then
if [[ $HUDSON == "true" ]] ; then
patchIsDoc=`$GREP -c -i 'title="documentation' $PATCH_DIR/jira`
if [[ $patchIsDoc != 0 ]] ; then
echo "The patch appears to be a documentation patch that doesn't require tests."
JIRA_COMMENT="$JIRA_COMMENT
+0 tests included. The patch appears to be a documentation patch that doesn't require tests."
return 0
fi
fi
JIRA_COMMENT="$JIRA_COMMENT
-1 tests included. The patch doesn't appear to include any new or modified tests.
Please justify why no tests are needed for this patch."
return 1
fi
JIRA_COMMENT="$JIRA_COMMENT
+1 tests included. The patch appears to include $testReferences new or modified tests."
return 0
}
###############################################################################
### Attempt to apply the patch
applyPatch () {
echo ""
echo ""
echo "======================================================================"
echo "======================================================================"
echo " Applying patch."
echo "======================================================================"
echo "======================================================================"
echo ""
echo ""
$PATCH -E -p0 < $PATCH_DIR/patch
if [[ $? != 0 ]] ; then
echo "PATCH APPLICATION FAILED"
JIRA_COMMENT="$JIRA_COMMENT
-1 patch. The patch command could not apply the patch."
return 1
fi
return 0
}
###############################################################################
### Check there are no javadoc warnings
checkJavadocWarnings () {
echo ""
echo ""
echo "======================================================================"
echo "======================================================================"
echo " Determining number of patched javadoc warnings."
echo "======================================================================"
echo "======================================================================"
echo ""
echo ""
echo "$ANT_HOME/bin/ant -Dversion="${VERSION}" -DHadoopPatchProcess= clean javadoc | tee $PATCH_DIR/patchJavadocWarnings.txt"
$ANT_HOME/bin/ant -Dversion="${VERSION}" -DHadoopPatchProcess= clean javadoc | tee $PATCH_DIR/patchJavadocWarnings.txt
javadocWarnings=`$GREP -c '\[javadoc\] [0-9]* warning' $PATCH_DIR/patchJavadocWarnings.txt`
echo ""
echo ""
echo "There appear to be $javadocWarnings javadoc warnings generated by the patched build."
if [[ $javadocWarnings != 0 ]] ; then
JIRA_COMMENT="$JIRA_COMMENT
-1 javadoc. The javadoc tool appears to have generated $javadocWarnings warning messages."
return 1
fi
JIRA_COMMENT="$JIRA_COMMENT
+1 javadoc. The javadoc tool did not generate any warning messages."
return 0
}
###############################################################################
### Check there are no changes in the number of Javac warnings
checkJavacWarnings () {
echo ""
echo ""
echo "======================================================================"
echo "======================================================================"
echo " Determining number of patched javac warnings."
echo "======================================================================"
echo "======================================================================"
echo ""
echo ""
echo "$ANT_HOME/bin/ant -Dversion="${VERSION}" -Djavac.args="-Xlint -Xmaxwarns 1000" $ECLIPSE_PROPERTY -Djava5.home=${JAVA5_HOME} -Dforrest.home=${FORREST_HOME} -DHadoopPatchProcess= tar > $PATCH_DIR/patchJavacWarnings.txt 2>&1"
$ANT_HOME/bin/ant -Dversion="${VERSION}" -Djavac.args="-Xlint -Xmaxwarns 1000" $ECLIPSE_PROPERTY -Djava5.home=${JAVA5_HOME} -Dforrest.home=${FORREST_HOME} -DHadoopPatchProcess= tar > $PATCH_DIR/patchJavacWarnings.txt 2>&1
### Compare trunk and patch javac warning numbers
if [[ -f $PATCH_DIR/patchJavacWarnings.txt ]] ; then
trunkJavacWarnings=`$GREP -o '\[javac\] [0-9]* warning' $PATCH_DIR/trunkJavacWarnings.txt | awk '{total += $2} END {print total}'`
patchJavacWarnings=`$GREP -o '\[javac\] [0-9]* warning' $PATCH_DIR/patchJavacWarnings.txt | awk '{total += $2} END {print total}'`
echo "There appear to be $trunkJavacWarnings javac compiler warnings before the patch and $patchJavacWarnings javac compiler warnings after applying the patch."
if [[ $patchJavacWarnings != "" && $trunkJavacWarnings != "" ]] ; then
if [[ $patchJavacWarnings > $trunkJavacWarnings ]] ; then
JIRA_COMMENT="$JIRA_COMMENT
-1 javac. The applied patch generated $patchJavacWarnings javac compiler warnings (more than the trunk's current $trunkJavacWarnings warnings)."
return 1
fi
fi
fi
JIRA_COMMENT="$JIRA_COMMENT
+1 javac. The applied patch does not increase the total number of javac compiler warnings."
return 0
}
###############################################################################
### Check there are no changes in the number of release audit (RAT) warnings
checkReleaseAuditWarnings () {
echo ""
echo ""
echo "======================================================================"
echo "======================================================================"
echo " Determining number of patched release audit warnings."
echo "======================================================================"
echo "======================================================================"
echo ""
echo ""
echo "$ANT_HOME/bin/ant -Dversion="${VERSION}" -Djava5.home=${JAVA5_HOME} -Dforrest.home=${FORREST_HOME} -DHadoopPatchProcess= releaseaudit > $PATCH_DIR/patchReleaseAuditWarnings.txt 2>&1"
$ANT_HOME/bin/ant -Dversion="${VERSION}" -Djava5.home=${JAVA5_HOME} -Dforrest.home=${FORREST_HOME} -DHadoopPatchProcess= releaseaudit > $PATCH_DIR/patchReleaseAuditWarnings.txt 2>&1
### Compare trunk and patch release audit warning numbers
if [[ -f $PATCH_DIR/patchReleaseAuditWarnings.txt ]] ; then
trunkReleaseAuditWarnings=`$GREP -c '\!?????' $PATCH_DIR/trunkReleaseAuditWarnings.txt`
patchReleaseAuditWarnings=`$GREP -c '\!?????' $PATCH_DIR/patchReleaseAuditWarnings.txt`
echo ""
echo ""
echo "There appear to be $trunkReleaseAuditWarnings release audit warnings before the patch and $patchReleaseAuditWarnings release audit warnings after applying the patch."
if [[ $patchReleaseAuditWarnings != "" && $trunkReleaseAuditWarnings != "" ]] ; then
if [[ $patchReleaseAuditWarnings > $trunkReleaseAuditWarnings ]] ; then
JIRA_COMMENT="$JIRA_COMMENT
-1 release audit. The applied patch generated $patchReleaseAuditWarnings release audit warnings (more than the trunk's current $trunkReleaseAuditWarnings warnings)."
$GREP '\!?????' $PATCH_DIR/patchReleaseAuditWarnings.txt > $PATCH_DIR/patchReleaseAuditProblems.txt
$GREP '\!?????' $PATCH_DIR/trunkReleaseAuditWarnings.txt > $PATCH_DIR/trunkReleaseAuditProblems.txt
echo "A diff of patched release audit warnings with trunk release audit warnings." > $PATCH_DIR/releaseAuditDiffWarnings.txt
echo "Lines that start with ????? in the release audit report indicate files that do not have an Apache license header." > $PATCH_DIR/releaseAuditDiffWarnings.txt
echo "" > $PATCH_DIR/releaseAuditDiffWarnings.txt
diff $PATCH_DIR/patchReleaseAuditProblems.txt $PATCH_DIR/trunkReleaseAuditProblems.txt >> $PATCH_DIR/releaseAuditDiffWarnings.txt
JIRA_COMMENT_FOOTER="Release audit warnings: http://hudson.zones.apache.org/hudson/job/$JOB_NAME/$BUILD_NUMBER/artifact/trunk/current/releaseAuditDiffWarnings.txt
$JIRA_COMMENT_FOOTER"
return 1
fi
fi
fi
JIRA_COMMENT="$JIRA_COMMENT
+1 release audit. The applied patch does not increase the total number of release audit warnings."
return 0
}
###############################################################################
### Check there are no changes in the number of Checkstyle warnings
checkStyle () {
echo ""
echo ""
echo "======================================================================"
echo "======================================================================"
echo " Determining number of patched checkstyle warnings."
echo "======================================================================"
echo "======================================================================"
echo ""
echo ""
echo "THIS IS NOT IMPLEMENTED YET"
echo ""
echo ""
echo "$ANT_HOME/bin/ant -Dversion="${VERSION}" -DHadoopPatchProcess= checkstyle"
$ANT_HOME/bin/ant -Dversion="${VERSION}" -DHadoopPatchProcess= checkstyle
JIRA_COMMENT_FOOTER="Checkstyle results: http://hudson.zones.apache.org/hudson/job/$JOB_NAME/$BUILD_NUMBER/artifact/trunk/build/test/checkstyle-errors.html
$JIRA_COMMENT_FOOTER"
### TODO: calculate actual patchStyleErrors
# patchStyleErrors=0
# if [[ $patchStyleErrors != 0 ]] ; then
# JIRA_COMMENT="$JIRA_COMMENT
#
# -1 checkstyle. The patch generated $patchStyleErrors code style errors."
# return 1
# fi
# JIRA_COMMENT="$JIRA_COMMENT
#
# +1 checkstyle. The patch generated 0 code style errors."
return 0
}
###############################################################################
### Check there are no changes in the number of Findbugs warnings
checkFindbugsWarnings () {
echo ""
echo ""
echo "======================================================================"
echo "======================================================================"
echo " Determining number of patched Findbugs warnings."
echo "======================================================================"
echo "======================================================================"
echo ""
echo ""
echo "$ANT_HOME/bin/ant -Dversion="${VERSION}" -Dfindbugs.home=$FINDBUGS_HOME -Djava5.home=${JAVA5_HOME} -Dforrest.home=${FORREST_HOME} -DHadoopPatchProcess= findbugs"
$ANT_HOME/bin/ant -Dversion="${VERSION}" -Dfindbugs.home=$FINDBUGS_HOME -Djava5.home=${JAVA5_HOME} -Dforrest.home=${FORREST_HOME} -DHadoopPatchProcess= findbugs
if [ $? != 0 ] ; then
JIRA_COMMENT="$JIRA_COMMENT
-1 findbugs. The patch appears to cause Findbugs to fail."
return 1
fi
JIRA_COMMENT_FOOTER="Findbugs warnings: http://hudson.zones.apache.org/hudson/job/$JOB_NAME/$BUILD_NUMBER/artifact/trunk/build/test/findbugs/newPatchFindbugsWarnings.html
$JIRA_COMMENT_FOOTER"
cp $BASEDIR/build/test/findbugs/*.xml $PATCH_DIR/patchFindbugsWarnings.xml
$FINDBUGS_HOME/bin/setBugDatabaseInfo -timestamp "01/01/1999" \
$PATCH_DIR/trunkFindbugsWarnings.xml \
$PATCH_DIR/trunkFindbugsWarnings.xml
$FINDBUGS_HOME/bin/setBugDatabaseInfo -timestamp "01/01/2000" \
$PATCH_DIR/patchFindbugsWarnings.xml \
$PATCH_DIR/patchFindbugsWarnings.xml
$FINDBUGS_HOME/bin/computeBugHistory -output $PATCH_DIR/findbugsMerge.xml \
$PATCH_DIR/trunkFindbugsWarnings.xml \
$PATCH_DIR/patchFindbugsWarnings.xml
findbugsWarnings=`$FINDBUGS_HOME/bin/filterBugs -first "01/01/2000" $PATCH_DIR/findbugsMerge.xml \
$BASEDIR/build/test/findbugs/newPatchFindbugsWarnings.xml | /usr/bin/awk '{print $1}'`
$FINDBUGS_HOME/bin/convertXmlToText -html \
$BASEDIR/build/test/findbugs/newPatchFindbugsWarnings.xml \
$BASEDIR/build/test/findbugs/newPatchFindbugsWarnings.html
cp $BASEDIR/build/test/findbugs/newPatchFindbugsWarnings.html $PATCH_DIR/newPatchFindbugsWarnings.html
cp $BASEDIR/build/test/findbugs/newPatchFindbugsWarnings.xml $PATCH_DIR/newPatchFindbugsWarnings.xml
if [[ $findbugsWarnings != 0 ]] ; then
JIRA_COMMENT="$JIRA_COMMENT
-1 findbugs. The patch appears to introduce $findbugsWarnings new Findbugs warnings."
return 1
fi
JIRA_COMMENT="$JIRA_COMMENT
+1 findbugs. The patch does not introduce any new Findbugs warnings."
return 0
}
###############################################################################
### Run the test-core target
runCoreTests () {
echo ""
echo ""
echo "======================================================================"
echo "======================================================================"
echo " Running core tests."
echo "======================================================================"
echo "======================================================================"
echo ""
echo ""
### Kill any rogue build processes from the last attempt
$PS -auxwww | $GREP HadoopPatchProcess | /usr/bin/nawk '{print $2}' | /usr/bin/xargs -t -I {} /usr/bin/kill -9 {} > /dev/null
echo "$ANT_HOME/bin/ant -Dversion="${VERSION}" -DHadoopPatchProcess= -Dtest.junit.output.format=xml -Dtest.output=yes -Dcompile.c++=yes -Dforrest.home=$FORREST_HOME -Djava5.home=$JAVA5_HOME create-c++-configure test-core"
$ANT_HOME/bin/ant -Dversion="${VERSION}" -DHadoopPatchProcess= -Dtest.junit.output.format=xml -Dtest.output=yes -Dcompile.c++=yes -Dforrest.home=$FORREST_HOME -Djava5.home=$JAVA5_HOME create-c++-configure test-core
if [[ $? != 0 ]] ; then
JIRA_COMMENT="$JIRA_COMMENT
-1 core tests. The patch failed core unit tests."
return 1
fi
JIRA_COMMENT="$JIRA_COMMENT
+1 core tests. The patch passed core unit tests."
return 0
}
###############################################################################
### Tests parts of contrib specific to the eclipse files
checkJarFilesDeclaredInEclipse () {
export DECLARED_JARS=$(sed -n 's@.*kind="lib".*path="\(.*jar\)".*@\1@p' < .eclipse.templates/.classpath)
export PRESENT_JARS=$(find build/ivy/lib/Hadoop/common/ lib/ src/test/lib/ -name '*.jar' |sort)
# When run by Hudson, consider libs from ${SUPPORT_DIR} declared
if [[ ${HUDSON} == "true" ]]; then
DECLARED_JARS="${DECLARED_JARS} $(cd "${SUPPORT_DIR}"; find lib -name '*.jar')"
fi
DECLARED_JARS=$(sed 'y/ /\n/' <<< ${DECLARED_JARS} | sort)
export ECLIPSE_DECLARED_SRC=$(sed -n 's@.*kind="src".*path="\(.*\)".*@\1@p' < .eclipse.templates/.classpath |sort)
if [ "${DECLARED_JARS}" != "${PRESENT_JARS}" ]; then
echo "
FAILED. Some jars are not declared in the Eclipse project.
Declared jars: ${DECLARED_JARS}
Present jars: ${PRESENT_JARS}"
return 1
fi
for dir in $ECLIPSE_DECLARED_SRC; do
[ '!' -d $dir ] && echo "
FAILED: $dir is referenced in the Eclipse project although it doesn't exists anymore." && return 1
done
return 0
}
checkEclipse () {
echo ""
echo ""
echo "======================================================================"
echo "======================================================================"
echo " Running Eclipse classpath verification."
echo "======================================================================"
echo "======================================================================"
echo ""
echo ""
checkJarFilesDeclaredInEclipse
if [[ $? != 0 ]] ; then
JIRA_COMMENT="$JIRA_COMMENT
-1 Eclipse classpath. The patch causes the Eclipse classpath to differ from the contents of the lib directories."
return 1
fi
JIRA_COMMENT="$JIRA_COMMENT
+1 Eclipse classpath. The patch retains Eclipse classpath integrity."
return 0
}
###############################################################################
### Run the test-contrib target
runContribTests () {
echo ""
echo ""
echo "======================================================================"
echo "======================================================================"
echo " Running contrib tests."
echo "======================================================================"
echo "======================================================================"
echo ""
echo ""
### Kill any rogue build processes from the last attempt
$PS -auxwww | $GREP HadoopPatchProcess | /usr/bin/nawk '{print $2}' | /usr/bin/xargs -t -I {} /usr/bin/kill -9 {} > /dev/null
echo "$ANT_HOME/bin/ant -Dversion="${VERSION}" $ECLIPSE_PROPERTY $PYTHON_PROPERTY -DHadoopPatchProcess= -Dtest.junit.output.format=xml -Dtest.output=yes test-contrib"
$ANT_HOME/bin/ant -Dversion="${VERSION}" $ECLIPSE_PROPERTY $PYTHON_PROPERTY -DHadoopPatchProcess= -Dtest.junit.output.format=xml -Dtest.output=yes test-contrib
if [[ $? != 0 ]] ; then
JIRA_COMMENT="$JIRA_COMMENT
-1 contrib tests. The patch failed contrib unit tests."
return 1
fi
JIRA_COMMENT="$JIRA_COMMENT
+1 contrib tests. The patch passed contrib unit tests."
return 0
}
###############################################################################
### Submit a comment to the defect's Jira
submitJiraComment () {
local result=$1
### Do not output the value of JIRA_COMMENT_FOOTER when run by a developer
if [[ $HUDSON == "false" ]] ; then
JIRA_COMMENT_FOOTER=""
fi
if [[ $result == 0 ]] ; then
comment="+1 overall. $JIRA_COMMENT
$JIRA_COMMENT_FOOTER"
else
comment="-1 overall. $JIRA_COMMENT
$JIRA_COMMENT_FOOTER"
fi
### Output the test result to the console
echo "
$comment"
if [[ $HUDSON == "true" ]] ; then
echo ""
echo ""
echo "======================================================================"
echo "======================================================================"
echo " Adding comment to Jira."
echo "======================================================================"
echo "======================================================================"
echo ""
echo ""
### Update Jira with a comment
export USER=hudson
$JIRACLI -s issues.apache.org/jira login hadoopqa $JIRA_PASSWD
$JIRACLI -s issues.apache.org/jira comment $defect "$comment"
$JIRACLI -s issues.apache.org/jira logout
fi
}
###############################################################################
### Cleanup files
cleanupAndExit () {
local result=$1
if [[ $HUDSON == "true" ]] ; then
if [ -e "$PATCH_DIR" ] ; then
mv $PATCH_DIR $BASEDIR
fi
CALLER=`hostname`
$CURL $PATCH_ADMIN_URL'&CALLER='$CALLER
fi
echo ""
echo ""
echo "======================================================================"
echo "======================================================================"
echo " Finished build."
echo "======================================================================"
echo "======================================================================"
echo ""
echo ""
exit $result
}
###############################################################################
###############################################################################
###############################################################################
JIRA_COMMENT=""
JIRA_COMMENT_FOOTER="Console output: http://hudson.zones.apache.org/hudson/job/$JOB_NAME/$BUILD_NUMBER/console
This message is automatically generated."
### Check if arguments to the script have been specified properly or not
parseArgs $@
cd $BASEDIR
checkout
RESULT=$?
if [[ $HUDSON == "true" ]] ; then
if [[ $RESULT != 0 ]] ; then
### Resubmit build.
$CURL $TRIGGER_BUILD_URL'&DEFECTNUM='$defect
exit 100
fi
fi
setup
checkAuthor
RESULT=$?
checkTests
(( RESULT = RESULT + $? ))
applyPatch
if [[ $? != 0 ]] ; then
submitJiraComment 1
cleanupAndExit 1
fi
checkJavadocWarnings
(( RESULT = RESULT + $? ))
checkJavacWarnings
(( RESULT = RESULT + $? ))
checkStyle
(( RESULT = RESULT + $? ))
checkFindbugsWarnings
(( RESULT = RESULT + $? ))
checkEclipse
(( RESULT = RESULT + $? ))
checkReleaseAuditWarnings
(( RESULT = RESULT + $? ))
### Do not call these when run by a developer
if [[ $HUDSON == "true" ]] ; then
runCoreTests
(( RESULT = RESULT + $? ))
runContribTests
(( RESULT = RESULT + $? ))
fi
JIRA_COMMENT_FOOTER="Test results: http://hudson.zones.apache.org/hudson/job/$JOB_NAME/$BUILD_NUMBER/testReport/
$JIRA_COMMENT_FOOTER"
submitJiraComment $RESULT
cleanupAndExit $RESULT

View File

@ -0,0 +1,178 @@
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output method="html" indent="yes"/>
<xsl:decimal-format decimal-separator="." grouping-separator="," />
<xsl:key name="files" match="file" use="@name" />
<!-- Checkstyle XML Style Sheet by Stephane Bailliez <sbailliez@apache.org> -->
<!-- Part of the Checkstyle distribution found at http://checkstyle.sourceforge.net -->
<!-- Usage (generates checkstyle_report.html): -->
<!-- <checkstyle failonviolation="false" config="${check.config}"> -->
<!-- <fileset dir="${src.dir}" includes="**/*.java"/> -->
<!-- <formatter type="xml" toFile="${doc.dir}/checkstyle_report.xml"/> -->
<!-- </checkstyle> -->
<!-- <style basedir="${doc.dir}" destdir="${doc.dir}" -->
<!-- includes="checkstyle_report.xml" -->
<!-- style="${doc.dir}/checkstyle-noframes-sorted.xsl"/> -->
<xsl:template match="checkstyle">
<html>
<head>
<style type="text/css">
.bannercell {
border: 0px;
padding: 0px;
}
body {
margin-left: 10;
margin-right: 10;
font:normal 80% arial,helvetica,sanserif;
background-color:#FFFFFF;
color:#000000;
}
.a td {
background: #efefef;
}
.b td {
background: #fff;
}
th, td {
text-align: left;
vertical-align: top;
}
th {
font-weight:bold;
background: #ccc;
color: black;
}
table, th, td {
font-size:100%;
border: none
}
table.log tr td, tr th {
}
h2 {
font-weight:bold;
font-size:140%;
margin-bottom: 5;
}
h3 {
font-size:100%;
font-weight:bold;
background: #525D76;
color: white;
text-decoration: none;
padding: 5px;
margin-right: 2px;
margin-left: 2px;
margin-bottom: 0;
}
</style>
</head>
<body>
<a name="top"></a>
<!-- jakarta logo -->
<table border="0" cellpadding="0" cellspacing="0" width="100%">
<tr>
<td class="bannercell" rowspan="2">
<!--a href="http://jakarta.apache.org/">
<img src="http://jakarta.apache.org/images/jakarta-logo.gif" alt="http://jakarta.apache.org" align="left" border="0"/>
</a-->
</td>
<td class="text-align:right"><h2>CheckStyle Audit</h2></td>
</tr>
<tr>
<td class="text-align:right">Designed for use with <a href='http://checkstyle.sourceforge.net/'>CheckStyle</a> and <a href='http://jakarta.apache.org'>Ant</a>.</td>
</tr>
</table>
<hr size="1"/>
<!-- Summary part -->
<xsl:apply-templates select="." mode="summary"/>
<hr size="1" width="100%" align="left"/>
<!-- Package List part -->
<xsl:apply-templates select="." mode="filelist"/>
<hr size="1" width="100%" align="left"/>
<!-- For each package create its part -->
<xsl:apply-templates select="file[@name and generate-id(.) = generate-id(key('files', @name))]" />
<hr size="1" width="100%" align="left"/>
</body>
</html>
</xsl:template>
<xsl:template match="checkstyle" mode="filelist">
<h3>Files</h3>
<table class="log" border="0" cellpadding="5" cellspacing="2" width="100%">
<tr>
<th>Name</th>
<th>Errors</th>
</tr>
<xsl:for-each select="file[@name and generate-id(.) = generate-id(key('files', @name))]">
<xsl:sort data-type="number" order="descending" select="count(key('files', @name)/error)"/>
<xsl:variable name="errorCount" select="count(error)"/>
<tr>
<xsl:call-template name="alternated-row"/>
<td><a href="#f-{@name}"><xsl:value-of select="@name"/></a></td>
<td><xsl:value-of select="$errorCount"/></td>
</tr>
</xsl:for-each>
</table>
</xsl:template>
<xsl:template match="file">
<a name="f-{@name}"></a>
<h3>File <xsl:value-of select="@name"/></h3>
<table class="log" border="0" cellpadding="5" cellspacing="2" width="100%">
<tr>
<th>Error Description</th>
<th>Line</th>
</tr>
<xsl:for-each select="key('files', @name)/error">
<xsl:sort data-type="number" order="ascending" select="@line"/>
<tr>
<xsl:call-template name="alternated-row"/>
<td><xsl:value-of select="@message"/></td>
<td><xsl:value-of select="@line"/></td>
</tr>
</xsl:for-each>
</table>
<a href="#top">Back to top</a>
</xsl:template>
<xsl:template match="checkstyle" mode="summary">
<h3>Summary</h3>
<xsl:variable name="fileCount" select="count(file[@name and generate-id(.) = generate-id(key('files', @name))])"/>
<xsl:variable name="errorCount" select="count(file/error)"/>
<table class="log" border="0" cellpadding="5" cellspacing="2" width="100%">
<tr>
<th>Files</th>
<th>Errors</th>
</tr>
<tr>
<xsl:call-template name="alternated-row"/>
<td><xsl:value-of select="$fileCount"/></td>
<td><xsl:value-of select="$errorCount"/></td>
</tr>
</table>
</xsl:template>
<xsl:template name="alternated-row">
<xsl:attribute name="class">
<xsl:if test="position() mod 2 = 1">a</xsl:if>
<xsl:if test="position() mod 2 = 0">b</xsl:if>
</xsl:attribute>
</xsl:template>
</xsl:stylesheet>

170
src/test/checkstyle.xml Normal file
View File

@ -0,0 +1,170 @@
<?xml version="1.0"?>
<!DOCTYPE module PUBLIC
"-//Puppy Crawl//DTD Check Configuration 1.2//EN"
"http://www.puppycrawl.com/dtds/configuration_1_2.dtd">
<!--
Checkstyle configuration for Hadoop that is based on the sun_checks.xml file
that is bundled with Checkstyle and includes checks for:
- the Java Language Specification at
http://java.sun.com/docs/books/jls/second_edition/html/index.html
- the Sun Code Conventions at http://java.sun.com/docs/codeconv/
- the Javadoc guidelines at
http://java.sun.com/j2se/javadoc/writingdoccomments/index.html
- the JDK Api documentation http://java.sun.com/j2se/docs/api/index.html
- some best practices
Checkstyle is very configurable. Be sure to read the documentation at
http://checkstyle.sf.net (or in your downloaded distribution).
Most Checks are configurable, be sure to consult the documentation.
To completely disable a check, just comment it out or delete it from the file.
Finally, it is worth reading the documentation.
-->
<module name="Checker">
<!-- Checks that a package.html file exists for each package. -->
<!-- See http://checkstyle.sf.net/config_javadoc.html#PackageHtml -->
<module name="PackageHtml"/>
<!-- Checks whether files end with a new line. -->
<!-- See http://checkstyle.sf.net/config_misc.html#NewlineAtEndOfFile -->
<!-- module name="NewlineAtEndOfFile"/-->
<!-- Checks that property files contain the same keys. -->
<!-- See http://checkstyle.sf.net/config_misc.html#Translation -->
<module name="Translation"/>
<module name="TreeWalker">
<!-- Checks for Javadoc comments. -->
<!-- See http://checkstyle.sf.net/config_javadoc.html -->
<module name="JavadocType">
<property name="scope" value="public"/>
<property name="allowMissingParamTags" value="true"/>
</module>
<module name="JavadocStyle"/>
<!-- Checks for Naming Conventions. -->
<!-- See http://checkstyle.sf.net/config_naming.html -->
<module name="ConstantName"/>
<module name="LocalFinalVariableName"/>
<module name="LocalVariableName"/>
<module name="MemberName"/>
<module name="MethodName"/>
<module name="PackageName"/>
<module name="ParameterName"/>
<module name="StaticVariableName"/>
<module name="TypeName"/>
<!-- Checks for Headers -->
<!-- See http://checkstyle.sf.net/config_header.html -->
<!-- <module name="Header"> -->
<!-- The follow property value demonstrates the ability -->
<!-- to have access to ANT properties. In this case it uses -->
<!-- the ${basedir} property to allow Checkstyle to be run -->
<!-- from any directory within a project. See property -->
<!-- expansion, -->
<!-- http://checkstyle.sf.net/config.html#properties -->
<!-- <property -->
<!-- name="headerFile" -->
<!-- value="${basedir}/java.header"/> -->
<!-- </module> -->
<!-- Following interprets the header file as regular expressions. -->
<!-- <module name="RegexpHeader"/> -->
<!-- Checks for imports -->
<!-- See http://checkstyle.sf.net/config_import.html -->
<module name="IllegalImport"/> <!-- defaults to sun.* packages -->
<module name="RedundantImport"/>
<module name="UnusedImports"/>
<!-- Checks for Size Violations. -->
<!-- See http://checkstyle.sf.net/config_sizes.html -->
<module name="FileLength"/>
<module name="LineLength"/>
<module name="MethodLength"/>
<module name="ParameterNumber"/>
<!-- Checks for whitespace -->
<!-- See http://checkstyle.sf.net/config_whitespace.html -->
<module name="EmptyForIteratorPad"/>
<module name="MethodParamPad"/>
<module name="NoWhitespaceAfter"/>
<module name="NoWhitespaceBefore"/>
<module name="ParenPad"/>
<module name="TypecastParenPad"/>
<module name="TabCharacter"/>
<module name="WhitespaceAfter">
<property name="tokens" value="COMMA, SEMI"/>
</module>
<!-- Modifier Checks -->
<!-- See http://checkstyle.sf.net/config_modifiers.html -->
<module name="ModifierOrder"/>
<module name="RedundantModifier"/>
<!-- Checks for blocks. You know, those {}'s -->
<!-- See http://checkstyle.sf.net/config_blocks.html -->
<module name="AvoidNestedBlocks"/>
<module name="EmptyBlock"/>
<module name="LeftCurly"/>
<module name="NeedBraces"/>
<module name="RightCurly"/>
<!-- Checks for common coding problems -->
<!-- See http://checkstyle.sf.net/config_coding.html -->
<!-- module name="AvoidInlineConditionals"/-->
<module name="DoubleCheckedLocking"/>
<module name="EmptyStatement"/>
<module name="EqualsHashCode"/>
<module name="HiddenField">
<property name="ignoreConstructorParameter" value="true"/>
</module>
<module name="IllegalInstantiation"/>
<module name="InnerAssignment"/>
<module name="MissingSwitchDefault"/>
<module name="RedundantThrows"/>
<module name="SimplifyBooleanExpression"/>
<module name="SimplifyBooleanReturn"/>
<!-- Checks for class design -->
<!-- See http://checkstyle.sf.net/config_design.html -->
<module name="FinalClass"/>
<module name="HideUtilityClassConstructor"/>
<module name="InterfaceIsType"/>
<module name="VisibilityModifier"/>
<!-- Miscellaneous other checks. -->
<!-- See http://checkstyle.sf.net/config_misc.html -->
<module name="ArrayTypeStyle"/>
<module name="Indentation">
<property name="basicOffset" value="2" />
<property name="caseIndent" value="0" />
</module>
<module name="TodoComment"/>
<module name="UpperEll"/>
</module>
</module>

50
src/test/core-site.xml Normal file
View File

@ -0,0 +1,50 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Values used when running unit tests. This is mostly empty, to -->
<!-- use of the default values, overriding the potentially -->
<!-- user-editted core-site.xml in the conf/ directory. -->
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>build/test</value>
<description>A base for other temporary directories.</description>
<final>true</final>
</property>
<property>
<name>test.fs.s3.name</name>
<value>s3:///</value>
<description>The name of the s3 file system for testing.</description>
</property>
<property>
<name>fs.s3.block.size</name>
<value>128</value>
<description>Size of a block in bytes.</description>
</property>
<property>
<name>fs.ftp.user.localhost</name>
<value>user</value>
<description>The username for connecting to FTP server running on localhost.
This is required by FTPFileSystem</description>
</property>
<property>
<name>fs.ftp.password.localhost</name>
<value>password</value>
<description>The password for connecting to FTP server running on localhost.
This is required by FTPFileSystem</description>
</property>
<property>
<name>test.fs.s3n.name</name>
<value>s3n:///</value>
<description>The name of the s3n file system for testing.</description>
</property>
</configuration>

6
src/test/ddl/buffer.jr Normal file
View File

@ -0,0 +1,6 @@
module org.apache.hadoop.record {
class RecBuffer {
buffer data;
}
}

6
src/test/ddl/int.jr Normal file
View File

@ -0,0 +1,6 @@
module org.apache.hadoop.record {
class RecInt {
int data;
}
}

6
src/test/ddl/string.jr Normal file
View File

@ -0,0 +1,6 @@
module org.apache.hadoop.record {
class RecString {
ustring data;
}
}

46
src/test/ddl/test.jr Normal file
View File

@ -0,0 +1,46 @@
module org.apache.hadoop.record {
class RecRecord0 {
ustring stringVal;
}
class RecRecord1 {
boolean boolVal;
byte byteVal;
int intVal;
long longVal;
float floatVal; // testing inline comment
double doubleVal; /* testing comment */
ustring stringVal; /* testing multi-line
* comment */
buffer bufferVal; // testing another // inline comment
vector<ustring> vectorVal;
map<ustring, ustring> mapVal;
RecRecord0 recordVal;
}
class RecRecordOld {
ustring name;
vector<long> ivec;
vector<vector<RecRecord0>> svec;
RecRecord0 inner;
vector<vector<vector<ustring>>> strvec;
float i1;
map<byte, ustring> map1;
vector<map<int, long>> mvec1;
vector<map<int, long>> mvec2;
}
/* RecRecordNew is a lot like RecRecordOld. Helps test for versioning. */
class RecRecordNew {
ustring name2;
RecRecord0 inner;
vector<int> ivec;
vector<vector<int>> svec;
vector<vector<vector<ustring>>> strvec;
int i1;
map<long, ustring> map1;
vector<map<int, long>> mvec2;
}
}

View File

@ -0,0 +1,216 @@
<FindBugsFilter>
<Match>
<Package name="org.apache.hadoop.record.compiler.generated" />
</Match>
<Match>
<Bug pattern="EI_EXPOSE_REP" />
</Match>
<Match>
<Bug pattern="EI_EXPOSE_REP2" />
</Match>
<Match>
<Bug pattern="SE_COMPARATOR_SHOULD_BE_SERIALIZABLE" />
</Match>
<Match>
<Class name="~.*_jsp" />
<Bug pattern="DLS_DEAD_LOCAL_STORE" />
</Match>
<Match>
<Field name="_jspx_dependants" />
<Bug pattern="UWF_UNWRITTEN_FIELD" />
</Match>
<!--
Inconsistent synchronization for Client.Connection.out is
is intentional to make a connection to be closed instantly.
-->
<Match>
<Class name="org.apache.hadoop.ipc.Client$Connection" />
<Field name="out" />
<Bug pattern="IS2_INCONSISTENT_SYNC" />
</Match>
<!--
Ignore Cross Scripting Vulnerabilities
-->
<Match>
<Package name="~org.apache.hadoop.mapred.*" />
<Bug code="XSS" />
</Match>
<Match>
<Class name="org.apache.hadoop.mapred.taskdetails_jsp" />
<Bug code="HRS" />
</Match>
<!--
Ignore warnings where child class has the same name as
super class. Classes based on Old API shadow names from
new API. Should go off after HADOOP-1.0
-->
<Match>
<Class name="~org.apache.hadoop.mapred.*" />
<Bug pattern="NM_SAME_SIMPLE_NAME_AS_SUPERCLASS" />
</Match>
<!--
Ignore warnings for usage of System.exit. This is
required and have been well thought out
-->
<Match>
<Class name="org.apache.hadoop.mapred.Child$2" />
<Method name="run" />
<Bug pattern="DM_EXIT" />
</Match>
<Match>
<Class name="org.apache.hadoop.mapred.JobTracker" />
<Method name="addHostToNodeMapping" />
<Bug pattern="DM_EXIT" />
</Match>
<Match>
<Class name="org.apache.hadoop.mapred.Task" />
<Or>
<Method name="done" />
<Method name="commit" />
<Method name="statusUpdate" />
</Or>
<Bug pattern="DM_EXIT" />
</Match>
<Match>
<Class name="org.apache.hadoop.mapred.Task$TaskReporter" />
<Method name="run" />
<Bug pattern="DM_EXIT" />
</Match>
<!--
We need to cast objects between old and new api objects
-->
<Match>
<Class name="org.apache.hadoop.mapred.OutputCommitter" />
<Bug pattern="BC_UNCONFIRMED_CAST" />
</Match>
<!--
We intentionally do the get name from the inner class
-->
<Match>
<Class name="org.apache.hadoop.mapred.TaskTracker$MapEventsFetcherThread" />
<Method name="run" />
<Bug pattern="IA_AMBIGUOUS_INVOCATION_OF_INHERITED_OR_OUTER_METHOD" />
</Match>
<Match>
<Class name="org.apache.hadoop.mapred.FileOutputCommitter" />
<Bug pattern="NM_WRONG_PACKAGE_INTENTIONAL" />
</Match>
<!--
Ignoring this warning as resolving this would need a non-trivial change in code
-->
<Match>
<Class name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorBaseDescriptor" />
<Method name="configure" />
<Field name="maxNumItems" />
<Bug pattern="ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD" />
</Match>
<!--
Comes from org.apache.jasper.runtime.ResourceInjector. Cannot do much.
-->
<Match>
<Class name="org.apache.hadoop.mapred.jobqueue_005fdetails_jsp" />
<Field name="_jspx_resourceInjector" />
<Bug pattern="SE_BAD_FIELD" />
</Match>
<!--
Storing textInputFormat and then passing it as a parameter. Safe to ignore.
-->
<Match>
<Class name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJob" />
<Method name="createValueAggregatorJob" />
<Bug pattern="DLS_DEAD_STORE_OF_CLASS_LITERAL" />
</Match>
<!--
Can remove this after the upgrade to findbugs1.3.8
-->
<Match>
<Class name="org.apache.hadoop.mapred.lib.db.DBInputFormat" />
<Method name="getSplits" />
<Bug pattern="DLS_DEAD_LOCAL_STORE" />
</Match>
<!--
This is a spurious warning. Just ignore
-->
<Match>
<Class name="org.apache.hadoop.mapred.MapTask$MapOutputBuffer" />
<Field name="kvindex" />
<Bug pattern="IS2_INCONSISTENT_SYNC" />
</Match>
<!--
core changes
-->
<Match>
<Class name="~org.apache.hadoop.*" />
<Bug code="MS" />
</Match>
<Match>
<Class name="org.apache.hadoop.fs.FileSystem" />
<Method name="checkPath" />
<Bug pattern="ES_COMPARING_STRINGS_WITH_EQ" />
</Match>
<Match>
<Class name="org.apache.hadoop.fs.kfs.KFSOutputStream" />
<Field name="path" />
<Bug pattern="URF_UNREAD_FIELD" />
</Match>
<Match>
<Class name="org.apache.hadoop.fs.kfs.KosmosFileSystem" />
<Method name="initialize" />
<Bug pattern="DM_EXIT" />
</Match>
<Match>
<Class name="org.apache.hadoop.io.Closeable" />
<Bug pattern="NM_SAME_SIMPLE_NAME_AS_INTERFACE" />
</Match>
<Match>
<Class name="org.apache.hadoop.security.AccessControlException" />
<Bug pattern="NM_SAME_SIMPLE_NAME_AS_SUPERCLASS" />
</Match>
<Match>
<Class name="org.apache.hadoop.record.meta.Utils" />
<Method name="skip" />
<Bug pattern="BC_UNCONFIRMED_CAST" />
</Match>
<!--
The compareTo method is actually a dummy method that just
throws excpetions. So, no need to override equals. Ignore
-->
<Match>
<Class name="org.apache.hadoop.record.meta.RecordTypeInfo" />
<Bug pattern="EQ_COMPARETO_USE_OBJECT_EQUALS" />
</Match>
<Match>
<Class name="org.apache.hadoop.util.ProcfsBasedProcessTree" />
<Bug pattern="DMI_HARDCODED_ABSOLUTE_FILENAME" />
</Match>
<!--
Streaming, Examples
-->
<Match>
<Class name="org.apache.hadoop.streaming.StreamUtil$TaskId" />
<Bug pattern="URF_UNREAD_FIELD" />
</Match>
<Match>
<Class name="org.apache.hadoop.examples.DBCountPageView" />
<Method name="verify" />
<Bug pattern="OBL_UNSATISFIED_OBLIGATION" />
</Match>
<Match>
<Class name="org.apache.hadoop.examples.ContextFactory" />
<Method name="setAttributes" />
<Bug pattern="OBL_UNSATISFIED_OBLIGATION" />
</Match>
</FindBugsFilter>

View File

@ -0,0 +1,97 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>security.client.protocol.acl</name>
<value>*</value>
<description>ACL for ClientProtocol, which is used by user code
via the DistributedFileSystem.
The ACL is a comma-separated list of user and group names. The user and
group list is separated by a blank. For e.g. "alice,bob users,wheel".
A special value of "*" means all users are allowed.</description>
</property>
<property>
<name>security.client.datanode.protocol.acl</name>
<value>*</value>
<description>ACL for ClientDatanodeProtocol, the client-to-datanode protocol
for block recovery.
The ACL is a comma-separated list of user and group names. The user and
group list is separated by a blank. For e.g. "alice,bob users,wheel".
A special value of "*" means all users are allowed.</description>
</property>
<property>
<name>security.datanode.protocol.acl</name>
<value>*</value>
<description>ACL for DatanodeProtocol, which is used by datanodes to
communicate with the namenode.
The ACL is a comma-separated list of user and group names. The user and
group list is separated by a blank. For e.g. "alice,bob users,wheel".
A special value of "*" means all users are allowed.</description>
</property>
<property>
<name>security.inter.datanode.protocol.acl</name>
<value>*</value>
<description>ACL for InterDatanodeProtocol, the inter-datanode protocol
for updating generation timestamp.
The ACL is a comma-separated list of user and group names. The user and
group list is separated by a blank. For e.g. "alice,bob users,wheel".
A special value of "*" means all users are allowed.</description>
</property>
<property>
<name>security.namenode.protocol.acl</name>
<value>*</value>
<description>ACL for NamenodeProtocol, the protocol used by the secondary
namenode to communicate with the namenode.
The ACL is a comma-separated list of user and group names. The user and
group list is separated by a blank. For e.g. "alice,bob users,wheel".
A special value of "*" means all users are allowed.</description>
</property>
<property>
<name>security.inter.tracker.protocol.acl</name>
<value>*</value>
<description>ACL for InterTrackerProtocol, used by the tasktrackers to
communicate with the jobtracker.
The ACL is a comma-separated list of user and group names. The user and
group list is separated by a blank. For e.g. "alice,bob users,wheel".
A special value of "*" means all users are allowed.</description>
</property>
<property>
<name>security.job.submission.protocol.acl</name>
<value>*</value>
<description>ACL for JobSubmissionProtocol, used by job clients to
communciate with the jobtracker for job submission, querying job status etc.
The ACL is a comma-separated list of user and group names. The user and
group list is separated by a blank. For e.g. "alice,bob users,wheel".
A special value of "*" means all users are allowed.</description>
</property>
<property>
<name>security.task.umbilical.protocol.acl</name>
<value>*</value>
<description>ACL for TaskUmbilicalProtocol, used by the map and reduce
tasks to communicate with the parent tasktracker.
The ACL is a comma-separated list of user and group names. The user and
group list is separated by a blank. For e.g. "alice,bob users,wheel".
A special value of "*" means all users are allowed.</description>
</property>
<property>
<name>security.refresh.policy.protocol.acl</name>
<value>${user.name}</value>
<description>ACL for RefreshAuthorizationPolicyProtocol, used by the
dfsadmin and mradmin commands to refresh the security policy in-effect.
The ACL is a comma-separated list of user and group names. The user and
group list is separated by a blank. For e.g. "alice,bob users,wheel".
A special value of "*" means all users are allowed.</description>
</property>
</configuration>

14
src/test/hadoop-site.xml Normal file
View File

@ -0,0 +1,14 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
<!-- DO NOT PUT ANY PROPERTY IN THIS FILE. INSTEAD USE -->
<!-- core-site.xml, mapred-site.xml OR hdfs-site.xml -->
<!-- This empty script is to avoid picking properties from -->
<!-- conf/hadoop-site.xml This would be removed once support -->
<!-- for hadoop-site.xml is removed. -->
<configuration>
</configuration>

9
src/test/hdfs-site.xml Normal file
View File

@ -0,0 +1,9 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
</configuration>

View File

@ -0,0 +1,103 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs;
import java.io.IOException;
import java.util.Iterator;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
/**
* Reducer that accumulates values based on their type.
* <p>
* The type is specified in the key part of the key-value pair
* as a prefix to the key in the following way
* <p>
* <tt>type:key</tt>
* <p>
* The values are accumulated according to the types:
* <ul>
* <li><tt>s:</tt> - string, concatenate</li>
* <li><tt>f:</tt> - float, summ</li>
* <li><tt>l:</tt> - long, summ</li>
* </ul>
*
*/
public class AccumulatingReducer extends MapReduceBase
implements Reducer<Text, Text, Text, Text> {
static final String VALUE_TYPE_LONG = "l:";
static final String VALUE_TYPE_FLOAT = "f:";
static final String VALUE_TYPE_STRING = "s:";
private static final Log LOG = LogFactory.getLog(AccumulatingReducer.class);
protected String hostName;
public AccumulatingReducer () {
LOG.info("Starting AccumulatingReducer !!!");
try {
hostName = java.net.InetAddress.getLocalHost().getHostName();
} catch(Exception e) {
hostName = "localhost";
}
LOG.info("Starting AccumulatingReducer on " + hostName);
}
public void reduce(Text key,
Iterator<Text> values,
OutputCollector<Text, Text> output,
Reporter reporter
) throws IOException {
String field = key.toString();
reporter.setStatus("starting " + field + " ::host = " + hostName);
// concatenate strings
if (field.startsWith(VALUE_TYPE_STRING)) {
String sSum = "";
while (values.hasNext())
sSum += values.next().toString() + ";";
output.collect(key, new Text(sSum));
reporter.setStatus("finished " + field + " ::host = " + hostName);
return;
}
// sum long values
if (field.startsWith(VALUE_TYPE_FLOAT)) {
float fSum = 0;
while (values.hasNext())
fSum += Float.parseFloat(values.next().toString());
output.collect(key, new Text(String.valueOf(fSum)));
reporter.setStatus("finished " + field + " ::host = " + hostName);
return;
}
// sum long values
if (field.startsWith(VALUE_TYPE_LONG)) {
long lSum = 0;
while (values.hasNext()) {
lSum += Long.parseLong(values.next().toString());
}
output.collect(key, new Text(String.valueOf(lSum)));
}
reporter.setStatus("finished " + field + " ::host = " + hostName);
}
}

View File

@ -0,0 +1,551 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.util.Date;
import java.util.StringTokenizer;
import junit.framework.TestCase;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.mapred.*;
/**
* Distributed i/o benchmark.
* <p>
* This test writes into or reads from a specified number of files.
* File size is specified as a parameter to the test.
* Each file is accessed in a separate map task.
* <p>
* The reducer collects the following statistics:
* <ul>
* <li>number of tasks completed</li>
* <li>number of bytes written/read</li>
* <li>execution time</li>
* <li>io rate</li>
* <li>io rate squared</li>
* </ul>
*
* Finally, the following information is appended to a local file
* <ul>
* <li>read or write test</li>
* <li>date and time the test finished</li>
* <li>number of files</li>
* <li>total number of bytes processed</li>
* <li>throughput in mb/sec (total number of bytes / sum of processing times)</li>
* <li>average i/o rate in mb/sec per file</li>
* <li>standard i/o rate deviation</li>
* </ul>
*/
public class DFSCIOTest extends TestCase {
// Constants
private static final Log LOG = LogFactory.getLog(DFSCIOTest.class);
private static final int TEST_TYPE_READ = 0;
private static final int TEST_TYPE_WRITE = 1;
private static final int TEST_TYPE_CLEANUP = 2;
private static final int DEFAULT_BUFFER_SIZE = 1000000;
private static final String BASE_FILE_NAME = "test_io_";
private static final String DEFAULT_RES_FILE_NAME = "DFSCIOTest_results.log";
private static Configuration fsConfig = new Configuration();
private static final long MEGA = 0x100000;
private static String TEST_ROOT_DIR = System.getProperty("test.build.data","/benchmarks/DFSCIOTest");
private static Path CONTROL_DIR = new Path(TEST_ROOT_DIR, "io_control");
private static Path WRITE_DIR = new Path(TEST_ROOT_DIR, "io_write");
private static Path READ_DIR = new Path(TEST_ROOT_DIR, "io_read");
private static Path DATA_DIR = new Path(TEST_ROOT_DIR, "io_data");
private static Path HDFS_TEST_DIR = new Path("/tmp/DFSCIOTest");
private static String HDFS_LIB_VERSION = System.getProperty("libhdfs.version", "1");
private static String CHMOD = new String("chmod");
private static Path HDFS_SHLIB = new Path(HDFS_TEST_DIR + "/libhdfs.so." + HDFS_LIB_VERSION);
private static Path HDFS_READ = new Path(HDFS_TEST_DIR + "/hdfs_read");
private static Path HDFS_WRITE = new Path(HDFS_TEST_DIR + "/hdfs_write");
/**
* Run the test with default parameters.
*
* @throws Exception
*/
public void testIOs() throws Exception {
testIOs(10, 10);
}
/**
* Run the test with the specified parameters.
*
* @param fileSize file size
* @param nrFiles number of files
* @throws IOException
*/
public static void testIOs(int fileSize, int nrFiles)
throws IOException {
FileSystem fs = FileSystem.get(fsConfig);
createControlFile(fs, fileSize, nrFiles);
writeTest(fs);
readTest(fs);
}
private static void createControlFile(
FileSystem fs,
int fileSize, // in MB
int nrFiles
) throws IOException {
LOG.info("creating control file: "+fileSize+" mega bytes, "+nrFiles+" files");
fs.delete(CONTROL_DIR, true);
for(int i=0; i < nrFiles; i++) {
String name = getFileName(i);
Path controlFile = new Path(CONTROL_DIR, "in_file_" + name);
SequenceFile.Writer writer = null;
try {
writer = SequenceFile.createWriter(fs, fsConfig, controlFile,
Text.class, LongWritable.class,
CompressionType.NONE);
writer.append(new Text(name), new LongWritable(fileSize));
} catch(Exception e) {
throw new IOException(e.getLocalizedMessage());
} finally {
if (writer != null)
writer.close();
writer = null;
}
}
LOG.info("created control files for: "+nrFiles+" files");
}
private static String getFileName(int fIdx) {
return BASE_FILE_NAME + Integer.toString(fIdx);
}
/**
* Write/Read mapper base class.
* <p>
* Collects the following statistics per task:
* <ul>
* <li>number of tasks completed</li>
* <li>number of bytes written/read</li>
* <li>execution time</li>
* <li>i/o rate</li>
* <li>i/o rate squared</li>
* </ul>
*/
private abstract static class IOStatMapper extends IOMapperBase {
IOStatMapper() {
super(fsConfig);
}
void collectStats(OutputCollector<Text, Text> output,
String name,
long execTime,
Object objSize) throws IOException {
long totalSize = ((Long)objSize).longValue();
float ioRateMbSec = (float)totalSize * 1000 / (execTime * MEGA);
LOG.info("Number of bytes processed = " + totalSize);
LOG.info("Exec time = " + execTime);
LOG.info("IO rate = " + ioRateMbSec);
output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "tasks"),
new Text(String.valueOf(1)));
output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "size"),
new Text(String.valueOf(totalSize)));
output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "time"),
new Text(String.valueOf(execTime)));
output.collect(new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "rate"),
new Text(String.valueOf(ioRateMbSec*1000)));
output.collect(new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "sqrate"),
new Text(String.valueOf(ioRateMbSec*ioRateMbSec*1000)));
}
}
/**
* Write mapper class.
*/
public static class WriteMapper extends IOStatMapper {
public WriteMapper() {
super();
for(int i=0; i < bufferSize; i++)
buffer[i] = (byte)('0' + i % 50);
}
public Object doIO(Reporter reporter,
String name,
long totalSize
) throws IOException {
// create file
totalSize *= MEGA;
// create instance of local filesystem
FileSystem localFS = FileSystem.getLocal(fsConfig);
try {
// native runtime
Runtime runTime = Runtime.getRuntime();
// copy the dso and executable from dfs and chmod them
synchronized (this) {
localFS.delete(HDFS_TEST_DIR, true);
if (!(localFS.mkdirs(HDFS_TEST_DIR))) {
throw new IOException("Failed to create " + HDFS_TEST_DIR + " on local filesystem");
}
}
synchronized (this) {
if (!localFS.exists(HDFS_SHLIB)) {
FileUtil.copy(fs, HDFS_SHLIB, localFS, HDFS_SHLIB, false, fsConfig);
String chmodCmd = new String(CHMOD + " a+x " + HDFS_SHLIB);
Process process = runTime.exec(chmodCmd);
int exitStatus = process.waitFor();
if (exitStatus != 0) {
throw new IOException(chmodCmd + ": Failed with exitStatus: " + exitStatus);
}
}
}
synchronized (this) {
if (!localFS.exists(HDFS_WRITE)) {
FileUtil.copy(fs, HDFS_WRITE, localFS, HDFS_WRITE, false, fsConfig);
String chmodCmd = new String(CHMOD + " a+x " + HDFS_WRITE);
Process process = runTime.exec(chmodCmd);
int exitStatus = process.waitFor();
if (exitStatus != 0) {
throw new IOException(chmodCmd + ": Failed with exitStatus: " + exitStatus);
}
}
}
// exec the C program
Path outFile = new Path(DATA_DIR, name);
String writeCmd = new String(HDFS_WRITE + " " + outFile + " " + totalSize + " " + bufferSize);
Process process = runTime.exec(writeCmd, null, new File(HDFS_TEST_DIR.toString()));
int exitStatus = process.waitFor();
if (exitStatus != 0) {
throw new IOException(writeCmd + ": Failed with exitStatus: " + exitStatus);
}
} catch (InterruptedException interruptedException) {
reporter.setStatus(interruptedException.toString());
} finally {
localFS.close();
}
return new Long(totalSize);
}
}
private static void writeTest(FileSystem fs)
throws IOException {
fs.delete(DATA_DIR, true);
fs.delete(WRITE_DIR, true);
runIOTest(WriteMapper.class, WRITE_DIR);
}
private static void runIOTest( Class<? extends Mapper> mapperClass,
Path outputDir
) throws IOException {
JobConf job = new JobConf(fsConfig, DFSCIOTest.class);
FileInputFormat.setInputPaths(job, CONTROL_DIR);
job.setInputFormat(SequenceFileInputFormat.class);
job.setMapperClass(mapperClass);
job.setReducerClass(AccumulatingReducer.class);
FileOutputFormat.setOutputPath(job, outputDir);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(1);
JobClient.runJob(job);
}
/**
* Read mapper class.
*/
public static class ReadMapper extends IOStatMapper {
public ReadMapper() {
super();
}
public Object doIO(Reporter reporter,
String name,
long totalSize
) throws IOException {
totalSize *= MEGA;
// create instance of local filesystem
FileSystem localFS = FileSystem.getLocal(fsConfig);
try {
// native runtime
Runtime runTime = Runtime.getRuntime();
// copy the dso and executable from dfs
synchronized (this) {
localFS.delete(HDFS_TEST_DIR, true);
if (!(localFS.mkdirs(HDFS_TEST_DIR))) {
throw new IOException("Failed to create " + HDFS_TEST_DIR + " on local filesystem");
}
}
synchronized (this) {
if (!localFS.exists(HDFS_SHLIB)) {
if (!FileUtil.copy(fs, HDFS_SHLIB, localFS, HDFS_SHLIB, false, fsConfig)) {
throw new IOException("Failed to copy " + HDFS_SHLIB + " to local filesystem");
}
String chmodCmd = new String(CHMOD + " a+x " + HDFS_SHLIB);
Process process = runTime.exec(chmodCmd);
int exitStatus = process.waitFor();
if (exitStatus != 0) {
throw new IOException(chmodCmd + ": Failed with exitStatus: " + exitStatus);
}
}
}
synchronized (this) {
if (!localFS.exists(HDFS_READ)) {
if (!FileUtil.copy(fs, HDFS_READ, localFS, HDFS_READ, false, fsConfig)) {
throw new IOException("Failed to copy " + HDFS_READ + " to local filesystem");
}
String chmodCmd = new String(CHMOD + " a+x " + HDFS_READ);
Process process = runTime.exec(chmodCmd);
int exitStatus = process.waitFor();
if (exitStatus != 0) {
throw new IOException(chmodCmd + ": Failed with exitStatus: " + exitStatus);
}
}
}
// exec the C program
Path inFile = new Path(DATA_DIR, name);
String readCmd = new String(HDFS_READ + " " + inFile + " " + totalSize + " " +
bufferSize);
Process process = runTime.exec(readCmd, null, new File(HDFS_TEST_DIR.toString()));
int exitStatus = process.waitFor();
if (exitStatus != 0) {
throw new IOException(HDFS_READ + ": Failed with exitStatus: " + exitStatus);
}
} catch (InterruptedException interruptedException) {
reporter.setStatus(interruptedException.toString());
} finally {
localFS.close();
}
return new Long(totalSize);
}
}
private static void readTest(FileSystem fs) throws IOException {
fs.delete(READ_DIR, true);
runIOTest(ReadMapper.class, READ_DIR);
}
private static void sequentialTest(
FileSystem fs,
int testType,
int fileSize,
int nrFiles
) throws Exception {
IOStatMapper ioer = null;
if (testType == TEST_TYPE_READ)
ioer = new ReadMapper();
else if (testType == TEST_TYPE_WRITE)
ioer = new WriteMapper();
else
return;
for(int i=0; i < nrFiles; i++)
ioer.doIO(Reporter.NULL,
BASE_FILE_NAME+Integer.toString(i),
MEGA*fileSize);
}
public static void main(String[] args) {
int testType = TEST_TYPE_READ;
int bufferSize = DEFAULT_BUFFER_SIZE;
int fileSize = 1;
int nrFiles = 1;
String resFileName = DEFAULT_RES_FILE_NAME;
boolean isSequential = false;
String version="DFSCIOTest.0.0.1";
String usage = "Usage: DFSCIOTest -read | -write | -clean [-nrFiles N] [-fileSize MB] [-resFile resultFileName] [-bufferSize Bytes] ";
System.out.println(version);
if (args.length == 0) {
System.err.println(usage);
System.exit(-1);
}
for (int i = 0; i < args.length; i++) { // parse command line
if (args[i].startsWith("-r")) {
testType = TEST_TYPE_READ;
} else if (args[i].startsWith("-w")) {
testType = TEST_TYPE_WRITE;
} else if (args[i].startsWith("-clean")) {
testType = TEST_TYPE_CLEANUP;
} else if (args[i].startsWith("-seq")) {
isSequential = true;
} else if (args[i].equals("-nrFiles")) {
nrFiles = Integer.parseInt(args[++i]);
} else if (args[i].equals("-fileSize")) {
fileSize = Integer.parseInt(args[++i]);
} else if (args[i].equals("-bufferSize")) {
bufferSize = Integer.parseInt(args[++i]);
} else if (args[i].equals("-resFile")) {
resFileName = args[++i];
}
}
LOG.info("nrFiles = " + nrFiles);
LOG.info("fileSize (MB) = " + fileSize);
LOG.info("bufferSize = " + bufferSize);
try {
fsConfig.setInt("test.io.file.buffer.size", bufferSize);
FileSystem fs = FileSystem.get(fsConfig);
if (testType != TEST_TYPE_CLEANUP) {
fs.delete(HDFS_TEST_DIR, true);
if (!fs.mkdirs(HDFS_TEST_DIR)) {
throw new IOException("Mkdirs failed to create " +
HDFS_TEST_DIR.toString());
}
//Copy the executables over to the remote filesystem
String hadoopHome = System.getenv("HADOOP_HOME");
fs.copyFromLocalFile(new Path(hadoopHome + "/libhdfs/libhdfs.so." + HDFS_LIB_VERSION),
HDFS_SHLIB);
fs.copyFromLocalFile(new Path(hadoopHome + "/libhdfs/hdfs_read"), HDFS_READ);
fs.copyFromLocalFile(new Path(hadoopHome + "/libhdfs/hdfs_write"), HDFS_WRITE);
}
if (isSequential) {
long tStart = System.currentTimeMillis();
sequentialTest(fs, testType, fileSize, nrFiles);
long execTime = System.currentTimeMillis() - tStart;
String resultLine = "Seq Test exec time sec: " + (float)execTime / 1000;
LOG.info(resultLine);
return;
}
if (testType == TEST_TYPE_CLEANUP) {
cleanup(fs);
return;
}
createControlFile(fs, fileSize, nrFiles);
long tStart = System.currentTimeMillis();
if (testType == TEST_TYPE_WRITE)
writeTest(fs);
if (testType == TEST_TYPE_READ)
readTest(fs);
long execTime = System.currentTimeMillis() - tStart;
analyzeResult(fs, testType, execTime, resFileName);
} catch(Exception e) {
System.err.print(e.getLocalizedMessage());
System.exit(-1);
}
}
private static void analyzeResult( FileSystem fs,
int testType,
long execTime,
String resFileName
) throws IOException {
Path reduceFile;
if (testType == TEST_TYPE_WRITE)
reduceFile = new Path(WRITE_DIR, "part-00000");
else
reduceFile = new Path(READ_DIR, "part-00000");
DataInputStream in;
in = new DataInputStream(fs.open(reduceFile));
BufferedReader lines;
lines = new BufferedReader(new InputStreamReader(in));
long tasks = 0;
long size = 0;
long time = 0;
float rate = 0;
float sqrate = 0;
String line;
while((line = lines.readLine()) != null) {
StringTokenizer tokens = new StringTokenizer(line, " \t\n\r\f%");
String attr = tokens.nextToken();
if (attr.endsWith(":tasks"))
tasks = Long.parseLong(tokens.nextToken());
else if (attr.endsWith(":size"))
size = Long.parseLong(tokens. nextToken());
else if (attr.endsWith(":time"))
time = Long.parseLong(tokens.nextToken());
else if (attr.endsWith(":rate"))
rate = Float.parseFloat(tokens.nextToken());
else if (attr.endsWith(":sqrate"))
sqrate = Float.parseFloat(tokens.nextToken());
}
double med = rate / 1000 / tasks;
double stdDev = Math.sqrt(Math.abs(sqrate / 1000 / tasks - med*med));
String resultLines[] = {
"----- DFSCIOTest ----- : " + ((testType == TEST_TYPE_WRITE) ? "write" :
(testType == TEST_TYPE_READ) ? "read" :
"unknown"),
" Date & time: " + new Date(System.currentTimeMillis()),
" Number of files: " + tasks,
"Total MBytes processed: " + size/MEGA,
" Throughput mb/sec: " + size * 1000.0 / (time * MEGA),
"Average IO rate mb/sec: " + med,
" Std IO rate deviation: " + stdDev,
" Test exec time sec: " + (float)execTime / 1000,
"" };
PrintStream res = new PrintStream(
new FileOutputStream(
new File(resFileName), true));
for(int i = 0; i < resultLines.length; i++) {
LOG.info(resultLines[i]);
res.println(resultLines[i]);
}
}
private static void cleanup(FileSystem fs) throws Exception {
LOG.info("Cleaning up test files");
fs.delete(new Path(TEST_ROOT_DIR), true);
fs.delete(HDFS_TEST_DIR, true);
}
}

View File

@ -0,0 +1,353 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.util.Date;
import java.util.StringTokenizer;
import java.util.TreeSet;
import java.util.Vector;
import junit.framework.TestCase;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.mapred.*;
/**
* Distributed checkup of the file system consistency.
* <p>
* Test file system consistency by reading each block of each file
* of the specified file tree.
* Report corrupted blocks and general file statistics.
* <p>
* Optionally displays statistics on read performance.
*
*/
public class DistributedFSCheck extends TestCase {
// Constants
private static final Log LOG = LogFactory.getLog(DistributedFSCheck.class);
private static final int TEST_TYPE_READ = 0;
private static final int TEST_TYPE_CLEANUP = 2;
private static final int DEFAULT_BUFFER_SIZE = 1000000;
private static final String DEFAULT_RES_FILE_NAME = "DistributedFSCheck_results.log";
private static final long MEGA = 0x100000;
private static Configuration fsConfig = new Configuration();
private static Path TEST_ROOT_DIR = new Path(System.getProperty("test.build.data","/benchmarks/DistributedFSCheck"));
private static Path MAP_INPUT_DIR = new Path(TEST_ROOT_DIR, "map_input");
private static Path READ_DIR = new Path(TEST_ROOT_DIR, "io_read");
private FileSystem fs;
private long nrFiles;
DistributedFSCheck(Configuration conf) throws Exception {
fsConfig = conf;
this.fs = FileSystem.get(conf);
}
/**
* Run distributed checkup for the entire files system.
*
* @throws Exception
*/
public void testFSBlocks() throws Exception {
testFSBlocks("/");
}
/**
* Run distributed checkup for the specified directory.
*
* @param rootName root directory name
* @throws Exception
*/
public void testFSBlocks(String rootName) throws Exception {
createInputFile(rootName);
runDistributedFSCheck();
cleanup(); // clean up after all to restore the system state
}
private void createInputFile(String rootName) throws IOException {
cleanup(); // clean up if previous run failed
Path inputFile = new Path(MAP_INPUT_DIR, "in_file");
SequenceFile.Writer writer =
SequenceFile.createWriter(fs, fsConfig, inputFile,
Text.class, LongWritable.class, CompressionType.NONE);
try {
nrFiles = 0;
listSubtree(new Path(rootName), writer);
} finally {
writer.close();
}
LOG.info("Created map input files.");
}
private void listSubtree(Path rootFile,
SequenceFile.Writer writer
) throws IOException {
FileStatus rootStatus = fs.getFileStatus(rootFile);
listSubtree(rootStatus, writer);
}
private void listSubtree(FileStatus rootStatus,
SequenceFile.Writer writer
) throws IOException {
Path rootFile = rootStatus.getPath();
if (!rootStatus.isDir()) {
nrFiles++;
// For a regular file generate <fName,offset> pairs
long blockSize = fs.getDefaultBlockSize();
long fileLength = rootStatus.getLen();
for(long offset = 0; offset < fileLength; offset += blockSize)
writer.append(new Text(rootFile.toString()), new LongWritable(offset));
return;
}
FileStatus children[] = fs.listStatus(rootFile);
if (children == null)
throw new IOException("Could not get listing for " + rootFile);
for (int i = 0; i < children.length; i++)
listSubtree(children[i], writer);
}
/**
* DistributedFSCheck mapper class.
*/
public static class DistributedFSCheckMapper extends IOMapperBase {
public DistributedFSCheckMapper() {
super(fsConfig);
}
public Object doIO(Reporter reporter,
String name,
long offset
) throws IOException {
// open file
FSDataInputStream in = null;
try {
in = fs.open(new Path(name));
} catch(IOException e) {
return name + "@(missing)";
}
in.seek(offset);
long actualSize = 0;
try {
long blockSize = fs.getDefaultBlockSize();
reporter.setStatus("reading " + name + "@" +
offset + "/" + blockSize);
for( int curSize = bufferSize;
curSize == bufferSize && actualSize < blockSize;
actualSize += curSize) {
curSize = in.read(buffer, 0, bufferSize);
}
} catch(IOException e) {
LOG.info("Corrupted block detected in \"" + name + "\" at " + offset);
return name + "@" + offset;
} finally {
in.close();
}
return new Long(actualSize);
}
void collectStats(OutputCollector<Text, Text> output,
String name,
long execTime,
Object corruptedBlock) throws IOException {
output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "blocks"),
new Text(String.valueOf(1)));
if (corruptedBlock.getClass().getName().endsWith("String")) {
output.collect(
new Text(AccumulatingReducer.VALUE_TYPE_STRING + "badBlocks"),
new Text((String)corruptedBlock));
return;
}
long totalSize = ((Long)corruptedBlock).longValue();
float ioRateMbSec = (float)totalSize * 1000 / (execTime * 0x100000);
LOG.info("Number of bytes processed = " + totalSize);
LOG.info("Exec time = " + execTime);
LOG.info("IO rate = " + ioRateMbSec);
output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "size"),
new Text(String.valueOf(totalSize)));
output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "time"),
new Text(String.valueOf(execTime)));
output.collect(new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "rate"),
new Text(String.valueOf(ioRateMbSec*1000)));
}
}
private void runDistributedFSCheck() throws Exception {
JobConf job = new JobConf(fs.getConf(), DistributedFSCheck.class);
FileInputFormat.setInputPaths(job, MAP_INPUT_DIR);
job.setInputFormat(SequenceFileInputFormat.class);
job.setMapperClass(DistributedFSCheckMapper.class);
job.setReducerClass(AccumulatingReducer.class);
FileOutputFormat.setOutputPath(job, READ_DIR);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(1);
JobClient.runJob(job);
}
public static void main(String[] args) throws Exception {
int testType = TEST_TYPE_READ;
int bufferSize = DEFAULT_BUFFER_SIZE;
String resFileName = DEFAULT_RES_FILE_NAME;
String rootName = "/";
boolean viewStats = false;
String usage = "Usage: DistributedFSCheck [-root name] [-clean] [-resFile resultFileName] [-bufferSize Bytes] [-stats] ";
if (args.length == 1 && args[0].startsWith("-h")) {
System.err.println(usage);
System.exit(-1);
}
for(int i = 0; i < args.length; i++) { // parse command line
if (args[i].equals("-root")) {
rootName = args[++i];
} else if (args[i].startsWith("-clean")) {
testType = TEST_TYPE_CLEANUP;
} else if (args[i].equals("-bufferSize")) {
bufferSize = Integer.parseInt(args[++i]);
} else if (args[i].equals("-resFile")) {
resFileName = args[++i];
} else if (args[i].startsWith("-stat")) {
viewStats = true;
}
}
LOG.info("root = " + rootName);
LOG.info("bufferSize = " + bufferSize);
Configuration conf = new Configuration();
conf.setInt("test.io.file.buffer.size", bufferSize);
DistributedFSCheck test = new DistributedFSCheck(conf);
if (testType == TEST_TYPE_CLEANUP) {
test.cleanup();
return;
}
test.createInputFile(rootName);
long tStart = System.currentTimeMillis();
test.runDistributedFSCheck();
long execTime = System.currentTimeMillis() - tStart;
test.analyzeResult(execTime, resFileName, viewStats);
// test.cleanup(); // clean up after all to restore the system state
}
private void analyzeResult(long execTime,
String resFileName,
boolean viewStats
) throws IOException {
Path reduceFile= new Path(READ_DIR, "part-00000");
DataInputStream in;
in = new DataInputStream(fs.open(reduceFile));
BufferedReader lines;
lines = new BufferedReader(new InputStreamReader(in));
long blocks = 0;
long size = 0;
long time = 0;
float rate = 0;
StringTokenizer badBlocks = null;
long nrBadBlocks = 0;
String line;
while((line = lines.readLine()) != null) {
StringTokenizer tokens = new StringTokenizer(line, " \t\n\r\f%");
String attr = tokens.nextToken();
if (attr.endsWith("blocks"))
blocks = Long.parseLong(tokens.nextToken());
else if (attr.endsWith("size"))
size = Long.parseLong(tokens.nextToken());
else if (attr.endsWith("time"))
time = Long.parseLong(tokens.nextToken());
else if (attr.endsWith("rate"))
rate = Float.parseFloat(tokens.nextToken());
else if (attr.endsWith("badBlocks")) {
badBlocks = new StringTokenizer(tokens.nextToken(), ";");
nrBadBlocks = badBlocks.countTokens();
}
}
Vector<String> resultLines = new Vector<String>();
resultLines.add( "----- DistributedFSCheck ----- : ");
resultLines.add( " Date & time: " + new Date(System.currentTimeMillis()));
resultLines.add( " Total number of blocks: " + blocks);
resultLines.add( " Total number of files: " + nrFiles);
resultLines.add( "Number of corrupted blocks: " + nrBadBlocks);
int nrBadFilesPos = resultLines.size();
TreeSet<String> badFiles = new TreeSet<String>();
long nrBadFiles = 0;
if (nrBadBlocks > 0) {
resultLines.add("");
resultLines.add("----- Corrupted Blocks (file@offset) ----- : ");
while(badBlocks.hasMoreTokens()) {
String curBlock = badBlocks.nextToken();
resultLines.add(curBlock);
badFiles.add(curBlock.substring(0, curBlock.indexOf('@')));
}
nrBadFiles = badFiles.size();
}
resultLines.insertElementAt(" Number of corrupted files: " + nrBadFiles, nrBadFilesPos);
if (viewStats) {
resultLines.add("");
resultLines.add("----- Performance ----- : ");
resultLines.add(" Total MBytes read: " + size/MEGA);
resultLines.add(" Throughput mb/sec: " + (float)size * 1000.0 / (time * MEGA));
resultLines.add(" Average IO rate mb/sec: " + rate / 1000 / blocks);
resultLines.add(" Test exec time sec: " + (float)execTime / 1000);
}
PrintStream res = new PrintStream(
new FileOutputStream(
new File(resFileName), true));
for(int i = 0; i < resultLines.size(); i++) {
String cur = resultLines.get(i);
LOG.info(cur);
res.println(cur);
}
}
private void cleanup() throws IOException {
LOG.info("Cleaning up test files");
fs.delete(TEST_ROOT_DIR, true);
}
}

View File

@ -0,0 +1,129 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs;
import java.io.IOException;
import java.net.InetAddress;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
/**
* Base mapper class for IO operations.
* <p>
* Two abstract method {@link #doIO(Reporter, String, long)} and
* {@link #collectStats(OutputCollector,String,long,Object)} should be
* overloaded in derived classes to define the IO operation and the
* statistics data to be collected by subsequent reducers.
*
*/
public abstract class IOMapperBase extends Configured
implements Mapper<Text, LongWritable, Text, Text> {
protected byte[] buffer;
protected int bufferSize;
protected FileSystem fs;
protected String hostName;
public IOMapperBase(Configuration conf) {
super(conf);
try {
fs = FileSystem.get(conf);
} catch (Exception e) {
throw new RuntimeException("Cannot create file system.", e);
}
bufferSize = conf.getInt("test.io.file.buffer.size", 4096);
buffer = new byte[bufferSize];
try {
hostName = InetAddress.getLocalHost().getHostName();
} catch(Exception e) {
hostName = "localhost";
}
}
public void configure(JobConf job) {
setConf(job);
}
public void close() throws IOException {
}
/**
* Perform io operation, usually read or write.
*
* @param reporter
* @param name file name
* @param value offset within the file
* @return object that is passed as a parameter to
* {@link #collectStats(OutputCollector,String,long,Object)}
* @throws IOException
*/
abstract Object doIO(Reporter reporter,
String name,
long value) throws IOException;
/**
* Collect stat data to be combined by a subsequent reducer.
*
* @param output
* @param name file name
* @param execTime IO execution time
* @param doIOReturnValue value returned by {@link #doIO(Reporter,String,long)}
* @throws IOException
*/
abstract void collectStats(OutputCollector<Text, Text> output,
String name,
long execTime,
Object doIOReturnValue) throws IOException;
/**
* Map file name and offset into statistical data.
* <p>
* The map task is to get the
* <tt>key</tt>, which contains the file name, and the
* <tt>value</tt>, which is the offset within the file.
*
* The parameters are passed to the abstract method
* {@link #doIO(Reporter,String,long)}, which performs the io operation,
* usually read or write data, and then
* {@link #collectStats(OutputCollector,String,long,Object)}
* is called to prepare stat data for a subsequent reducer.
*/
public void map(Text key,
LongWritable value,
OutputCollector<Text, Text> output,
Reporter reporter) throws IOException {
String name = key.toString();
long longValue = value.get();
reporter.setStatus("starting " + name + " ::host = " + hostName);
long tStart = System.currentTimeMillis();
Object statValue = doIO(reporter, name, longValue);
long tEnd = System.currentTimeMillis();
long execTime = tEnd - tStart;
collectStats(output, name, execTime, statValue);
reporter.setStatus("finished " + name + " ::host = " + hostName);
}
}

View File

@ -0,0 +1,853 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.StringTokenizer;
import junit.framework.TestCase;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.security.UnixUserGroupInformation;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.tools.DistCp;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Level;
/**
* A JUnit test for copying files recursively.
*/
public class TestCopyFiles extends TestCase {
{
((Log4JLogger)LogFactory.getLog("org.apache.hadoop.hdfs.StateChange")
).getLogger().setLevel(Level.OFF);
((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.OFF);
((Log4JLogger)FSNamesystem.LOG).getLogger().setLevel(Level.OFF);
((Log4JLogger)DistCp.LOG).getLogger().setLevel(Level.ALL);
}
static final URI LOCAL_FS = URI.create("file:///");
private static final Random RAN = new Random();
private static final int NFILES = 20;
private static String TEST_ROOT_DIR =
new Path(System.getProperty("test.build.data","/tmp"))
.toString().replace(' ', '+');
/** class MyFile contains enough information to recreate the contents of
* a single file.
*/
private static class MyFile {
private static Random gen = new Random();
private static final int MAX_LEVELS = 3;
private static final int MAX_SIZE = 8*1024;
private static String[] dirNames = {
"zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"
};
private final String name;
private int size = 0;
private long seed = 0L;
MyFile() {
this(gen.nextInt(MAX_LEVELS));
}
MyFile(int nLevels) {
String xname = "";
if (nLevels != 0) {
int[] levels = new int[nLevels];
for (int idx = 0; idx < nLevels; idx++) {
levels[idx] = gen.nextInt(10);
}
StringBuffer sb = new StringBuffer();
for (int idx = 0; idx < nLevels; idx++) {
sb.append(dirNames[levels[idx]]);
sb.append("/");
}
xname = sb.toString();
}
long fidx = gen.nextLong() & Long.MAX_VALUE;
name = xname + Long.toString(fidx);
reset();
}
void reset() {
final int oldsize = size;
do { size = gen.nextInt(MAX_SIZE); } while (oldsize == size);
final long oldseed = seed;
do { seed = gen.nextLong() & Long.MAX_VALUE; } while (oldseed == seed);
}
String getName() { return name; }
int getSize() { return size; }
long getSeed() { return seed; }
}
private static MyFile[] createFiles(URI fsname, String topdir)
throws IOException {
return createFiles(FileSystem.get(fsname, new Configuration()), topdir);
}
/** create NFILES with random names and directory hierarchies
* with random (but reproducible) data in them.
*/
private static MyFile[] createFiles(FileSystem fs, String topdir)
throws IOException {
Path root = new Path(topdir);
MyFile[] files = new MyFile[NFILES];
for (int i = 0; i < NFILES; i++) {
files[i] = createFile(root, fs);
}
return files;
}
static MyFile createFile(Path root, FileSystem fs, int levels)
throws IOException {
MyFile f = levels < 0 ? new MyFile() : new MyFile(levels);
Path p = new Path(root, f.getName());
FSDataOutputStream out = fs.create(p);
byte[] toWrite = new byte[f.getSize()];
new Random(f.getSeed()).nextBytes(toWrite);
out.write(toWrite);
out.close();
FileSystem.LOG.info("created: " + p + ", size=" + f.getSize());
return f;
}
static MyFile createFile(Path root, FileSystem fs) throws IOException {
return createFile(root, fs, -1);
}
private static boolean checkFiles(FileSystem fs, String topdir, MyFile[] files
) throws IOException {
return checkFiles(fs, topdir, files, false);
}
private static boolean checkFiles(FileSystem fs, String topdir, MyFile[] files,
boolean existingOnly) throws IOException {
Path root = new Path(topdir);
for (int idx = 0; idx < files.length; idx++) {
Path fPath = new Path(root, files[idx].getName());
try {
fs.getFileStatus(fPath);
FSDataInputStream in = fs.open(fPath);
byte[] toRead = new byte[files[idx].getSize()];
byte[] toCompare = new byte[files[idx].getSize()];
Random rb = new Random(files[idx].getSeed());
rb.nextBytes(toCompare);
assertEquals("Cannnot read file.", toRead.length, in.read(toRead));
in.close();
for (int i = 0; i < toRead.length; i++) {
if (toRead[i] != toCompare[i]) {
return false;
}
}
toRead = null;
toCompare = null;
}
catch(FileNotFoundException fnfe) {
if (!existingOnly) {
throw fnfe;
}
}
}
return true;
}
private static void updateFiles(FileSystem fs, String topdir, MyFile[] files,
int nupdate) throws IOException {
assert nupdate <= NFILES;
Path root = new Path(topdir);
for (int idx = 0; idx < nupdate; ++idx) {
Path fPath = new Path(root, files[idx].getName());
// overwrite file
assertTrue(fPath.toString() + " does not exist", fs.exists(fPath));
FSDataOutputStream out = fs.create(fPath);
files[idx].reset();
byte[] toWrite = new byte[files[idx].getSize()];
Random rb = new Random(files[idx].getSeed());
rb.nextBytes(toWrite);
out.write(toWrite);
out.close();
}
}
private static FileStatus[] getFileStatus(FileSystem fs,
String topdir, MyFile[] files) throws IOException {
return getFileStatus(fs, topdir, files, false);
}
private static FileStatus[] getFileStatus(FileSystem fs,
String topdir, MyFile[] files, boolean existingOnly) throws IOException {
Path root = new Path(topdir);
List<FileStatus> statuses = new ArrayList<FileStatus>();
for (int idx = 0; idx < NFILES; ++idx) {
try {
statuses.add(fs.getFileStatus(new Path(root, files[idx].getName())));
} catch(FileNotFoundException fnfe) {
if (!existingOnly) {
throw fnfe;
}
}
}
return statuses.toArray(new FileStatus[statuses.size()]);
}
private static boolean checkUpdate(FileSystem fs, FileStatus[] old,
String topdir, MyFile[] upd, final int nupdate) throws IOException {
Path root = new Path(topdir);
// overwrote updated files
for (int idx = 0; idx < nupdate; ++idx) {
final FileStatus stat =
fs.getFileStatus(new Path(root, upd[idx].getName()));
if (stat.getModificationTime() <= old[idx].getModificationTime()) {
return false;
}
}
// did not overwrite files not updated
for (int idx = nupdate; idx < NFILES; ++idx) {
final FileStatus stat =
fs.getFileStatus(new Path(root, upd[idx].getName()));
if (stat.getModificationTime() != old[idx].getModificationTime()) {
return false;
}
}
return true;
}
/** delete directory and everything underneath it.*/
private static void deldir(FileSystem fs, String topdir) throws IOException {
fs.delete(new Path(topdir), true);
}
/** copy files from local file system to local file system */
public void testCopyFromLocalToLocal() throws Exception {
Configuration conf = new Configuration();
FileSystem localfs = FileSystem.get(LOCAL_FS, conf);
MyFile[] files = createFiles(LOCAL_FS, TEST_ROOT_DIR+"/srcdat");
ToolRunner.run(new DistCp(new Configuration()),
new String[] {"file:///"+TEST_ROOT_DIR+"/srcdat",
"file:///"+TEST_ROOT_DIR+"/destdat"});
assertTrue("Source and destination directories do not match.",
checkFiles(localfs, TEST_ROOT_DIR+"/destdat", files));
deldir(localfs, TEST_ROOT_DIR+"/destdat");
deldir(localfs, TEST_ROOT_DIR+"/srcdat");
}
/** copy files from dfs file system to dfs file system */
public void testCopyFromDfsToDfs() throws Exception {
String namenode = null;
MiniDFSCluster cluster = null;
try {
Configuration conf = new Configuration();
cluster = new MiniDFSCluster(conf, 2, true, null);
final FileSystem hdfs = cluster.getFileSystem();
namenode = FileSystem.getDefaultUri(conf).toString();
if (namenode.startsWith("hdfs://")) {
MyFile[] files = createFiles(URI.create(namenode), "/srcdat");
ToolRunner.run(new DistCp(conf), new String[] {
"-log",
namenode+"/logs",
namenode+"/srcdat",
namenode+"/destdat"});
assertTrue("Source and destination directories do not match.",
checkFiles(hdfs, "/destdat", files));
FileSystem fs = FileSystem.get(URI.create(namenode+"/logs"), conf);
assertTrue("Log directory does not exist.",
fs.exists(new Path(namenode+"/logs")));
deldir(hdfs, "/destdat");
deldir(hdfs, "/srcdat");
deldir(hdfs, "/logs");
}
} finally {
if (cluster != null) { cluster.shutdown(); }
}
}
/** copy files from local file system to dfs file system */
public void testCopyFromLocalToDfs() throws Exception {
MiniDFSCluster cluster = null;
try {
Configuration conf = new Configuration();
cluster = new MiniDFSCluster(conf, 1, true, null);
final FileSystem hdfs = cluster.getFileSystem();
final String namenode = hdfs.getUri().toString();
if (namenode.startsWith("hdfs://")) {
MyFile[] files = createFiles(LOCAL_FS, TEST_ROOT_DIR+"/srcdat");
ToolRunner.run(new DistCp(conf), new String[] {
"-log",
namenode+"/logs",
"file:///"+TEST_ROOT_DIR+"/srcdat",
namenode+"/destdat"});
assertTrue("Source and destination directories do not match.",
checkFiles(cluster.getFileSystem(), "/destdat", files));
assertTrue("Log directory does not exist.",
hdfs.exists(new Path(namenode+"/logs")));
deldir(hdfs, "/destdat");
deldir(hdfs, "/logs");
deldir(FileSystem.get(LOCAL_FS, conf), TEST_ROOT_DIR+"/srcdat");
}
} finally {
if (cluster != null) { cluster.shutdown(); }
}
}
/** copy files from dfs file system to local file system */
public void testCopyFromDfsToLocal() throws Exception {
MiniDFSCluster cluster = null;
try {
Configuration conf = new Configuration();
final FileSystem localfs = FileSystem.get(LOCAL_FS, conf);
cluster = new MiniDFSCluster(conf, 1, true, null);
final FileSystem hdfs = cluster.getFileSystem();
final String namenode = FileSystem.getDefaultUri(conf).toString();
if (namenode.startsWith("hdfs://")) {
MyFile[] files = createFiles(URI.create(namenode), "/srcdat");
ToolRunner.run(new DistCp(conf), new String[] {
"-log",
"/logs",
namenode+"/srcdat",
"file:///"+TEST_ROOT_DIR+"/destdat"});
assertTrue("Source and destination directories do not match.",
checkFiles(localfs, TEST_ROOT_DIR+"/destdat", files));
assertTrue("Log directory does not exist.",
hdfs.exists(new Path("/logs")));
deldir(localfs, TEST_ROOT_DIR+"/destdat");
deldir(hdfs, "/logs");
deldir(hdfs, "/srcdat");
}
} finally {
if (cluster != null) { cluster.shutdown(); }
}
}
public void testCopyDfsToDfsUpdateOverwrite() throws Exception {
MiniDFSCluster cluster = null;
try {
Configuration conf = new Configuration();
cluster = new MiniDFSCluster(conf, 2, true, null);
final FileSystem hdfs = cluster.getFileSystem();
final String namenode = hdfs.getUri().toString();
if (namenode.startsWith("hdfs://")) {
MyFile[] files = createFiles(URI.create(namenode), "/srcdat");
ToolRunner.run(new DistCp(conf), new String[] {
"-p",
"-log",
namenode+"/logs",
namenode+"/srcdat",
namenode+"/destdat"});
assertTrue("Source and destination directories do not match.",
checkFiles(hdfs, "/destdat", files));
FileSystem fs = FileSystem.get(URI.create(namenode+"/logs"), conf);
assertTrue("Log directory does not exist.",
fs.exists(new Path(namenode+"/logs")));
FileStatus[] dchkpoint = getFileStatus(hdfs, "/destdat", files);
final int nupdate = NFILES>>2;
updateFiles(cluster.getFileSystem(), "/srcdat", files, nupdate);
deldir(hdfs, "/logs");
ToolRunner.run(new DistCp(conf), new String[] {
"-p",
"-update",
"-log",
namenode+"/logs",
namenode+"/srcdat",
namenode+"/destdat"});
assertTrue("Source and destination directories do not match.",
checkFiles(hdfs, "/destdat", files));
assertTrue("Update failed to replicate all changes in src",
checkUpdate(hdfs, dchkpoint, "/destdat", files, nupdate));
deldir(hdfs, "/logs");
ToolRunner.run(new DistCp(conf), new String[] {
"-p",
"-overwrite",
"-log",
namenode+"/logs",
namenode+"/srcdat",
namenode+"/destdat"});
assertTrue("Source and destination directories do not match.",
checkFiles(hdfs, "/destdat", files));
assertTrue("-overwrite didn't.",
checkUpdate(hdfs, dchkpoint, "/destdat", files, NFILES));
deldir(hdfs, "/destdat");
deldir(hdfs, "/srcdat");
deldir(hdfs, "/logs");
}
} finally {
if (cluster != null) { cluster.shutdown(); }
}
}
public void testCopyDuplication() throws Exception {
final FileSystem localfs = FileSystem.get(LOCAL_FS, new Configuration());
try {
MyFile[] files = createFiles(localfs, TEST_ROOT_DIR+"/srcdat");
ToolRunner.run(new DistCp(new Configuration()),
new String[] {"file:///"+TEST_ROOT_DIR+"/srcdat",
"file:///"+TEST_ROOT_DIR+"/src2/srcdat"});
assertTrue("Source and destination directories do not match.",
checkFiles(localfs, TEST_ROOT_DIR+"/src2/srcdat", files));
assertEquals(DistCp.DuplicationException.ERROR_CODE,
ToolRunner.run(new DistCp(new Configuration()),
new String[] {"file:///"+TEST_ROOT_DIR+"/srcdat",
"file:///"+TEST_ROOT_DIR+"/src2/srcdat",
"file:///"+TEST_ROOT_DIR+"/destdat",}));
}
finally {
deldir(localfs, TEST_ROOT_DIR+"/destdat");
deldir(localfs, TEST_ROOT_DIR+"/srcdat");
deldir(localfs, TEST_ROOT_DIR+"/src2");
}
}
public void testCopySingleFile() throws Exception {
FileSystem fs = FileSystem.get(LOCAL_FS, new Configuration());
Path root = new Path(TEST_ROOT_DIR+"/srcdat");
try {
MyFile[] files = {createFile(root, fs)};
//copy a dir with a single file
ToolRunner.run(new DistCp(new Configuration()),
new String[] {"file:///"+TEST_ROOT_DIR+"/srcdat",
"file:///"+TEST_ROOT_DIR+"/destdat"});
assertTrue("Source and destination directories do not match.",
checkFiles(fs, TEST_ROOT_DIR+"/destdat", files));
//copy a single file
String fname = files[0].getName();
Path p = new Path(root, fname);
FileSystem.LOG.info("fname=" + fname + ", exists? " + fs.exists(p));
ToolRunner.run(new DistCp(new Configuration()),
new String[] {"file:///"+TEST_ROOT_DIR+"/srcdat/"+fname,
"file:///"+TEST_ROOT_DIR+"/dest2/"+fname});
assertTrue("Source and destination directories do not match.",
checkFiles(fs, TEST_ROOT_DIR+"/dest2", files));
//copy single file to existing dir
deldir(fs, TEST_ROOT_DIR+"/dest2");
fs.mkdirs(new Path(TEST_ROOT_DIR+"/dest2"));
MyFile[] files2 = {createFile(root, fs, 0)};
String sname = files2[0].getName();
ToolRunner.run(new DistCp(new Configuration()),
new String[] {"-update",
"file:///"+TEST_ROOT_DIR+"/srcdat/"+sname,
"file:///"+TEST_ROOT_DIR+"/dest2/"});
assertTrue("Source and destination directories do not match.",
checkFiles(fs, TEST_ROOT_DIR+"/dest2", files2));
updateFiles(fs, TEST_ROOT_DIR+"/srcdat", files2, 1);
//copy single file to existing dir w/ dst name conflict
ToolRunner.run(new DistCp(new Configuration()),
new String[] {"-update",
"file:///"+TEST_ROOT_DIR+"/srcdat/"+sname,
"file:///"+TEST_ROOT_DIR+"/dest2/"});
assertTrue("Source and destination directories do not match.",
checkFiles(fs, TEST_ROOT_DIR+"/dest2", files2));
}
finally {
deldir(fs, TEST_ROOT_DIR+"/destdat");
deldir(fs, TEST_ROOT_DIR+"/dest2");
deldir(fs, TEST_ROOT_DIR+"/srcdat");
}
}
public void testPreserveOption() throws Exception {
Configuration conf = new Configuration();
MiniDFSCluster cluster = null;
try {
cluster = new MiniDFSCluster(conf, 2, true, null);
String nnUri = FileSystem.getDefaultUri(conf).toString();
FileSystem fs = FileSystem.get(URI.create(nnUri), conf);
{//test preserving user
MyFile[] files = createFiles(URI.create(nnUri), "/srcdat");
FileStatus[] srcstat = getFileStatus(fs, "/srcdat", files);
for(int i = 0; i < srcstat.length; i++) {
fs.setOwner(srcstat[i].getPath(), "u" + i, null);
}
ToolRunner.run(new DistCp(conf),
new String[]{"-pu", nnUri+"/srcdat", nnUri+"/destdat"});
assertTrue("Source and destination directories do not match.",
checkFiles(fs, "/destdat", files));
FileStatus[] dststat = getFileStatus(fs, "/destdat", files);
for(int i = 0; i < dststat.length; i++) {
assertEquals("i=" + i, "u" + i, dststat[i].getOwner());
}
deldir(fs, "/destdat");
deldir(fs, "/srcdat");
}
{//test preserving group
MyFile[] files = createFiles(URI.create(nnUri), "/srcdat");
FileStatus[] srcstat = getFileStatus(fs, "/srcdat", files);
for(int i = 0; i < srcstat.length; i++) {
fs.setOwner(srcstat[i].getPath(), null, "g" + i);
}
ToolRunner.run(new DistCp(conf),
new String[]{"-pg", nnUri+"/srcdat", nnUri+"/destdat"});
assertTrue("Source and destination directories do not match.",
checkFiles(fs, "/destdat", files));
FileStatus[] dststat = getFileStatus(fs, "/destdat", files);
for(int i = 0; i < dststat.length; i++) {
assertEquals("i=" + i, "g" + i, dststat[i].getGroup());
}
deldir(fs, "/destdat");
deldir(fs, "/srcdat");
}
{//test preserving mode
MyFile[] files = createFiles(URI.create(nnUri), "/srcdat");
FileStatus[] srcstat = getFileStatus(fs, "/srcdat", files);
FsPermission[] permissions = new FsPermission[srcstat.length];
for(int i = 0; i < srcstat.length; i++) {
permissions[i] = new FsPermission((short)(i & 0666));
fs.setPermission(srcstat[i].getPath(), permissions[i]);
}
ToolRunner.run(new DistCp(conf),
new String[]{"-pp", nnUri+"/srcdat", nnUri+"/destdat"});
assertTrue("Source and destination directories do not match.",
checkFiles(fs, "/destdat", files));
FileStatus[] dststat = getFileStatus(fs, "/destdat", files);
for(int i = 0; i < dststat.length; i++) {
assertEquals("i=" + i, permissions[i], dststat[i].getPermission());
}
deldir(fs, "/destdat");
deldir(fs, "/srcdat");
}
} finally {
if (cluster != null) { cluster.shutdown(); }
}
}
public void testMapCount() throws Exception {
String namenode = null;
MiniDFSCluster dfs = null;
MiniMRCluster mr = null;
try {
Configuration conf = new Configuration();
dfs = new MiniDFSCluster(conf, 3, true, null);
FileSystem fs = dfs.getFileSystem();
final FsShell shell = new FsShell(conf);
namenode = fs.getUri().toString();
mr = new MiniMRCluster(3, namenode, 1);
MyFile[] files = createFiles(fs.getUri(), "/srcdat");
long totsize = 0;
for (MyFile f : files) {
totsize += f.getSize();
}
Configuration job = mr.createJobConf();
job.setLong("distcp.bytes.per.map", totsize / 3);
ToolRunner.run(new DistCp(job),
new String[] {"-m", "100",
"-log",
namenode+"/logs",
namenode+"/srcdat",
namenode+"/destdat"});
assertTrue("Source and destination directories do not match.",
checkFiles(fs, "/destdat", files));
String logdir = namenode + "/logs";
System.out.println(execCmd(shell, "-lsr", logdir));
FileStatus[] logs = fs.listStatus(new Path(logdir));
// rare case where splits are exact, logs.length can be 4
assertTrue("Unexpected map count, logs.length=" + logs.length,
logs.length == 5 || logs.length == 4);
deldir(fs, "/destdat");
deldir(fs, "/logs");
ToolRunner.run(new DistCp(job),
new String[] {"-m", "1",
"-log",
namenode+"/logs",
namenode+"/srcdat",
namenode+"/destdat"});
System.out.println(execCmd(shell, "-lsr", logdir));
logs = fs.listStatus(new Path(namenode+"/logs"));
assertTrue("Unexpected map count, logs.length=" + logs.length,
logs.length == 2);
} finally {
if (dfs != null) { dfs.shutdown(); }
if (mr != null) { mr.shutdown(); }
}
}
public void testLimits() throws Exception {
Configuration conf = new Configuration();
MiniDFSCluster cluster = null;
try {
cluster = new MiniDFSCluster(conf, 2, true, null);
final String nnUri = FileSystem.getDefaultUri(conf).toString();
final FileSystem fs = FileSystem.get(URI.create(nnUri), conf);
final DistCp distcp = new DistCp(conf);
final FsShell shell = new FsShell(conf);
final String srcrootdir = "/src_root";
final Path srcrootpath = new Path(srcrootdir);
final String dstrootdir = "/dst_root";
final Path dstrootpath = new Path(dstrootdir);
{//test -filelimit
MyFile[] files = createFiles(URI.create(nnUri), srcrootdir);
int filelimit = files.length / 2;
System.out.println("filelimit=" + filelimit);
ToolRunner.run(distcp,
new String[]{"-filelimit", ""+filelimit, nnUri+srcrootdir, nnUri+dstrootdir});
String results = execCmd(shell, "-lsr", dstrootdir);
results = removePrefix(results, dstrootdir);
System.out.println("results=" + results);
FileStatus[] dststat = getFileStatus(fs, dstrootdir, files, true);
assertEquals(filelimit, dststat.length);
deldir(fs, dstrootdir);
deldir(fs, srcrootdir);
}
{//test -sizelimit
createFiles(URI.create(nnUri), srcrootdir);
long sizelimit = fs.getContentSummary(srcrootpath).getLength()/2;
System.out.println("sizelimit=" + sizelimit);
ToolRunner.run(distcp,
new String[]{"-sizelimit", ""+sizelimit, nnUri+srcrootdir, nnUri+dstrootdir});
ContentSummary summary = fs.getContentSummary(dstrootpath);
System.out.println("summary=" + summary);
assertTrue(summary.getLength() <= sizelimit);
deldir(fs, dstrootdir);
deldir(fs, srcrootdir);
}
{//test update
final MyFile[] srcs = createFiles(URI.create(nnUri), srcrootdir);
final long totalsize = fs.getContentSummary(srcrootpath).getLength();
System.out.println("src.length=" + srcs.length);
System.out.println("totalsize =" + totalsize);
fs.mkdirs(dstrootpath);
final int parts = RAN.nextInt(NFILES/3 - 1) + 2;
final int filelimit = srcs.length/parts;
final long sizelimit = totalsize/parts;
System.out.println("filelimit=" + filelimit);
System.out.println("sizelimit=" + sizelimit);
System.out.println("parts =" + parts);
final String[] args = {"-filelimit", ""+filelimit, "-sizelimit", ""+sizelimit,
"-update", nnUri+srcrootdir, nnUri+dstrootdir};
int dstfilecount = 0;
long dstsize = 0;
for(int i = 0; i <= parts; i++) {
ToolRunner.run(distcp, args);
FileStatus[] dststat = getFileStatus(fs, dstrootdir, srcs, true);
System.out.println(i + ") dststat.length=" + dststat.length);
assertTrue(dststat.length - dstfilecount <= filelimit);
ContentSummary summary = fs.getContentSummary(dstrootpath);
System.out.println(i + ") summary.getLength()=" + summary.getLength());
assertTrue(summary.getLength() - dstsize <= sizelimit);
assertTrue(checkFiles(fs, dstrootdir, srcs, true));
dstfilecount = dststat.length;
dstsize = summary.getLength();
}
deldir(fs, dstrootdir);
deldir(fs, srcrootdir);
}
} finally {
if (cluster != null) { cluster.shutdown(); }
}
}
static final long now = System.currentTimeMillis();
static UnixUserGroupInformation createUGI(String name, boolean issuper) {
String username = name + now;
String group = issuper? "supergroup": username;
return UnixUserGroupInformation.createImmutable(
new String[]{username, group});
}
static Path createHomeDirectory(FileSystem fs, UserGroupInformation ugi
) throws IOException {
final Path home = new Path("/user/" + ugi.getUserName());
fs.mkdirs(home);
fs.setOwner(home, ugi.getUserName(), ugi.getGroupNames()[0]);
fs.setPermission(home, new FsPermission((short)0700));
return home;
}
public void testHftpAccessControl() throws Exception {
MiniDFSCluster cluster = null;
try {
final UnixUserGroupInformation DFS_UGI = createUGI("dfs", true);
final UnixUserGroupInformation USER_UGI = createUGI("user", false);
//start cluster by DFS_UGI
final Configuration dfsConf = new Configuration();
UnixUserGroupInformation.saveToConf(dfsConf,
UnixUserGroupInformation.UGI_PROPERTY_NAME, DFS_UGI);
cluster = new MiniDFSCluster(dfsConf, 2, true, null);
cluster.waitActive();
final String httpAdd = dfsConf.get("dfs.http.address");
final URI nnURI = FileSystem.getDefaultUri(dfsConf);
final String nnUri = nnURI.toString();
final Path home = createHomeDirectory(FileSystem.get(nnURI, dfsConf), USER_UGI);
//now, login as USER_UGI
final Configuration userConf = new Configuration();
UnixUserGroupInformation.saveToConf(userConf,
UnixUserGroupInformation.UGI_PROPERTY_NAME, USER_UGI);
final FileSystem fs = FileSystem.get(nnURI, userConf);
final Path srcrootpath = new Path(home, "src_root");
final String srcrootdir = srcrootpath.toString();
final Path dstrootpath = new Path(home, "dst_root");
final String dstrootdir = dstrootpath.toString();
final DistCp distcp = new DistCp(userConf);
FileSystem.mkdirs(fs, srcrootpath, new FsPermission((short)0700));
final String[] args = {"hftp://"+httpAdd+srcrootdir, nnUri+dstrootdir};
{ //copy with permission 000, should fail
fs.setPermission(srcrootpath, new FsPermission((short)0));
assertEquals(-3, ToolRunner.run(distcp, args));
}
} finally {
if (cluster != null) { cluster.shutdown(); }
}
}
/** test -delete */
public void testDelete() throws Exception {
final Configuration conf = new Configuration();
MiniDFSCluster cluster = null;
try {
cluster = new MiniDFSCluster(conf, 2, true, null);
final URI nnURI = FileSystem.getDefaultUri(conf);
final String nnUri = nnURI.toString();
final FileSystem fs = FileSystem.get(URI.create(nnUri), conf);
final DistCp distcp = new DistCp(conf);
final FsShell shell = new FsShell(conf);
final String srcrootdir = "/src_root";
final String dstrootdir = "/dst_root";
{
//create source files
createFiles(nnURI, srcrootdir);
String srcresults = execCmd(shell, "-lsr", srcrootdir);
srcresults = removePrefix(srcresults, srcrootdir);
System.out.println("srcresults=" + srcresults);
//create some files in dst
createFiles(nnURI, dstrootdir);
System.out.println("dstrootdir=" + dstrootdir);
shell.run(new String[]{"-lsr", dstrootdir});
//run distcp
ToolRunner.run(distcp,
new String[]{"-delete", "-update", "-log", "/log",
nnUri+srcrootdir, nnUri+dstrootdir});
//make sure src and dst contains the same files
String dstresults = execCmd(shell, "-lsr", dstrootdir);
dstresults = removePrefix(dstresults, dstrootdir);
System.out.println("first dstresults=" + dstresults);
assertEquals(srcresults, dstresults);
//create additional file in dst
create(fs, new Path(dstrootdir, "foo"));
create(fs, new Path(dstrootdir, "foobar"));
//run distcp again
ToolRunner.run(distcp,
new String[]{"-delete", "-update", "-log", "/log2",
nnUri+srcrootdir, nnUri+dstrootdir});
//make sure src and dst contains the same files
dstresults = execCmd(shell, "-lsr", dstrootdir);
dstresults = removePrefix(dstresults, dstrootdir);
System.out.println("second dstresults=" + dstresults);
assertEquals(srcresults, dstresults);
//cleanup
deldir(fs, dstrootdir);
deldir(fs, srcrootdir);
}
} finally {
if (cluster != null) { cluster.shutdown(); }
}
}
static void create(FileSystem fs, Path f) throws IOException {
FSDataOutputStream out = fs.create(f);
try {
byte[] b = new byte[1024 + RAN.nextInt(1024)];
RAN.nextBytes(b);
out.write(b);
} finally {
if (out != null) out.close();
}
}
static String execCmd(FsShell shell, String... args) throws Exception {
ByteArrayOutputStream baout = new ByteArrayOutputStream();
PrintStream out = new PrintStream(baout, true);
PrintStream old = System.out;
System.setOut(out);
shell.run(args);
out.close();
System.setOut(old);
return baout.toString();
}
private static String removePrefix(String lines, String prefix) {
final int prefixlen = prefix.length();
final StringTokenizer t = new StringTokenizer(lines, "\n");
final StringBuffer results = new StringBuffer();
for(; t.hasMoreTokens(); ) {
String s = t.nextToken();
results.append(s.substring(s.indexOf(prefix) + prefixlen) + "\n");
}
return results.toString();
}
}

View File

@ -0,0 +1,445 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.Date;
import java.util.StringTokenizer;
import junit.framework.TestCase;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.StringUtils;
/**
* Distributed i/o benchmark.
* <p>
* This test writes into or reads from a specified number of files.
* File size is specified as a parameter to the test.
* Each file is accessed in a separate map task.
* <p>
* The reducer collects the following statistics:
* <ul>
* <li>number of tasks completed</li>
* <li>number of bytes written/read</li>
* <li>execution time</li>
* <li>io rate</li>
* <li>io rate squared</li>
* </ul>
*
* Finally, the following information is appended to a local file
* <ul>
* <li>read or write test</li>
* <li>date and time the test finished</li>
* <li>number of files</li>
* <li>total number of bytes processed</li>
* <li>throughput in mb/sec (total number of bytes / sum of processing times)</li>
* <li>average i/o rate in mb/sec per file</li>
* <li>standard deviation of i/o rate </li>
* </ul>
*/
public class TestDFSIO extends TestCase {
// Constants
private static final Log LOG = LogFactory.getLog(TestDFSIO.class);
private static final int TEST_TYPE_READ = 0;
private static final int TEST_TYPE_WRITE = 1;
private static final int TEST_TYPE_CLEANUP = 2;
private static final int DEFAULT_BUFFER_SIZE = 1000000;
private static final String BASE_FILE_NAME = "test_io_";
private static final String DEFAULT_RES_FILE_NAME = "TestDFSIO_results.log";
private static Configuration fsConfig = new Configuration();
private static final long MEGA = 0x100000;
private static String TEST_ROOT_DIR = System.getProperty("test.build.data","/benchmarks/TestDFSIO");
private static Path CONTROL_DIR = new Path(TEST_ROOT_DIR, "io_control");
private static Path WRITE_DIR = new Path(TEST_ROOT_DIR, "io_write");
private static Path READ_DIR = new Path(TEST_ROOT_DIR, "io_read");
private static Path DATA_DIR = new Path(TEST_ROOT_DIR, "io_data");
/**
* Run the test with default parameters.
*
* @throws Exception
*/
public void testIOs() throws Exception {
testIOs(10, 10);
}
/**
* Run the test with the specified parameters.
*
* @param fileSize file size
* @param nrFiles number of files
* @throws IOException
*/
public static void testIOs(int fileSize, int nrFiles)
throws IOException {
FileSystem fs = FileSystem.get(fsConfig);
createControlFile(fs, fileSize, nrFiles);
writeTest(fs);
readTest(fs);
cleanup(fs);
}
private static void createControlFile(
FileSystem fs,
int fileSize, // in MB
int nrFiles
) throws IOException {
LOG.info("creating control file: "+fileSize+" mega bytes, "+nrFiles+" files");
fs.delete(CONTROL_DIR, true);
for(int i=0; i < nrFiles; i++) {
String name = getFileName(i);
Path controlFile = new Path(CONTROL_DIR, "in_file_" + name);
SequenceFile.Writer writer = null;
try {
writer = SequenceFile.createWriter(fs, fsConfig, controlFile,
Text.class, LongWritable.class,
CompressionType.NONE);
writer.append(new Text(name), new LongWritable(fileSize));
} catch(Exception e) {
throw new IOException(e.getLocalizedMessage());
} finally {
if (writer != null)
writer.close();
writer = null;
}
}
LOG.info("created control files for: "+nrFiles+" files");
}
private static String getFileName(int fIdx) {
return BASE_FILE_NAME + Integer.toString(fIdx);
}
/**
* Write/Read mapper base class.
* <p>
* Collects the following statistics per task:
* <ul>
* <li>number of tasks completed</li>
* <li>number of bytes written/read</li>
* <li>execution time</li>
* <li>i/o rate</li>
* <li>i/o rate squared</li>
* </ul>
*/
private abstract static class IOStatMapper extends IOMapperBase {
IOStatMapper() {
super(fsConfig);
}
void collectStats(OutputCollector<Text, Text> output,
String name,
long execTime,
Object objSize) throws IOException {
long totalSize = ((Long)objSize).longValue();
float ioRateMbSec = (float)totalSize * 1000 / (execTime * MEGA);
LOG.info("Number of bytes processed = " + totalSize);
LOG.info("Exec time = " + execTime);
LOG.info("IO rate = " + ioRateMbSec);
output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "tasks"),
new Text(String.valueOf(1)));
output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "size"),
new Text(String.valueOf(totalSize)));
output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "time"),
new Text(String.valueOf(execTime)));
output.collect(new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "rate"),
new Text(String.valueOf(ioRateMbSec*1000)));
output.collect(new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "sqrate"),
new Text(String.valueOf(ioRateMbSec*ioRateMbSec*1000)));
}
}
/**
* Write mapper class.
*/
public static class WriteMapper extends IOStatMapper {
public WriteMapper() {
super();
for(int i=0; i < bufferSize; i++)
buffer[i] = (byte)('0' + i % 50);
}
public Object doIO(Reporter reporter,
String name,
long totalSize
) throws IOException {
// create file
totalSize *= MEGA;
OutputStream out;
out = fs.create(new Path(DATA_DIR, name), true, bufferSize);
try {
// write to the file
long nrRemaining;
for (nrRemaining = totalSize; nrRemaining > 0; nrRemaining -= bufferSize) {
int curSize = (bufferSize < nrRemaining) ? bufferSize : (int)nrRemaining;
out.write(buffer, 0, curSize);
reporter.setStatus("writing " + name + "@" +
(totalSize - nrRemaining) + "/" + totalSize
+ " ::host = " + hostName);
}
} finally {
out.close();
}
return new Long(totalSize);
}
}
private static void writeTest(FileSystem fs)
throws IOException {
fs.delete(DATA_DIR, true);
fs.delete(WRITE_DIR, true);
runIOTest(WriteMapper.class, WRITE_DIR);
}
private static void runIOTest( Class<? extends Mapper> mapperClass,
Path outputDir
) throws IOException {
JobConf job = new JobConf(fsConfig, TestDFSIO.class);
FileInputFormat.setInputPaths(job, CONTROL_DIR);
job.setInputFormat(SequenceFileInputFormat.class);
job.setMapperClass(mapperClass);
job.setReducerClass(AccumulatingReducer.class);
FileOutputFormat.setOutputPath(job, outputDir);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(1);
JobClient.runJob(job);
}
/**
* Read mapper class.
*/
public static class ReadMapper extends IOStatMapper {
public ReadMapper() {
super();
}
public Object doIO(Reporter reporter,
String name,
long totalSize
) throws IOException {
totalSize *= MEGA;
// open file
DataInputStream in = fs.open(new Path(DATA_DIR, name));
try {
long actualSize = 0;
for(int curSize = bufferSize; curSize == bufferSize;) {
curSize = in.read(buffer, 0, bufferSize);
actualSize += curSize;
reporter.setStatus("reading " + name + "@" +
actualSize + "/" + totalSize
+ " ::host = " + hostName);
}
} finally {
in.close();
}
return new Long(totalSize);
}
}
private static void readTest(FileSystem fs) throws IOException {
fs.delete(READ_DIR, true);
runIOTest(ReadMapper.class, READ_DIR);
}
private static void sequentialTest(
FileSystem fs,
int testType,
int fileSize,
int nrFiles
) throws Exception {
IOStatMapper ioer = null;
if (testType == TEST_TYPE_READ)
ioer = new ReadMapper();
else if (testType == TEST_TYPE_WRITE)
ioer = new WriteMapper();
else
return;
for(int i=0; i < nrFiles; i++)
ioer.doIO(Reporter.NULL,
BASE_FILE_NAME+Integer.toString(i),
MEGA*fileSize);
}
public static void main(String[] args) {
int testType = TEST_TYPE_READ;
int bufferSize = DEFAULT_BUFFER_SIZE;
int fileSize = 1;
int nrFiles = 1;
String resFileName = DEFAULT_RES_FILE_NAME;
boolean isSequential = false;
String className = TestDFSIO.class.getSimpleName();
String version = className + ".0.0.4";
String usage = "Usage: " + className + " -read | -write | -clean [-nrFiles N] [-fileSize MB] [-resFile resultFileName] [-bufferSize Bytes] ";
System.out.println(version);
if (args.length == 0) {
System.err.println(usage);
System.exit(-1);
}
for (int i = 0; i < args.length; i++) { // parse command line
if (args[i].startsWith("-read")) {
testType = TEST_TYPE_READ;
} else if (args[i].equals("-write")) {
testType = TEST_TYPE_WRITE;
} else if (args[i].equals("-clean")) {
testType = TEST_TYPE_CLEANUP;
} else if (args[i].startsWith("-seq")) {
isSequential = true;
} else if (args[i].equals("-nrFiles")) {
nrFiles = Integer.parseInt(args[++i]);
} else if (args[i].equals("-fileSize")) {
fileSize = Integer.parseInt(args[++i]);
} else if (args[i].equals("-bufferSize")) {
bufferSize = Integer.parseInt(args[++i]);
} else if (args[i].equals("-resFile")) {
resFileName = args[++i];
}
}
LOG.info("nrFiles = " + nrFiles);
LOG.info("fileSize (MB) = " + fileSize);
LOG.info("bufferSize = " + bufferSize);
try {
fsConfig.setInt("test.io.file.buffer.size", bufferSize);
FileSystem fs = FileSystem.get(fsConfig);
if (isSequential) {
long tStart = System.currentTimeMillis();
sequentialTest(fs, testType, fileSize, nrFiles);
long execTime = System.currentTimeMillis() - tStart;
String resultLine = "Seq Test exec time sec: " + (float)execTime / 1000;
LOG.info(resultLine);
return;
}
if (testType == TEST_TYPE_CLEANUP) {
cleanup(fs);
return;
}
createControlFile(fs, fileSize, nrFiles);
long tStart = System.currentTimeMillis();
if (testType == TEST_TYPE_WRITE)
writeTest(fs);
if (testType == TEST_TYPE_READ)
readTest(fs);
long execTime = System.currentTimeMillis() - tStart;
analyzeResult(fs, testType, execTime, resFileName);
} catch(Exception e) {
System.err.print(StringUtils.stringifyException(e));
System.exit(-1);
}
}
private static void analyzeResult( FileSystem fs,
int testType,
long execTime,
String resFileName
) throws IOException {
Path reduceFile;
if (testType == TEST_TYPE_WRITE)
reduceFile = new Path(WRITE_DIR, "part-00000");
else
reduceFile = new Path(READ_DIR, "part-00000");
DataInputStream in;
in = new DataInputStream(fs.open(reduceFile));
BufferedReader lines;
lines = new BufferedReader(new InputStreamReader(in));
long tasks = 0;
long size = 0;
long time = 0;
float rate = 0;
float sqrate = 0;
String line;
while((line = lines.readLine()) != null) {
StringTokenizer tokens = new StringTokenizer(line, " \t\n\r\f%");
String attr = tokens.nextToken();
if (attr.endsWith(":tasks"))
tasks = Long.parseLong(tokens.nextToken());
else if (attr.endsWith(":size"))
size = Long.parseLong(tokens.nextToken());
else if (attr.endsWith(":time"))
time = Long.parseLong(tokens.nextToken());
else if (attr.endsWith(":rate"))
rate = Float.parseFloat(tokens.nextToken());
else if (attr.endsWith(":sqrate"))
sqrate = Float.parseFloat(tokens.nextToken());
}
double med = rate / 1000 / tasks;
double stdDev = Math.sqrt(Math.abs(sqrate / 1000 / tasks - med*med));
String resultLines[] = {
"----- TestDFSIO ----- : " + ((testType == TEST_TYPE_WRITE) ? "write" :
(testType == TEST_TYPE_READ) ? "read" :
"unknown"),
" Date & time: " + new Date(System.currentTimeMillis()),
" Number of files: " + tasks,
"Total MBytes processed: " + size/MEGA,
" Throughput mb/sec: " + size * 1000.0 / (time * MEGA),
"Average IO rate mb/sec: " + med,
" IO rate std deviation: " + stdDev,
" Test exec time sec: " + (float)execTime / 1000,
"" };
PrintStream res = new PrintStream(
new FileOutputStream(
new File(resFileName), true));
for(int i = 0; i < resultLines.length; i++) {
LOG.info(resultLines[i]);
res.println(resultLines[i]);
}
}
private static void cleanup(FileSystem fs) throws IOException {
LOG.info("Cleaning up test files");
fs.delete(new Path(TEST_ROOT_DIR), true);
}
}

View File

@ -0,0 +1,629 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.Random;
import java.util.List;
import java.util.ArrayList;
import java.util.Set;
import java.util.HashSet;
import java.util.Map;
import java.util.HashMap;
import java.net.InetSocketAddress;
import java.net.URI;
import junit.framework.TestCase;
import org.apache.commons.logging.Log;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.fs.shell.CommandFormat;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.mapred.lib.LongSumReducer;
import org.apache.hadoop.security.UnixUserGroupInformation;
public class TestFileSystem extends TestCase {
private static final Log LOG = FileSystem.LOG;
private static Configuration conf = new Configuration();
private static int BUFFER_SIZE = conf.getInt("io.file.buffer.size", 4096);
private static final long MEGA = 1024 * 1024;
private static final int SEEKS_PER_FILE = 4;
private static String ROOT = System.getProperty("test.build.data","fs_test");
private static Path CONTROL_DIR = new Path(ROOT, "fs_control");
private static Path WRITE_DIR = new Path(ROOT, "fs_write");
private static Path READ_DIR = new Path(ROOT, "fs_read");
private static Path DATA_DIR = new Path(ROOT, "fs_data");
public void testFs() throws Exception {
testFs(10 * MEGA, 100, 0);
}
public static void testFs(long megaBytes, int numFiles, long seed)
throws Exception {
FileSystem fs = FileSystem.get(conf);
if (seed == 0)
seed = new Random().nextLong();
LOG.info("seed = "+seed);
createControlFile(fs, megaBytes, numFiles, seed);
writeTest(fs, false);
readTest(fs, false);
seekTest(fs, false);
fs.delete(CONTROL_DIR, true);
fs.delete(DATA_DIR, true);
fs.delete(WRITE_DIR, true);
fs.delete(READ_DIR, true);
}
public static void testCommandFormat() throws Exception {
// This should go to TestFsShell.java when it is added.
CommandFormat cf;
cf= new CommandFormat("copyToLocal", 2,2,"crc","ignoreCrc");
assertEquals(cf.parse(new String[] {"-get","file", "-"}, 1).get(1), "-");
assertEquals(cf.parse(new String[] {"-get","file","-ignoreCrc","/foo"}, 1).get(1),"/foo");
cf = new CommandFormat("tail", 1, 1, "f");
assertEquals(cf.parse(new String[] {"-tail","fileName"}, 1).get(0),"fileName");
assertEquals(cf.parse(new String[] {"-tail","-f","fileName"}, 1).get(0),"fileName");
cf = new CommandFormat("setrep", 2, 2, "R", "w");
assertEquals(cf.parse(new String[] {"-setrep","-R","2","/foo/bar"}, 1).get(1), "/foo/bar");
cf = new CommandFormat("put", 2, 10000);
assertEquals(cf.parse(new String[] {"-put", "-", "dest"}, 1).get(1), "dest");
}
public static void createControlFile(FileSystem fs,
long megaBytes, int numFiles,
long seed) throws Exception {
LOG.info("creating control file: "+megaBytes+" bytes, "+numFiles+" files");
Path controlFile = new Path(CONTROL_DIR, "files");
fs.delete(controlFile, true);
Random random = new Random(seed);
SequenceFile.Writer writer =
SequenceFile.createWriter(fs, conf, controlFile,
Text.class, LongWritable.class, CompressionType.NONE);
long totalSize = 0;
long maxSize = ((megaBytes / numFiles) * 2) + 1;
try {
while (totalSize < megaBytes) {
Text name = new Text(Long.toString(random.nextLong()));
long size = random.nextLong();
if (size < 0)
size = -size;
size = size % maxSize;
//LOG.info(" adding: name="+name+" size="+size);
writer.append(name, new LongWritable(size));
totalSize += size;
}
} finally {
writer.close();
}
LOG.info("created control file for: "+totalSize+" bytes");
}
public static class WriteMapper extends Configured
implements Mapper<Text, LongWritable, Text, LongWritable> {
private Random random = new Random();
private byte[] buffer = new byte[BUFFER_SIZE];
private FileSystem fs;
private boolean fastCheck;
// a random suffix per task
private String suffix = "-"+random.nextLong();
{
try {
fs = FileSystem.get(conf);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public WriteMapper() { super(null); }
public WriteMapper(Configuration conf) { super(conf); }
public void configure(JobConf job) {
setConf(job);
fastCheck = job.getBoolean("fs.test.fastCheck", false);
}
public void map(Text key, LongWritable value,
OutputCollector<Text, LongWritable> collector,
Reporter reporter)
throws IOException {
String name = key.toString();
long size = value.get();
long seed = Long.parseLong(name);
random.setSeed(seed);
reporter.setStatus("creating " + name);
// write to temp file initially to permit parallel execution
Path tempFile = new Path(DATA_DIR, name+suffix);
OutputStream out = fs.create(tempFile);
long written = 0;
try {
while (written < size) {
if (fastCheck) {
Arrays.fill(buffer, (byte)random.nextInt(Byte.MAX_VALUE));
} else {
random.nextBytes(buffer);
}
long remains = size - written;
int length = (remains<=buffer.length) ? (int)remains : buffer.length;
out.write(buffer, 0, length);
written += length;
reporter.setStatus("writing "+name+"@"+written+"/"+size);
}
} finally {
out.close();
}
// rename to final location
fs.rename(tempFile, new Path(DATA_DIR, name));
collector.collect(new Text("bytes"), new LongWritable(written));
reporter.setStatus("wrote " + name);
}
public void close() {
}
}
public static void writeTest(FileSystem fs, boolean fastCheck)
throws Exception {
fs.delete(DATA_DIR, true);
fs.delete(WRITE_DIR, true);
JobConf job = new JobConf(conf, TestFileSystem.class);
job.setBoolean("fs.test.fastCheck", fastCheck);
FileInputFormat.setInputPaths(job, CONTROL_DIR);
job.setInputFormat(SequenceFileInputFormat.class);
job.setMapperClass(WriteMapper.class);
job.setReducerClass(LongSumReducer.class);
FileOutputFormat.setOutputPath(job, WRITE_DIR);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setNumReduceTasks(1);
JobClient.runJob(job);
}
public static class ReadMapper extends Configured
implements Mapper<Text, LongWritable, Text, LongWritable> {
private Random random = new Random();
private byte[] buffer = new byte[BUFFER_SIZE];
private byte[] check = new byte[BUFFER_SIZE];
private FileSystem fs;
private boolean fastCheck;
{
try {
fs = FileSystem.get(conf);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public ReadMapper() { super(null); }
public ReadMapper(Configuration conf) { super(conf); }
public void configure(JobConf job) {
setConf(job);
fastCheck = job.getBoolean("fs.test.fastCheck", false);
}
public void map(Text key, LongWritable value,
OutputCollector<Text, LongWritable> collector,
Reporter reporter)
throws IOException {
String name = key.toString();
long size = value.get();
long seed = Long.parseLong(name);
random.setSeed(seed);
reporter.setStatus("opening " + name);
DataInputStream in =
new DataInputStream(fs.open(new Path(DATA_DIR, name)));
long read = 0;
try {
while (read < size) {
long remains = size - read;
int n = (remains<=buffer.length) ? (int)remains : buffer.length;
in.readFully(buffer, 0, n);
read += n;
if (fastCheck) {
Arrays.fill(check, (byte)random.nextInt(Byte.MAX_VALUE));
} else {
random.nextBytes(check);
}
if (n != buffer.length) {
Arrays.fill(buffer, n, buffer.length, (byte)0);
Arrays.fill(check, n, check.length, (byte)0);
}
assertTrue(Arrays.equals(buffer, check));
reporter.setStatus("reading "+name+"@"+read+"/"+size);
}
} finally {
in.close();
}
collector.collect(new Text("bytes"), new LongWritable(read));
reporter.setStatus("read " + name);
}
public void close() {
}
}
public static void readTest(FileSystem fs, boolean fastCheck)
throws Exception {
fs.delete(READ_DIR, true);
JobConf job = new JobConf(conf, TestFileSystem.class);
job.setBoolean("fs.test.fastCheck", fastCheck);
FileInputFormat.setInputPaths(job, CONTROL_DIR);
job.setInputFormat(SequenceFileInputFormat.class);
job.setMapperClass(ReadMapper.class);
job.setReducerClass(LongSumReducer.class);
FileOutputFormat.setOutputPath(job, READ_DIR);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setNumReduceTasks(1);
JobClient.runJob(job);
}
public static class SeekMapper<K> extends Configured
implements Mapper<Text, LongWritable, K, LongWritable> {
private Random random = new Random();
private byte[] check = new byte[BUFFER_SIZE];
private FileSystem fs;
private boolean fastCheck;
{
try {
fs = FileSystem.get(conf);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public SeekMapper() { super(null); }
public SeekMapper(Configuration conf) { super(conf); }
public void configure(JobConf job) {
setConf(job);
fastCheck = job.getBoolean("fs.test.fastCheck", false);
}
public void map(Text key, LongWritable value,
OutputCollector<K, LongWritable> collector,
Reporter reporter)
throws IOException {
String name = key.toString();
long size = value.get();
long seed = Long.parseLong(name);
if (size == 0) return;
reporter.setStatus("opening " + name);
FSDataInputStream in = fs.open(new Path(DATA_DIR, name));
try {
for (int i = 0; i < SEEKS_PER_FILE; i++) {
// generate a random position
long position = Math.abs(random.nextLong()) % size;
// seek file to that position
reporter.setStatus("seeking " + name);
in.seek(position);
byte b = in.readByte();
// check that byte matches
byte checkByte = 0;
// advance random state to that position
random.setSeed(seed);
for (int p = 0; p <= position; p+= check.length) {
reporter.setStatus("generating data for " + name);
if (fastCheck) {
checkByte = (byte)random.nextInt(Byte.MAX_VALUE);
} else {
random.nextBytes(check);
checkByte = check[(int)(position % check.length)];
}
}
assertEquals(b, checkByte);
}
} finally {
in.close();
}
}
public void close() {
}
}
public static void seekTest(FileSystem fs, boolean fastCheck)
throws Exception {
fs.delete(READ_DIR, true);
JobConf job = new JobConf(conf, TestFileSystem.class);
job.setBoolean("fs.test.fastCheck", fastCheck);
FileInputFormat.setInputPaths(job,CONTROL_DIR);
job.setInputFormat(SequenceFileInputFormat.class);
job.setMapperClass(SeekMapper.class);
job.setReducerClass(LongSumReducer.class);
FileOutputFormat.setOutputPath(job, READ_DIR);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setNumReduceTasks(1);
JobClient.runJob(job);
}
public static void main(String[] args) throws Exception {
int megaBytes = 10;
int files = 100;
boolean noRead = false;
boolean noWrite = false;
boolean noSeek = false;
boolean fastCheck = false;
long seed = new Random().nextLong();
String usage = "Usage: TestFileSystem -files N -megaBytes M [-noread] [-nowrite] [-noseek] [-fastcheck]";
if (args.length == 0) {
System.err.println(usage);
System.exit(-1);
}
for (int i = 0; i < args.length; i++) { // parse command line
if (args[i].equals("-files")) {
files = Integer.parseInt(args[++i]);
} else if (args[i].equals("-megaBytes")) {
megaBytes = Integer.parseInt(args[++i]);
} else if (args[i].equals("-noread")) {
noRead = true;
} else if (args[i].equals("-nowrite")) {
noWrite = true;
} else if (args[i].equals("-noseek")) {
noSeek = true;
} else if (args[i].equals("-fastcheck")) {
fastCheck = true;
}
}
LOG.info("seed = "+seed);
LOG.info("files = " + files);
LOG.info("megaBytes = " + megaBytes);
FileSystem fs = FileSystem.get(conf);
if (!noWrite) {
createControlFile(fs, megaBytes*MEGA, files, seed);
writeTest(fs, fastCheck);
}
if (!noRead) {
readTest(fs, fastCheck);
}
if (!noSeek) {
seekTest(fs, fastCheck);
}
}
static Configuration createConf4Testing(String username) throws Exception {
Configuration conf = new Configuration();
UnixUserGroupInformation.saveToConf(conf,
UnixUserGroupInformation.UGI_PROPERTY_NAME,
new UnixUserGroupInformation(username, new String[]{"group"}));
return conf;
}
public void testFsCache() throws Exception {
{
long now = System.currentTimeMillis();
Configuration[] conf = {new Configuration(),
createConf4Testing("foo" + now), createConf4Testing("bar" + now)};
FileSystem[] fs = new FileSystem[conf.length];
for(int i = 0; i < conf.length; i++) {
fs[i] = FileSystem.get(conf[i]);
assertEquals(fs[i], FileSystem.get(conf[i]));
for(int j = 0; j < i; j++) {
assertFalse(fs[j] == fs[i]);
}
}
FileSystem.closeAll();
}
{
try {
runTestCache(NameNode.DEFAULT_PORT);
} catch(java.net.BindException be) {
LOG.warn("Cannot test NameNode.DEFAULT_PORT (="
+ NameNode.DEFAULT_PORT + ")", be);
}
runTestCache(0);
}
}
static void runTestCache(int port) throws Exception {
Configuration conf = new Configuration();
MiniDFSCluster cluster = null;
try {
cluster = new MiniDFSCluster(port, conf, 2, true, true, null, null);
URI uri = cluster.getFileSystem().getUri();
LOG.info("uri=" + uri);
{
FileSystem fs = FileSystem.get(uri, new Configuration());
checkPath(cluster, fs);
for(int i = 0; i < 100; i++) {
assertTrue(fs == FileSystem.get(uri, new Configuration()));
}
}
if (port == NameNode.DEFAULT_PORT) {
//test explicit default port
URI uri2 = new URI(uri.getScheme(), uri.getUserInfo(),
uri.getHost(), NameNode.DEFAULT_PORT, uri.getPath(),
uri.getQuery(), uri.getFragment());
LOG.info("uri2=" + uri2);
FileSystem fs = FileSystem.get(uri2, conf);
checkPath(cluster, fs);
for(int i = 0; i < 100; i++) {
assertTrue(fs == FileSystem.get(uri2, new Configuration()));
}
}
} finally {
if (cluster != null) cluster.shutdown();
}
}
static void checkPath(MiniDFSCluster cluster, FileSystem fileSys) throws IOException {
InetSocketAddress add = cluster.getNameNode().getNameNodeAddress();
// Test upper/lower case
fileSys.checkPath(new Path("hdfs://" + add.getHostName().toUpperCase() + ":" + add.getPort()));
}
public void testFsClose() throws Exception {
{
Configuration conf = new Configuration();
new Path("file:///").getFileSystem(conf);
UnixUserGroupInformation.login(conf, true);
FileSystem.closeAll();
}
{
Configuration conf = new Configuration();
new Path("hftp://localhost:12345/").getFileSystem(conf);
UnixUserGroupInformation.login(conf, true);
FileSystem.closeAll();
}
{
Configuration conf = new Configuration();
FileSystem fs = new Path("hftp://localhost:12345/").getFileSystem(conf);
UnixUserGroupInformation.login(fs.getConf(), true);
FileSystem.closeAll();
}
}
public void testCacheKeysAreCaseInsensitive()
throws Exception
{
Configuration conf = new Configuration();
// check basic equality
FileSystem.Cache.Key lowercaseCachekey1 = new FileSystem.Cache.Key(new URI("hftp://localhost:12345/"), conf);
FileSystem.Cache.Key lowercaseCachekey2 = new FileSystem.Cache.Key(new URI("hftp://localhost:12345/"), conf);
assertEquals( lowercaseCachekey1, lowercaseCachekey2 );
// check insensitive equality
FileSystem.Cache.Key uppercaseCachekey = new FileSystem.Cache.Key(new URI("HFTP://Localhost:12345/"), conf);
assertEquals( lowercaseCachekey2, uppercaseCachekey );
// check behaviour with collections
List<FileSystem.Cache.Key> list = new ArrayList<FileSystem.Cache.Key>();
list.add(uppercaseCachekey);
assertTrue(list.contains(uppercaseCachekey));
assertTrue(list.contains(lowercaseCachekey2));
Set<FileSystem.Cache.Key> set = new HashSet<FileSystem.Cache.Key>();
set.add(uppercaseCachekey);
assertTrue(set.contains(uppercaseCachekey));
assertTrue(set.contains(lowercaseCachekey2));
Map<FileSystem.Cache.Key, String> map = new HashMap<FileSystem.Cache.Key, String>();
map.put(uppercaseCachekey, "");
assertTrue(map.containsKey(uppercaseCachekey));
assertTrue(map.containsKey(lowercaseCachekey2));
}
public static void testFsUniqueness(long megaBytes, int numFiles, long seed)
throws Exception {
// multiple invocations of FileSystem.get return the same object.
FileSystem fs1 = FileSystem.get(conf);
FileSystem fs2 = FileSystem.get(conf);
assertTrue(fs1 == fs2);
// multiple invocations of FileSystem.newInstance return different objects
fs1 = FileSystem.newInstance(conf);
fs2 = FileSystem.newInstance(conf);
assertTrue(fs1 != fs2 && !fs1.equals(fs2));
fs1.close();
fs2.close();
}
}

View File

@ -0,0 +1,213 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs;
import java.io.IOException;
import java.util.Iterator;
import junit.framework.TestCase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.tools.HadoopArchives;
import org.apache.hadoop.util.ToolRunner;
/**
* test the har file system
* create a har filesystem
* run fs commands
* and then run a map reduce job
*/
public class TestHarFileSystem extends TestCase {
private Path inputPath;
private MiniDFSCluster dfscluster;
private MiniMRCluster mapred;
private FileSystem fs;
private Path filea, fileb, filec;
private Path archivePath;
protected void setUp() throws Exception {
super.setUp();
dfscluster = new MiniDFSCluster(new JobConf(), 2, true, null);
fs = dfscluster.getFileSystem();
mapred = new MiniMRCluster(2, fs.getUri().toString(), 1);
inputPath = new Path(fs.getHomeDirectory(), "test");
filea = new Path(inputPath,"a");
fileb = new Path(inputPath,"b");
filec = new Path(inputPath,"c");
archivePath = new Path(fs.getHomeDirectory(), "tmp");
}
protected void tearDown() throws Exception {
try {
if (mapred != null) {
mapred.shutdown();
}
if (dfscluster != null) {
dfscluster.shutdown();
}
} catch(Exception e) {
System.err.println(e);
}
super.tearDown();
}
static class TextMapperReducer implements Mapper<LongWritable, Text, Text, Text>,
Reducer<Text, Text, Text, Text> {
public void configure(JobConf conf) {
//do nothing
}
public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
output.collect(value, new Text(""));
}
public void close() throws IOException {
// do nothing
}
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
while(values.hasNext()) {
values.next();
output.collect(key, null);
}
}
}
public void testArchives() throws Exception {
fs.mkdirs(inputPath);
FSDataOutputStream out = fs.create(filea);
out.write("a".getBytes());
out.close();
out = fs.create(fileb);
out.write("b".getBytes());
out.close();
out = fs.create(filec);
out.write("c".getBytes());
out.close();
Configuration conf = mapred.createJobConf();
HadoopArchives har = new HadoopArchives(conf);
String[] args = new String[3];
//check for destination not specfied
args[0] = "-archiveName";
args[1] = "foo.har";
args[2] = inputPath.toString();
int ret = ToolRunner.run(har, args);
assertTrue(ret != 0);
args = new String[4];
//check for wrong archiveName
args[0] = "-archiveName";
args[1] = "/d/foo.har";
args[2] = inputPath.toString();
args[3] = archivePath.toString();
ret = ToolRunner.run(har, args);
assertTrue(ret != 0);
// se if dest is a file
args[1] = "foo.har";
args[3] = filec.toString();
ret = ToolRunner.run(har, args);
assertTrue(ret != 0);
//this is a valid run
args[0] = "-archiveName";
args[1] = "foo.har";
args[2] = inputPath.toString();
args[3] = archivePath.toString();
ret = ToolRunner.run(har, args);
//checl for the existenece of the archive
assertTrue(ret == 0);
///try running it again. it should not
// override the directory
ret = ToolRunner.run(har, args);
assertTrue(ret != 0);
Path finalPath = new Path(archivePath, "foo.har");
Path fsPath = new Path(inputPath.toUri().getPath());
String relative = fsPath.toString().substring(1);
Path filePath = new Path(finalPath, relative);
//make it a har path
Path harPath = new Path("har://" + filePath.toUri().getPath());
assertTrue(fs.exists(new Path(finalPath, "_index")));
assertTrue(fs.exists(new Path(finalPath, "_masterindex")));
assertTrue(!fs.exists(new Path(finalPath, "_logs")));
//creation tested
//check if the archive is same
// do ls and cat on all the files
FsShell shell = new FsShell(conf);
args = new String[2];
args[0] = "-ls";
args[1] = harPath.toString();
ret = ToolRunner.run(shell, args);
// ls should work.
assertTrue((ret == 0));
//now check for contents of filea
// fileb and filec
Path harFilea = new Path(harPath, "a");
Path harFileb = new Path(harPath, "b");
Path harFilec = new Path(harPath, "c");
FileSystem harFs = harFilea.getFileSystem(conf);
FSDataInputStream fin = harFs.open(harFilea);
byte[] b = new byte[4];
int readBytes = fin.read(b);
assertTrue("Empty read.", readBytes > 0);
fin.close();
assertTrue("strings are equal ", (b[0] == "a".getBytes()[0]));
fin = harFs.open(harFileb);
readBytes = fin.read(b);
assertTrue("Empty read.", readBytes > 0);
fin.close();
assertTrue("strings are equal ", (b[0] == "b".getBytes()[0]));
fin = harFs.open(harFilec);
readBytes = fin.read(b);
assertTrue("Empty read.", readBytes > 0);
fin.close();
assertTrue("strings are equal ", (b[0] == "c".getBytes()[0]));
// ok all files match
// run a map reduce job
Path outdir = new Path(fs.getHomeDirectory(), "mapout");
JobConf jobconf = mapred.createJobConf();
FileInputFormat.addInputPath(jobconf, harPath);
jobconf.setInputFormat(TextInputFormat.class);
jobconf.setOutputFormat(TextOutputFormat.class);
FileOutputFormat.setOutputPath(jobconf, outdir);
jobconf.setMapperClass(TextMapperReducer.class);
jobconf.setMapOutputKeyClass(Text.class);
jobconf.setMapOutputValueClass(Text.class);
jobconf.setReducerClass(TextMapperReducer.class);
jobconf.setNumReduceTasks(1);
JobClient.runJob(jobconf);
args[1] = outdir.toString();
ret = ToolRunner.run(shell, args);
FileStatus[] status = fs.globStatus(new Path(outdir, "part*"));
Path reduceFile = status[0].getPath();
FSDataInputStream reduceIn = fs.open(reduceFile);
b = new byte[6];
readBytes = reduceIn.read(b);
assertTrue("Should read 6 bytes.", readBytes == 6);
//assuming all the 6 bytes were read.
Text readTxt = new Text(b);
assertTrue("a\nb\nc\n".equals(readTxt.toString()));
assertTrue("number of bytes left should be -1", reduceIn.read(b) == -1);
reduceIn.close();
}
}

View File

@ -0,0 +1,964 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs;
import java.io.IOException;
import java.util.Date;
import java.io.DataInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.io.File;
import java.io.BufferedReader;
import java.util.StringTokenizer;
import java.net.InetAddress;
import java.text.SimpleDateFormat;
import java.util.Iterator;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.Log;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reducer;
/**
* This program executes a specified operation that applies load to
* the NameNode.
*
* When run simultaneously on multiple nodes, this program functions
* as a stress-test and benchmark for namenode, especially when
* the number of bytes written to each file is small.
*
* Valid operations are:
* create_write
* open_read
* rename
* delete
*
* NOTE: The open_read, rename and delete operations assume that the files
* they operate on are already available. The create_write operation
* must be run before running the other operations.
*/
public class NNBench {
private static final Log LOG = LogFactory.getLog(
"org.apache.hadoop.hdfs.NNBench");
protected static String CONTROL_DIR_NAME = "control";
protected static String OUTPUT_DIR_NAME = "output";
protected static String DATA_DIR_NAME = "data";
protected static final String DEFAULT_RES_FILE_NAME = "NNBench_results.log";
protected static final String NNBENCH_VERSION = "NameNode Benchmark 0.4";
public static String operation = "none";
public static long numberOfMaps = 1l; // default is 1
public static long numberOfReduces = 1l; // default is 1
public static long startTime =
System.currentTimeMillis() + (120 * 1000); // default is 'now' + 2min
public static long blockSize = 1l; // default is 1
public static int bytesToWrite = 0; // default is 0
public static long bytesPerChecksum = 1l; // default is 1
public static long numberOfFiles = 1l; // default is 1
public static short replicationFactorPerFile = 1; // default is 1
public static String baseDir = "/benchmarks/NNBench"; // default
public static boolean readFileAfterOpen = false; // default is to not read
// Supported operations
private static final String OP_CREATE_WRITE = "create_write";
private static final String OP_OPEN_READ = "open_read";
private static final String OP_RENAME = "rename";
private static final String OP_DELETE = "delete";
// To display in the format that matches the NN and DN log format
// Example: 2007-10-26 00:01:19,853
static SimpleDateFormat sdf =
new SimpleDateFormat("yyyy-MM-dd' 'HH:mm:ss','S");
private static Configuration config = new Configuration();
/**
* Clean up the files before a test run
*
* @throws IOException on error
*/
private static void cleanupBeforeTestrun() throws IOException {
FileSystem tempFS = FileSystem.get(config);
// Delete the data directory only if it is the create/write operation
if (operation.equals(OP_CREATE_WRITE)) {
LOG.info("Deleting data directory");
tempFS.delete(new Path(baseDir, DATA_DIR_NAME), true);
}
tempFS.delete(new Path(baseDir, CONTROL_DIR_NAME), true);
tempFS.delete(new Path(baseDir, OUTPUT_DIR_NAME), true);
}
/**
* Create control files before a test run.
* Number of files created is equal to the number of maps specified
*
* @throws IOException on error
*/
private static void createControlFiles() throws IOException {
FileSystem tempFS = FileSystem.get(config);
LOG.info("Creating " + numberOfMaps + " control files");
for (int i = 0; i < numberOfMaps; i++) {
String strFileName = "NNBench_Controlfile_" + i;
Path filePath = new Path(new Path(baseDir, CONTROL_DIR_NAME),
strFileName);
SequenceFile.Writer writer = null;
try {
writer = SequenceFile.createWriter(tempFS, config, filePath, Text.class,
LongWritable.class, CompressionType.NONE);
writer.append(new Text(strFileName), new LongWritable(0l));
} catch(Exception e) {
throw new IOException(e.getLocalizedMessage());
} finally {
if (writer != null) {
writer.close();
}
writer = null;
}
}
}
/**
* Display version
*/
private static void displayVersion() {
System.out.println(NNBENCH_VERSION);
}
/**
* Display usage
*/
private static void displayUsage() {
String usage =
"Usage: nnbench <options>\n" +
"Options:\n" +
"\t-operation <Available operations are " + OP_CREATE_WRITE + " " +
OP_OPEN_READ + " " + OP_RENAME + " " + OP_DELETE + ". " +
"This option is mandatory>\n" +
"\t * NOTE: The open_read, rename and delete operations assume " +
"that the files they operate on, are already available. " +
"The create_write operation must be run before running the " +
"other operations.\n" +
"\t-maps <number of maps. default is 1. This is not mandatory>\n" +
"\t-reduces <number of reduces. default is 1. This is not mandatory>\n" +
"\t-startTime <time to start, given in seconds from the epoch. " +
"Make sure this is far enough into the future, so all maps " +
"(operations) will start at the same time>. " +
"default is launch time + 2 mins. This is not mandatory \n" +
"\t-blockSize <Block size in bytes. default is 1. " +
"This is not mandatory>\n" +
"\t-bytesToWrite <Bytes to write. default is 0. " +
"This is not mandatory>\n" +
"\t-bytesPerChecksum <Bytes per checksum for the files. default is 1. " +
"This is not mandatory>\n" +
"\t-numberOfFiles <number of files to create. default is 1. " +
"This is not mandatory>\n" +
"\t-replicationFactorPerFile <Replication factor for the files." +
" default is 1. This is not mandatory>\n" +
"\t-baseDir <base DFS path. default is /becnhmarks/NNBench. " +
"This is not mandatory>\n" +
"\t-readFileAfterOpen <true or false. if true, it reads the file and " +
"reports the average time to read. This is valid with the open_read " +
"operation. default is false. This is not mandatory>\n" +
"\t-help: Display the help statement\n";
System.out.println(usage);
}
/**
* check for arguments and fail if the values are not specified
*/
public static void checkArgs(final int index, final int length) {
if (index == length) {
displayUsage();
System.exit(-1);
}
}
/**
* Parse input arguments
*
* @params args Command line inputs
*/
public static void parseInputs(final String[] args) {
// If there are no command line arguments, exit
if (args.length == 0) {
displayUsage();
System.exit(-1);
}
// Parse command line args
for (int i = 0; i < args.length; i++) {
if (args[i].equals("-operation")) {
operation = args[++i];
} else if (args[i].equals("-maps")) {
checkArgs(i + 1, args.length);
numberOfMaps = Long.parseLong(args[++i]);
} else if (args[i].equals("-reduces")) {
checkArgs(i + 1, args.length);
numberOfReduces = Long.parseLong(args[++i]);
} else if (args[i].equals("-startTime")) {
checkArgs(i + 1, args.length);
startTime = Long.parseLong(args[++i]) * 1000;
} else if (args[i].equals("-blockSize")) {
checkArgs(i + 1, args.length);
blockSize = Long.parseLong(args[++i]);
} else if (args[i].equals("-bytesToWrite")) {
checkArgs(i + 1, args.length);
bytesToWrite = Integer.parseInt(args[++i]);
} else if (args[i].equals("-bytesPerChecksum")) {
checkArgs(i + 1, args.length);
bytesPerChecksum = Long.parseLong(args[++i]);
} else if (args[i].equals("-numberOfFiles")) {
checkArgs(i + 1, args.length);
numberOfFiles = Long.parseLong(args[++i]);
} else if (args[i].equals("-replicationFactorPerFile")) {
checkArgs(i + 1, args.length);
replicationFactorPerFile = Short.parseShort(args[++i]);
} else if (args[i].equals("-baseDir")) {
checkArgs(i + 1, args.length);
baseDir = args[++i];
} else if (args[i].equals("-readFileAfterOpen")) {
checkArgs(i + 1, args.length);
readFileAfterOpen = Boolean.parseBoolean(args[++i]);
} else if (args[i].equals("-help")) {
displayUsage();
System.exit(-1);
}
}
LOG.info("Test Inputs: ");
LOG.info(" Test Operation: " + operation);
LOG.info(" Start time: " + sdf.format(new Date(startTime)));
LOG.info(" Number of maps: " + numberOfMaps);
LOG.info(" Number of reduces: " + numberOfReduces);
LOG.info(" Block Size: " + blockSize);
LOG.info(" Bytes to write: " + bytesToWrite);
LOG.info(" Bytes per checksum: " + bytesPerChecksum);
LOG.info(" Number of files: " + numberOfFiles);
LOG.info(" Replication factor: " + replicationFactorPerFile);
LOG.info(" Base dir: " + baseDir);
LOG.info(" Read file after open: " + readFileAfterOpen);
// Set user-defined parameters, so the map method can access the values
config.set("test.nnbench.operation", operation);
config.setLong("test.nnbench.maps", numberOfMaps);
config.setLong("test.nnbench.reduces", numberOfReduces);
config.setLong("test.nnbench.starttime", startTime);
config.setLong("test.nnbench.blocksize", blockSize);
config.setInt("test.nnbench.bytestowrite", bytesToWrite);
config.setLong("test.nnbench.bytesperchecksum", bytesPerChecksum);
config.setLong("test.nnbench.numberoffiles", numberOfFiles);
config.setInt("test.nnbench.replicationfactor",
(int) replicationFactorPerFile);
config.set("test.nnbench.basedir", baseDir);
config.setBoolean("test.nnbench.readFileAfterOpen", readFileAfterOpen);
config.set("test.nnbench.datadir.name", DATA_DIR_NAME);
config.set("test.nnbench.outputdir.name", OUTPUT_DIR_NAME);
config.set("test.nnbench.controldir.name", CONTROL_DIR_NAME);
}
/**
* Analyze the results
*
* @throws IOException on error
*/
private static void analyzeResults() throws IOException {
final FileSystem fs = FileSystem.get(config);
Path reduceFile = new Path(new Path(baseDir, OUTPUT_DIR_NAME),
"part-00000");
DataInputStream in;
in = new DataInputStream(fs.open(reduceFile));
BufferedReader lines;
lines = new BufferedReader(new InputStreamReader(in));
long totalTimeAL1 = 0l;
long totalTimeAL2 = 0l;
long totalTimeTPmS = 0l;
long lateMaps = 0l;
long numOfExceptions = 0l;
long successfulFileOps = 0l;
long mapStartTimeTPmS = 0l;
long mapEndTimeTPmS = 0l;
String resultTPSLine1 = null;
String resultTPSLine2 = null;
String resultALLine1 = null;
String resultALLine2 = null;
String line;
while((line = lines.readLine()) != null) {
StringTokenizer tokens = new StringTokenizer(line, " \t\n\r\f%;");
String attr = tokens.nextToken();
if (attr.endsWith(":totalTimeAL1")) {
totalTimeAL1 = Long.parseLong(tokens.nextToken());
} else if (attr.endsWith(":totalTimeAL2")) {
totalTimeAL2 = Long.parseLong(tokens.nextToken());
} else if (attr.endsWith(":totalTimeTPmS")) {
totalTimeTPmS = Long.parseLong(tokens.nextToken());
} else if (attr.endsWith(":latemaps")) {
lateMaps = Long.parseLong(tokens.nextToken());
} else if (attr.endsWith(":numOfExceptions")) {
numOfExceptions = Long.parseLong(tokens.nextToken());
} else if (attr.endsWith(":successfulFileOps")) {
successfulFileOps = Long.parseLong(tokens.nextToken());
} else if (attr.endsWith(":mapStartTimeTPmS")) {
mapStartTimeTPmS = Long.parseLong(tokens.nextToken());
} else if (attr.endsWith(":mapEndTimeTPmS")) {
mapEndTimeTPmS = Long.parseLong(tokens.nextToken());
}
}
// Average latency is the average time to perform 'n' number of
// operations, n being the number of files
double avgLatency1 = (double) totalTimeAL1 / (double) successfulFileOps;
double avgLatency2 = (double) totalTimeAL2 / (double) successfulFileOps;
// The time it takes for the longest running map is measured. Using that,
// cluster transactions per second is calculated. It includes time to
// retry any of the failed operations
double longestMapTimeTPmS = (double) (mapEndTimeTPmS - mapStartTimeTPmS);
double totalTimeTPS = (longestMapTimeTPmS == 0) ?
(1000 * successfulFileOps) :
(double) (1000 * successfulFileOps) / (double) longestMapTimeTPmS;
// The time it takes to perform 'n' operations is calculated (in ms),
// n being the number of files. Using that time, the average execution
// time is calculated. It includes time to retry any of the
// failed operations
double AverageExecutionTime = (totalTimeTPmS == 0) ?
(double) successfulFileOps :
(double) (totalTimeTPmS / successfulFileOps);
if (operation.equals(OP_CREATE_WRITE)) {
// For create/write/close, it is treated as two transactions,
// since a file create from a client perspective involves create and close
resultTPSLine1 = " TPS: Create/Write/Close: " +
(int) (totalTimeTPS * 2);
resultTPSLine2 = "Avg exec time (ms): Create/Write/Close: " +
(double) AverageExecutionTime;
resultALLine1 = " Avg Lat (ms): Create/Write: " + avgLatency1;
resultALLine2 = " Avg Lat (ms): Close: " + avgLatency2;
} else if (operation.equals(OP_OPEN_READ)) {
resultTPSLine1 = " TPS: Open/Read: " +
(int) totalTimeTPS;
resultTPSLine2 = " Avg Exec time (ms): Open/Read: " +
(double) AverageExecutionTime;
resultALLine1 = " Avg Lat (ms): Open: " + avgLatency1;
if (readFileAfterOpen) {
resultALLine2 = " Avg Lat (ms): Read: " + avgLatency2;
}
} else if (operation.equals(OP_RENAME)) {
resultTPSLine1 = " TPS: Rename: " +
(int) totalTimeTPS;
resultTPSLine2 = " Avg Exec time (ms): Rename: " +
(double) AverageExecutionTime;
resultALLine1 = " Avg Lat (ms): Rename: " + avgLatency1;
} else if (operation.equals(OP_DELETE)) {
resultTPSLine1 = " TPS: Delete: " +
(int) totalTimeTPS;
resultTPSLine2 = " Avg Exec time (ms): Delete: " +
(double) AverageExecutionTime;
resultALLine1 = " Avg Lat (ms): Delete: " + avgLatency1;
}
String resultLines[] = {
"-------------- NNBench -------------- : ",
" Version: " + NNBENCH_VERSION,
" Date & time: " + sdf.format(new Date(
System.currentTimeMillis())),
"",
" Test Operation: " + operation,
" Start time: " +
sdf.format(new Date(startTime)),
" Maps to run: " + numberOfMaps,
" Reduces to run: " + numberOfReduces,
" Block Size (bytes): " + blockSize,
" Bytes to write: " + bytesToWrite,
" Bytes per checksum: " + bytesPerChecksum,
" Number of files: " + numberOfFiles,
" Replication factor: " + replicationFactorPerFile,
" Successful file operations: " + successfulFileOps,
"",
" # maps that missed the barrier: " + lateMaps,
" # exceptions: " + numOfExceptions,
"",
resultTPSLine1,
resultTPSLine2,
resultALLine1,
resultALLine2,
"",
" RAW DATA: AL Total #1: " + totalTimeAL1,
" RAW DATA: AL Total #2: " + totalTimeAL2,
" RAW DATA: TPS Total (ms): " + totalTimeTPmS,
" RAW DATA: Longest Map Time (ms): " + longestMapTimeTPmS,
" RAW DATA: Late maps: " + lateMaps,
" RAW DATA: # of exceptions: " + numOfExceptions,
"" };
PrintStream res = new PrintStream(new FileOutputStream(
new File(DEFAULT_RES_FILE_NAME), true));
// Write to a file and also dump to log
for(int i = 0; i < resultLines.length; i++) {
LOG.info(resultLines[i]);
res.println(resultLines[i]);
}
}
/**
* Run the test
*
* @throws IOException on error
*/
public static void runTests() throws IOException {
config.setLong("io.bytes.per.checksum", bytesPerChecksum);
JobConf job = new JobConf(config, NNBench.class);
job.setJobName("NNBench-" + operation);
FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
job.setInputFormat(SequenceFileInputFormat.class);
// Explicitly set number of max map attempts to 1.
job.setMaxMapAttempts(1);
// Explicitly turn off speculative execution
job.setSpeculativeExecution(false);
job.setMapperClass(NNBenchMapper.class);
job.setReducerClass(NNBenchReducer.class);
FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks((int) numberOfReduces);
JobClient.runJob(job);
}
/**
* Validate the inputs
*/
public static void validateInputs() {
// If it is not one of the four operations, then fail
if (!operation.equals(OP_CREATE_WRITE) &&
!operation.equals(OP_OPEN_READ) &&
!operation.equals(OP_RENAME) &&
!operation.equals(OP_DELETE)) {
System.err.println("Error: Unknown operation: " + operation);
displayUsage();
System.exit(-1);
}
// If number of maps is a negative number, then fail
// Hadoop allows the number of maps to be 0
if (numberOfMaps < 0) {
System.err.println("Error: Number of maps must be a positive number");
displayUsage();
System.exit(-1);
}
// If number of reduces is a negative number or 0, then fail
if (numberOfReduces <= 0) {
System.err.println("Error: Number of reduces must be a positive number");
displayUsage();
System.exit(-1);
}
// If blocksize is a negative number or 0, then fail
if (blockSize <= 0) {
System.err.println("Error: Block size must be a positive number");
displayUsage();
System.exit(-1);
}
// If bytes to write is a negative number, then fail
if (bytesToWrite < 0) {
System.err.println("Error: Bytes to write must be a positive number");
displayUsage();
System.exit(-1);
}
// If bytes per checksum is a negative number, then fail
if (bytesPerChecksum < 0) {
System.err.println("Error: Bytes per checksum must be a positive number");
displayUsage();
System.exit(-1);
}
// If number of files is a negative number, then fail
if (numberOfFiles < 0) {
System.err.println("Error: Number of files must be a positive number");
displayUsage();
System.exit(-1);
}
// If replication factor is a negative number, then fail
if (replicationFactorPerFile < 0) {
System.err.println("Error: Replication factor must be a positive number");
displayUsage();
System.exit(-1);
}
// If block size is not a multiple of bytesperchecksum, fail
if (blockSize % bytesPerChecksum != 0) {
System.err.println("Error: Block Size in bytes must be a multiple of " +
"bytes per checksum: ");
displayUsage();
System.exit(-1);
}
}
/**
* Main method for running the NNBench benchmarks
*
* @throws IOException indicates a problem with test startup
*/
public static void main(String[] args) throws IOException {
// Display the application version string
displayVersion();
// Parse the inputs
parseInputs(args);
// Validate inputs
validateInputs();
// Clean up files before the test run
cleanupBeforeTestrun();
// Create control files before test run
createControlFiles();
// Run the tests as a map reduce job
runTests();
// Analyze results
analyzeResults();
}
/**
* Mapper class
*/
static class NNBenchMapper extends Configured
implements Mapper<Text, LongWritable, Text, Text> {
FileSystem filesystem = null;
private String hostName = null;
long numberOfFiles = 1l;
long blkSize = 1l;
short replFactor = 1;
int bytesToWrite = 0;
String baseDir = null;
String dataDirName = null;
String op = null;
boolean readFile = false;
final int MAX_OPERATION_EXCEPTIONS = 1000;
// Data to collect from the operation
int numOfExceptions = 0;
long startTimeAL = 0l;
long totalTimeAL1 = 0l;
long totalTimeAL2 = 0l;
long successfulFileOps = 0l;
/**
* Constructor
*/
public NNBenchMapper() {
}
/**
* Mapper base implementation
*/
public void configure(JobConf conf) {
setConf(conf);
try {
filesystem = FileSystem.get(conf);
} catch(Exception e) {
throw new RuntimeException("Cannot get file system.", e);
}
try {
hostName = InetAddress.getLocalHost().getHostName();
} catch(Exception e) {
throw new RuntimeException("Error getting hostname", e);
}
}
/**
* Mapper base implementation
*/
public void close() throws IOException {
}
/**
* Returns when the current number of seconds from the epoch equals
* the command line argument given by <code>-startTime</code>.
* This allows multiple instances of this program, running on clock
* synchronized nodes, to start at roughly the same time.
*/
private boolean barrier() {
long startTime = getConf().getLong("test.nnbench.starttime", 0l);
long currentTime = System.currentTimeMillis();
long sleepTime = startTime - currentTime;
boolean retVal = false;
// If the sleep time is greater than 0, then sleep and return
if (sleepTime > 0) {
LOG.info("Waiting in barrier for: " + sleepTime + " ms");
try {
Thread.sleep(sleepTime);
retVal = true;
} catch (Exception e) {
retVal = false;
}
}
return retVal;
}
/**
* Map method
*/
public void map(Text key,
LongWritable value,
OutputCollector<Text, Text> output,
Reporter reporter) throws IOException {
Configuration conf = filesystem.getConf();
numberOfFiles = conf.getLong("test.nnbench.numberoffiles", 1l);
blkSize = conf.getLong("test.nnbench.blocksize", 1l);
replFactor = (short) (conf.getInt("test.nnbench.replicationfactor", 1));
bytesToWrite = conf.getInt("test.nnbench.bytestowrite", 0);
baseDir = conf.get("test.nnbench.basedir");
dataDirName = conf.get("test.nnbench.datadir.name");
op = conf.get("test.nnbench.operation");
readFile = conf.getBoolean("test.nnbench.readFileAfterOpen", false);
long totalTimeTPmS = 0l;
long startTimeTPmS = 0l;
long endTimeTPms = 0l;
numOfExceptions = 0;
startTimeAL = 0l;
totalTimeAL1 = 0l;
totalTimeAL2 = 0l;
successfulFileOps = 0l;
if (barrier()) {
if (op.equals(OP_CREATE_WRITE)) {
startTimeTPmS = System.currentTimeMillis();
doCreateWriteOp("file_" + hostName + "_", output, reporter);
} else if (op.equals(OP_OPEN_READ)) {
startTimeTPmS = System.currentTimeMillis();
doOpenReadOp("file_" + hostName + "_", output, reporter);
} else if (op.equals(OP_RENAME)) {
startTimeTPmS = System.currentTimeMillis();
doRenameOp("file_" + hostName + "_", output, reporter);
} else if (op.equals(OP_DELETE)) {
startTimeTPmS = System.currentTimeMillis();
doDeleteOp("file_" + hostName + "_", output, reporter);
}
endTimeTPms = System.currentTimeMillis();
totalTimeTPmS = endTimeTPms - startTimeTPmS;
} else {
output.collect(new Text("l:latemaps"), new Text("1"));
}
// collect after the map end time is measured
output.collect(new Text("l:totalTimeAL1"),
new Text(String.valueOf(totalTimeAL1)));
output.collect(new Text("l:totalTimeAL2"),
new Text(String.valueOf(totalTimeAL2)));
output.collect(new Text("l:numOfExceptions"),
new Text(String.valueOf(numOfExceptions)));
output.collect(new Text("l:successfulFileOps"),
new Text(String.valueOf(successfulFileOps)));
output.collect(new Text("l:totalTimeTPmS"),
new Text(String.valueOf(totalTimeTPmS)));
output.collect(new Text("min:mapStartTimeTPmS"),
new Text(String.valueOf(startTimeTPmS)));
output.collect(new Text("max:mapEndTimeTPmS"),
new Text(String.valueOf(endTimeTPms)));
}
/**
* Create and Write operation.
*/
private void doCreateWriteOp(String name,
OutputCollector<Text, Text> output,
Reporter reporter) {
FSDataOutputStream out = null;
byte[] buffer = new byte[bytesToWrite];
for (long l = 0l; l < numberOfFiles; l++) {
Path filePath = new Path(new Path(baseDir, dataDirName),
name + "_" + l);
boolean successfulOp = false;
while (! successfulOp && numOfExceptions < MAX_OPERATION_EXCEPTIONS) {
try {
// Set up timer for measuring AL (transaction #1)
startTimeAL = System.currentTimeMillis();
// Create the file
// Use a buffer size of 512
out = filesystem.create(filePath,
true,
512,
replFactor,
blkSize);
out.write(buffer);
totalTimeAL1 += (System.currentTimeMillis() - startTimeAL);
// Close the file / file output stream
// Set up timers for measuring AL (transaction #2)
startTimeAL = System.currentTimeMillis();
out.close();
totalTimeAL2 += (System.currentTimeMillis() - startTimeAL);
successfulOp = true;
successfulFileOps ++;
reporter.setStatus("Finish "+ l + " files");
} catch (IOException e) {
LOG.info("Exception recorded in op: " +
"Create/Write/Close");
numOfExceptions++;
}
}
}
}
/**
* Open operation
*/
private void doOpenReadOp(String name,
OutputCollector<Text, Text> output,
Reporter reporter) {
FSDataInputStream input = null;
byte[] buffer = new byte[bytesToWrite];
for (long l = 0l; l < numberOfFiles; l++) {
Path filePath = new Path(new Path(baseDir, dataDirName),
name + "_" + l);
boolean successfulOp = false;
while (! successfulOp && numOfExceptions < MAX_OPERATION_EXCEPTIONS) {
try {
// Set up timer for measuring AL
startTimeAL = System.currentTimeMillis();
input = filesystem.open(filePath);
totalTimeAL1 += (System.currentTimeMillis() - startTimeAL);
// If the file needs to be read (specified at command line)
if (readFile) {
startTimeAL = System.currentTimeMillis();
input.readFully(buffer);
totalTimeAL2 += (System.currentTimeMillis() - startTimeAL);
}
input.close();
successfulOp = true;
successfulFileOps ++;
reporter.setStatus("Finish "+ l + " files");
} catch (IOException e) {
LOG.info("Exception recorded in op: OpenRead " + e);
numOfExceptions++;
}
}
}
}
/**
* Rename operation
*/
private void doRenameOp(String name,
OutputCollector<Text, Text> output,
Reporter reporter) {
for (long l = 0l; l < numberOfFiles; l++) {
Path filePath = new Path(new Path(baseDir, dataDirName),
name + "_" + l);
Path filePathR = new Path(new Path(baseDir, dataDirName),
name + "_r_" + l);
boolean successfulOp = false;
while (! successfulOp && numOfExceptions < MAX_OPERATION_EXCEPTIONS) {
try {
// Set up timer for measuring AL
startTimeAL = System.currentTimeMillis();
filesystem.rename(filePath, filePathR);
totalTimeAL1 += (System.currentTimeMillis() - startTimeAL);
successfulOp = true;
successfulFileOps ++;
reporter.setStatus("Finish "+ l + " files");
} catch (IOException e) {
LOG.info("Exception recorded in op: Rename");
numOfExceptions++;
}
}
}
}
/**
* Delete operation
*/
private void doDeleteOp(String name,
OutputCollector<Text, Text> output,
Reporter reporter) {
for (long l = 0l; l < numberOfFiles; l++) {
Path filePath = new Path(new Path(baseDir, dataDirName),
name + "_" + l);
boolean successfulOp = false;
while (! successfulOp && numOfExceptions < MAX_OPERATION_EXCEPTIONS) {
try {
// Set up timer for measuring AL
startTimeAL = System.currentTimeMillis();
filesystem.delete(filePath, true);
totalTimeAL1 += (System.currentTimeMillis() - startTimeAL);
successfulOp = true;
successfulFileOps ++;
reporter.setStatus("Finish "+ l + " files");
} catch (IOException e) {
LOG.info("Exception in recorded op: Delete");
numOfExceptions++;
}
}
}
}
}
/**
* Reducer class
*/
static class NNBenchReducer extends MapReduceBase
implements Reducer<Text, Text, Text, Text> {
protected String hostName;
public NNBenchReducer () {
LOG.info("Starting NNBenchReducer !!!");
try {
hostName = java.net.InetAddress.getLocalHost().getHostName();
} catch(Exception e) {
hostName = "localhost";
}
LOG.info("Starting NNBenchReducer on " + hostName);
}
/**
* Reduce method
*/
public void reduce(Text key,
Iterator<Text> values,
OutputCollector<Text, Text> output,
Reporter reporter
) throws IOException {
String field = key.toString();
reporter.setStatus("starting " + field + " ::host = " + hostName);
// sum long values
if (field.startsWith("l:")) {
long lSum = 0;
while (values.hasNext()) {
lSum += Long.parseLong(values.next().toString());
}
output.collect(key, new Text(String.valueOf(lSum)));
}
if (field.startsWith("min:")) {
long minVal = -1;
while (values.hasNext()) {
long value = Long.parseLong(values.next().toString());
if (minVal == -1) {
minVal = value;
} else {
if (value != 0 && value < minVal) {
minVal = value;
}
}
}
output.collect(key, new Text(String.valueOf(minVal)));
}
if (field.startsWith("max:")) {
long maxVal = -1;
while (values.hasNext()) {
long value = Long.parseLong(values.next().toString());
if (maxVal == -1) {
maxVal = value;
} else {
if (value > maxVal) {
maxVal = value;
}
}
}
output.collect(key, new Text(String.valueOf(maxVal)));
}
reporter.setStatus("finished " + field + " ::host = " + hostName);
}
}
}

View File

@ -0,0 +1,344 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs;
import java.io.IOException;
import java.util.Date;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.StringUtils;
/**
* This program executes a specified operation that applies load to
* the NameNode. Possible operations include create/writing files,
* opening/reading files, renaming files, and deleting files.
*
* When run simultaneously on multiple nodes, this program functions
* as a stress-test and benchmark for namenode, especially when
* the number of bytes written to each file is small.
*
* This version does not use the map reduce framework
*
*/
public class NNBenchWithoutMR {
private static final Log LOG = LogFactory.getLog(
"org.apache.hadoop.hdfs.NNBench");
// variable initialzed from command line arguments
private static long startTime = 0;
private static int numFiles = 0;
private static long bytesPerBlock = 1;
private static long blocksPerFile = 0;
private static long bytesPerFile = 1;
private static Path baseDir = null;
// variables initialized in main()
private static FileSystem fileSys = null;
private static Path taskDir = null;
private static String uniqueId = null;
private static byte[] buffer;
private static long maxExceptionsPerFile = 200;
/**
* Returns when the current number of seconds from the epoch equals
* the command line argument given by <code>-startTime</code>.
* This allows multiple instances of this program, running on clock
* synchronized nodes, to start at roughly the same time.
*/
static void barrier() {
long sleepTime;
while ((sleepTime = startTime - System.currentTimeMillis()) > 0) {
try {
Thread.sleep(sleepTime);
} catch (InterruptedException ex) {
}
}
}
static private void handleException(String operation, Throwable e,
int singleFileExceptions) {
LOG.warn("Exception while " + operation + ": " +
StringUtils.stringifyException(e));
if (singleFileExceptions >= maxExceptionsPerFile) {
throw new RuntimeException(singleFileExceptions +
" exceptions for a single file exceeds threshold. Aborting");
}
}
/**
* Create and write to a given number of files. Repeat each remote
* operation until is suceeds (does not throw an exception).
*
* @return the number of exceptions caught
*/
static int createWrite() {
int totalExceptions = 0;
FSDataOutputStream out = null;
boolean success = false;
for (int index = 0; index < numFiles; index++) {
int singleFileExceptions = 0;
do { // create file until is succeeds or max exceptions reached
try {
out = fileSys.create(
new Path(taskDir, "" + index), false, 512, (short)1, bytesPerBlock);
success = true;
} catch (IOException ioe) {
success=false;
totalExceptions++;
handleException("creating file #" + index, ioe, ++singleFileExceptions);
}
} while (!success);
long toBeWritten = bytesPerFile;
while (toBeWritten > 0) {
int nbytes = (int) Math.min(buffer.length, toBeWritten);
toBeWritten -= nbytes;
try { // only try once
out.write(buffer, 0, nbytes);
} catch (IOException ioe) {
totalExceptions++;
handleException("writing to file #" + index, ioe, ++singleFileExceptions);
}
}
do { // close file until is succeeds
try {
out.close();
success = true;
} catch (IOException ioe) {
success=false;
totalExceptions++;
handleException("closing file #" + index, ioe, ++singleFileExceptions);
}
} while (!success);
}
return totalExceptions;
}
/**
* Open and read a given number of files.
*
* @return the number of exceptions caught
*/
static int openRead() {
int totalExceptions = 0;
FSDataInputStream in = null;
for (int index = 0; index < numFiles; index++) {
int singleFileExceptions = 0;
try {
in = fileSys.open(new Path(taskDir, "" + index), 512);
long toBeRead = bytesPerFile;
while (toBeRead > 0) {
int nbytes = (int) Math.min(buffer.length, toBeRead);
toBeRead -= nbytes;
try { // only try once
in.read(buffer, 0, nbytes);
} catch (IOException ioe) {
totalExceptions++;
handleException("reading from file #" + index, ioe, ++singleFileExceptions);
}
}
in.close();
} catch (IOException ioe) {
totalExceptions++;
handleException("opening file #" + index, ioe, ++singleFileExceptions);
}
}
return totalExceptions;
}
/**
* Rename a given number of files. Repeat each remote
* operation until is suceeds (does not throw an exception).
*
* @return the number of exceptions caught
*/
static int rename() {
int totalExceptions = 0;
boolean success = false;
for (int index = 0; index < numFiles; index++) {
int singleFileExceptions = 0;
do { // rename file until is succeeds
try {
boolean result = fileSys.rename(
new Path(taskDir, "" + index), new Path(taskDir, "A" + index));
success = true;
} catch (IOException ioe) {
success=false;
totalExceptions++;
handleException("creating file #" + index, ioe, ++singleFileExceptions);
}
} while (!success);
}
return totalExceptions;
}
/**
* Delete a given number of files. Repeat each remote
* operation until is suceeds (does not throw an exception).
*
* @return the number of exceptions caught
*/
static int delete() {
int totalExceptions = 0;
boolean success = false;
for (int index = 0; index < numFiles; index++) {
int singleFileExceptions = 0;
do { // delete file until is succeeds
try {
boolean result = fileSys.delete(new Path(taskDir, "A" + index), true);
success = true;
} catch (IOException ioe) {
success=false;
totalExceptions++;
handleException("creating file #" + index, ioe, ++singleFileExceptions);
}
} while (!success);
}
return totalExceptions;
}
/**
* This launches a given namenode operation (<code>-operation</code>),
* starting at a given time (<code>-startTime</code>). The files used
* by the openRead, rename, and delete operations are the same files
* created by the createWrite operation. Typically, the program
* would be run four times, once for each operation in this order:
* createWrite, openRead, rename, delete.
*
* <pre>
* Usage: nnbench
* -operation <one of createWrite, openRead, rename, or delete>
* -baseDir <base output/input DFS path>
* -startTime <time to start, given in seconds from the epoch>
* -numFiles <number of files to create, read, rename, or delete>
* -blocksPerFile <number of blocks to create per file>
* [-bytesPerBlock <number of bytes to write to each block, default is 1>]
* [-bytesPerChecksum <value for io.bytes.per.checksum>]
* </pre>
*
* @throws IOException indicates a problem with test startup
*/
public static void main(String[] args) throws IOException {
String version = "NameNodeBenchmark.0.3";
System.out.println(version);
int bytesPerChecksum = -1;
String usage =
"Usage: nnbench " +
" -operation <one of createWrite, openRead, rename, or delete> " +
" -baseDir <base output/input DFS path> " +
" -startTime <time to start, given in seconds from the epoch> " +
" -numFiles <number of files to create> " +
" -blocksPerFile <number of blocks to create per file> " +
" [-bytesPerBlock <number of bytes to write to each block, default is 1>] " +
" [-bytesPerChecksum <value for io.bytes.per.checksum>]" +
"Note: bytesPerBlock MUST be a multiple of bytesPerChecksum";
String operation = null;
for (int i = 0; i < args.length; i++) { // parse command line
if (args[i].equals("-baseDir")) {
baseDir = new Path(args[++i]);
} else if (args[i].equals("-numFiles")) {
numFiles = Integer.parseInt(args[++i]);
} else if (args[i].equals("-blocksPerFile")) {
blocksPerFile = Integer.parseInt(args[++i]);
} else if (args[i].equals("-bytesPerBlock")) {
bytesPerBlock = Long.parseLong(args[++i]);
} else if (args[i].equals("-bytesPerChecksum")) {
bytesPerChecksum = Integer.parseInt(args[++i]);
} else if (args[i].equals("-startTime")) {
startTime = Long.parseLong(args[++i]) * 1000;
} else if (args[i].equals("-operation")) {
operation = args[++i];
} else {
System.out.println(usage);
System.exit(-1);
}
}
bytesPerFile = bytesPerBlock * blocksPerFile;
JobConf jobConf = new JobConf(new Configuration(), NNBench.class);
if ( bytesPerChecksum < 0 ) { // if it is not set in cmdline
bytesPerChecksum = jobConf.getInt("io.bytes.per.checksum", 512);
}
jobConf.set("io.bytes.per.checksum", Integer.toString(bytesPerChecksum));
System.out.println("Inputs: ");
System.out.println(" operation: " + operation);
System.out.println(" baseDir: " + baseDir);
System.out.println(" startTime: " + startTime);
System.out.println(" numFiles: " + numFiles);
System.out.println(" blocksPerFile: " + blocksPerFile);
System.out.println(" bytesPerBlock: " + bytesPerBlock);
System.out.println(" bytesPerChecksum: " + bytesPerChecksum);
if (operation == null || // verify args
baseDir == null ||
numFiles < 1 ||
blocksPerFile < 1 ||
bytesPerBlock < 0 ||
bytesPerBlock % bytesPerChecksum != 0)
{
System.err.println(usage);
System.exit(-1);
}
fileSys = FileSystem.get(jobConf);
uniqueId = java.net.InetAddress.getLocalHost().getHostName();
taskDir = new Path(baseDir, uniqueId);
// initialize buffer used for writing/reading file
buffer = new byte[(int) Math.min(bytesPerFile, 32768L)];
Date execTime;
Date endTime;
long duration;
int exceptions = 0;
barrier(); // wait for coordinated start time
execTime = new Date();
System.out.println("Job started: " + startTime);
if (operation.equals("createWrite")) {
if (!fileSys.mkdirs(taskDir)) {
throw new IOException("Mkdirs failed to create " + taskDir.toString());
}
exceptions = createWrite();
} else if (operation.equals("openRead")) {
exceptions = openRead();
} else if (operation.equals("rename")) {
exceptions = rename();
} else if (operation.equals("delete")) {
exceptions = delete();
} else {
System.err.println(usage);
System.exit(-1);
}
endTime = new Date();
System.out.println("Job ended: " + endTime);
duration = (endTime.getTime() - execTime.getTime()) /1000;
System.out.println("The " + operation + " job took " + duration + " seconds.");
System.out.println("The job recorded " + exceptions + " exceptions.");
}
}

View File

@ -0,0 +1,603 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.io;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class FileBench extends Configured implements Tool {
static int printUsage() {
ToolRunner.printGenericCommandUsage(System.out);
System.out.println(
"Usage: Task list: -[no]r -[no]w\n" +
" Format: -[no]seq -[no]txt\n" +
" CompressionCodec: -[no]zip -[no]pln\n" +
" CompressionType: -[no]blk -[no]rec\n" +
" Required: -dir <working dir>\n" +
"All valid combinations are implicitly enabled, unless an option is enabled\n" +
"explicitly. For example, specifying \"-zip\", excludes -pln,\n" +
"unless they are also explicitly included, as in \"-pln -zip\"\n" +
"Note that CompressionType params only apply to SequenceFiles\n\n" +
"Useful options to set:\n" +
"-D fs.default.name=\"file:///\" \\\n" +
"-D fs.file.impl=org.apache.hadoop.fs.RawLocalFileSystem \\\n" +
"-D filebench.file.bytes=$((10*1024*1024*1024)) \\\n" +
"-D filebench.key.words=5 \\\n" +
"-D filebench.val.words=20\n");
return -1;
}
static String[] keys;
static String[] values;
static StringBuilder sentence = new StringBuilder();
private static String generateSentence(Random r, int noWords) {
sentence.setLength(0);
for (int i=0; i < noWords; ++i) {
sentence.append(words[r.nextInt(words.length)]);
sentence.append(" ");
}
return sentence.toString();
}
// fill keys, values with ~1.5 blocks for block-compressed seq fill
private static void fillBlocks(JobConf conf) {
Random r = new Random();
long seed = conf.getLong("filebench.seed", -1);
if (seed > 0) {
r.setSeed(seed);
}
int keylen = conf.getInt("filebench.key.words", 5);
int vallen = conf.getInt("filebench.val.words", 20);
int acc = (3 * conf.getInt("io.seqfile.compress.blocksize", 1000000)) >> 1;
ArrayList<String> k = new ArrayList<String>();
ArrayList<String> v = new ArrayList<String>();
for (int i = 0; acc > 0; ++i) {
String s = generateSentence(r, keylen);
acc -= s.length();
k.add(s);
s = generateSentence(r, vallen);
acc -= s.length();
v.add(s);
}
keys = k.toArray(new String[0]);
values = v.toArray(new String[0]);
}
@SuppressWarnings("unchecked") // OutputFormat instantiation
static long writeBench(JobConf conf) throws IOException {
long filelen = conf.getLong("filebench.file.bytes", 5 * 1024 * 1024 * 1024);
Text key = new Text();
Text val = new Text();
final String fn = conf.get("test.filebench.name", "");
final Path outd = FileOutputFormat.getOutputPath(conf);
conf.set("mapred.work.output.dir", outd.toString());
OutputFormat outf = conf.getOutputFormat();
RecordWriter<Text,Text> rw =
outf.getRecordWriter(outd.getFileSystem(conf), conf, fn,
Reporter.NULL);
try {
long acc = 0L;
Date start = new Date();
for (int i = 0; acc < filelen; ++i) {
i %= keys.length;
key.set(keys[i]);
val.set(values[i]);
rw.write(key, val);
acc += keys[i].length();
acc += values[i].length();
}
Date end = new Date();
return end.getTime() - start.getTime();
} finally {
rw.close(Reporter.NULL);
}
}
@SuppressWarnings("unchecked") // InputFormat instantiation
static long readBench(JobConf conf) throws IOException {
InputFormat inf = conf.getInputFormat();
final String fn = conf.get("test.filebench.name", "");
Path pin = new Path(FileInputFormat.getInputPaths(conf)[0], fn);
FileStatus in = pin.getFileSystem(conf).getFileStatus(pin);
RecordReader rr = inf.getRecordReader(new FileSplit(pin, 0, in.getLen(),
(String[])null), conf, Reporter.NULL);
try {
Object key = rr.createKey();
Object val = rr.createValue();
Date start = new Date();
while (rr.next(key, val));
Date end = new Date();
return end.getTime() - start.getTime();
} finally {
rr.close();
}
}
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new FileBench(), args);
System.exit(res);
}
/**
* Process params from command line and run set of benchmarks specified.
*/
public int run(String[] argv) throws IOException {
JobConf job = new JobConf(getConf());
EnumSet<CCodec> cc = null;
EnumSet<CType> ct = null;
EnumSet<Format> f = null;
EnumSet<RW> rw = null;
Path root = null;
FileSystem fs = FileSystem.get(job);
for(int i = 0; i < argv.length; ++i) {
try {
if ("-dir".equals(argv[i])) {
root = new Path(argv[++i]).makeQualified(fs);
System.out.println("DIR: " + root.toString());
} else if ("-seed".equals(argv[i])) {
job.setLong("filebench.seed", Long.valueOf(argv[++i]));
} else if (argv[i].startsWith("-no")) {
String arg = argv[i].substring(3);
cc = rem(CCodec.class, cc, arg);
ct = rem(CType.class, ct, arg);
f = rem(Format.class, f, arg);
rw = rem(RW.class, rw, arg);
} else {
String arg = argv[i].substring(1);
cc = add(CCodec.class, cc, arg);
ct = add(CType.class, ct, arg);
f = add(Format.class, f, arg);
rw = add(RW.class, rw, arg);
}
} catch (Exception e) {
throw (IOException)new IOException().initCause(e);
}
}
if (null == root) {
System.out.println("Missing -dir param");
printUsage();
return -1;
}
fillBlocks(job);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, root);
FileOutputFormat.setOutputPath(job, root);
if (null == cc) cc = EnumSet.allOf(CCodec.class);
if (null == ct) ct = EnumSet.allOf(CType.class);
if (null == f) f = EnumSet.allOf(Format.class);
if (null == rw) rw = EnumSet.allOf(RW.class);
for (RW rwop : rw) {
for (Format fmt : f) {
fmt.configure(job);
for (CCodec cod : cc) {
cod.configure(job);
if (!(fmt == Format.txt || cod == CCodec.pln)) {
for (CType typ : ct) {
String fn =
fmt.name().toUpperCase() + "_" +
cod.name().toUpperCase() + "_" +
typ.name().toUpperCase();
typ.configure(job);
System.out.print(rwop.name().toUpperCase() + " " + fn + ": ");
System.out.println(rwop.exec(fn, job) / 1000 +
" seconds");
}
} else {
String fn =
fmt.name().toUpperCase() + "_" +
cod.name().toUpperCase();
Path p = new Path(root, fn);
if (rwop == RW.r && !fs.exists(p)) {
fn += cod.getExt();
}
System.out.print(rwop.name().toUpperCase() + " " + fn + ": ");
System.out.println(rwop.exec(fn, job) / 1000 +
" seconds");
}
}
}
}
return 0;
}
// overwrought argument processing and wordlist follow
enum CCodec {
zip(GzipCodec.class, ".gz"), pln(null, "");
Class<? extends CompressionCodec> inf;
String ext;
CCodec(Class<? extends CompressionCodec> inf, String ext) {
this.inf = inf;
this.ext = ext;
}
public void configure(JobConf job) {
if (inf != null) {
job.setBoolean("mapred.output.compress", true);
job.setClass("mapred.output.compression.codec", inf,
CompressionCodec.class);
} else {
job.setBoolean("mapred.output.compress", false);
}
}
public String getExt() { return ext; }
}
enum CType {
blk("BLOCK"),
rec("RECORD");
String typ;
CType(String typ) { this.typ = typ; }
public void configure(JobConf job) {
job.set("mapred.map.output.compression.type", typ);
job.set("mapred.output.compression.type", typ);
}
}
enum Format {
seq(SequenceFileInputFormat.class, SequenceFileOutputFormat.class),
txt(TextInputFormat.class, TextOutputFormat.class);
Class<? extends InputFormat> inf;
Class<? extends OutputFormat> of;
Format(Class<? extends InputFormat> inf, Class<? extends OutputFormat> of) {
this.inf = inf;
this.of = of;
}
public void configure(JobConf job) {
if (null != inf) job.setInputFormat(inf);
if (null != of) job.setOutputFormat(of);
}
}
enum RW {
w() {
public long exec(String fn, JobConf job) throws IOException {
job.set("test.filebench.name", fn);
return writeBench(job);
}
},
r() {
public long exec(String fn, JobConf job) throws IOException {
job.set("test.filebench.name", fn);
return readBench(job);
}
};
public abstract long exec(String fn, JobConf job) throws IOException;
}
static Map<Class<? extends Enum>, Map<String,? extends Enum>> fullmap
= new HashMap<Class<? extends Enum>, Map<String,? extends Enum>>();
static {
// can't effectively use Enum::valueOf
Map<String,CCodec> m1 = new HashMap<String,CCodec>();
for (CCodec v : CCodec.values()) m1.put(v.name(), v);
fullmap.put(CCodec.class, m1);
Map<String,CType> m2 = new HashMap<String,CType>();
for (CType v : CType.values()) m2.put(v.name(), v);
fullmap.put(CType.class, m2);
Map<String,Format> m3 = new HashMap<String,Format>();
for (Format v : Format.values()) m3.put(v.name(), v);
fullmap.put(Format.class, m3);
Map<String,RW> m4 = new HashMap<String,RW>();
for (RW v : RW.values()) m4.put(v.name(), v);
fullmap.put(RW.class, m4);
}
public static <T extends Enum<T>> EnumSet<T> rem(Class<T> c,
EnumSet<T> set, String s) {
if (null != fullmap.get(c) && fullmap.get(c).get(s) != null) {
if (null == set) {
set = EnumSet.allOf(c);
}
set.remove(fullmap.get(c).get(s));
}
return set;
}
@SuppressWarnings("unchecked")
public static <T extends Enum<T>> EnumSet<T> add(Class<T> c,
EnumSet<T> set, String s) {
if (null != fullmap.get(c) && fullmap.get(c).get(s) != null) {
if (null == set) {
set = EnumSet.noneOf(c);
}
set.add((T)fullmap.get(c).get(s));
}
return set;
}
/**
* A random list of 1000 words from /usr/share/dict/words
*/
private static final String[] words = {
"diurnalness", "Homoiousian", "spiranthic", "tetragynian",
"silverhead", "ungreat", "lithograph", "exploiter",
"physiologian", "by", "hellbender", "Filipendula",
"undeterring", "antiscolic", "pentagamist", "hypoid",
"cacuminal", "sertularian", "schoolmasterism", "nonuple",
"gallybeggar", "phytonic", "swearingly", "nebular",
"Confervales", "thermochemically", "characinoid", "cocksuredom",
"fallacious", "feasibleness", "debromination", "playfellowship",
"tramplike", "testa", "participatingly", "unaccessible",
"bromate", "experientialist", "roughcast", "docimastical",
"choralcelo", "blightbird", "peptonate", "sombreroed",
"unschematized", "antiabolitionist", "besagne", "mastication",
"bromic", "sviatonosite", "cattimandoo", "metaphrastical",
"endotheliomyoma", "hysterolysis", "unfulminated", "Hester",
"oblongly", "blurredness", "authorling", "chasmy",
"Scorpaenidae", "toxihaemia", "Dictograph", "Quakerishly",
"deaf", "timbermonger", "strammel", "Thraupidae",
"seditious", "plerome", "Arneb", "eristically",
"serpentinic", "glaumrie", "socioromantic", "apocalypst",
"tartrous", "Bassaris", "angiolymphoma", "horsefly",
"kenno", "astronomize", "euphemious", "arsenide",
"untongued", "parabolicness", "uvanite", "helpless",
"gemmeous", "stormy", "templar", "erythrodextrin",
"comism", "interfraternal", "preparative", "parastas",
"frontoorbital", "Ophiosaurus", "diopside", "serosanguineous",
"ununiformly", "karyological", "collegian", "allotropic",
"depravity", "amylogenesis", "reformatory", "epidymides",
"pleurotropous", "trillium", "dastardliness", "coadvice",
"embryotic", "benthonic", "pomiferous", "figureheadship",
"Megaluridae", "Harpa", "frenal", "commotion",
"abthainry", "cobeliever", "manilla", "spiciferous",
"nativeness", "obispo", "monilioid", "biopsic",
"valvula", "enterostomy", "planosubulate", "pterostigma",
"lifter", "triradiated", "venialness", "tum",
"archistome", "tautness", "unswanlike", "antivenin",
"Lentibulariaceae", "Triphora", "angiopathy", "anta",
"Dawsonia", "becomma", "Yannigan", "winterproof",
"antalgol", "harr", "underogating", "ineunt",
"cornberry", "flippantness", "scyphostoma", "approbation",
"Ghent", "Macraucheniidae", "scabbiness", "unanatomized",
"photoelasticity", "eurythermal", "enation", "prepavement",
"flushgate", "subsequentially", "Edo", "antihero",
"Isokontae", "unforkedness", "porriginous", "daytime",
"nonexecutive", "trisilicic", "morphiomania", "paranephros",
"botchedly", "impugnation", "Dodecatheon", "obolus",
"unburnt", "provedore", "Aktistetae", "superindifference",
"Alethea", "Joachimite", "cyanophilous", "chorograph",
"brooky", "figured", "periclitation", "quintette",
"hondo", "ornithodelphous", "unefficient", "pondside",
"bogydom", "laurinoxylon", "Shiah", "unharmed",
"cartful", "noncrystallized", "abusiveness", "cromlech",
"japanned", "rizzomed", "underskin", "adscendent",
"allectory", "gelatinousness", "volcano", "uncompromisingly",
"cubit", "idiotize", "unfurbelowed", "undinted",
"magnetooptics", "Savitar", "diwata", "ramosopalmate",
"Pishquow", "tomorn", "apopenptic", "Haversian",
"Hysterocarpus", "ten", "outhue", "Bertat",
"mechanist", "asparaginic", "velaric", "tonsure",
"bubble", "Pyrales", "regardful", "glyphography",
"calabazilla", "shellworker", "stradametrical", "havoc",
"theologicopolitical", "sawdust", "diatomaceous", "jajman",
"temporomastoid", "Serrifera", "Ochnaceae", "aspersor",
"trailmaking", "Bishareen", "digitule", "octogynous",
"epididymitis", "smokefarthings", "bacillite", "overcrown",
"mangonism", "sirrah", "undecorated", "psychofugal",
"bismuthiferous", "rechar", "Lemuridae", "frameable",
"thiodiazole", "Scanic", "sportswomanship", "interruptedness",
"admissory", "osteopaedion", "tingly", "tomorrowness",
"ethnocracy", "trabecular", "vitally", "fossilism",
"adz", "metopon", "prefatorial", "expiscate",
"diathermacy", "chronist", "nigh", "generalizable",
"hysterogen", "aurothiosulphuric", "whitlowwort", "downthrust",
"Protestantize", "monander", "Itea", "chronographic",
"silicize", "Dunlop", "eer", "componental",
"spot", "pamphlet", "antineuritic", "paradisean",
"interruptor", "debellator", "overcultured", "Florissant",
"hyocholic", "pneumatotherapy", "tailoress", "rave",
"unpeople", "Sebastian", "thermanesthesia", "Coniferae",
"swacking", "posterishness", "ethmopalatal", "whittle",
"analgize", "scabbardless", "naught", "symbiogenetically",
"trip", "parodist", "columniform", "trunnel",
"yawler", "goodwill", "pseudohalogen", "swangy",
"cervisial", "mediateness", "genii", "imprescribable",
"pony", "consumptional", "carposporangial", "poleax",
"bestill", "subfebrile", "sapphiric", "arrowworm",
"qualminess", "ultraobscure", "thorite", "Fouquieria",
"Bermudian", "prescriber", "elemicin", "warlike",
"semiangle", "rotular", "misthread", "returnability",
"seraphism", "precostal", "quarried", "Babylonism",
"sangaree", "seelful", "placatory", "pachydermous",
"bozal", "galbulus", "spermaphyte", "cumbrousness",
"pope", "signifier", "Endomycetaceae", "shallowish",
"sequacity", "periarthritis", "bathysphere", "pentosuria",
"Dadaism", "spookdom", "Consolamentum", "afterpressure",
"mutter", "louse", "ovoviviparous", "corbel",
"metastoma", "biventer", "Hydrangea", "hogmace",
"seizing", "nonsuppressed", "oratorize", "uncarefully",
"benzothiofuran", "penult", "balanocele", "macropterous",
"dishpan", "marten", "absvolt", "jirble",
"parmelioid", "airfreighter", "acocotl", "archesporial",
"hypoplastral", "preoral", "quailberry", "cinque",
"terrestrially", "stroking", "limpet", "moodishness",
"canicule", "archididascalian", "pompiloid", "overstaid",
"introducer", "Italical", "Christianopaganism", "prescriptible",
"subofficer", "danseuse", "cloy", "saguran",
"frictionlessly", "deindividualization", "Bulanda", "ventricous",
"subfoliar", "basto", "scapuloradial", "suspend",
"stiffish", "Sphenodontidae", "eternal", "verbid",
"mammonish", "upcushion", "barkometer", "concretion",
"preagitate", "incomprehensible", "tristich", "visceral",
"hemimelus", "patroller", "stentorophonic", "pinulus",
"kerykeion", "brutism", "monstership", "merciful",
"overinstruct", "defensibly", "bettermost", "splenauxe",
"Mormyrus", "unreprimanded", "taver", "ell",
"proacquittal", "infestation", "overwoven", "Lincolnlike",
"chacona", "Tamil", "classificational", "lebensraum",
"reeveland", "intuition", "Whilkut", "focaloid",
"Eleusinian", "micromembrane", "byroad", "nonrepetition",
"bacterioblast", "brag", "ribaldrous", "phytoma",
"counteralliance", "pelvimetry", "pelf", "relaster",
"thermoresistant", "aneurism", "molossic", "euphonym",
"upswell", "ladhood", "phallaceous", "inertly",
"gunshop", "stereotypography", "laryngic", "refasten",
"twinling", "oflete", "hepatorrhaphy", "electrotechnics",
"cockal", "guitarist", "topsail", "Cimmerianism",
"larklike", "Llandovery", "pyrocatechol", "immatchable",
"chooser", "metrocratic", "craglike", "quadrennial",
"nonpoisonous", "undercolored", "knob", "ultratense",
"balladmonger", "slait", "sialadenitis", "bucketer",
"magnificently", "unstipulated", "unscourged", "unsupercilious",
"packsack", "pansophism", "soorkee", "percent",
"subirrigate", "champer", "metapolitics", "spherulitic",
"involatile", "metaphonical", "stachyuraceous", "speckedness",
"bespin", "proboscidiform", "gul", "squit",
"yeelaman", "peristeropode", "opacousness", "shibuichi",
"retinize", "yote", "misexposition", "devilwise",
"pumpkinification", "vinny", "bonze", "glossing",
"decardinalize", "transcortical", "serphoid", "deepmost",
"guanajuatite", "wemless", "arval", "lammy",
"Effie", "Saponaria", "tetrahedral", "prolificy",
"excerpt", "dunkadoo", "Spencerism", "insatiately",
"Gilaki", "oratorship", "arduousness", "unbashfulness",
"Pithecolobium", "unisexuality", "veterinarian", "detractive",
"liquidity", "acidophile", "proauction", "sural",
"totaquina", "Vichyite", "uninhabitedness", "allegedly",
"Gothish", "manny", "Inger", "flutist",
"ticktick", "Ludgatian", "homotransplant", "orthopedical",
"diminutively", "monogoneutic", "Kenipsim", "sarcologist",
"drome", "stronghearted", "Fameuse", "Swaziland",
"alen", "chilblain", "beatable", "agglomeratic",
"constitutor", "tendomucoid", "porencephalous", "arteriasis",
"boser", "tantivy", "rede", "lineamental",
"uncontradictableness", "homeotypical", "masa", "folious",
"dosseret", "neurodegenerative", "subtransverse", "Chiasmodontidae",
"palaeotheriodont", "unstressedly", "chalcites", "piquantness",
"lampyrine", "Aplacentalia", "projecting", "elastivity",
"isopelletierin", "bladderwort", "strander", "almud",
"iniquitously", "theologal", "bugre", "chargeably",
"imperceptivity", "meriquinoidal", "mesophyte", "divinator",
"perfunctory", "counterappellant", "synovial", "charioteer",
"crystallographical", "comprovincial", "infrastapedial", "pleasurehood",
"inventurous", "ultrasystematic", "subangulated", "supraoesophageal",
"Vaishnavism", "transude", "chrysochrous", "ungrave",
"reconciliable", "uninterpleaded", "erlking", "wherefrom",
"aprosopia", "antiadiaphorist", "metoxazine", "incalculable",
"umbellic", "predebit", "foursquare", "unimmortal",
"nonmanufacture", "slangy", "predisputant", "familist",
"preaffiliate", "friarhood", "corelysis", "zoonitic",
"halloo", "paunchy", "neuromimesis", "aconitine",
"hackneyed", "unfeeble", "cubby", "autoschediastical",
"naprapath", "lyrebird", "inexistency", "leucophoenicite",
"ferrogoslarite", "reperuse", "uncombable", "tambo",
"propodiale", "diplomatize", "Russifier", "clanned",
"corona", "michigan", "nonutilitarian", "transcorporeal",
"bought", "Cercosporella", "stapedius", "glandularly",
"pictorially", "weism", "disilane", "rainproof",
"Caphtor", "scrubbed", "oinomancy", "pseudoxanthine",
"nonlustrous", "redesertion", "Oryzorictinae", "gala",
"Mycogone", "reappreciate", "cyanoguanidine", "seeingness",
"breadwinner", "noreast", "furacious", "epauliere",
"omniscribent", "Passiflorales", "uninductive", "inductivity",
"Orbitolina", "Semecarpus", "migrainoid", "steprelationship",
"phlogisticate", "mesymnion", "sloped", "edificator",
"beneficent", "culm", "paleornithology", "unurban",
"throbless", "amplexifoliate", "sesquiquintile", "sapience",
"astucious", "dithery", "boor", "ambitus",
"scotching", "uloid", "uncompromisingness", "hoove",
"waird", "marshiness", "Jerusalem", "mericarp",
"unevoked", "benzoperoxide", "outguess", "pyxie",
"hymnic", "euphemize", "mendacity", "erythremia",
"rosaniline", "unchatteled", "lienteria", "Bushongo",
"dialoguer", "unrepealably", "rivethead", "antideflation",
"vinegarish", "manganosiderite", "doubtingness", "ovopyriform",
"Cephalodiscus", "Muscicapa", "Animalivora", "angina",
"planispheric", "ipomoein", "cuproiodargyrite", "sandbox",
"scrat", "Munnopsidae", "shola", "pentafid",
"overstudiousness", "times", "nonprofession", "appetible",
"valvulotomy", "goladar", "uniarticular", "oxyterpene",
"unlapsing", "omega", "trophonema", "seminonflammable",
"circumzenithal", "starer", "depthwise", "liberatress",
"unleavened", "unrevolting", "groundneedle", "topline",
"wandoo", "umangite", "ordinant", "unachievable",
"oversand", "snare", "avengeful", "unexplicit",
"mustafina", "sonable", "rehabilitative", "eulogization",
"papery", "technopsychology", "impressor", "cresylite",
"entame", "transudatory", "scotale", "pachydermatoid",
"imaginary", "yeat", "slipped", "stewardship",
"adatom", "cockstone", "skyshine", "heavenful",
"comparability", "exprobratory", "dermorhynchous", "parquet",
"cretaceous", "vesperal", "raphis", "undangered",
"Glecoma", "engrain", "counteractively", "Zuludom",
"orchiocatabasis", "Auriculariales", "warriorwise", "extraorganismal",
"overbuilt", "alveolite", "tetchy", "terrificness",
"widdle", "unpremonished", "rebilling", "sequestrum",
"equiconvex", "heliocentricism", "catabaptist", "okonite",
"propheticism", "helminthagogic", "calycular", "giantly",
"wingable", "golem", "unprovided", "commandingness",
"greave", "haply", "doina", "depressingly",
"subdentate", "impairment", "decidable", "neurotrophic",
"unpredict", "bicorporeal", "pendulant", "flatman",
"intrabred", "toplike", "Prosobranchiata", "farrantly",
"toxoplasmosis", "gorilloid", "dipsomaniacal", "aquiline",
"atlantite", "ascitic", "perculsive", "prospectiveness",
"saponaceous", "centrifugalization", "dinical", "infravaginal",
"beadroll", "affaite", "Helvidian", "tickleproof",
"abstractionism", "enhedge", "outwealth", "overcontribute",
"coldfinch", "gymnastic", "Pincian", "Munychian",
"codisjunct", "quad", "coracomandibular", "phoenicochroite",
"amender", "selectivity", "putative", "semantician",
"lophotrichic", "Spatangoidea", "saccharogenic", "inferent",
"Triconodonta", "arrendation", "sheepskin", "taurocolla",
"bunghole", "Machiavel", "triakistetrahedral", "dehairer",
"prezygapophysial", "cylindric", "pneumonalgia", "sleigher",
"emir", "Socraticism", "licitness", "massedly",
"instructiveness", "sturdied", "redecrease", "starosta",
"evictor", "orgiastic", "squdge", "meloplasty",
"Tsonecan", "repealableness", "swoony", "myesthesia",
"molecule", "autobiographist", "reciprocation", "refective",
"unobservantness", "tricae", "ungouged", "floatability",
"Mesua", "fetlocked", "chordacentrum", "sedentariness",
"various", "laubanite", "nectopod", "zenick",
"sequentially", "analgic", "biodynamics", "posttraumatic",
"nummi", "pyroacetic", "bot", "redescend",
"dispermy", "undiffusive", "circular", "trillion",
"Uraniidae", "ploration", "discipular", "potentness",
"sud", "Hu", "Eryon", "plugger",
"subdrainage", "jharal", "abscission", "supermarket",
"countergabion", "glacierist", "lithotresis", "minniebush",
"zanyism", "eucalypteol", "sterilely", "unrealize",
"unpatched", "hypochondriacism", "critically", "cheesecutter",
};
}

View File

@ -0,0 +1,98 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.io;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.SequenceFile.Sorter.RawKeyValueIterator;
import org.apache.hadoop.io.SequenceFile.Sorter.SegmentDescriptor;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.DefaultCodec;
import org.apache.hadoop.mapred.*;
import junit.framework.TestCase;
import org.apache.commons.logging.*;
public class TestSequenceFileMergeProgress extends TestCase {
private static final Log LOG = FileInputFormat.LOG;
private static final int RECORDS = 10000;
public void testMergeProgressWithNoCompression() throws IOException {
runTest(SequenceFile.CompressionType.NONE);
}
public void testMergeProgressWithRecordCompression() throws IOException {
runTest(SequenceFile.CompressionType.RECORD);
}
public void testMergeProgressWithBlockCompression() throws IOException {
runTest(SequenceFile.CompressionType.BLOCK);
}
public void runTest(CompressionType compressionType) throws IOException {
JobConf job = new JobConf();
FileSystem fs = FileSystem.getLocal(job);
Path dir = new Path(System.getProperty("test.build.data",".") + "/mapred");
Path file = new Path(dir, "test.seq");
Path tempDir = new Path(dir, "tmp");
fs.delete(dir, true);
FileInputFormat.setInputPaths(job, dir);
fs.mkdirs(tempDir);
LongWritable tkey = new LongWritable();
Text tval = new Text();
SequenceFile.Writer writer =
SequenceFile.createWriter(fs, job, file, LongWritable.class, Text.class,
compressionType, new DefaultCodec());
try {
for (int i = 0; i < RECORDS; ++i) {
tkey.set(1234);
tval.set("valuevaluevaluevaluevaluevaluevaluevaluevaluevaluevalue");
writer.append(tkey, tval);
}
} finally {
writer.close();
}
long fileLength = fs.getFileStatus(file).getLen();
LOG.info("With compression = " + compressionType + ": "
+ "compressed length = " + fileLength);
SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs,
job.getOutputKeyComparator(), job.getMapOutputKeyClass(),
job.getMapOutputValueClass(), job);
Path[] paths = new Path[] {file};
RawKeyValueIterator rIter = sorter.merge(paths, tempDir, false);
int count = 0;
while (rIter.next()) {
count++;
}
assertEquals(RECORDS, count);
assertEquals(1.0f, rIter.getProgress().get());
}
}

View File

@ -0,0 +1,197 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ipc;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.Socket;
import java.net.SocketAddress;
import junit.framework.TestCase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobStatus;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.net.StandardSocketFactory;
/**
* This class checks that RPCs can use specialized socket factories.
*/
public class TestSocketFactory extends TestCase {
/**
* Check that we can reach a NameNode or a JobTracker using a specific
* socket factory
*/
public void testSocketFactory() throws IOException {
// Create a standard mini-cluster
Configuration sconf = new Configuration();
MiniDFSCluster cluster = new MiniDFSCluster(sconf, 1, true, null);
final int nameNodePort = cluster.getNameNodePort();
// Get a reference to its DFS directly
FileSystem fs = cluster.getFileSystem();
assertTrue(fs instanceof DistributedFileSystem);
DistributedFileSystem directDfs = (DistributedFileSystem) fs;
// Get another reference via network using a specific socket factory
Configuration cconf = new Configuration();
FileSystem.setDefaultUri(cconf, String.format("hdfs://localhost:%s/",
nameNodePort + 10));
cconf.set("hadoop.rpc.socket.factory.class.default",
"org.apache.hadoop.ipc.DummySocketFactory");
cconf.set("hadoop.rpc.socket.factory.class.ClientProtocol",
"org.apache.hadoop.ipc.DummySocketFactory");
cconf.set("hadoop.rpc.socket.factory.class.JobSubmissionProtocol",
"org.apache.hadoop.ipc.DummySocketFactory");
fs = FileSystem.get(cconf);
assertTrue(fs instanceof DistributedFileSystem);
DistributedFileSystem dfs = (DistributedFileSystem) fs;
JobClient client = null;
MiniMRCluster mr = null;
try {
// This will test RPC to the NameNode only.
// could we test Client-DataNode connections?
Path filePath = new Path("/dir");
assertFalse(directDfs.exists(filePath));
assertFalse(dfs.exists(filePath));
directDfs.mkdirs(filePath);
assertTrue(directDfs.exists(filePath));
assertTrue(dfs.exists(filePath));
// This will test TPC to a JobTracker
fs = FileSystem.get(sconf);
mr = new MiniMRCluster(1, fs.getUri().toString(), 1);
final int jobTrackerPort = mr.getJobTrackerPort();
JobConf jconf = new JobConf(cconf);
jconf.set("mapred.job.tracker", String.format("localhost:%d",
jobTrackerPort + 10));
client = new JobClient(jconf);
JobStatus[] jobs = client.jobsToComplete();
assertTrue(jobs.length == 0);
} finally {
try {
if (client != null)
client.close();
} catch (Exception ignored) {
// nothing we can do
ignored.printStackTrace();
}
try {
if (dfs != null)
dfs.close();
} catch (Exception ignored) {
// nothing we can do
ignored.printStackTrace();
}
try {
if (directDfs != null)
directDfs.close();
} catch (Exception ignored) {
// nothing we can do
ignored.printStackTrace();
}
try {
if (cluster != null)
cluster.shutdown();
} catch (Exception ignored) {
// nothing we can do
ignored.printStackTrace();
}
if (mr != null) {
try {
mr.shutdown();
} catch (Exception ignored) {
ignored.printStackTrace();
}
}
}
}
}
/**
* Dummy socket factory which shift TPC ports by subtracting 10 when
* establishing a connection
*/
class DummySocketFactory extends StandardSocketFactory {
/**
* Default empty constructor (for use with the reflection API).
*/
public DummySocketFactory() {
}
/* @inheritDoc */
@Override
public Socket createSocket() throws IOException {
return new Socket() {
@Override
public void connect(SocketAddress addr, int timeout)
throws IOException {
assert (addr instanceof InetSocketAddress);
InetSocketAddress iaddr = (InetSocketAddress) addr;
SocketAddress newAddr = null;
if (iaddr.isUnresolved())
newAddr =
new InetSocketAddress(iaddr.getHostName(),
iaddr.getPort() - 10);
else
newAddr =
new InetSocketAddress(iaddr.getAddress(), iaddr.getPort() - 10);
System.out.printf("Test socket: rerouting %s to %s\n", iaddr,
newAddr);
super.connect(newAddr, timeout);
}
};
}
/* @inheritDoc */
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (!(obj instanceof DummySocketFactory))
return false;
return true;
}
/* @inheritDoc */
@Override
public int hashCode() {
// Dummy hash code (to make find bugs happy)
return 53;
}
}

View File

@ -0,0 +1,152 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.security.authorize;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.HDFSPolicyProvider;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.tools.DFSAdmin;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.mapred.TestMiniMRWithDFS;
import org.apache.hadoop.security.UnixUserGroupInformation;
import org.apache.hadoop.util.StringUtils;
import junit.framework.TestCase;
public class TestServiceLevelAuthorization extends TestCase {
public void testServiceLevelAuthorization() throws Exception {
MiniDFSCluster dfs = null;
MiniMRCluster mr = null;
FileSystem fileSys = null;
try {
final int slaves = 4;
// Turn on service-level authorization
Configuration conf = new Configuration();
conf.setClass(PolicyProvider.POLICY_PROVIDER_CONFIG,
HadoopPolicyProvider.class, PolicyProvider.class);
conf.setBoolean(ServiceAuthorizationManager.SERVICE_AUTHORIZATION_CONFIG,
true);
// Start the mini clusters
dfs = new MiniDFSCluster(conf, slaves, true, null);
fileSys = dfs.getFileSystem();
JobConf mrConf = new JobConf(conf);
mr = new MiniMRCluster(slaves, fileSys.getUri().toString(), 1,
null, null, mrConf);
// Run examples
TestMiniMRWithDFS.runPI(mr, mr.createJobConf(mrConf));
TestMiniMRWithDFS.runWordCount(mr, mr.createJobConf(mrConf));
} finally {
if (dfs != null) { dfs.shutdown(); }
if (mr != null) { mr.shutdown();
}
}
}
private static final String DUMMY_ACL = "nouser nogroup";
private static final String UNKNOWN_USER = "dev,null";
private void rewriteHadoopPolicyFile(File policyFile) throws IOException {
FileWriter fos = new FileWriter(policyFile);
PolicyProvider policyProvider = new HDFSPolicyProvider();
fos.write("<configuration>\n");
for (Service service : policyProvider.getServices()) {
String key = service.getServiceKey();
String value ="*";
if (key.equals("security.refresh.policy.protocol.acl")) {
value = DUMMY_ACL;
}
fos.write("<property><name>"+ key + "</name><value>" + value +
"</value></property>\n");
System.err.println("<property><name>"+ key + "</name><value>" + value +
"</value></property>\n");
}
fos.write("</configuration>\n");
fos.close();
}
private void refreshPolicy(Configuration conf) throws IOException {
DFSAdmin dfsAdmin = new DFSAdmin(conf);
dfsAdmin.refreshServiceAcl();
}
public void testRefresh() throws Exception {
MiniDFSCluster dfs = null;
try {
final int slaves = 4;
// Turn on service-level authorization
Configuration conf = new Configuration();
conf.setClass(PolicyProvider.POLICY_PROVIDER_CONFIG,
HDFSPolicyProvider.class, PolicyProvider.class);
conf.setBoolean(ServiceAuthorizationManager.SERVICE_AUTHORIZATION_CONFIG,
true);
// Start the mini dfs cluster
dfs = new MiniDFSCluster(conf, slaves, true, null);
// Refresh the service level authorization policy
refreshPolicy(conf);
// Simulate an 'edit' of hadoop-policy.xml
String confDir = System.getProperty("test.build.extraconf",
"build/test/extraconf");
File policyFile = new File(confDir, ConfiguredPolicy.HADOOP_POLICY_FILE);
String policyFileCopy = ConfiguredPolicy.HADOOP_POLICY_FILE + ".orig";
FileUtil.copy(policyFile, FileSystem.getLocal(conf), // first save original
new Path(confDir, policyFileCopy), false, conf);
rewriteHadoopPolicyFile( // rewrite the file
new File(confDir, ConfiguredPolicy.HADOOP_POLICY_FILE));
// Refresh the service level authorization policy
refreshPolicy(conf);
// Refresh the service level authorization policy once again,
// this time it should fail!
try {
// Note: hadoop-policy.xml for tests has
// security.refresh.policy.protocol.acl = ${user.name}
conf.set(UnixUserGroupInformation.UGI_PROPERTY_NAME, UNKNOWN_USER);
refreshPolicy(conf);
fail("Refresh of NameNode's policy file cannot be successful!");
} catch (RemoteException re) {
System.out.println("Good, refresh worked... refresh failed with: " +
StringUtils.stringifyException(re.unwrapRemoteException()));
} finally {
// Reset to original hadoop-policy.xml
FileUtil.fullyDelete(new File(confDir,
ConfiguredPolicy.HADOOP_POLICY_FILE));
FileUtil.replaceFile(new File(confDir, policyFileCopy), new File(confDir, ConfiguredPolicy.HADOOP_POLICY_FILE));
}
} finally {
if (dfs != null) { dfs.shutdown(); }
}
}
}

View File

@ -0,0 +1,46 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.test;
import org.apache.hadoop.util.ProgramDriver;
@Deprecated
//Class to be removed after the project split
public class AllTestDriver {
/**
* A description of the test program for running all the tests using jar file
*/
public static void main(String argv[]){
ProgramDriver pd = new ProgramDriver();
new CoreTestDriver(pd);
new HdfsTestDriver(pd);
new HdfsWithMRTestDriver(pd);
new MapredTestDriver(pd);
try {
pd.driver(argv);
} catch (Throwable e) {
e.printStackTrace();
}
}
}

View File

@ -0,0 +1,75 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.test;
import org.apache.hadoop.fs.DFSCIOTest;
import org.apache.hadoop.fs.DistributedFSCheck;
import org.apache.hadoop.fs.TestDFSIO;
import org.apache.hadoop.fs.TestFileSystem;
import org.apache.hadoop.hdfs.NNBench;
import org.apache.hadoop.io.FileBench;
import org.apache.hadoop.util.ProgramDriver;
/*
* Driver for HDFS tests, which require map-reduce to run.
*/
public class HdfsWithMRTestDriver {
private ProgramDriver pgd;
public HdfsWithMRTestDriver() {
this(new ProgramDriver());
}
public HdfsWithMRTestDriver(ProgramDriver pgd) {
this.pgd = pgd;
try {
pgd.addClass("nnbench", NNBench.class,
"A benchmark that stresses the namenode.");
pgd.addClass("testfilesystem", TestFileSystem.class,
"A test for FileSystem read/write.");
pgd.addClass("TestDFSIO", TestDFSIO.class,
"Distributed i/o benchmark.");
pgd.addClass("DFSCIOTest", DFSCIOTest.class, "" +
"Distributed i/o benchmark of libhdfs.");
pgd.addClass("DistributedFSCheck", DistributedFSCheck.class,
"Distributed checkup of the file system consistency.");
pgd.addClass("filebench", FileBench.class,
"Benchmark SequenceFile(Input|Output)Format " +
"(block,record compressed and uncompressed), " +
"Text(Input|Output)Format (compressed and uncompressed)");
} catch(Throwable e) {
e.printStackTrace();
}
}
public void run(String argv[]) {
try {
pgd.driver(argv);
} catch(Throwable e) {
e.printStackTrace();
}
}
public static void main(String argv[]){
new HdfsWithMRTestDriver().run(argv);
}
}

View File

@ -0,0 +1,221 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FsShell;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.permission.PermissionStatus;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.mapred.TaskTracker;
import org.apache.log4j.Level;
public class TestDistCh extends junit.framework.TestCase {
{
((Log4JLogger)LogFactory.getLog("org.apache.hadoop.hdfs.StateChange")
).getLogger().setLevel(Level.OFF);
((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.OFF);
((Log4JLogger)FSNamesystem.LOG).getLogger().setLevel(Level.OFF);
((Log4JLogger)TaskTracker.LOG).getLogger().setLevel(Level.OFF);
}
static final Long RANDOM_NUMBER_GENERATOR_SEED = null;
private static final Random RANDOM = new Random();
static {
final long seed = RANDOM_NUMBER_GENERATOR_SEED == null?
RANDOM.nextLong(): RANDOM_NUMBER_GENERATOR_SEED;
System.out.println("seed=" + seed);
RANDOM.setSeed(seed);
}
static final String TEST_ROOT_DIR =
new Path(System.getProperty("test.build.data","/tmp")
).toString().replace(' ', '+');
static final int NUN_SUBS = 5;
static class FileTree {
private final FileSystem fs;
private final String root;
private final Path rootdir;
private int fcount = 0;
Path createSmallFile(Path dir) throws IOException {
final Path f = new Path(dir, "f" + ++fcount);
assertTrue(!fs.exists(f));
final DataOutputStream out = fs.create(f);
try {
out.writeBytes("createSmallFile: f=" + f);
} finally {
out.close();
}
assertTrue(fs.exists(f));
return f;
}
Path mkdir(Path dir) throws IOException {
assertTrue(fs.mkdirs(dir));
assertTrue(fs.getFileStatus(dir).isDir());
return dir;
}
FileTree(FileSystem fs, String name) throws IOException {
this.fs = fs;
this.root = "/test/" + name;
this.rootdir = mkdir(new Path(root));
for(int i = 0; i < 3; i++) {
createSmallFile(rootdir);
}
for(int i = 0; i < NUN_SUBS; i++) {
final Path sub = mkdir(new Path(root, "sub" + i));
int num_files = RANDOM.nextInt(3);
for(int j = 0; j < num_files; j++) {
createSmallFile(sub);
}
}
System.out.println("rootdir = " + rootdir);
}
}
static class ChPermissionStatus extends PermissionStatus {
ChPermissionStatus(FileStatus filestatus) {
this(filestatus, "", "", "");
}
ChPermissionStatus(FileStatus filestatus, String owner, String group, String permission) {
super("".equals(owner)? filestatus.getOwner(): owner,
"".equals(group)? filestatus.getGroup(): group,
"".equals(permission)? filestatus.getPermission(): new FsPermission(Short.parseShort(permission, 8)));
}
}
public void testDistCh() throws Exception {
final Configuration conf = new Configuration();
final MiniDFSCluster cluster = new MiniDFSCluster(conf, 2, true, null);
final FileSystem fs = cluster.getFileSystem();
final MiniMRCluster mr = new MiniMRCluster(2, fs.getUri().toString(), 1);
final FsShell shell = new FsShell(conf);
try {
final FileTree tree = new FileTree(fs, "testDistCh");
final FileStatus rootstatus = fs.getFileStatus(tree.rootdir);
runLsr(shell, tree.root, 0);
//generate random arguments
final String[] args = new String[RANDOM.nextInt(NUN_SUBS-1) + 1];
final PermissionStatus[] newstatus = new PermissionStatus[NUN_SUBS];
final List<Integer> indices = new LinkedList<Integer>();
for(int i = 0; i < NUN_SUBS; i++) {
indices.add(i);
}
for(int i = 0; i < args.length; i++) {
final int index = indices.remove(RANDOM.nextInt(indices.size()));
final String sub = "sub" + index;
final boolean changeOwner = RANDOM.nextBoolean();
final boolean changeGroup = RANDOM.nextBoolean();
final boolean changeMode = !changeOwner && !changeGroup? true: RANDOM.nextBoolean();
final String owner = changeOwner? sub: "";
final String group = changeGroup? sub: "";
final String permission = changeMode? RANDOM.nextInt(8) + "" + RANDOM.nextInt(8) + "" + RANDOM.nextInt(8): "";
args[i] = tree.root + "/" + sub + ":" + owner + ":" + group + ":" + permission;
newstatus[index] = new ChPermissionStatus(rootstatus, owner, group, permission);
}
for(int i = 0; i < NUN_SUBS; i++) {
if (newstatus[i] == null) {
newstatus[i] = new ChPermissionStatus(rootstatus);
}
}
System.out.println("args=" + Arrays.asList(args).toString().replace(",", ",\n "));
System.out.println("newstatus=" + Arrays.asList(newstatus).toString().replace(",", ",\n "));
//run DistCh
new DistCh(mr.createJobConf()).run(args);
runLsr(shell, tree.root, 0);
//check results
for(int i = 0; i < NUN_SUBS; i++) {
Path sub = new Path(tree.root + "/sub" + i);
checkFileStatus(newstatus[i], fs.getFileStatus(sub));
for(FileStatus status : fs.listStatus(sub)) {
checkFileStatus(newstatus[i], status);
}
}
} finally {
cluster.shutdown();
}
}
static final FsPermission UMASK = FsPermission.createImmutable((short)0111);
static void checkFileStatus(PermissionStatus expected, FileStatus actual) {
assertEquals(expected.getUserName(), actual.getOwner());
assertEquals(expected.getGroupName(), actual.getGroup());
FsPermission perm = expected.getPermission();
if (!actual.isDir()) {
perm = perm.applyUMask(UMASK);
}
assertEquals(perm, actual.getPermission());
}
private static String runLsr(final FsShell shell, String root, int returnvalue
) throws Exception {
System.out.println("root=" + root + ", returnvalue=" + returnvalue);
final ByteArrayOutputStream bytes = new ByteArrayOutputStream();
final PrintStream out = new PrintStream(bytes);
final PrintStream oldOut = System.out;
final PrintStream oldErr = System.err;
System.setOut(out);
System.setErr(out);
final String results;
try {
assertEquals(returnvalue, shell.run(new String[]{"-lsr", root}));
results = bytes.toString();
} finally {
IOUtils.closeStream(out);
System.setOut(oldOut);
System.setErr(oldErr);
}
System.out.println("results:\n" + results);
return results;
}
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,7 @@
# log4j configuration used during build and unit tests
log4j.rootLogger=info,stdout
log4j.threshhold=ALL
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n

18
src/test/mapred-site.xml Normal file
View File

@ -0,0 +1,18 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>io.sort.mb</name>
<value>10</value>
</property>
<property>
<name>mapred.hosts.exclude</name>
<value>hosts.exclude</value>
<description></description>
</property>
</configuration>