merge trunk into HDFS-4949 branch

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-4949@1513658 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Colin McCabe 2013-08-13 21:19:53 +00:00
commit 70527c8754
119 changed files with 6500 additions and 2787 deletions

1
.gitattributes vendored
View File

@ -14,5 +14,6 @@
*.sh text eol=lf
*.bat text eol=crlf
*.cmd text eol=crlf
*.csproj text merge=union eol=crlf
*.sln text merge=union eol=crlf

View File

@ -106,6 +106,8 @@ Trunk (Unreleased)
HADOOP-9186. test-patch.sh should report build failure to JIRA.
(Binglin Chang via Colin Patrick McCabe)
HADOOP-9833 move slf4j to version 1.7.5 (Kousuke Saruta via stevel)
BUG FIXES
HADOOP-9451. Fault single-layer config if node group topology is enabled.
@ -269,9 +271,6 @@ Trunk (Unreleased)
HADOOP-9433 TestLocalFileSystem#testHasFileDescriptor leaks file handle
(Chris Nauroth via sanjay)
HADOOP-9806 PortmapInterface should check if the procedure is out-of-range
(brandonli)
OPTIMIZATIONS
HADOOP-7761. Improve the performance of raw comparisons. (todd)
@ -286,6 +285,8 @@ Release 2.3.0 - UNRELEASED
IMPROVEMENTS
HADOOP-9319. Update bundled LZ4 source to r99. (Binglin Chang via llu)
HADOOP-9241. DU refresh interval is not configurable (harsh)
HADOOP-9417. Support for symlink resolution in LocalFileSystem /
@ -312,6 +313,12 @@ Release 2.3.0 - UNRELEASED
HADOOP-9582. Non-existent file to "hadoop fs -conf" doesn't throw error
(Ashwin Shankar via jlowe)
HADOOP-9761. ViewFileSystem#rename fails when using DistributedFileSystem.
(Andrew Wang via Colin Patrick McCabe)
HADOOP-9817. FileSystem#globStatus and FileContext#globStatus need to work
with symlinks. (Colin Patrick McCabe via Andrew Wang)
Release 2.1.1-beta - UNRELEASED
INCOMPATIBLE CHANGES
@ -326,6 +333,13 @@ Release 2.1.1-beta - UNRELEASED
HADOOP-9803. Add a generic type parameter to RetryInvocationHandler.
(szetszwo)
HADOOP-9821. ClientId should have getMsb/getLsb methods.
(Tsuyoshi OZAWA via jing9)
HADOOP-9672. Upgrade Avro dependency to 1.7.4. (sandy via kihwal)
HADOOP-9789. Support server advertised kerberos principals (daryn)
OPTIMIZATIONS
BUG FIXES
@ -336,6 +350,17 @@ Release 2.1.1-beta - UNRELEASED
HADOOP-9801. Configuration#writeXml uses platform defaulting encoding, which
may mishandle multi-byte characters. (cnauroth)
HADOOP-9806 PortmapInterface should check if the procedure is out-of-range
(brandonli)
HADOOP-9527. Add symlink support to LocalFileSystem on Windows.
(Arpit Agarwal via cnauroth)
HADOOP-9831. Make checknative shell command accessible on Windows. (cnauroth)
HADOOP-9675. use svn:eol-style native for html to prevent line ending
issues (Colin Patrick McCabe)
Release 2.1.0-beta - 2013-08-06
INCOMPATIBLE CHANGES
@ -365,6 +390,10 @@ Release 2.1.0-beta - 2013-08-06
HADOOP-9698. [RPC v9] Client must honor server's SASL negotiate response (daryn)
HADOOP-9832. [RPC v9] Add RPC header to client ping (daryn)
HADOOP-9820. [RPC v9] Wire protocol is insufficient to support multiplexing. (daryn via jitendra)
NEW FEATURES
HADOOP-9283. Add support for running the Hadoop client on AIX. (atm)
@ -681,6 +710,10 @@ Release 2.1.0-beta - 2013-08-06
HADOOP-9507. LocalFileSystem rename() is broken in some cases when
destination exists. (cnauroth)
HADOOP-9816. RPC Sasl QOP is broken (daryn)
HADOOP-9850. RPC kerberos errors don't trigger relogin. (daryn via kihwal)
BREAKDOWN OF HADOOP-8562 SUBTASKS AND RELATED JIRAS
HADOOP-8924. Hadoop Common creating package-info.java must not depend on

View File

@ -252,12 +252,14 @@ in src/main/native/src/org/apache/hadoop/util:
* BSD-style license that can be found in the LICENSE file.
*/
For src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4.c:
For src/main/native/src/org/apache/hadoop/io/compress/lz4/{lz4.h,lz4.c,
lz4_encoder.h,lz4hc.h,lz4hc.c,lz4hc_encoder.h},
/*
LZ4 - Fast LZ compression algorithm
Copyright (C) 2011, Yann Collet.
BSD License
Header File
Copyright (C) 2011-2013, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@ -281,4 +283,8 @@ in src/main/native/src/org/apache/hadoop/util:
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- LZ4 source repository : http://code.google.com/p/lz4/
*/

View File

@ -456,7 +456,12 @@
<exclude>src/test/empty-file</exclude>
<exclude>src/test/all-tests</exclude>
<exclude>src/test/resources/kdc/ldif/users.ldif</exclude>
<exclude>src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4.h</exclude>
<exclude>src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4.c</exclude>
<exclude>src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4_encoder.h</exclude>
<exclude>src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.h</exclude>
<exclude>src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.c</exclude>
<exclude>src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc_encoder.h</exclude>
<exclude>src/test/java/org/apache/hadoop/fs/test-untar.tgz</exclude>
</excludes>
</configuration>

View File

@ -170,6 +170,7 @@ add_dual_library(hadoop
${D}/io/compress/lz4/Lz4Compressor.c
${D}/io/compress/lz4/Lz4Decompressor.c
${D}/io/compress/lz4/lz4.c
${D}/io/compress/lz4/lz4hc.c
${SNAPPY_SOURCE_FILES}
${D}/io/compress/zlib/ZlibCompressor.c
${D}/io/compress/zlib/ZlibDecompressor.c

View File

@ -57,7 +57,7 @@ case $COMMAND in
;;
#hdfs commands
namenode|secondarynamenode|datanode|dfs|dfsadmin|fsck|balancer|fetchdt|oiv|dfsgroups)
namenode|secondarynamenode|datanode|dfs|dfsadmin|fsck|balancer|fetchdt|oiv|dfsgroups|portmap|nfs3)
echo "DEPRECATED: Use of this script to execute hdfs command is deprecated." 1>&2
echo "Instead use the hdfs command for it." 1>&2
echo "" 1>&2

View File

@ -119,7 +119,7 @@ call :updatepath %HADOOP_BIN_PATH%
goto :eof
)
set corecommands=fs version jar distcp daemonlog archive
set corecommands=fs version jar checknative distcp daemonlog archive
for %%i in ( %corecommands% ) do (
if %hadoop-command% == %%i set corecommand=true
)
@ -157,6 +157,10 @@ call :updatepath %HADOOP_BIN_PATH%
set CLASS=org.apache.hadoop.util.RunJar
goto :eof
:checknative
set CLASS=org.apache.hadoop.util.NativeLibraryChecker
goto :eof
:distcp
set CLASS=org.apache.hadoop.tools.DistCp
set CLASSPATH=%CLASSPATH%;%TOOL_PATH%
@ -222,6 +226,7 @@ call :updatepath %HADOOP_BIN_PATH%
@echo fs run a generic filesystem user client
@echo version print the version
@echo jar ^<jar^> run a jar file
@echo checknative [-a^|-h] check native hadoop and compression libraries availability
@echo distcp ^<srcurl^> ^<desturl^> copy file or directories recursively
@echo archive -archiveName NAME -p ^<parent path^> ^<src^>* ^<dest^> create a hadoop archive
@echo classpath prints the class path needed to get the

View File

@ -96,7 +96,7 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
public static final int IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT =
256 * 1024;
/** Internal buffer size for Snappy compressor/decompressors */
/** Internal buffer size for Lz4 compressor/decompressors */
public static final String IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_KEY =
"io.compression.codec.lz4.buffersize";
@ -104,6 +104,14 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
public static final int IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_DEFAULT =
256 * 1024;
/** Use lz4hc(slow but with high compression ratio) for lz4 compression */
public static final String IO_COMPRESSION_CODEC_LZ4_USELZ4HC_KEY =
"io.compression.codec.lz4.use.lz4hc";
/** Default value for IO_COMPRESSION_CODEC_USELZ4HC_KEY */
public static final boolean IO_COMPRESSION_CODEC_LZ4_USELZ4HC_DEFAULT =
false;
/**
* Service Authorization
*/

View File

@ -258,7 +258,7 @@ private FileContext(final AbstractFileSystem defFs,
* Hence this method is not called makeAbsolute() and
* has been deliberately declared private.
*/
private Path fixRelativePart(Path p) {
Path fixRelativePart(Path p) {
if (p.isUriPathAbsolute()) {
return p;
} else {
@ -1905,7 +1905,7 @@ public LocatedFileStatus next() throws IOException {
public FileStatus[] globStatus(Path pathPattern)
throws AccessControlException, UnsupportedFileSystemException,
IOException {
return globStatus(pathPattern, DEFAULT_FILTER);
return new Globber(FileContext.this, pathPattern, DEFAULT_FILTER).glob();
}
/**
@ -1934,154 +1934,7 @@ public FileStatus[] globStatus(Path pathPattern)
public FileStatus[] globStatus(final Path pathPattern,
final PathFilter filter) throws AccessControlException,
UnsupportedFileSystemException, IOException {
URI uri = getFSofPath(fixRelativePart(pathPattern)).getUri();
String filename = pathPattern.toUri().getPath();
List<String> filePatterns = GlobExpander.expand(filename);
if (filePatterns.size() == 1) {
Path absPathPattern = fixRelativePart(pathPattern);
return globStatusInternal(uri, new Path(absPathPattern.toUri()
.getPath()), filter);
} else {
List<FileStatus> results = new ArrayList<FileStatus>();
for (String iFilePattern : filePatterns) {
Path iAbsFilePattern = fixRelativePart(new Path(iFilePattern));
FileStatus[] files = globStatusInternal(uri, iAbsFilePattern, filter);
for (FileStatus file : files) {
results.add(file);
}
}
return results.toArray(new FileStatus[results.size()]);
}
}
/**
*
* @param uri for all the inPathPattern
* @param inPathPattern - without the scheme & authority (take from uri)
* @param filter
*
* @return an array of FileStatus objects
*
* @throws AccessControlException If access is denied
* @throws IOException If an I/O error occurred
*/
private FileStatus[] globStatusInternal(final URI uri,
final Path inPathPattern, final PathFilter filter)
throws AccessControlException, IOException
{
Path[] parents = new Path[1];
int level = 0;
assert(inPathPattern.toUri().getScheme() == null &&
inPathPattern.toUri().getAuthority() == null &&
inPathPattern.isUriPathAbsolute());
String filename = inPathPattern.toUri().getPath();
// path has only zero component
if (filename.isEmpty() || Path.SEPARATOR.equals(filename)) {
Path p = inPathPattern.makeQualified(uri, null);
return getFileStatus(new Path[]{p});
}
// path has at least one component
String[] components = filename.split(Path.SEPARATOR);
// Path is absolute, first component is "/" hence first component
// is the uri root
parents[0] = new Path(new Path(uri), new Path("/"));
level = 1;
// glob the paths that match the parent path, ie. [0, components.length-1]
boolean[] hasGlob = new boolean[]{false};
Path[] relParentPaths =
globPathsLevel(parents, components, level, hasGlob);
FileStatus[] results;
if (relParentPaths == null || relParentPaths.length == 0) {
results = null;
} else {
// fix the pathes to be abs
Path[] parentPaths = new Path [relParentPaths.length];
for(int i=0; i<relParentPaths.length; i++) {
parentPaths[i] = relParentPaths[i].makeQualified(uri, null);
}
// Now work on the last component of the path
GlobFilter fp =
new GlobFilter(components[components.length - 1], filter);
if (fp.hasPattern()) { // last component has a pattern
// list parent directories and then glob the results
try {
results = listStatus(parentPaths, fp);
} catch (FileNotFoundException e) {
results = null;
}
hasGlob[0] = true;
} else { // last component does not have a pattern
// get all the path names
ArrayList<Path> filteredPaths =
new ArrayList<Path>(parentPaths.length);
for (int i = 0; i < parentPaths.length; i++) {
parentPaths[i] = new Path(parentPaths[i],
components[components.length - 1]);
if (fp.accept(parentPaths[i])) {
filteredPaths.add(parentPaths[i]);
}
}
// get all their statuses
results = getFileStatus(
filteredPaths.toArray(new Path[filteredPaths.size()]));
}
}
// Decide if the pathPattern contains a glob or not
if (results == null) {
if (hasGlob[0]) {
results = new FileStatus[0];
}
} else {
if (results.length == 0) {
if (!hasGlob[0]) {
results = null;
}
} else {
Arrays.sort(results);
}
}
return results;
}
/*
* For a path of N components, return a list of paths that match the
* components [<code>level</code>, <code>N-1</code>].
*/
private Path[] globPathsLevel(Path[] parents, String[] filePattern,
int level, boolean[] hasGlob) throws AccessControlException,
FileNotFoundException, IOException {
if (level == filePattern.length - 1) {
return parents;
}
if (parents == null || parents.length == 0) {
return null;
}
GlobFilter fp = new GlobFilter(filePattern[level]);
if (fp.hasPattern()) {
try {
parents = FileUtil.stat2Paths(listStatus(parents, fp));
} catch (FileNotFoundException e) {
parents = null;
}
hasGlob[0] = true;
} else {
for (int i = 0; i < parents.length; i++) {
parents[i] = new Path(parents[i], filePattern[level]);
}
}
return globPathsLevel(parents, filePattern, level + 1, hasGlob);
return new Globber(FileContext.this, pathPattern, filter).glob();
}
/**

View File

@ -1619,7 +1619,7 @@ public FileStatus[] listStatus(Path[] files, PathFilter filter)
* @throws IOException
*/
public FileStatus[] globStatus(Path pathPattern) throws IOException {
return globStatus(pathPattern, DEFAULT_FILTER);
return new Globber(this, pathPattern, DEFAULT_FILTER).glob();
}
/**
@ -1637,126 +1637,7 @@ public FileStatus[] globStatus(Path pathPattern) throws IOException {
*/
public FileStatus[] globStatus(Path pathPattern, PathFilter filter)
throws IOException {
String filename = pathPattern.toUri().getPath();
List<FileStatus> allMatches = null;
List<String> filePatterns = GlobExpander.expand(filename);
for (String filePattern : filePatterns) {
Path path = new Path(filePattern.isEmpty() ? Path.CUR_DIR : filePattern);
List<FileStatus> matches = globStatusInternal(path, filter);
if (matches != null) {
if (allMatches == null) {
allMatches = matches;
} else {
allMatches.addAll(matches);
}
}
}
FileStatus[] results = null;
if (allMatches != null) {
results = allMatches.toArray(new FileStatus[allMatches.size()]);
} else if (filePatterns.size() > 1) {
// no matches with multiple expansions is a non-matching glob
results = new FileStatus[0];
}
return results;
}
// sort gripes because FileStatus Comparable isn't parameterized...
@SuppressWarnings("unchecked")
private List<FileStatus> globStatusInternal(Path pathPattern,
PathFilter filter) throws IOException {
boolean patternHasGlob = false; // pathPattern has any globs
List<FileStatus> matches = new ArrayList<FileStatus>();
// determine starting point
int level = 0;
String baseDir = Path.CUR_DIR;
if (pathPattern.isAbsolute()) {
level = 1; // need to skip empty item at beginning of split list
baseDir = Path.SEPARATOR;
}
// parse components and determine if it's a glob
String[] components = null;
GlobFilter[] filters = null;
String filename = pathPattern.toUri().getPath();
if (!filename.isEmpty() && !Path.SEPARATOR.equals(filename)) {
components = filename.split(Path.SEPARATOR);
filters = new GlobFilter[components.length];
for (int i=level; i < components.length; i++) {
filters[i] = new GlobFilter(components[i]);
patternHasGlob |= filters[i].hasPattern();
}
if (!patternHasGlob) {
baseDir = unquotePathComponent(filename);
components = null; // short through to filter check
}
}
// seed the parent directory path, return if it doesn't exist
try {
matches.add(getFileStatus(new Path(baseDir)));
} catch (FileNotFoundException e) {
return patternHasGlob ? matches : null;
}
// skip if there are no components other than the basedir
if (components != null) {
// iterate through each path component
for (int i=level; (i < components.length) && !matches.isEmpty(); i++) {
List<FileStatus> children = new ArrayList<FileStatus>();
for (FileStatus match : matches) {
// don't look for children in a file matched by a glob
if (!match.isDirectory()) {
continue;
}
try {
if (filters[i].hasPattern()) {
// get all children matching the filter
FileStatus[] statuses = listStatus(match.getPath(), filters[i]);
children.addAll(Arrays.asList(statuses));
} else {
// the component does not have a pattern
String component = unquotePathComponent(components[i]);
Path child = new Path(match.getPath(), component);
children.add(getFileStatus(child));
}
} catch (FileNotFoundException e) {
// don't care
}
}
matches = children;
}
}
// remove anything that didn't match the filter
if (!matches.isEmpty()) {
Iterator<FileStatus> iter = matches.iterator();
while (iter.hasNext()) {
if (!filter.accept(iter.next().getPath())) {
iter.remove();
}
}
}
// no final paths, if there were any globs return empty list
if (matches.isEmpty()) {
return patternHasGlob ? matches : null;
}
Collections.sort(matches);
return matches;
}
/**
* The glob filter builds a regexp per path component. If the component
* does not contain a shell metachar, then it falls back to appending the
* raw string to the list of built up paths. This raw path needs to have
* the quoting removed. Ie. convert all occurances of "\X" to "X"
* @param name of the path component
* @return the unquoted path component
*/
private String unquotePathComponent(String name) {
return name.replaceAll("\\\\(.)", "$1");
return new Globber(this, pathPattern, filter).glob();
}
/**

View File

@ -73,7 +73,9 @@ public T resolve(final FileSystem filesys, final Path path)
int count = 0;
T in = null;
Path p = path;
FileSystem fs = FileSystem.getFSofPath(p, filesys.getConf());
// Assumes path belongs to this FileSystem.
// Callers validate this by passing paths through FileSystem#checkPath
FileSystem fs = filesys;
for (boolean isLink = true; isLink;) {
try {
in = doCall(p);

View File

@ -143,6 +143,27 @@ public static boolean fullyDelete(final File dir, boolean tryGrantPermissions) {
return deleteImpl(dir, true);
}
/**
* Returns the target of the given symlink. Returns the empty string if
* the given path does not refer to a symlink or there is an error
* accessing the symlink.
* @param f File representing the symbolic link.
* @return The target of the symbolic link, empty string on error or if not
* a symlink.
*/
public static String readLink(File f) {
/* NB: Use readSymbolicLink in java.nio.file.Path once available. Could
* use getCanonicalPath in File to get the target of the symlink but that
* does not indicate if the given path refers to a symlink.
*/
try {
return Shell.execCommand(
Shell.getReadlinkCommand(f.toString())).trim();
} catch (IOException x) {
return "";
}
}
/*
* Pure-Java implementation of "chmod +rwx f".
*/
@ -737,15 +758,18 @@ public static class HardLink extends org.apache.hadoop.fs.HardLink {
* On Windows, when symlink creation fails due to security
* setting, we will log a warning. The return code in this
* case is 2.
*
* @param target the target for symlink
* @param linkname the symlink
* @return value returned by the command
* @return 0 on success
*/
public static int symLink(String target, String linkname) throws IOException{
// Run the input paths through Java's File so that they are converted to the
// native OS form
File targetFile = new File(target);
File linkFile = new File(linkname);
File targetFile = new File(
Path.getPathWithoutSchemeAndAuthority(new Path(target)).toString());
File linkFile = new File(
Path.getPathWithoutSchemeAndAuthority(new Path(linkname)).toString());
// If not on Java7+, copy a file instead of creating a symlink since
// Java6 has close to no support for symlinks on Windows. Specifically
@ -757,9 +781,16 @@ public static int symLink(String target, String linkname) throws IOException{
// is symlinked under userlogs and userlogs are generated afterwards).
if (Shell.WINDOWS && !Shell.isJava7OrAbove() && targetFile.isFile()) {
try {
LOG.info("FileUtil#symlink: On Java6, copying file instead "
+ linkname + " -> " + target);
org.apache.commons.io.FileUtils.copyFile(targetFile, linkFile);
LOG.warn("FileUtil#symlink: On Windows+Java6, copying file instead " +
"of creating a symlink. Copying " + target + " -> " + linkname);
if (!linkFile.getParentFile().exists()) {
LOG.warn("Parent directory " + linkFile.getParent() +
" does not exist.");
return 1;
} else {
org.apache.commons.io.FileUtils.copyFile(targetFile, linkFile);
}
} catch (IOException ex) {
LOG.warn("FileUtil#symlink failed to copy the file with error: "
+ ex.getMessage());
@ -769,10 +800,23 @@ public static int symLink(String target, String linkname) throws IOException{
return 0;
}
String[] cmd = Shell.getSymlinkCommand(targetFile.getPath(),
linkFile.getPath());
ShellCommandExecutor shExec = new ShellCommandExecutor(cmd);
String[] cmd = Shell.getSymlinkCommand(
targetFile.toString(),
linkFile.toString());
ShellCommandExecutor shExec;
try {
if (Shell.WINDOWS &&
linkFile.getParentFile() != null &&
!new Path(target).isAbsolute()) {
// Relative links on Windows must be resolvable at the time of
// creation. To ensure this we run the shell command in the directory
// of the link.
//
shExec = new ShellCommandExecutor(cmd, linkFile.getParentFile());
} else {
shExec = new ShellCommandExecutor(cmd);
}
shExec.execute();
} catch (Shell.ExitCodeException ec) {
int returnVal = ec.getExitCode();

View File

@ -0,0 +1,215 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.Log;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@InterfaceAudience.Private
@InterfaceStability.Unstable
class Globber {
public static final Log LOG = LogFactory.getLog(Globber.class.getName());
private final FileSystem fs;
private final FileContext fc;
private final Path pathPattern;
private final PathFilter filter;
public Globber(FileSystem fs, Path pathPattern, PathFilter filter) {
this.fs = fs;
this.fc = null;
this.pathPattern = pathPattern;
this.filter = filter;
}
public Globber(FileContext fc, Path pathPattern, PathFilter filter) {
this.fs = null;
this.fc = fc;
this.pathPattern = pathPattern;
this.filter = filter;
}
private FileStatus getFileStatus(Path path) {
try {
if (fs != null) {
return fs.getFileStatus(path);
} else {
return fc.getFileStatus(path);
}
} catch (IOException e) {
return null;
}
}
private FileStatus[] listStatus(Path path) {
try {
if (fs != null) {
return fs.listStatus(path);
} else {
return fc.util().listStatus(path);
}
} catch (IOException e) {
return new FileStatus[0];
}
}
private Path fixRelativePart(Path path) {
if (fs != null) {
return fs.fixRelativePart(path);
} else {
return fc.fixRelativePart(path);
}
}
/**
* Translate an absolute path into a list of path components.
* We merge double slashes into a single slash here.
* The first path component (i.e. root) does not get an entry in the list.
*/
private static List<String> getPathComponents(String path)
throws IOException {
ArrayList<String> ret = new ArrayList<String>();
for (String component : path.split(Path.SEPARATOR)) {
if (!component.isEmpty()) {
ret.add(component);
}
}
return ret;
}
private String schemeFromPath(Path path) throws IOException {
String scheme = pathPattern.toUri().getScheme();
if (scheme == null) {
if (fs != null) {
scheme = fs.getUri().getScheme();
} else {
scheme = fc.getFSofPath(path).getUri().getScheme();
}
}
return scheme;
}
private String authorityFromPath(Path path) throws IOException {
String authority = pathPattern.toUri().getAuthority();
if (authority == null) {
if (fs != null) {
authority = fs.getUri().getAuthority();
} else {
authority = fc.getFSofPath(path).getUri().getAuthority();
}
}
return authority ;
}
public FileStatus[] glob() throws IOException {
// First we get the scheme and authority of the pattern that was passed
// in.
String scheme = schemeFromPath(pathPattern);
String authority = authorityFromPath(pathPattern);
// Next we strip off everything except the pathname itself, and expand all
// globs. Expansion is a process which turns "grouping" clauses,
// expressed as brackets, into separate path patterns.
String pathPatternString = pathPattern.toUri().getPath();
List<String> flattenedPatterns = GlobExpander.expand(pathPatternString);
// Now loop over all flattened patterns. In every case, we'll be trying to
// match them to entries in the filesystem.
ArrayList<FileStatus> results =
new ArrayList<FileStatus>(flattenedPatterns.size());
boolean sawWildcard = false;
for (String flatPattern : flattenedPatterns) {
// Get the absolute path for this flattened pattern. We couldn't do
// this prior to flattening because of patterns like {/,a}, where which
// path you go down influences how the path must be made absolute.
Path absPattern =
fixRelativePart(new Path(flatPattern .isEmpty() ? "." : flatPattern ));
// Now we break the flattened, absolute pattern into path components.
// For example, /a/*/c would be broken into the list [a, *, c]
List<String> components =
getPathComponents(absPattern.toUri().getPath());
// Starting out at the root of the filesystem, we try to match
// filesystem entries against pattern components.
ArrayList<FileStatus> candidates = new ArrayList<FileStatus>(1);
candidates.add(new FileStatus(0, true, 0, 0, 0,
new Path(scheme, authority, "/")));
for (String component : components) {
ArrayList<FileStatus> newCandidates =
new ArrayList<FileStatus>(candidates.size());
GlobFilter globFilter = new GlobFilter(component);
if (globFilter.hasPattern()) {
sawWildcard = true;
}
if (candidates.isEmpty() && sawWildcard) {
break;
}
for (FileStatus candidate : candidates) {
FileStatus resolvedCandidate = candidate;
if (candidate.isSymlink()) {
// We have to resolve symlinks, because otherwise we don't know
// whether they are directories.
resolvedCandidate = getFileStatus(candidate.getPath());
}
if (resolvedCandidate == null ||
resolvedCandidate.isDirectory() == false) {
continue;
}
FileStatus[] children = listStatus(candidate.getPath());
for (FileStatus child : children) {
// Set the child path based on the parent path.
// This keeps the symlinks in our path.
child.setPath(new Path(candidate.getPath(),
child.getPath().getName()));
if (globFilter.accept(child.getPath())) {
newCandidates.add(child);
}
}
}
candidates = newCandidates;
}
for (FileStatus status : candidates) {
// HADOOP-3497 semantics: the user-defined filter is applied at the
// end, once the full path is built up.
if (filter.accept(status.getPath())) {
results.add(status);
}
}
}
/*
* When the input pattern "looks" like just a simple filename, and we
* can't find it, we return null rather than an empty array.
* This is a special case which the shell relies on.
*
* To be more precise: if there were no results, AND there were no
* groupings (aka brackets), and no wildcards in the input (aka stars),
* we return null.
*/
if ((!sawWildcard) && results.isEmpty() &&
(flattenedPatterns.size() <= 1)) {
return null;
}
return results.toArray(new FileStatus[0]);
}
}

View File

@ -682,31 +682,13 @@ public void createSymlink(Path target, Path link, boolean createParent)
if (createParent) {
mkdirs(link.getParent());
}
// NB: Use createSymbolicLink in java.nio.file.Path once available
try {
Shell.execCommand(Shell.getSymlinkCommand(
Path.getPathWithoutSchemeAndAuthority(target).toString(),
Path.getPathWithoutSchemeAndAuthority(makeAbsolute(link)).toString()));
} catch (IOException x) {
throw new IOException("Unable to create symlink: "+x.getMessage());
}
}
/**
* Returns the target of the given symlink. Returns the empty string if
* the given path does not refer to a symlink or there is an error
* accessing the symlink.
*/
private String readLink(Path p) {
/* NB: Use readSymbolicLink in java.nio.file.Path once available. Could
* use getCanonicalPath in File to get the target of the symlink but that
* does not indicate if the given path refers to a symlink.
*/
try {
final String path = p.toUri().getPath();
return Shell.execCommand(Shell.READ_LINK_COMMAND, path).trim();
} catch (IOException x) {
return "";
// NB: Use createSymbolicLink in java.nio.file.Path once available
int result = FileUtil.symLink(target.toString(),
makeAbsolute(link).toString());
if (result != 0) {
throw new IOException("Error " + result + " creating symlink " +
link + " to " + target);
}
}
@ -729,7 +711,7 @@ public FileStatus getFileLinkStatus(final Path f) throws IOException {
}
private FileStatus getFileLinkStatusInternal(final Path f) throws IOException {
String target = readLink(f);
String target = FileUtil.readLink(new File(f.toString()));
try {
FileStatus fs = getFileStatus(f);

View File

@ -17,6 +17,7 @@
*/
package org.apache.hadoop.fs.local;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
@ -28,12 +29,12 @@
import org.apache.hadoop.fs.AbstractFileSystem;
import org.apache.hadoop.fs.DelegateToFileSystem;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.FsConstants;
import org.apache.hadoop.fs.FsServerDefaults;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RawLocalFileSystem;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.util.Shell;
/**
* The RawLocalFs implementation of AbstractFileSystem.
@ -83,36 +84,18 @@ public void createSymlink(Path target, Path link, boolean createParent)
final String targetScheme = target.toUri().getScheme();
if (targetScheme != null && !"file".equals(targetScheme)) {
throw new IOException("Unable to create symlink to non-local file "+
"system: "+target.toString());
"system: "+target.toString());
}
if (createParent) {
mkdir(link.getParent(), FsPermission.getDirDefault(), true);
}
// NB: Use createSymbolicLink in java.nio.file.Path once available
try {
Shell.execCommand(Shell.getSymlinkCommand(
Path.getPathWithoutSchemeAndAuthority(target).toString(),
Path.getPathWithoutSchemeAndAuthority(link).toString()));
} catch (IOException x) {
throw new IOException("Unable to create symlink: "+x.getMessage());
}
}
/**
* Returns the target of the given symlink. Returns the empty string if
* the given path does not refer to a symlink or there is an error
* acessing the symlink.
*/
private String readLink(Path p) {
/* NB: Use readSymbolicLink in java.nio.file.Path once available. Could
* use getCanonicalPath in File to get the target of the symlink but that
* does not indicate if the given path refers to a symlink.
*/
try {
final String path = p.toUri().getPath();
return Shell.execCommand(Shell.READ_LINK_COMMAND, path).trim();
} catch (IOException x) {
return "";
// NB: Use createSymbolicLink in java.nio.file.Path once available
int result = FileUtil.symLink(target.toString(), link.toString());
if (result != 0) {
throw new IOException("Error " + result + " creating symlink " +
link + " to " + target);
}
}
@ -123,7 +106,7 @@ private String readLink(Path p) {
*/
@Override
public FileStatus getFileLinkStatus(final Path f) throws IOException {
String target = readLink(f);
String target = FileUtil.readLink(new File(f.toString()));
try {
FileStatus fs = getFileStatus(f);
// If f refers to a regular file or directory

View File

@ -18,18 +18,16 @@
package org.apache.hadoop.fs.shell;
import java.io.File;
import java.io.IOException;
import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.PathIsDirectoryException;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
/** Various commands for copy files */
@ -44,6 +42,7 @@ public static void registerCommands(CommandFactory factory) {
factory.addClass(CopyToLocal.class, "-copyToLocal");
factory.addClass(Get.class, "-get");
factory.addClass(Put.class, "-put");
factory.addClass(AppendToFile.class, "-appendToFile");
}
/** merge multiple files together */
@ -235,4 +234,93 @@ public static class CopyToLocal extends Get {
public static final String USAGE = Get.USAGE;
public static final String DESCRIPTION = "Identical to the -get command.";
}
/**
* Append the contents of one or more local files to a remote
* file.
*/
public static class AppendToFile extends CommandWithDestination {
public static final String NAME = "appendToFile";
public static final String USAGE = "<localsrc> ... <dst>";
public static final String DESCRIPTION =
"Appends the contents of all the given local files to the\n" +
"given dst file. The dst file will be created if it does\n" +
"not exist. If <localSrc> is -, then the input is read\n" +
"from stdin.";
private static final int DEFAULT_IO_LENGTH = 1024 * 1024;
boolean readStdin = false;
// commands operating on local paths have no need for glob expansion
@Override
protected List<PathData> expandArgument(String arg) throws IOException {
List<PathData> items = new LinkedList<PathData>();
if (arg.equals("-")) {
readStdin = true;
} else {
try {
items.add(new PathData(new URI(arg), getConf()));
} catch (URISyntaxException e) {
if (Path.WINDOWS) {
// Unlike URI, PathData knows how to parse Windows drive-letter paths.
items.add(new PathData(arg, getConf()));
} else {
throw new IOException("Unexpected URISyntaxException: " + e.toString());
}
}
}
return items;
}
@Override
protected void processOptions(LinkedList<String> args)
throws IOException {
if (args.size() < 2) {
throw new IOException("missing destination argument");
}
getRemoteDestination(args);
super.processOptions(args);
}
@Override
protected void processArguments(LinkedList<PathData> args)
throws IOException {
if (!dst.exists) {
dst.fs.create(dst.path, false).close();
}
InputStream is = null;
FSDataOutputStream fos = dst.fs.append(dst.path);
try {
if (readStdin) {
if (args.size() == 0) {
IOUtils.copyBytes(System.in, fos, DEFAULT_IO_LENGTH);
} else {
throw new IOException(
"stdin (-) must be the sole input argument when present");
}
}
// Read in each input file and write to the target.
for (PathData source : args) {
is = new FileInputStream(source.toFile());
IOUtils.copyBytes(is, fos, DEFAULT_IO_LENGTH);
IOUtils.closeStream(is);
is = null;
}
} finally {
if (is != null) {
IOUtils.closeStream(is);
}
if (fos != null) {
IOUtils.closeStream(fos);
}
}
}
}
}

View File

@ -107,7 +107,7 @@ public CompressionOutputStream createOutputStream(OutputStream out,
CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_KEY,
CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_DEFAULT);
int compressionOverhead = Math.max((int)(bufferSize * 0.01), 10);
int compressionOverhead = bufferSize/255 + 16;
return new BlockCompressorStream(out, compressor, bufferSize,
compressionOverhead);
@ -140,7 +140,10 @@ public Compressor createCompressor() {
int bufferSize = conf.getInt(
CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_KEY,
CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_DEFAULT);
return new Lz4Compressor(bufferSize);
boolean useLz4HC = conf.getBoolean(
CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_USELZ4HC_KEY,
CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_USELZ4HC_DEFAULT);
return new Lz4Compressor(bufferSize, useLz4HC);
}
/**

View File

@ -52,6 +52,7 @@ public class Lz4Compressor implements Compressor {
private long bytesRead = 0L;
private long bytesWritten = 0L;
private final boolean useLz4HC;
static {
if (NativeCodeLoader.isNativeCodeLoaded()) {
@ -72,8 +73,11 @@ public class Lz4Compressor implements Compressor {
* Creates a new compressor.
*
* @param directBufferSize size of the direct buffer to be used.
* @param useLz4HC use high compression ratio version of lz4,
* which trades CPU for compression ratio.
*/
public Lz4Compressor(int directBufferSize) {
public Lz4Compressor(int directBufferSize, boolean useLz4HC) {
this.useLz4HC = useLz4HC;
this.directBufferSize = directBufferSize;
uncompressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize);
@ -81,6 +85,15 @@ public Lz4Compressor(int directBufferSize) {
compressedDirectBuf.position(directBufferSize);
}
/**
* Creates a new compressor.
*
* @param directBufferSize size of the direct buffer to be used.
*/
public Lz4Compressor(int directBufferSize) {
this(directBufferSize, false);
}
/**
* Creates a new compressor with the default buffer size.
*/
@ -227,7 +240,7 @@ public synchronized int compress(byte[] b, int off, int len)
}
// Compress data
n = compressBytesDirect();
n = useLz4HC ? compressBytesDirectHC() : compressBytesDirect();
compressedDirectBuf.limit(n);
uncompressedDirectBuf.clear(); // lz4 consumes all buffer input
@ -297,5 +310,7 @@ public synchronized void end() {
private native int compressBytesDirect();
private native int compressBytesDirectHC();
public native static String getLibraryName();
}

View File

@ -18,10 +18,11 @@
package org.apache.hadoop.ipc;
import static org.apache.hadoop.ipc.RpcConstants.CONNECTION_CONTEXT_CALL_ID;
import static org.apache.hadoop.ipc.RpcConstants.*;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.FilterInputStream;
@ -52,6 +53,7 @@
import java.util.concurrent.atomic.AtomicLong;
import javax.net.SocketFactory;
import javax.security.sasl.Sasl;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -87,6 +89,7 @@
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.google.protobuf.CodedOutputStream;
@ -380,6 +383,7 @@ private class Connection extends Thread {
private boolean tcpNoDelay; // if T then disable Nagle's Algorithm
private boolean doPing; //do we need to send ping message
private int pingInterval; // how often sends ping to the server in msecs
private ByteArrayOutputStream pingRequest; // ping message
// currently active calls
private Hashtable<Integer, Call> calls = new Hashtable<Integer, Call>();
@ -405,6 +409,15 @@ public Connection(ConnectionId remoteId, int serviceClass) throws IOException {
this.maxRetriesOnSocketTimeouts = remoteId.getMaxRetriesOnSocketTimeouts();
this.tcpNoDelay = remoteId.getTcpNoDelay();
this.doPing = remoteId.getDoPing();
if (doPing) {
// construct a RPC header with the callId as the ping callId
pingRequest = new ByteArrayOutputStream();
RpcRequestHeaderProto pingHeader = ProtoUtil
.makeRpcRequestHeader(RpcKind.RPC_PROTOCOL_BUFFER,
OperationProto.RPC_FINAL_PACKET, PING_CALL_ID,
RpcConstants.INVALID_RETRY_COUNT, clientId);
pingHeader.writeDelimitedTo(pingRequest);
}
this.pingInterval = remoteId.getPingInterval();
this.serviceClass = serviceClass;
if (LOG.isDebugEnabled()) {
@ -700,6 +713,7 @@ public AuthMethod run()
}
});
} catch (Exception ex) {
authMethod = saslRpcClient.getAuthMethod();
if (rand == null) {
rand = new Random();
}
@ -711,6 +725,9 @@ public AuthMethod run()
// Sasl connect is successful. Let's set up Sasl i/o streams.
inStream = saslRpcClient.getInputStream(inStream);
outStream = saslRpcClient.getOutputStream(outStream);
// for testing
remoteId.saslQop =
(String)saslRpcClient.getNegotiatedProperty(Sasl.QOP);
} else if (UserGroupInformation.isSecurityEnabled() &&
!fallbackAllowed) {
throw new IOException("Server asks us to fall back to SIMPLE " +
@ -720,12 +737,16 @@ public AuthMethod run()
}
if (doPing) {
this.in = new DataInputStream(new BufferedInputStream(
new PingInputStream(inStream)));
} else {
this.in = new DataInputStream(new BufferedInputStream(inStream));
inStream = new PingInputStream(inStream);
}
this.out = new DataOutputStream(new BufferedOutputStream(outStream));
this.in = new DataInputStream(new BufferedInputStream(inStream));
// SASL may have already buffered the stream
if (!(outStream instanceof BufferedOutputStream)) {
outStream = new BufferedOutputStream(outStream);
}
this.out = new DataOutputStream(outStream);
writeConnectionContext(remoteId, authMethod);
// update last activity time
@ -905,7 +926,8 @@ private synchronized void sendPing() throws IOException {
if ( curTime - lastActivity.get() >= pingInterval) {
lastActivity.set(curTime);
synchronized (out) {
out.writeInt(RpcConstants.PING_CALL_ID);
out.writeInt(pingRequest.size());
pingRequest.writeTo(out);
out.flush();
}
}
@ -1455,6 +1477,7 @@ public static class ConnectionId {
private final boolean tcpNoDelay; // if T then disable Nagle's Algorithm
private final boolean doPing; //do we need to send ping message
private final int pingInterval; // how often sends ping to the server in msecs
private String saslQop; // here for testing
ConnectionId(InetSocketAddress address, Class<?> protocol,
UserGroupInformation ticket, int rpcTimeout, int maxIdleTime,
@ -1509,6 +1532,11 @@ int getPingInterval() {
return pingInterval;
}
@VisibleForTesting
String getSaslQop() {
return saslQop;
}
static ConnectionId getConnectionId(InetSocketAddress addr,
Class<?> protocol, UserGroupInformation ticket, int rpcTimeout,
Configuration conf) throws IOException {

View File

@ -33,6 +33,7 @@ public class ClientId {
/** The byte array of a UUID should be 16 */
public static final int BYTE_LENGTH = 16;
private static final int shiftWidth = 8;
/**
* Return clientId as byte[]
@ -53,17 +54,27 @@ public static String toString(byte[] clientId) {
}
// otherwise should be 16 bytes
Preconditions.checkArgument(clientId.length == BYTE_LENGTH);
long msb = 0;
long lsb = 0;
for (int i = 0; i < 8; i++) {
msb = (msb << 8) | (clientId[i] & 0xff);
}
for (int i = 8; i < 16; i++) {
lsb = (lsb << 8) | (clientId[i] & 0xff);
}
long msb = getMsb(clientId);
long lsb = getLsb(clientId);
return (new UUID(msb, lsb)).toString();
}
public static long getMsb(byte[] clientId) {
long msb = 0;
for (int i = 0; i < BYTE_LENGTH/2; i++) {
msb = (msb << shiftWidth) | (clientId[i] & 0xff);
}
return msb;
}
public static long getLsb(byte[] clientId) {
long lsb = 0;
for (int i = BYTE_LENGTH/2; i < BYTE_LENGTH; i++) {
lsb = (lsb << shiftWidth) | (clientId[i] & 0xff);
}
return lsb;
}
/** Convert from clientId string byte[] representation of clientId */
public static byte[] toBytes(String id) {
if (id == null || "".equals(id)) {

View File

@ -70,16 +70,8 @@ public static class CacheEntry implements LightWeightCache.Entry {
"Invalid clientId - length is " + clientId.length
+ " expected length " + ClientId.BYTE_LENGTH);
// Convert UUID bytes to two longs
long tmp = 0;
for (int i=0; i<8; i++) {
tmp = (tmp << 8) | (clientId[i] & 0xff);
}
clientIdMsb = tmp;
tmp = 0;
for (int i=8; i<16; i++) {
tmp = (tmp << 8) | (clientId[i] & 0xff);
}
clientIdLsb = tmp;
clientIdMsb = ClientId.getMsb(clientId);
clientIdLsb = ClientId.getLsb(clientId);
this.callId = callId;
this.expirationTime = expirationTime;
}

View File

@ -27,13 +27,13 @@ private RpcConstants() {
// Hidden Constructor
}
public static final int PING_CALL_ID = -1;
public static final int AUTHORIZATION_FAILED_CALL_ID = -1;
public static final int INVALID_CALL_ID = -2;
public static final int CONNECTION_CONTEXT_CALL_ID = -3;
public static final int PING_CALL_ID = -4;
public static final byte[] DUMMY_CLIENT_ID = new byte[0];
public static final int INVALID_CALL_ID = -2;
public static final int CONNECTION_CONTEXT_CALL_ID = -3;
public static final int INVALID_RETRY_COUNT = -1;

View File

@ -72,8 +72,9 @@
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
import static org.apache.hadoop.ipc.RpcConstants.CURRENT_VERSION;
import static org.apache.hadoop.ipc.RpcConstants.CONNECTION_CONTEXT_CALL_ID;
import static org.apache.hadoop.ipc.RpcConstants.*;
import org.apache.hadoop.ipc.ProtobufRpcEngine.RpcResponseMessageWrapper;
import org.apache.hadoop.ipc.ProtobufRpcEngine.RpcResponseWrapper;
import org.apache.hadoop.ipc.RPC.RpcInvoker;
import org.apache.hadoop.ipc.RPC.VersionMismatch;
@ -1177,9 +1178,7 @@ public class Connection {
public UserGroupInformation attemptingUser = null; // user name before auth
// Fake 'call' for failed authorization response
private static final int AUTHORIZATION_FAILED_CALLID = -1;
private final Call authFailedCall = new Call(AUTHORIZATION_FAILED_CALLID,
private final Call authFailedCall = new Call(AUTHORIZATION_FAILED_CALL_ID,
RpcConstants.INVALID_RETRY_COUNT, null, this);
private ByteArrayOutputStream authFailedResponse = new ByteArrayOutputStream();
@ -1276,8 +1275,28 @@ private UserGroupInformation getAuthorizedUgi(String authorizedId)
}
}
private RpcSaslProto saslReadAndProcess(DataInputStream dis) throws
WrappedRpcServerException, InterruptedException {
private void saslReadAndProcess(DataInputStream dis) throws
WrappedRpcServerException, IOException, InterruptedException {
final RpcSaslProto saslMessage =
decodeProtobufFromStream(RpcSaslProto.newBuilder(), dis);
switch (saslMessage.getState()) {
case WRAP: {
if (!saslContextEstablished || !useWrap) {
throw new WrappedRpcServerException(
RpcErrorCodeProto.FATAL_INVALID_RPC_HEADER,
new SaslException("Server is not wrapping data"));
}
// loops over decoded data and calls processOneRpc
unwrapPacketAndProcessRpcs(saslMessage.getToken().toByteArray());
break;
}
default:
saslProcess(saslMessage);
}
}
private void saslProcess(RpcSaslProto saslMessage)
throws WrappedRpcServerException, IOException, InterruptedException {
if (saslContextEstablished) {
throw new WrappedRpcServerException(
RpcErrorCodeProto.FATAL_INVALID_RPC_HEADER,
@ -1286,7 +1305,7 @@ private RpcSaslProto saslReadAndProcess(DataInputStream dis) throws
RpcSaslProto saslResponse = null;
try {
try {
saslResponse = processSaslMessage(dis);
saslResponse = processSaslMessage(saslMessage);
} catch (IOException e) {
IOException sendToClient = e;
Throwable cause = e;
@ -1310,8 +1329,6 @@ private RpcSaslProto saslReadAndProcess(DataInputStream dis) throws
LOG.debug("SASL server context established. Negotiated QoP is "
+ saslServer.getNegotiatedProperty(Sasl.QOP));
}
String qop = (String) saslServer.getNegotiatedProperty(Sasl.QOP);
useWrap = qop != null && !"auth".equalsIgnoreCase(qop);
user = getAuthorizedUgi(saslServer.getAuthorizationID());
if (LOG.isDebugEnabled()) {
LOG.debug("SASL server successfully authenticated client: " + user);
@ -1326,13 +1343,21 @@ private RpcSaslProto saslReadAndProcess(DataInputStream dis) throws
throw new WrappedRpcServerException(
RpcErrorCodeProto.FATAL_UNAUTHORIZED, ioe);
}
return saslResponse;
// send back response if any, may throw IOException
if (saslResponse != null) {
doSaslReply(saslResponse);
}
// do NOT enable wrapping until the last auth response is sent
if (saslContextEstablished) {
String qop = (String) saslServer.getNegotiatedProperty(Sasl.QOP);
// SASL wrapping is only used if the connection has a QOP, and
// the value is not auth. ex. auth-int & auth-priv
useWrap = (qop != null && !"auth".equalsIgnoreCase(qop));
}
}
private RpcSaslProto processSaslMessage(DataInputStream dis)
private RpcSaslProto processSaslMessage(RpcSaslProto saslMessage)
throws IOException, InterruptedException {
final RpcSaslProto saslMessage =
decodeProtobufFromStream(RpcSaslProto.newBuilder(), dis);
RpcSaslProto saslResponse = null;
final SaslState state = saslMessage.getState(); // required
switch (state) {
@ -1517,11 +1542,6 @@ public int readAndProcess()
if (data == null) {
dataLengthBuffer.flip();
dataLength = dataLengthBuffer.getInt();
if ((dataLength == RpcConstants.PING_CALL_ID) && (!useWrap)) {
// covers the !useSasl too
dataLengthBuffer.clear();
return 0; // ping message
}
checkDataLength(dataLength);
data = ByteBuffer.allocate(dataLength);
}
@ -1532,7 +1552,7 @@ public int readAndProcess()
dataLengthBuffer.clear();
data.flip();
boolean isHeaderRead = connectionContextRead;
processRpcRequestPacket(data.array());
processOneRpc(data.array());
data = null;
if (!isHeaderRead) {
continue;
@ -1695,29 +1715,19 @@ private void processConnectionContext(DataInputStream dis)
}
/**
* Process a RPC Request - if SASL wrapping is enabled, unwrap the
* requests and process each one, else directly process the request
* @param buf - single request or SASL wrapped requests
* @throws IOException - connection failed to authenticate or authorize,
* or the request could not be decoded into a Call
* Process a wrapped RPC Request - unwrap the SASL packet and process
* each embedded RPC request
* @param buf - SASL wrapped request of one or more RPCs
* @throws IOException - SASL packet cannot be unwrapped
* @throws InterruptedException
*/
private void processRpcRequestPacket(byte[] buf)
throws WrappedRpcServerException, IOException, InterruptedException {
if (saslContextEstablished && useWrap) {
if (LOG.isDebugEnabled())
LOG.debug("Have read input token of size " + buf.length
+ " for processing by saslServer.unwrap()");
final byte[] plaintextData = saslServer.unwrap(buf, 0, buf.length);
// loops over decoded data and calls processOneRpc
unwrapPacketAndProcessRpcs(plaintextData);
} else {
processOneRpc(buf);
}
}
private void unwrapPacketAndProcessRpcs(byte[] inBuf)
throws WrappedRpcServerException, IOException, InterruptedException {
if (LOG.isDebugEnabled()) {
LOG.debug("Have read input token of size " + inBuf.length
+ " for processing by saslServer.unwrap()");
}
inBuf = saslServer.unwrap(inBuf, 0, inBuf.length);
ReadableByteChannel ch = Channels.newChannel(new ByteArrayInputStream(
inBuf));
// Read all RPCs contained in the inBuf, even partial ones
@ -1732,13 +1742,6 @@ private void unwrapPacketAndProcessRpcs(byte[] inBuf)
if (unwrappedData == null) {
unwrappedDataLengthBuffer.flip();
int unwrappedDataLength = unwrappedDataLengthBuffer.getInt();
if (unwrappedDataLength == RpcConstants.PING_CALL_ID) {
if (LOG.isDebugEnabled())
LOG.debug("Received ping message");
unwrappedDataLengthBuffer.clear();
continue; // ping message
}
unwrappedData = ByteBuffer.allocate(unwrappedDataLength);
}
@ -1906,11 +1909,9 @@ private void processRpcOutOfBandRequest(RpcRequestHeaderProto header,
RpcErrorCodeProto.FATAL_INVALID_RPC_HEADER,
"SASL protocol not requested by client");
}
RpcSaslProto response = saslReadAndProcess(dis);
// send back response if any, may throw IOException
if (response != null) {
doSaslReply(response);
}
saslReadAndProcess(dis);
} else if (callId == PING_CALL_ID) {
LOG.debug("Received ping message");
} else {
throw new WrappedRpcServerException(
RpcErrorCodeProto.FATAL_INVALID_RPC_HEADER,
@ -1924,7 +1925,7 @@ private void processRpcOutOfBandRequest(RpcRequestHeaderProto header,
*/
private void authorizeConnection() throws WrappedRpcServerException {
try {
// If auth method is DIGEST, the token was obtained by the
// If auth method is TOKEN, the token was obtained by the
// real user for the effective user, therefore not required to
// authorize real user. doAs is allowed only for simple or kerberos
// authentication
@ -2389,9 +2390,21 @@ private void wrapWithSasl(ByteArrayOutputStream response, Call call)
LOG.debug("Adding saslServer wrapped token of size " + token.length
+ " as call response.");
response.reset();
DataOutputStream saslOut = new DataOutputStream(response);
saslOut.writeInt(token.length);
saslOut.write(token, 0, token.length);
// rebuild with sasl header and payload
RpcResponseHeaderProto saslHeader = RpcResponseHeaderProto.newBuilder()
.setCallId(AuthProtocol.SASL.callId)
.setStatus(RpcStatusProto.SUCCESS)
.build();
RpcSaslProto saslMessage = RpcSaslProto.newBuilder()
.setState(SaslState.WRAP)
.setToken(ByteString.copyFrom(token, 0, token.length))
.build();
RpcResponseMessageWrapper saslResponse =
new RpcResponseMessageWrapper(saslHeader, saslMessage);
DataOutputStream out = new DataOutputStream(response);
out.writeInt(saslResponse.getLength());
saslResponse.write(out);
}
}

View File

@ -381,7 +381,7 @@ synchronized MetricsBuffer sampleMetrics() {
private void snapshotMetrics(MetricsSourceAdapter sa,
MetricsBufferBuilder bufferBuilder) {
long startTime = Time.now();
bufferBuilder.add(sa.name(), sa.getMetrics(collector, false));
bufferBuilder.add(sa.name(), sa.getMetrics(collector, true));
collector.clear();
snapshotStat.add(Time.now() - startTime);
LOG.debug("Snapshotted source "+ sa.name());

View File

@ -20,15 +20,20 @@
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.FilterInputStream;
import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.InetSocketAddress;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import javax.security.auth.callback.Callback;
import javax.security.auth.callback.CallbackHandler;
@ -47,6 +52,7 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.GlobPattern;
import org.apache.hadoop.ipc.ProtobufRpcEngine.RpcRequestMessageWrapper;
import org.apache.hadoop.ipc.ProtobufRpcEngine.RpcResponseMessageWrapper;
import org.apache.hadoop.ipc.RPC.RpcKind;
@ -67,6 +73,7 @@
import org.apache.hadoop.security.token.TokenSelector;
import org.apache.hadoop.util.ProtoUtil;
import com.google.common.annotations.VisibleForTesting;
import com.google.protobuf.ByteString;
/**
* A utility class that encapsulates SASL logic for RPC client
@ -82,6 +89,7 @@ public class SaslRpcClient {
private final Configuration conf;
private SaslClient saslClient;
private AuthMethod authMethod;
private static final RpcRequestHeaderProto saslHeader = ProtoUtil
.makeRpcRequestHeader(RpcKind.RPC_PROTOCOL_BUFFER,
@ -106,6 +114,24 @@ public SaslRpcClient(UserGroupInformation ugi, Class<?> protocol,
this.conf = conf;
}
@VisibleForTesting
@InterfaceAudience.Private
public Object getNegotiatedProperty(String key) {
return (saslClient != null) ? saslClient.getNegotiatedProperty(key) : null;
}
// the RPC Client has an inelegant way of handling expiration of TGTs
// acquired via a keytab. any connection failure causes a relogin, so
// the Client needs to know what authMethod was being attempted if an
// exception occurs. the SASL prep for a kerberos connection should
// ideally relogin if necessary instead of exposing this detail to the
// Client
@InterfaceAudience.Private
public AuthMethod getAuthMethod() {
return authMethod;
}
/**
* Instantiate a sasl client for the first supported auth type in the
* given list. The auth type must be defined, enabled, and the user
@ -256,9 +282,8 @@ private Token<?> getServerToken(SaslAuth authType) throws IOException {
* @return String of the server's principal
* @throws IOException - error determining configured principal
*/
// try to get the configured principal for the remote server
private String getServerPrincipal(SaslAuth authType) throws IOException {
@VisibleForTesting
String getServerPrincipal(SaslAuth authType) throws IOException {
KerberosInfo krbInfo = SecurityUtil.getKerberosInfo(protocol, conf);
LOG.debug("Get kerberos info proto:"+protocol+" info:"+krbInfo);
if (krbInfo == null) { // protocol has no support for kerberos
@ -270,28 +295,37 @@ private String getServerPrincipal(SaslAuth authType) throws IOException {
"Can't obtain server Kerberos config key from protocol="
+ protocol.getCanonicalName());
}
// construct the expected principal from the config
String confPrincipal = SecurityUtil.getServerPrincipal(
conf.get(serverKey), serverAddr.getAddress());
if (confPrincipal == null || confPrincipal.isEmpty()) {
throw new IllegalArgumentException(
"Failed to specify server's Kerberos principal name");
// construct server advertised principal for comparision
String serverPrincipal = new KerberosPrincipal(
authType.getProtocol() + "/" + authType.getServerId()).getName();
boolean isPrincipalValid = false;
// use the pattern if defined
String serverKeyPattern = conf.get(serverKey + ".pattern");
if (serverKeyPattern != null && !serverKeyPattern.isEmpty()) {
Pattern pattern = GlobPattern.compile(serverKeyPattern);
isPrincipalValid = pattern.matcher(serverPrincipal).matches();
} else {
// check that the server advertised principal matches our conf
String confPrincipal = SecurityUtil.getServerPrincipal(
conf.get(serverKey), serverAddr.getAddress());
if (confPrincipal == null || confPrincipal.isEmpty()) {
throw new IllegalArgumentException(
"Failed to specify server's Kerberos principal name");
}
KerberosName name = new KerberosName(confPrincipal);
if (name.getHostName() == null) {
throw new IllegalArgumentException(
"Kerberos principal name does NOT have the expected hostname part: "
+ confPrincipal);
}
isPrincipalValid = serverPrincipal.equals(confPrincipal);
}
// ensure it looks like a host-based service principal
KerberosName name = new KerberosName(confPrincipal);
if (name.getHostName() == null) {
throw new IllegalArgumentException(
"Kerberos principal name does NOT have the expected hostname part: "
+ confPrincipal);
}
// check that the server advertised principal matches our conf
KerberosPrincipal serverPrincipal = new KerberosPrincipal(
authType.getProtocol() + "/" + authType.getServerId());
if (!serverPrincipal.getName().equals(confPrincipal)) {
if (!isPrincipalValid) {
throw new IllegalArgumentException(
"Server has invalid Kerberos principal: " + serverPrincipal);
}
return confPrincipal;
return serverPrincipal;
}
@ -312,8 +346,9 @@ public AuthMethod saslConnect(InputStream inS, OutputStream outS)
DataOutputStream outStream = new DataOutputStream(new BufferedOutputStream(
outS));
// redefined if/when a SASL negotiation completes
AuthMethod authMethod = AuthMethod.SIMPLE;
// redefined if/when a SASL negotiation starts, can be queried if the
// negotiation fails
authMethod = AuthMethod.SIMPLE;
sendSaslMessage(outStream, negotiateRequest);
@ -350,6 +385,7 @@ public AuthMethod saslConnect(InputStream inS, OutputStream outS)
case NEGOTIATE: {
// create a compatible SASL client, throws if no supported auths
SaslAuth saslAuthType = selectSaslClient(saslMessage.getAuthsList());
// define auth being attempted, caller can query if connect fails
authMethod = AuthMethod.valueOf(saslAuthType.getMethod());
byte[] responseToken = null;
@ -463,36 +499,139 @@ private RpcSaslProto.Builder createSaslReply(SaslState state,
return response;
}
/**
* Get a SASL wrapped InputStream. Can be called only after saslConnect() has
* been called.
*
* @param in
* the InputStream to wrap
* @return a SASL wrapped InputStream
* @throws IOException
*/
public InputStream getInputStream(InputStream in) throws IOException {
if (!saslClient.isComplete()) {
throw new IOException("Sasl authentication exchange hasn't completed yet");
}
return new SaslInputStream(in, saslClient);
private boolean useWrap() {
// getNegotiatedProperty throws if client isn't complete
String qop = (String) saslClient.getNegotiatedProperty(Sasl.QOP);
// SASL wrapping is only used if the connection has a QOP, and
// the value is not auth. ex. auth-int & auth-priv
return qop != null && !"auth".equalsIgnoreCase(qop);
}
/**
* Get a SASL wrapped OutputStream. Can be called only after saslConnect() has
* been called.
* Get SASL wrapped InputStream if SASL QoP requires unwrapping,
* otherwise return original stream. Can be called only after
* saslConnect() has been called.
*
* @param out
* the OutputStream to wrap
* @return a SASL wrapped OutputStream
* @param in - InputStream used to make the connection
* @return InputStream that may be using SASL unwrap
* @throws IOException
*/
public InputStream getInputStream(InputStream in) throws IOException {
if (useWrap()) {
in = new WrappedInputStream(in);
}
return in;
}
/**
* Get SASL wrapped OutputStream if SASL QoP requires wrapping,
* otherwise return original stream. Can be called only after
* saslConnect() has been called.
*
* @param in - InputStream used to make the connection
* @return InputStream that may be using SASL unwrap
* @throws IOException
*/
public OutputStream getOutputStream(OutputStream out) throws IOException {
if (!saslClient.isComplete()) {
throw new IOException("Sasl authentication exchange hasn't completed yet");
if (useWrap()) {
// the client and server negotiate a maximum buffer size that can be
// wrapped
String maxBuf = (String)saslClient.getNegotiatedProperty(Sasl.RAW_SEND_SIZE);
out = new BufferedOutputStream(new WrappedOutputStream(out),
Integer.parseInt(maxBuf));
}
return new SaslOutputStream(out, saslClient);
return out;
}
// ideally this should be folded into the RPC decoding loop but it's
// currently split across Client and SaslRpcClient...
class WrappedInputStream extends FilterInputStream {
private ByteBuffer unwrappedRpcBuffer = ByteBuffer.allocate(0);
public WrappedInputStream(InputStream in) throws IOException {
super(in);
}
@Override
public int read() throws IOException {
byte[] b = new byte[1];
int n = read(b, 0, 1);
return (n != -1) ? b[0] : -1;
}
@Override
public int read(byte b[]) throws IOException {
return read(b, 0, b.length);
}
@Override
public int read(byte[] buf, int off, int len) throws IOException {
synchronized(unwrappedRpcBuffer) {
// fill the buffer with the next RPC message
if (unwrappedRpcBuffer.remaining() == 0) {
readNextRpcPacket();
}
// satisfy as much of the request as possible
int readLen = Math.min(len, unwrappedRpcBuffer.remaining());
unwrappedRpcBuffer.get(buf, off, readLen);
return readLen;
}
}
// all messages must be RPC SASL wrapped, else an exception is thrown
private void readNextRpcPacket() throws IOException {
LOG.debug("reading next wrapped RPC packet");
DataInputStream dis = new DataInputStream(in);
int rpcLen = dis.readInt();
byte[] rpcBuf = new byte[rpcLen];
dis.readFully(rpcBuf);
// decode the RPC header
ByteArrayInputStream bis = new ByteArrayInputStream(rpcBuf);
RpcResponseHeaderProto.Builder headerBuilder =
RpcResponseHeaderProto.newBuilder();
headerBuilder.mergeDelimitedFrom(bis);
boolean isWrapped = false;
// Must be SASL wrapped, verify and decode.
if (headerBuilder.getCallId() == AuthProtocol.SASL.callId) {
RpcSaslProto.Builder saslMessage = RpcSaslProto.newBuilder();
saslMessage.mergeDelimitedFrom(bis);
if (saslMessage.getState() == SaslState.WRAP) {
isWrapped = true;
byte[] token = saslMessage.getToken().toByteArray();
if (LOG.isDebugEnabled()) {
LOG.debug("unwrapping token of length:" + token.length);
}
token = saslClient.unwrap(token, 0, token.length);
unwrappedRpcBuffer = ByteBuffer.wrap(token);
}
}
if (!isWrapped) {
throw new SaslException("Server sent non-wrapped response");
}
}
}
class WrappedOutputStream extends FilterOutputStream {
public WrappedOutputStream(OutputStream out) throws IOException {
super(out);
}
@Override
public void write(byte[] buf, int off, int len) throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug("wrapping token of length:" + len);
}
buf = saslClient.wrap(buf, off, len);
RpcSaslProto saslMessage = RpcSaslProto.newBuilder()
.setState(SaslState.WRAP)
.setToken(ByteString.copyFrom(buf, 0, buf.length))
.build();
RpcRequestMessageWrapper request =
new RpcRequestMessageWrapper(saslHeader, saslMessage);
DataOutputStream dob = new DataOutputStream(out);
dob.writeInt(request.getLength());
request.write(dob);
}
}
/** Release resources used by wrapped saslClient */

View File

@ -47,7 +47,6 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.ipc.Server.Connection;
import org.apache.hadoop.security.authentication.util.KerberosName;
import org.apache.hadoop.security.token.SecretManager;
import org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.security.token.SecretManager.InvalidToken;
@ -104,12 +103,12 @@ public SaslRpcServer(AuthMethod authMethod) throws IOException {
String fullName = UserGroupInformation.getCurrentUser().getUserName();
if (LOG.isDebugEnabled())
LOG.debug("Kerberos principal name is " + fullName);
KerberosName krbName = new KerberosName(fullName);
serverId = krbName.getHostName();
if (serverId == null) {
serverId = "";
}
protocol = krbName.getServiceName();
// don't use KerberosName because we don't want auth_to_local
String[] parts = fullName.split("[/@]", 2);
protocol = parts[0];
// should verify service host is present here rather than in create()
// but lazy tests are using a UGI that isn't a SPN...
serverId = (parts.length < 2) ? "" : parts[1];
break;
}
default:

View File

@ -123,6 +123,12 @@ public static String[] getSymlinkCommand(String target, String link) {
: new String[] { "ln", "-s", target, link };
}
/** Return a command to read the target of the a symbolic link*/
public static String[] getReadlinkCommand(String link) {
return WINDOWS ? new String[] { WINUTILS, "readlink", link }
: new String[] { "readlink", link };
}
/** Return a command for determining if process with specified pid is alive. */
public static String[] getCheckProcessIsAliveCommand(String pid) {
return Shell.WINDOWS ?

View File

@ -72,6 +72,7 @@
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="src\org\apache\hadoop\io\compress\lz4\lz4.c" />
<ClCompile Include="src\org\apache\hadoop\io\compress\lz4\lz4hc.c" />
<ClCompile Include="src\org\apache\hadoop\io\compress\lz4\Lz4Compressor.c" />
<ClCompile Include="src\org\apache\hadoop\io\compress\lz4\Lz4Decompressor.c" />
<ClCompile Include="src\org\apache\hadoop\io\nativeio\file_descriptor.c" />

View File

@ -51,6 +51,9 @@
<ClCompile Include="src\org\apache\hadoop\io\compress\lz4\lz4.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\org\apache\hadoop\io\compress\lz4\lz4hc.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="src\org\apache\hadoop\io\compress\lz4\Lz4Compressor.c">
<Filter>Source Files</Filter>
</ClCompile>

View File

@ -23,21 +23,9 @@
#ifdef UNIX
#include "config.h"
#endif // UNIX
#include "lz4.h"
#include "lz4hc.h"
//****************************
// Simple Functions
//****************************
extern int LZ4_compress (const char* source, char* dest, int isize);
/*
LZ4_compress() :
return : the number of bytes in compressed buffer dest
note : destination buffer must be already allocated.
To avoid any problem, size it to handle worst cases situations (input data not compressible)
Worst case size is : "inputsize + 0.4%", with "0.4%" being at least 8 bytes.
*/
static jfieldID Lz4Compressor_clazz;
static jfieldID Lz4Compressor_uncompressedDirectBuf;
@ -107,5 +95,45 @@ JNIEXPORT jstring JNICALL
Java_org_apache_hadoop_io_compress_lz4_Lz4Compressor_getLibraryName(
JNIEnv *env, jclass class
) {
return (*env)->NewStringUTF(env, "revision:43");
return (*env)->NewStringUTF(env, "revision:99");
}
JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_lz4_Lz4Compressor_compressBytesDirectHC
(JNIEnv *env, jobject thisj){
const char* uncompressed_bytes = NULL;
char* compressed_bytes = NULL;
// Get members of Lz4Compressor
jobject clazz = (*env)->GetStaticObjectField(env, thisj, Lz4Compressor_clazz);
jobject uncompressed_direct_buf = (*env)->GetObjectField(env, thisj, Lz4Compressor_uncompressedDirectBuf);
jint uncompressed_direct_buf_len = (*env)->GetIntField(env, thisj, Lz4Compressor_uncompressedDirectBufLen);
jobject compressed_direct_buf = (*env)->GetObjectField(env, thisj, Lz4Compressor_compressedDirectBuf);
jint compressed_direct_buf_len = (*env)->GetIntField(env, thisj, Lz4Compressor_directBufferSize);
// Get the input direct buffer
LOCK_CLASS(env, clazz, "Lz4Compressor");
uncompressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf);
UNLOCK_CLASS(env, clazz, "Lz4Compressor");
if (uncompressed_bytes == 0) {
return (jint)0;
}
// Get the output direct buffer
LOCK_CLASS(env, clazz, "Lz4Compressor");
compressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, compressed_direct_buf);
UNLOCK_CLASS(env, clazz, "Lz4Compressor");
if (compressed_bytes == 0) {
return (jint)0;
}
compressed_direct_buf_len = LZ4_compressHC(uncompressed_bytes, compressed_bytes, uncompressed_direct_buf_len);
if (compressed_direct_buf_len < 0){
THROW(env, "java/lang/InternalError", "LZ4_compressHC failed");
}
(*env)->SetIntField(env, thisj, Lz4Compressor_uncompressedDirectBufLen, 0);
return (jint)compressed_direct_buf_len;
}

View File

@ -22,18 +22,7 @@
#ifdef UNIX
#include "config.h"
#endif // UNIX
int LZ4_uncompress_unknownOutputSize(const char* source, char* dest, int isize, int maxOutputSize);
/*
LZ4_uncompress_unknownOutputSize() :
isize : is the input size, therefore the compressed size
maxOutputSize : is the size of the destination buffer (which must be already allocated)
return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
This version never writes beyond dest + maxOutputSize, and is therefore protected against malicious data packets
note : This version is a bit slower than LZ4_uncompress
*/
#include "lz4.h"
static jfieldID Lz4Decompressor_clazz;
@ -89,7 +78,7 @@ JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_lz4_Lz4Decompressor_de
return (jint)0;
}
uncompressed_direct_buf_len = LZ4_uncompress_unknownOutputSize(compressed_bytes, uncompressed_bytes, compressed_direct_buf_len, uncompressed_direct_buf_len);
uncompressed_direct_buf_len = LZ4_decompress_safe(compressed_bytes, uncompressed_bytes, compressed_direct_buf_len, uncompressed_direct_buf_len);
if (uncompressed_direct_buf_len < 0) {
THROW(env, "java/lang/InternalError", "LZ4_uncompress_unknownOutputSize failed.");
}

View File

@ -0,0 +1,179 @@
/*
LZ4 - Fast LZ compression algorithm
Header File
Copyright (C) 2011-2013, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- LZ4 source repository : http://code.google.com/p/lz4/
*/
/*
Copied from:
URL: http://lz4.googlecode.com/svn/trunk
Repository Root: http://lz4.googlecode.com/svn
Repository UUID: 650e7d94-2a16-8b24-b05c-7c0b3f6821cd
Revision: 99
Node Kind: directory
Schedule: normal
Last Changed Author: yann.collet.73@gmail.com
Last Changed Rev: 99
Last Changed Date: 2013-07-27 19:19:31 +0800 (Sat, 27 Jul 2013)
*/
#pragma once
#if defined (__cplusplus)
extern "C" {
#endif
//**************************************
// Compiler Options
//**************************************
#if defined(_MSC_VER) && !defined(__cplusplus) // Visual Studio
# define inline __inline // Visual C is not C99, but supports some kind of inline
#endif
//****************************
// Simple Functions
//****************************
int LZ4_compress (const char* source, char* dest, int inputSize);
int LZ4_decompress_safe (const char* source, char* dest, int inputSize, int maxOutputSize);
/*
LZ4_compress() :
Compresses 'inputSize' bytes from 'source' into 'dest'.
Destination buffer must be already allocated,
and must be sized to handle worst cases situations (input data not compressible)
Worst case size evaluation is provided by function LZ4_compressBound()
inputSize : Max supported value is ~1.9GB
return : the number of bytes written in buffer dest
or 0 if the compression fails
LZ4_decompress_safe() :
maxOutputSize : is the size of the destination buffer (which must be already allocated)
return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
If the source stream is detected malformed, the function will stop decoding and return a negative result.
This function is protected against buffer overflow exploits (never writes outside of output buffer, and never reads outside of input buffer). Therefore, it is protected against malicious data packets
*/
//****************************
// Advanced Functions
//****************************
static inline int LZ4_compressBound(int isize) { return ((isize) + ((isize)/255) + 16); }
#define LZ4_COMPRESSBOUND( isize) ((isize) + ((isize)/255) + 16)
/*
LZ4_compressBound() :
Provides the maximum size that LZ4 may output in a "worst case" scenario (input data not compressible)
primarily useful for memory allocation of output buffer.
inline function is recommended for the general case,
macro is also provided when result needs to be evaluated at compilation (such as table size allocation).
isize : is the input size. Max supported value is ~1.9GB
return : maximum output size in a "worst case" scenario
note : this function is limited by "int" range (2^31-1)
*/
int LZ4_compress_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
/*
LZ4_compress_limitedOutput() :
Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
If it cannot achieve it, compression will stop, and result of the function will be zero.
This function never writes outside of provided output buffer.
inputSize : Max supported value is ~1.9GB
maxOutputSize : is the size of the destination buffer (which must be already allocated)
return : the number of bytes written in buffer 'dest'
or 0 if the compression fails
*/
int LZ4_decompress_fast (const char* source, char* dest, int outputSize);
/*
LZ4_decompress_fast() :
outputSize : is the original (uncompressed) size
return : the number of bytes read from the source buffer (in other words, the compressed size)
If the source stream is malformed, the function will stop decoding and return a negative result.
note : This function is a bit faster than LZ4_decompress_safe()
This function never writes outside of output buffers, and never read before input buffer, but may read beyond input buffer (since it doesn't know its size) in case of malicious data packet.
Use this function preferably into a trusted environment (data to decode comes from a trusted source).
Destination buffer must be already allocated. Its size must be a minimum of 'outputSize' bytes.
*/
int LZ4_decompress_safe_partial (const char* source, char* dest, int inputSize, int targetOutputSize, int maxOutputSize);
/*
LZ4_decompress_safe_partial() :
This function decompress a compressed block of size 'inputSize' at position 'source'
into output buffer 'dest' of size 'maxOutputSize'.
The function stops decompressing operation as soon as 'targetOutputSize' has been reached,
reducing decompression time.
return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller.
Always control how many bytes were decoded.
If the source stream is malformed, the function will stop decoding and return a negative result.
This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets
*/
int LZ4_decompress_safe_withPrefix64k (const char* source, char* dest, int inputSize, int maxOutputSize);
int LZ4_decompress_fast_withPrefix64k (const char* source, char* dest, int outputSize);
/*
*_withPrefix64k() :
These decoding functions work the same as their "normal name" versions,
but will potentially use up to 64KB of data in front of 'char* dest'.
These functions are used for decoding inter-dependant blocks.
*/
//****************************
// Obsolete Functions
//****************************
static inline int LZ4_uncompress (const char* source, char* dest, int outputSize) { return LZ4_decompress_fast(source, dest, outputSize); }
static inline int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); }
/*
These functions are deprecated and should no longer be used.
They are provided here for compatibility with existing user programs.
*/
#if defined (__cplusplus)
}
#endif

View File

@ -0,0 +1,258 @@
/*
LZ4 Encoder - Part of LZ4 compression algorithm
Copyright (C) 2011-2013, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- LZ4 source repository : http://code.google.com/p/lz4/
*/
/* lz4_encoder.h must be included into lz4.c
The objective of this file is to create a single LZ4 compression function source
which will be instanciated multiple times with minor variations
depending on a set of #define.
*/
//****************************
// Check required defines
//****************************
#ifndef FUNCTION_NAME
# error "FUNTION_NAME is not defined"
#endif
//****************************
// Local definitions
//****************************
#ifdef COMPRESS_64K
# define HASHLOG (MEMORY_USAGE-1)
# define CURRENT_H_TYPE U16
# define CURRENTBASE(base) const BYTE* const base = ip
#else
# define HASHLOG (MEMORY_USAGE-2)
# define CURRENT_H_TYPE HTYPE
# define CURRENTBASE(base) INITBASE(base)
#endif
#define HASHTABLE_NBCELLS (1U<<HASHLOG)
#define LZ4_HASH(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASHLOG))
#define LZ4_HASHVALUE(p) LZ4_HASH(A32(p))
//****************************
// Function code
//****************************
int FUNCTION_NAME(
#ifdef USE_HEAPMEMORY
void* ctx,
#endif
const char* source,
char* dest,
int inputSize
#ifdef LIMITED_OUTPUT
,int maxOutputSize
#endif
)
{
#ifdef USE_HEAPMEMORY
CURRENT_H_TYPE* HashTable = (CURRENT_H_TYPE*)ctx;
#else
CURRENT_H_TYPE HashTable[HASHTABLE_NBCELLS] = {0};
#endif
const BYTE* ip = (BYTE*) source;
CURRENTBASE(base);
const BYTE* anchor = ip;
const BYTE* const iend = ip + inputSize;
const BYTE* const mflimit = iend - MFLIMIT;
#define matchlimit (iend - LASTLITERALS)
BYTE* op = (BYTE*) dest;
#ifdef LIMITED_OUTPUT
BYTE* const oend = op + maxOutputSize;
#endif
int length;
const int skipStrength = SKIPSTRENGTH;
U32 forwardH;
// Init
if (inputSize<MINLENGTH) goto _last_literals;
#ifdef COMPRESS_64K
if (inputSize>=LZ4_64KLIMIT) return 0; // Size too large (not within 64K limit)
#endif
#ifdef USE_HEAPMEMORY
memset((void*)HashTable, 0, HASHTABLESIZE);
#endif
// First Byte
HashTable[LZ4_HASHVALUE(ip)] = (CURRENT_H_TYPE)(ip - base);
ip++; forwardH = LZ4_HASHVALUE(ip);
// Main Loop
for ( ; ; )
{
int findMatchAttempts = (1U << skipStrength) + 3;
const BYTE* forwardIp = ip;
const BYTE* ref;
BYTE* token;
// Find a match
do {
U32 h = forwardH;
int step = findMatchAttempts++ >> skipStrength;
ip = forwardIp;
forwardIp = ip + step;
if unlikely(forwardIp > mflimit) { goto _last_literals; }
forwardH = LZ4_HASHVALUE(forwardIp);
ref = base + HashTable[h];
HashTable[h] = (CURRENT_H_TYPE)(ip - base);
} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
// Catch up
while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; }
// Encode Literal length
length = (int)(ip - anchor);
token = op++;
#ifdef LIMITED_OUTPUT
if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend) return 0; // Check output limit
#endif
if (length>=(int)RUN_MASK)
{
int len = length-RUN_MASK;
*token=(RUN_MASK<<ML_BITS);
for(; len >= 255 ; len-=255) *op++ = 255;
*op++ = (BYTE)len;
}
else *token = (BYTE)(length<<ML_BITS);
// Copy Literals
LZ4_BLINDCOPY(anchor, op, length);
_next_match:
// Encode Offset
LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
// Start Counting
ip+=MINMATCH; ref+=MINMATCH; // MinMatch already verified
anchor = ip;
while likely(ip<matchlimit-(STEPSIZE-1))
{
size_t diff = AARCH(ref) ^ AARCH(ip);
if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
ip += LZ4_NbCommonBytes(diff);
goto _endCount;
}
if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { ip+=4; ref+=4; }
if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; }
if ((ip<matchlimit) && (*ref == *ip)) ip++;
_endCount:
// Encode MatchLength
length = (int)(ip - anchor);
#ifdef LIMITED_OUTPUT
if unlikely(op + (1 + LASTLITERALS) + (length>>8) > oend) return 0; // Check output limit
#endif
if (length>=(int)ML_MASK)
{
*token += ML_MASK;
length -= ML_MASK;
for (; length > 509 ; length-=510) { *op++ = 255; *op++ = 255; }
if (length >= 255) { length-=255; *op++ = 255; }
*op++ = (BYTE)length;
}
else *token += (BYTE)(length);
// Test end of chunk
if (ip > mflimit) { anchor = ip; break; }
// Fill table
HashTable[LZ4_HASHVALUE(ip-2)] = (CURRENT_H_TYPE)(ip - 2 - base);
// Test next position
ref = base + HashTable[LZ4_HASHVALUE(ip)];
HashTable[LZ4_HASHVALUE(ip)] = (CURRENT_H_TYPE)(ip - base);
if ((ref >= ip - MAX_DISTANCE) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; }
// Prepare next loop
anchor = ip++;
forwardH = LZ4_HASHVALUE(ip);
}
_last_literals:
// Encode Last Literals
{
int lastRun = (int)(iend - anchor);
#ifdef LIMITED_OUTPUT
if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0; // Check output limit
#endif
if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun >= 255 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
else *op++ = (BYTE)(lastRun<<ML_BITS);
memcpy(op, anchor, iend - anchor);
op += iend-anchor;
}
// End
return (int) (((char*)op)-dest);
}
//****************************
// Clean defines
//****************************
// Required defines
#undef FUNCTION_NAME
// Locally Generated
#undef HASHLOG
#undef HASHTABLE_NBCELLS
#undef LZ4_HASH
#undef LZ4_HASHVALUE
#undef CURRENT_H_TYPE
#undef CURRENTBASE
// Optional defines
#ifdef LIMITED_OUTPUT
#undef LIMITED_OUTPUT
#endif
#ifdef USE_HEAPMEMORY
#undef USE_HEAPMEMORY
#endif

View File

@ -0,0 +1,584 @@
/*
LZ4 HC - High Compression Mode of LZ4
Copyright (C) 2011-2013, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- LZ4 source repository : http://code.google.com/p/lz4/
*/
/*
Note : this source file requires "lz4hc_encoder.h"
*/
//**************************************
// Memory routines
//**************************************
#include <stdlib.h> // calloc, free
#define ALLOCATOR(s) calloc(1,s)
#define FREEMEM free
#include <string.h> // memset, memcpy
#define MEM_INIT memset
//**************************************
// CPU Feature Detection
//**************************************
// 32 or 64 bits ?
#if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \
|| defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) \
|| defined(__64BIT__) || defined(_LP64) || defined(__LP64__) \
|| defined(__ia64) || defined(__itanium__) || defined(_M_IA64) ) // Detects 64 bits mode
# define LZ4_ARCH64 1
#else
# define LZ4_ARCH64 0
#endif
// Little Endian or Big Endian ?
// Overwrite the #define below if you know your architecture endianess
#if defined (__GLIBC__)
# include <endian.h>
# if (__BYTE_ORDER == __BIG_ENDIAN)
# define LZ4_BIG_ENDIAN 1
# endif
#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
# define LZ4_BIG_ENDIAN 1
#elif defined(__sparc) || defined(__sparc__) \
|| defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \
|| defined(__hpux) || defined(__hppa) \
|| defined(_MIPSEB) || defined(__s390__)
# define LZ4_BIG_ENDIAN 1
#else
// Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
#endif
// Unaligned memory access is automatically enabled for "common" CPU, such as x86.
// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected
// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance
#if defined(__ARM_FEATURE_UNALIGNED)
# define LZ4_FORCE_UNALIGNED_ACCESS 1
#endif
// Define this parameter if your target system or compiler does not support hardware bit count
#if defined(_MSC_VER) && defined(_WIN32_WCE) // Visual Studio for Windows CE does not support Hardware bit count
# define LZ4_FORCE_SW_BITCOUNT
#endif
//**************************************
// Compiler Options
//**************************************
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
/* "restrict" is a known keyword */
#else
# define restrict // Disable restrict
#endif
#ifdef _MSC_VER // Visual Studio
# define forceinline static __forceinline
# include <intrin.h> // For Visual 2005
# if LZ4_ARCH64 // 64-bits
# pragma intrinsic(_BitScanForward64) // For Visual 2005
# pragma intrinsic(_BitScanReverse64) // For Visual 2005
# else // 32-bits
# pragma intrinsic(_BitScanForward) // For Visual 2005
# pragma intrinsic(_BitScanReverse) // For Visual 2005
# endif
# pragma warning(disable : 4127) // disable: C4127: conditional expression is constant
# pragma warning(disable : 4701) // disable: C4701: potentially uninitialized local variable used
#else
# ifdef __GNUC__
# define forceinline static inline __attribute__((always_inline))
# else
# define forceinline static inline
# endif
#endif
#ifdef _MSC_VER // Visual Studio
# define lz4_bswap16(x) _byteswap_ushort(x)
#else
# define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
#endif
//**************************************
// Includes
//**************************************
#include "lz4hc.h"
#include "lz4.h"
//**************************************
// Basic Types
//**************************************
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
# include <stdint.h>
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef uint32_t U32;
typedef int32_t S32;
typedef uint64_t U64;
#else
typedef unsigned char BYTE;
typedef unsigned short U16;
typedef unsigned int U32;
typedef signed int S32;
typedef unsigned long long U64;
#endif
#if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
# define _PACKED __attribute__ ((packed))
#else
# define _PACKED
#endif
#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
# ifdef __IBMC__
# pragma pack(1)
# else
# pragma pack(push, 1)
# endif
#endif
typedef struct _U16_S { U16 v; } _PACKED U16_S;
typedef struct _U32_S { U32 v; } _PACKED U32_S;
typedef struct _U64_S { U64 v; } _PACKED U64_S;
#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
# pragma pack(pop)
#endif
#define A64(x) (((U64_S *)(x))->v)
#define A32(x) (((U32_S *)(x))->v)
#define A16(x) (((U16_S *)(x))->v)
//**************************************
// Constants
//**************************************
#define MINMATCH 4
#define DICTIONARY_LOGSIZE 16
#define MAXD (1<<DICTIONARY_LOGSIZE)
#define MAXD_MASK ((U32)(MAXD - 1))
#define MAX_DISTANCE (MAXD - 1)
#define HASH_LOG (DICTIONARY_LOGSIZE-1)
#define HASHTABLESIZE (1 << HASH_LOG)
#define HASH_MASK (HASHTABLESIZE - 1)
#define MAX_NB_ATTEMPTS 256
#define ML_BITS 4
#define ML_MASK (size_t)((1U<<ML_BITS)-1)
#define RUN_BITS (8-ML_BITS)
#define RUN_MASK ((1U<<RUN_BITS)-1)
#define COPYLENGTH 8
#define LASTLITERALS 5
#define MFLIMIT (COPYLENGTH+MINMATCH)
#define MINLENGTH (MFLIMIT+1)
#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
#define KB *(1U<<10)
#define MB *(1U<<20)
#define GB *(1U<<30)
//**************************************
// Architecture-specific macros
//**************************************
#if LZ4_ARCH64 // 64-bit
# define STEPSIZE 8
# define LZ4_COPYSTEP(s,d) A64(d) = A64(s); d+=8; s+=8;
# define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d)
# define UARCH U64
# define AARCH A64
# define HTYPE U32
# define INITBASE(b,s) const BYTE* const b = s
#else // 32-bit
# define STEPSIZE 4
# define LZ4_COPYSTEP(s,d) A32(d) = A32(s); d+=4; s+=4;
# define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d);
# define UARCH U32
# define AARCH A32
//# define HTYPE const BYTE*
//# define INITBASE(b,s) const int b = 0
# define HTYPE U32
# define INITBASE(b,s) const BYTE* const b = s
#endif
#if defined(LZ4_BIG_ENDIAN)
# define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
# define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
#else // Little Endian
# define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
# define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; }
#endif
//************************************************************
// Local Types
//************************************************************
typedef struct
{
const BYTE* inputBuffer;
const BYTE* base;
const BYTE* end;
HTYPE hashTable[HASHTABLESIZE];
U16 chainTable[MAXD];
const BYTE* nextToUpdate;
} LZ4HC_Data_Structure;
//**************************************
// Macros
//**************************************
#define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d<e);
#define LZ4_BLINDCOPY(s,d,l) { BYTE* e=d+l; LZ4_WILDCOPY(s,d,e); d=e; }
#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG))
#define HASH_VALUE(p) HASH_FUNCTION(A32(p))
#define HASH_POINTER(p) (HashTable[HASH_VALUE(p)] + base)
#define DELTANEXT(p) chainTable[(size_t)(p) & MAXD_MASK]
#define GETNEXT(p) ((p) - (size_t)DELTANEXT(p))
//**************************************
// Private functions
//**************************************
#if LZ4_ARCH64
forceinline int LZ4_NbCommonBytes (register U64 val)
{
#if defined(LZ4_BIG_ENDIAN)
# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
unsigned long r = 0;
_BitScanReverse64( &r, val );
return (int)(r>>3);
# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
return (__builtin_clzll(val) >> 3);
# else
int r;
if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
r += (!val);
return r;
# endif
#else
# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
unsigned long r = 0;
_BitScanForward64( &r, val );
return (int)(r>>3);
# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
return (__builtin_ctzll(val) >> 3);
# else
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
# endif
#endif
}
#else
forceinline int LZ4_NbCommonBytes (register U32 val)
{
#if defined(LZ4_BIG_ENDIAN)
# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
unsigned long r;
_BitScanReverse( &r, val );
return (int)(r>>3);
# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
return (__builtin_clz(val) >> 3);
# else
int r;
if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
r += (!val);
return r;
# endif
#else
# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
unsigned long r;
_BitScanForward( &r, val );
return (int)(r>>3);
# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
return (__builtin_ctz(val) >> 3);
# else
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
# endif
#endif
}
#endif
forceinline int LZ4_InitHC (LZ4HC_Data_Structure* hc4, const BYTE* base)
{
MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
hc4->nextToUpdate = base + 1;
hc4->base = base;
hc4->inputBuffer = base;
hc4->end = base;
return 1;
}
void* LZ4_createHC (const char* slidingInputBuffer)
{
void* hc4 = ALLOCATOR(sizeof(LZ4HC_Data_Structure));
LZ4_InitHC ((LZ4HC_Data_Structure*)hc4, (const BYTE*)slidingInputBuffer);
return hc4;
}
int LZ4_freeHC (void* LZ4HC_Data)
{
FREEMEM(LZ4HC_Data);
return (0);
}
// Update chains up to ip (excluded)
forceinline void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
{
U16* chainTable = hc4->chainTable;
HTYPE* HashTable = hc4->hashTable;
INITBASE(base,hc4->base);
while(hc4->nextToUpdate < ip)
{
const BYTE* const p = hc4->nextToUpdate;
size_t delta = (p) - HASH_POINTER(p);
if (delta>MAX_DISTANCE) delta = MAX_DISTANCE;
DELTANEXT(p) = (U16)delta;
HashTable[HASH_VALUE(p)] = (HTYPE)((p) - base);
hc4->nextToUpdate++;
}
}
char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
{
LZ4HC_Data_Structure* hc4 = (LZ4HC_Data_Structure*)LZ4HC_Data;
U32 distance = (U32)(hc4->end - hc4->inputBuffer) - 64 KB;
distance = (distance >> 16) << 16; // Must be a multiple of 64 KB
LZ4HC_Insert(hc4, hc4->end - MINMATCH);
memcpy((void*)(hc4->end - 64 KB - distance), (const void*)(hc4->end - 64 KB), 64 KB);
hc4->nextToUpdate -= distance;
hc4->base -= distance;
if ((U32)(hc4->inputBuffer - hc4->base) > 1 GB + 64 KB) // Avoid overflow
{
int i;
hc4->base += 1 GB;
for (i=0; i<HASHTABLESIZE; i++) hc4->hashTable[i] -= 1 GB;
}
hc4->end -= distance;
return (char*)(hc4->end);
}
forceinline size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit)
{
const BYTE* p1t = p1;
while (p1t<matchlimit-(STEPSIZE-1))
{
UARCH diff = AARCH(p2) ^ AARCH(p1t);
if (!diff) { p1t+=STEPSIZE; p2+=STEPSIZE; continue; }
p1t += LZ4_NbCommonBytes(diff);
return (p1t - p1);
}
if (LZ4_ARCH64) if ((p1t<(matchlimit-3)) && (A32(p2) == A32(p1t))) { p1t+=4; p2+=4; }
if ((p1t<(matchlimit-1)) && (A16(p2) == A16(p1t))) { p1t+=2; p2+=2; }
if ((p1t<matchlimit) && (*p2 == *p1t)) p1t++;
return (p1t - p1);
}
forceinline int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* const matchlimit, const BYTE** matchpos)
{
U16* const chainTable = hc4->chainTable;
HTYPE* const HashTable = hc4->hashTable;
const BYTE* ref;
INITBASE(base,hc4->base);
int nbAttempts=MAX_NB_ATTEMPTS;
size_t repl=0, ml=0;
U16 delta=0; // useless assignment, to remove an uninitialization warning
// HC4 match finder
LZ4HC_Insert(hc4, ip);
ref = HASH_POINTER(ip);
#define REPEAT_OPTIMIZATION
#ifdef REPEAT_OPTIMIZATION
// Detect repetitive sequences of length <= 4
if ((U32)(ip-ref) <= 4) // potential repetition
{
if (A32(ref) == A32(ip)) // confirmed
{
delta = (U16)(ip-ref);
repl = ml = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH;
*matchpos = ref;
}
ref = GETNEXT(ref);
}
#endif
while (((U32)(ip-ref) <= MAX_DISTANCE) && (nbAttempts))
{
nbAttempts--;
if (*(ref+ml) == *(ip+ml))
if (A32(ref) == A32(ip))
{
size_t mlt = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH;
if (mlt > ml) { ml = mlt; *matchpos = ref; }
}
ref = GETNEXT(ref);
}
#ifdef REPEAT_OPTIMIZATION
// Complete table
if (repl)
{
const BYTE* ptr = ip;
const BYTE* end;
end = ip + repl - (MINMATCH-1);
while(ptr < end-delta)
{
DELTANEXT(ptr) = delta; // Pre-Load
ptr++;
}
do
{
DELTANEXT(ptr) = delta;
HashTable[HASH_VALUE(ptr)] = (HTYPE)((ptr) - base); // Head of chain
ptr++;
} while(ptr < end);
hc4->nextToUpdate = end;
}
#endif
return (int)ml;
}
forceinline int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos)
{
U16* const chainTable = hc4->chainTable;
HTYPE* const HashTable = hc4->hashTable;
INITBASE(base,hc4->base);
const BYTE* ref;
int nbAttempts = MAX_NB_ATTEMPTS;
int delta = (int)(ip-startLimit);
// First Match
LZ4HC_Insert(hc4, ip);
ref = HASH_POINTER(ip);
while (((U32)(ip-ref) <= MAX_DISTANCE) && (nbAttempts))
{
nbAttempts--;
if (*(startLimit + longest) == *(ref - delta + longest))
if (A32(ref) == A32(ip))
{
#if 1
const BYTE* reft = ref+MINMATCH;
const BYTE* ipt = ip+MINMATCH;
const BYTE* startt = ip;
while (ipt<matchlimit-(STEPSIZE-1))
{
UARCH diff = AARCH(reft) ^ AARCH(ipt);
if (!diff) { ipt+=STEPSIZE; reft+=STEPSIZE; continue; }
ipt += LZ4_NbCommonBytes(diff);
goto _endCount;
}
if (LZ4_ARCH64) if ((ipt<(matchlimit-3)) && (A32(reft) == A32(ipt))) { ipt+=4; reft+=4; }
if ((ipt<(matchlimit-1)) && (A16(reft) == A16(ipt))) { ipt+=2; reft+=2; }
if ((ipt<matchlimit) && (*reft == *ipt)) ipt++;
_endCount:
reft = ref;
#else
// Easier for code maintenance, but unfortunately slower too
const BYTE* startt = ip;
const BYTE* reft = ref;
const BYTE* ipt = ip + MINMATCH + LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit);
#endif
while ((startt>startLimit) && (reft > hc4->inputBuffer) && (startt[-1] == reft[-1])) {startt--; reft--;}
if ((ipt-startt) > longest)
{
longest = (int)(ipt-startt);
*matchpos = reft;
*startpos = startt;
}
}
ref = GETNEXT(ref);
}
return longest;
}
//**************************************
// Compression functions
//**************************************
/*
int LZ4_compressHC(
const char* source,
char* dest,
int inputSize)
Compress 'inputSize' bytes from 'source' into an output buffer 'dest'.
Destination buffer must be already allocated, and sized at a minimum of LZ4_compressBound(inputSize).
return : the number of bytes written in buffer 'dest'
*/
#define FUNCTION_NAME LZ4_compressHC
#include "lz4hc_encoder.h"
/*
int LZ4_compressHC_limitedOutput(
const char* source,
char* dest,
int inputSize,
int maxOutputSize)
Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
If it cannot achieve it, compression will stop, and result of the function will be zero.
return : the number of bytes written in buffer 'dest', or 0 if the compression fails
*/
#define FUNCTION_NAME LZ4_compressHC_limitedOutput
#define LIMITED_OUTPUT
#include "lz4hc_encoder.h"

View File

@ -0,0 +1,111 @@
/*
LZ4 HC - High Compression Mode of LZ4
Header File
Copyright (C) 2011-2013, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- LZ4 source repository : http://code.google.com/p/lz4/
*/
#pragma once
#if defined (__cplusplus)
extern "C" {
#endif
int LZ4_compressHC (const char* source, char* dest, int inputSize);
/*
LZ4_compressHC :
return : the number of bytes in compressed buffer dest
or 0 if compression fails.
note : destination buffer must be already allocated.
To avoid any problem, size it to handle worst cases situations (input data not compressible)
Worst case size evaluation is provided by function LZ4_compressBound() (see "lz4.h")
*/
int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
/*
LZ4_compress_limitedOutput() :
Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
If it cannot achieve it, compression will stop, and result of the function will be zero.
This function never writes outside of provided output buffer.
inputSize : Max supported value is 1 GB
maxOutputSize : is maximum allowed size into the destination buffer (which must be already allocated)
return : the number of output bytes written in buffer 'dest'
or 0 if compression fails.
*/
/* Note :
Decompression functions are provided within LZ4 source code (see "lz4.h") (BSD license)
*/
/* Advanced Functions */
void* LZ4_createHC (const char* slidingInputBuffer);
int LZ4_compressHC_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize);
int LZ4_compressHC_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize);
char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
int LZ4_freeHC (void* LZ4HC_Data);
/*
These functions allow the compression of dependent blocks, where each block benefits from prior 64 KB within preceding blocks.
In order to achieve this, it is necessary to start creating the LZ4HC Data Structure, thanks to the function :
void* LZ4_createHC (const char* slidingInputBuffer);
The result of the function is the (void*) pointer on the LZ4HC Data Structure.
This pointer will be needed in all other functions.
If the pointer returned is NULL, then the allocation has failed, and compression must be aborted.
The only parameter 'const char* slidingInputBuffer' must, obviously, point at the beginning of input buffer.
The input buffer must be already allocated, and size at least 192KB.
'slidingInputBuffer' will also be the 'const char* source' of the first block.
All blocks are expected to lay next to each other within the input buffer, starting from 'slidingInputBuffer'.
To compress each block, use either LZ4_compressHC_continue() or LZ4_compressHC_limitedOutput_continue().
Their behavior are identical to LZ4_compressHC() or LZ4_compressHC_limitedOutput(),
but require the LZ4HC Data Structure as their first argument, and check that each block starts right after the previous one.
If next block does not begin immediately after the previous one, the compression will fail (return 0).
When it's no longer possible to lay the next block after the previous one (not enough space left into input buffer), a call to :
char* LZ4_slideInputBufferHC(void* LZ4HC_Data);
must be performed. It will typically copy the latest 64KB of input at the beginning of input buffer.
Note that, for this function to work properly, minimum size of an input buffer must be 192KB.
==> The memory position where the next input data block must start is provided as the result of the function.
Compression can then resume, using LZ4_compressHC_continue() or LZ4_compressHC_limitedOutput_continue(), as usual.
When compression is completed, a call to LZ4_freeHC() will release the memory used by the LZ4HC Data Structure.
*/
#if defined (__cplusplus)
}
#endif

View File

@ -0,0 +1,349 @@
/*
LZ4 HC Encoder - Part of LZ4 HC algorithm
Copyright (C) 2011-2013, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- LZ4 source repository : http://code.google.com/p/lz4/
*/
/* lz4hc_encoder.h must be included into lz4hc.c
The objective of this file is to create a single LZ4 compression function source
which will be instanciated multiple times with minor variations
depending on a set of #define.
*/
//****************************
// Check required defines
//****************************
#ifndef FUNCTION_NAME
# error "FUNTION_NAME is not defined"
#endif
//****************************
// Local definitions
//****************************
#define COMBINED_NAME_RAW(n1,n2) n1 ## n2
#define COMBINED_NAME(n1,n2) COMBINED_NAME_RAW(n1,n2)
#define ENCODE_SEQUENCE_NAME COMBINED_NAME(FUNCTION_NAME,_encodeSequence)
#ifdef LIMITED_OUTPUT
# define ENCODE_SEQUENCE(i,o,a,m,r,d) if (ENCODE_SEQUENCE_NAME(i,o,a,m,r,d)) return 0;
#else
# define ENCODE_SEQUENCE(i,o,a,m,r,d) ENCODE_SEQUENCE_NAME(i,o,a,m,r)
#endif
//****************************
// Function code
//****************************
forceinline int ENCODE_SEQUENCE_NAME (
const BYTE** ip,
BYTE** op,
const BYTE** anchor,
int matchLength,
const BYTE* ref
#ifdef LIMITED_OUTPUT
,BYTE* oend
#endif
)
{
int length, len;
BYTE* token;
// Encode Literal length
length = (int)(*ip - *anchor);
token = (*op)++;
#ifdef LIMITED_OUTPUT
if ((*op + length + (2 + 1 + LASTLITERALS) + (length>>8)) > oend) return 1; // Check output limit
#endif
if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; }
else *token = (BYTE)(length<<ML_BITS);
// Copy Literals
LZ4_BLINDCOPY(*anchor, *op, length);
// Encode Offset
LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref));
// Encode MatchLength
length = (int)(matchLength-MINMATCH);
#ifdef LIMITED_OUTPUT
if (*op + (1 + LASTLITERALS) + (length>>8) > oend) return 1; // Check output limit
#endif
if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; }
else *token += (BYTE)(length);
// Prepare next loop
*ip += matchLength;
*anchor = *ip;
return 0;
}
int COMBINED_NAME(FUNCTION_NAME,_continue) (
void* ctxvoid,
const char* source,
char* dest,
int inputSize
#ifdef LIMITED_OUTPUT
,int maxOutputSize
#endif
)
{
LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid;
const BYTE* ip = (const BYTE*) source;
const BYTE* anchor = ip;
const BYTE* const iend = ip + inputSize;
const BYTE* const mflimit = iend - MFLIMIT;
const BYTE* const matchlimit = (iend - LASTLITERALS);
BYTE* op = (BYTE*) dest;
#ifdef LIMITED_OUTPUT
BYTE* const oend = op + maxOutputSize;
#endif
int ml, ml2, ml3, ml0;
const BYTE* ref=NULL;
const BYTE* start2=NULL;
const BYTE* ref2=NULL;
const BYTE* start3=NULL;
const BYTE* ref3=NULL;
const BYTE* start0;
const BYTE* ref0;
// Ensure blocks follow each other
if (ip != ctx->end) return 0;
ctx->end += inputSize;
ip++;
// Main Loop
while (ip < mflimit)
{
ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref));
if (!ml) { ip++; continue; }
// saved, in case we would skip too much
start0 = ip;
ref0 = ref;
ml0 = ml;
_Search2:
if (ip+ml < mflimit)
ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2);
else ml2 = ml;
if (ml2 == ml) // No better match
{
ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
continue;
}
if (start0 < ip)
{
if (start2 < ip + ml0) // empirical
{
ip = start0;
ref = ref0;
ml = ml0;
}
}
// Here, start0==ip
if ((start2 - ip) < 3) // First Match too small : removed
{
ml = ml2;
ip = start2;
ref =ref2;
goto _Search2;
}
_Search3:
// Currently we have :
// ml2 > ml1, and
// ip1+3 <= ip2 (usually < ip1+ml1)
if ((start2 - ip) < OPTIMAL_ML)
{
int correction;
int new_ml = ml;
if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
correction = new_ml - (int)(start2 - ip);
if (correction > 0)
{
start2 += correction;
ref2 += correction;
ml2 -= correction;
}
}
// Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18)
if (start2 + ml2 < mflimit)
ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3);
else ml3 = ml2;
if (ml3 == ml2) // No better match : 2 sequences to encode
{
// ip & ref are known; Now for ml
if (start2 < ip+ml) ml = (int)(start2 - ip);
// Now, encode 2 sequences
ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
ip = start2;
ENCODE_SEQUENCE(&ip, &op, &anchor, ml2, ref2, oend);
continue;
}
if (start3 < ip+ml+3) // Not enough space for match 2 : remove it
{
if (start3 >= (ip+ml)) // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1
{
if (start2 < ip+ml)
{
int correction = (int)(ip+ml - start2);
start2 += correction;
ref2 += correction;
ml2 -= correction;
if (ml2 < MINMATCH)
{
start2 = start3;
ref2 = ref3;
ml2 = ml3;
}
}
ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
ip = start3;
ref = ref3;
ml = ml3;
start0 = start2;
ref0 = ref2;
ml0 = ml2;
goto _Search2;
}
start2 = start3;
ref2 = ref3;
ml2 = ml3;
goto _Search3;
}
// OK, now we have 3 ascending matches; let's write at least the first one
// ip & ref are known; Now for ml
if (start2 < ip+ml)
{
if ((start2 - ip) < (int)ML_MASK)
{
int correction;
if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
correction = ml - (int)(start2 - ip);
if (correction > 0)
{
start2 += correction;
ref2 += correction;
ml2 -= correction;
}
}
else
{
ml = (int)(start2 - ip);
}
}
ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
ip = start2;
ref = ref2;
ml = ml2;
start2 = start3;
ref2 = ref3;
ml2 = ml3;
goto _Search3;
}
// Encode Last Literals
{
int lastRun = (int)(iend - anchor);
#ifdef LIMITED_OUTPUT
if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0; // Check output limit
#endif
if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
else *op++ = (BYTE)(lastRun<<ML_BITS);
memcpy(op, anchor, iend - anchor);
op += iend-anchor;
}
// End
return (int) (((char*)op)-dest);
}
int FUNCTION_NAME (const char* source,
char* dest,
int inputSize
#ifdef LIMITED_OUTPUT
,int maxOutputSize
#endif
)
{
void* ctx = LZ4_createHC(source);
int result;
if (ctx==NULL) return 0;
#ifdef LIMITED_OUTPUT
result = COMBINED_NAME(FUNCTION_NAME,_continue) (ctx, source, dest, inputSize, maxOutputSize);
#else
result = COMBINED_NAME(FUNCTION_NAME,_continue) (ctx, source, dest, inputSize);
#endif
LZ4_freeHC(ctx);
return result;
}
//****************************
// Clean defines
//****************************
// Required defines
#undef FUNCTION_NAME
// Locally Generated
#undef ENCODE_SEQUENCE
#undef ENCODE_SEQUENCE_NAME
// Optional defines
#ifdef LIMITED_OUTPUT
#undef LIMITED_OUTPUT
#endif

View File

@ -141,6 +141,7 @@ message RpcSaslProto {
INITIATE = 2;
CHALLENGE = 3;
RESPONSE = 4;
WRAP = 5;
}
message SaslAuth {

View File

@ -46,7 +46,6 @@ div#dfsnodetable a#title {
}
div#dfsnodetable td, th {
border-bottom-style : none;
padding-bottom : 4px;
padding-top : 4px;
}

View File

@ -45,6 +45,27 @@ bin/hadoop fs <args>
Differences are described with each of the commands. Error information is
sent to stderr and the output is sent to stdout.
appendToFile
Usage: <<<hdfs dfs -appendToFile <localsrc> ... <dst> >>>
Append single src, or multiple srcs from local file system to the
destination file system. Also reads input from stdin and appends to
destination file system.
* <<<hdfs dfs -appendToFile localfile /user/hadoop/hadoopfile>>>
* <<<hdfs dfs -appendToFile localfile1 localfile2 /user/hadoop/hadoopfile>>>
* <<<hdfs dfs -appendToFile localfile hdfs://nn.example.com/hadoop/hadoopfile>>>
* <<<hdfs dfs -appendToFile - hdfs://nn.example.com/hadoop/hadoopfile>>>
Reads the input from stdin.
Exit Code:
Returns 0 on success and 1 on error.
cat
Usage: <<<hdfs dfs -cat URI [URI ...]>>>

View File

@ -71,7 +71,8 @@ public Path getTestRootPath(String pathString) {
public String getAbsoluteTestRootDir() throws IOException {
if (absTestRootDir == null) {
if (testRootDir.startsWith("/")) {
Path testRootPath = new Path(testRootDir);
if (testRootPath.isAbsolute()) {
absTestRootDir = testRootDir;
} else {
absTestRootDir = getWorkingDirectory().toString() + "/"

View File

@ -109,4 +109,7 @@ abstract public RemoteIterator<FileStatus> listStatusIterator(final Path f)
abstract public FileStatus[] listStatus(final Path f)
throws AccessControlException, FileNotFoundException,
UnsupportedFileSystemException, IOException;
abstract public FileStatus[] globStatus(Path pathPattern, PathFilter filter)
throws IOException;
}

View File

@ -332,4 +332,10 @@ public FileStatus[] listStatus(Path f) throws AccessControlException,
FileNotFoundException, UnsupportedFileSystemException, IOException {
return fc.util().listStatus(f);
}
@Override
public FileStatus[] globStatus(Path pathPattern, PathFilter filter)
throws IOException {
return fc.util().globStatus(pathPattern, filter);
}
}

View File

@ -397,4 +397,10 @@ public FileStatus[] listStatus(Path f) throws AccessControlException,
FileNotFoundException, UnsupportedFileSystemException, IOException {
return fs.listStatus(f);
}
@Override
public FileStatus[] globStatus(Path pathPattern, PathFilter filter)
throws IOException {
return fs.globStatus(pathPattern, filter);
}
}

View File

@ -20,13 +20,10 @@
import java.io.*;
import java.net.URI;
import java.util.EnumSet;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.Options.CreateOpts;
import org.apache.hadoop.fs.Options.Rename;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.CreateFlag;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.test.GenericTestUtils;
import static org.junit.Assert.*;
@ -51,6 +48,13 @@ public abstract class SymlinkBaseTest {
abstract protected String testBaseDir2() throws IOException;
abstract protected URI testURI();
// Returns true if the filesystem is emulating symlink support. Certain
// checks will be bypassed if that is the case.
//
protected boolean emulatingSymlinksOnWindows() {
return false;
}
protected IOException unwrapException(IOException e) {
return e;
}
@ -156,8 +160,11 @@ public void testCreateLinkCanCreateParent() throws IOException {
@Test(timeout=10000)
/** Try to create a directory given a path that refers to a symlink */
public void testMkdirExistingLink() throws IOException {
Path file = new Path(testBaseDir1() + "/targetFile");
createAndWriteFile(file);
Path dir = new Path(testBaseDir1()+"/link");
wrapper.createSymlink(new Path("/doesNotExist"), dir, false);
wrapper.createSymlink(file, dir, false);
try {
wrapper.mkdir(dir, FileContext.DEFAULT_PERM, false);
fail("Created a dir where a symlink exists");
@ -224,6 +231,7 @@ public void testOpenResolvesLinks() throws IOException {
@Test(timeout=10000)
/** Stat a link to a file */
public void testStatLinkToFile() throws IOException {
assumeTrue(!emulatingSymlinksOnWindows());
Path file = new Path(testBaseDir1()+"/file");
Path linkToFile = new Path(testBaseDir1()+"/linkToFile");
createAndWriteFile(file);
@ -232,8 +240,7 @@ public void testStatLinkToFile() throws IOException {
assertTrue(wrapper.isSymlink(linkToFile));
assertTrue(wrapper.isFile(linkToFile));
assertFalse(wrapper.isDir(linkToFile));
assertEquals(file.toUri().getPath(),
wrapper.getLinkTarget(linkToFile).toString());
assertEquals(file, wrapper.getLinkTarget(linkToFile));
// The local file system does not fully resolve the link
// when obtaining the file status
if (!"file".equals(getScheme())) {
@ -277,8 +284,7 @@ public void testStatLinkToDir() throws IOException {
assertFalse(wrapper.isFile(linkToDir));
assertTrue(wrapper.isDir(linkToDir));
assertEquals(dir.toUri().getPath(),
wrapper.getLinkTarget(linkToDir).toString());
assertEquals(dir, wrapper.getLinkTarget(linkToDir));
}
@Test(timeout=10000)
@ -351,6 +357,12 @@ public void testRecursiveLinks() throws IOException {
/* Assert that the given link to a file behaves as expected. */
private void checkLink(Path linkAbs, Path expectedTarget, Path targetQual)
throws IOException {
// If we are emulating symlinks then many of these checks will fail
// so we skip them.
//
assumeTrue(!emulatingSymlinksOnWindows());
Path dir = new Path(testBaseDir1());
// isFile/Directory
assertTrue(wrapper.isFile(linkAbs));
@ -400,7 +412,7 @@ else if (wrapper instanceof FileSystemTestWrapper) {
failureExpected = false;
}
try {
readFile(new Path(getScheme()+"://"+testBaseDir1()+"/linkToFile"));
readFile(new Path(getScheme()+":///"+testBaseDir1()+"/linkToFile"));
assertFalse(failureExpected);
} catch (Exception e) {
if (!failureExpected) {
@ -646,6 +658,7 @@ public void testCreateDirViaSymlink() throws IOException {
@Test(timeout=10000)
/** Create symlink through a symlink */
public void testCreateLinkViaLink() throws IOException {
assumeTrue(!emulatingSymlinksOnWindows());
Path dir1 = new Path(testBaseDir1());
Path file = new Path(testBaseDir1(), "file");
Path linkToDir = new Path(testBaseDir2(), "linkToDir");
@ -688,6 +701,7 @@ public void testListStatusUsingLink() throws IOException {
@Test(timeout=10000)
/** Test create symlink using the same path */
public void testCreateLinkTwice() throws IOException {
assumeTrue(!emulatingSymlinksOnWindows());
Path file = new Path(testBaseDir1(), "file");
Path link = new Path(testBaseDir1(), "linkToFile");
createAndWriteFile(file);
@ -783,7 +797,7 @@ public void testCreateLinkToDotDot() throws IOException {
Path linkToDir = new Path(testBaseDir2(), "linkToDir");
Path fileViaLink = new Path(linkToDir, "test/file");
// Symlink to .. is not a problem since the .. is squashed early
assertEquals(testBaseDir1(), dotDot.toString());
assertEquals(new Path(testBaseDir1()), dotDot);
createAndWriteFile(file);
wrapper.createSymlink(dotDot, linkToDir, false);
readFile(fileViaLink);
@ -876,7 +890,8 @@ public void testRenameSymlinkViaSymlink() throws IOException {
assertFalse(wrapper.exists(linkViaLink));
// Check that we didn't rename the link target
assertTrue(wrapper.exists(file));
assertTrue(wrapper.getFileLinkStatus(linkNewViaLink).isSymlink());
assertTrue(wrapper.getFileLinkStatus(linkNewViaLink).isSymlink() ||
emulatingSymlinksOnWindows());
readFile(linkNewViaLink);
}
@ -1014,7 +1029,8 @@ public void testRenameSymlinkNonExistantDest() throws IOException {
createAndWriteFile(file);
wrapper.createSymlink(file, link1, false);
wrapper.rename(link1, link2);
assertTrue(wrapper.getFileLinkStatus(link2).isSymlink());
assertTrue(wrapper.getFileLinkStatus(link2).isSymlink() ||
emulatingSymlinksOnWindows());
readFile(link2);
readFile(file);
assertFalse(wrapper.exists(link1));
@ -1038,8 +1054,11 @@ public void testRenameSymlinkToExistingFile() throws IOException {
}
wrapper.rename(link, file1, Rename.OVERWRITE);
assertFalse(wrapper.exists(link));
assertTrue(wrapper.getFileLinkStatus(file1).isSymlink());
assertEquals(file2, wrapper.getLinkTarget(file1));
if (!emulatingSymlinksOnWindows()) {
assertTrue(wrapper.getFileLinkStatus(file1).isSymlink());
assertEquals(file2, wrapper.getLinkTarget(file1));
}
}
@Test(timeout=10000)
@ -1078,16 +1097,21 @@ public void testRenameSymlinkToExistingDir() throws IOException {
@Test(timeout=10000)
/** Rename a symlink to itself */
public void testRenameSymlinkToItself() throws IOException {
Path file = new Path(testBaseDir1(), "file");
createAndWriteFile(file);
Path link = new Path(testBaseDir1(), "linkToFile1");
wrapper.createSymlink(new Path("/doestNotExist"), link, false);
wrapper.createSymlink(file, link, false);
try {
wrapper.rename(link, link);
fail("Failed to get expected IOException");
} catch (IOException e) {
assertTrue(unwrapException(e) instanceof FileAlreadyExistsException);
}
// Fails with overwrite as well
try {
wrapper.rename(link, link, Rename.OVERWRITE);
fail("Failed to get expected IOException");
} catch (IOException e) {
assertTrue(unwrapException(e) instanceof FileAlreadyExistsException);
}
@ -1096,6 +1120,7 @@ public void testRenameSymlinkToItself() throws IOException {
@Test(timeout=10000)
/** Rename a symlink */
public void testRenameSymlink() throws IOException {
assumeTrue(!emulatingSymlinksOnWindows());
Path file = new Path(testBaseDir1(), "file");
Path link1 = new Path(testBaseDir1(), "linkToFile1");
Path link2 = new Path(testBaseDir1(), "linkToFile2");
@ -1193,6 +1218,7 @@ public void testRenameSymlinkToDirItLinksTo() throws IOException {
@Test(timeout=10000)
/** Test rename the symlink's target */
public void testRenameLinkTarget() throws IOException {
assumeTrue(!emulatingSymlinksOnWindows());
Path file = new Path(testBaseDir1(), "file");
Path fileNew = new Path(testBaseDir1(), "fileNew");
Path link = new Path(testBaseDir1(), "linkToFile");

View File

@ -793,6 +793,8 @@ public void testCreateJarWithClassPath() throws Exception {
}
}
List<String> actualClassPaths = Arrays.asList(classPathAttr.split(" "));
Collections.sort(expectedClassPaths);
Collections.sort(actualClassPaths);
Assert.assertEquals(expectedClassPaths, actualClassPaths);
} finally {
if (jarFile != null) {

View File

@ -28,11 +28,38 @@
import org.apache.hadoop.io.AvroTestUtil;
import org.apache.hadoop.util.Shell;
import com.google.common.base.Joiner;
import junit.framework.TestCase;
import static org.junit.Assert.fail;
public class TestPath extends TestCase {
/**
* Merge a bunch of Path objects into a sorted semicolon-separated
* path string.
*/
public static String mergeStatuses(Path paths[]) {
String pathStrings[] = new String[paths.length];
int i = 0;
for (Path path : paths) {
pathStrings[i++] = path.toUri().getPath();
}
Arrays.sort(pathStrings);
return Joiner.on(";").join(pathStrings);
}
/**
* Merge a bunch of FileStatus objects into a sorted semicolon-separated
* path string.
*/
public static String mergeStatuses(FileStatus statuses[]) {
Path paths[] = new Path[statuses.length];
int i = 0;
for (FileStatus status : statuses) {
paths[i++] = status.getPath();
}
return mergeStatuses(paths);
}
@Test (timeout = 30000)
public void testToString() {
toStringTest("/");
@ -352,10 +379,11 @@ public void testGlobEscapeStatus() throws Exception {
// ensure globStatus with "*" finds all dir contents
stats = lfs.globStatus(new Path(testRoot, "*"));
Arrays.sort(stats);
assertEquals(paths.length, stats.length);
for (int i=0; i < paths.length; i++) {
assertEquals(paths[i].getParent(), stats[i].getPath());
Path parentPaths[] = new Path[paths.length];
for (int i = 0; i < paths.length; i++) {
parentPaths[i] = paths[i].getParent();
}
assertEquals(mergeStatuses(parentPaths), mergeStatuses(stats));
// ensure that globStatus with an escaped "\*" only finds "*"
stats = lfs.globStatus(new Path(testRoot, "\\*"));
@ -365,9 +393,7 @@ public void testGlobEscapeStatus() throws Exception {
// try to glob the inner file for all dirs
stats = lfs.globStatus(new Path(testRoot, "*/f"));
assertEquals(paths.length, stats.length);
for (int i=0; i < paths.length; i++) {
assertEquals(paths[i], stats[i].getPath());
}
assertEquals(mergeStatuses(paths), mergeStatuses(stats));
// try to get the inner file for only the "*" dir
stats = lfs.globStatus(new Path(testRoot, "\\*/f"));

View File

@ -30,6 +30,7 @@
import java.net.URISyntaxException;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.util.Shell;
import org.junit.Test;
/**
@ -61,6 +62,16 @@ protected URI testURI() {
}
}
@Override
protected boolean emulatingSymlinksOnWindows() {
// Java 6 on Windows has very poor symlink support. Specifically
// Specifically File#length and File#renameTo do not work as expected.
// (see HADOOP-9061 for additional details)
// Hence some symlink tests will be skipped.
//
return (Shell.WINDOWS && !Shell.isJava7OrAbove());
}
@Override
public void testCreateDanglingLink() throws IOException {
// Dangling symlinks are not supported on Windows local file system.
@ -171,6 +182,7 @@ public void testDanglingLink() throws IOException {
* file scheme (eg file://host/tmp/test).
*/
public void testGetLinkStatusPartQualTarget() throws IOException {
assumeTrue(!emulatingSymlinksOnWindows());
Path fileAbs = new Path(testBaseDir1()+"/file");
Path fileQual = new Path(testURI().toString(), fileAbs);
Path dir = new Path(testBaseDir1());
@ -205,4 +217,14 @@ public void testGetLinkStatusPartQualTarget() throws IOException {
// Excpected.
}
}
/** Test create symlink to . */
@Override
public void testCreateLinkToDot() throws IOException {
try {
super.testCreateLinkToDot();
} catch (IllegalArgumentException iae) {
// Expected.
}
}
}

View File

@ -17,8 +17,13 @@
*/
package org.apache.hadoop.fs;
import org.apache.hadoop.util.Shell;
import org.junit.BeforeClass;
import java.io.IOException;
import static org.junit.Assume.assumeTrue;
public class TestSymlinkLocalFSFileContext extends TestSymlinkLocalFS {
@BeforeClass
@ -27,4 +32,9 @@ public static void testSetup() throws Exception {
wrapper = new FileContextTestWrapper(context);
}
@Override
public void testRenameFileWithDestParentSymlink() throws IOException {
assumeTrue(!Shell.WINDOWS);
super.testRenameFileWithDestParentSymlink();
}
}

View File

@ -17,13 +17,20 @@
*/
package org.apache.hadoop.fs;
import java.io.FileNotFoundException;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Options.Rename;
import org.apache.hadoop.util.Shell;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.junit.Assume.assumeTrue;
public class TestSymlinkLocalFSFileSystem extends TestSymlinkLocalFS {
@BeforeClass
@ -54,4 +61,36 @@ public void testCreateFileDirExistingLink() throws IOException {}
@Override
@Test(timeout=1000)
public void testAccessFileViaInterSymlinkAbsTarget() throws IOException {}
@Override
public void testRenameFileWithDestParentSymlink() throws IOException {
assumeTrue(!Shell.WINDOWS);
super.testRenameFileWithDestParentSymlink();
}
@Override
@Test(timeout=10000)
/** Rename a symlink to itself */
public void testRenameSymlinkToItself() throws IOException {
Path file = new Path(testBaseDir1(), "file");
createAndWriteFile(file);
Path link = new Path(testBaseDir1(), "linkToFile1");
wrapper.createSymlink(file, link, false);
try {
wrapper.rename(link, link);
fail("Failed to get expected IOException");
} catch (IOException e) {
assertTrue(unwrapException(e) instanceof FileAlreadyExistsException);
}
// Fails with overwrite as well
try {
wrapper.rename(link, link, Rename.OVERWRITE);
fail("Failed to get expected IOException");
} catch (IOException e) {
// Todo: Fix this test when HADOOP-9819 is fixed.
assertTrue(unwrapException(e) instanceof FileAlreadyExistsException ||
unwrapException(e) instanceof FileNotFoundException);
}
}
}

View File

@ -134,6 +134,14 @@ public void testSnappyCodec() throws IOException {
public void testLz4Codec() throws IOException {
if (NativeCodeLoader.isNativeCodeLoaded()) {
if (Lz4Codec.isNativeCodeLoaded()) {
conf.setBoolean(
CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_USELZ4HC_KEY,
false);
codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.Lz4Codec");
codecTest(conf, seed, count, "org.apache.hadoop.io.compress.Lz4Codec");
conf.setBoolean(
CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_USELZ4HC_KEY,
true);
codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.Lz4Codec");
codecTest(conf, seed, count, "org.apache.hadoop.io.compress.Lz4Codec");
} else {

View File

@ -100,6 +100,7 @@ public interface TestProtocol extends VersionedProtocol {
void ping() throws IOException;
void slowPing(boolean shouldSlow) throws IOException;
void sleep(long delay) throws IOException, InterruptedException;
String echo(String value) throws IOException;
String[] echo(String[] value) throws IOException;
Writable echo(Writable value) throws IOException;
@ -145,6 +146,11 @@ public synchronized void slowPing(boolean shouldSlow) {
}
}
@Override
public void sleep(long delay) throws InterruptedException {
Thread.sleep(delay);
}
@Override
public String echo(String value) throws IOException { return value; }
@ -932,6 +938,28 @@ public void run() {
}
}
@Test
public void testConnectionPing() throws Exception {
Configuration conf = new Configuration();
int pingInterval = 50;
conf.setBoolean(CommonConfigurationKeys.IPC_CLIENT_PING_KEY, true);
conf.setInt(CommonConfigurationKeys.IPC_PING_INTERVAL_KEY, pingInterval);
final Server server = new RPC.Builder(conf)
.setProtocol(TestProtocol.class).setInstance(new TestImpl())
.setBindAddress(ADDRESS).setPort(0).setNumHandlers(5).setVerbose(true)
.build();
server.start();
final TestProtocol proxy = RPC.getProxy(TestProtocol.class,
TestProtocol.versionID, server.getListenerAddress(), conf);
try {
// this call will throw exception if server couldn't decode the ping
proxy.sleep(pingInterval*4);
} finally {
if (proxy != null) RPC.stopProxy(proxy);
}
}
public static void main(String[] args) throws IOException {
new TestRPC().testCallsInternal(conf);

View File

@ -29,6 +29,7 @@
import java.net.InetSocketAddress;
import java.security.PrivilegedExceptionAction;
import java.security.Security;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Set;
import java.util.regex.Pattern;
@ -44,8 +45,6 @@
import javax.security.sasl.SaslException;
import javax.security.sasl.SaslServer;
import junit.framework.Assert;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -62,11 +61,11 @@
import org.apache.hadoop.security.SaslRpcClient;
import org.apache.hadoop.security.SaslRpcServer;
import org.apache.hadoop.security.SaslRpcServer.AuthMethod;
import org.apache.hadoop.security.SaslRpcServer.QualityOfProtection;
import org.apache.hadoop.security.SecurityInfo;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.security.TestUserGroupInformation;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
import org.apache.hadoop.security.token.SecretManager;
import org.apache.hadoop.security.token.SecretManager.InvalidToken;
import org.apache.hadoop.security.token.Token;
@ -77,9 +76,28 @@
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
/** Unit tests for using Sasl over RPC. */
@RunWith(Parameterized.class)
public class TestSaslRPC {
@Parameters
public static Collection<Object[]> data() {
Collection<Object[]> params = new ArrayList<Object[]>();
for (QualityOfProtection qop : QualityOfProtection.values()) {
params.add(new Object[]{ qop });
}
return params;
}
QualityOfProtection expectedQop;
public TestSaslRPC(QualityOfProtection qop) {
expectedQop = qop;
}
private static final String ADDRESS = "0.0.0.0";
public static final Log LOG =
@ -115,8 +133,12 @@ public static void setupKerb() {
@Before
public void setup() {
LOG.info("---------------------------------");
LOG.info("Testing QOP:"+expectedQop);
LOG.info("---------------------------------");
conf = new Configuration();
conf.set(HADOOP_SECURITY_AUTHENTICATION, KERBEROS.toString());
conf.set("hadoop.rpc.protection", expectedQop.name().toLowerCase());
UserGroupInformation.setConfiguration(conf);
enableSecretManager = null;
forceSecretManager = null;
@ -226,15 +248,16 @@ public Token<TestTokenIdentifier> selectToken(Text service,
serverPrincipal = SERVER_PRINCIPAL_KEY)
@TokenInfo(TestTokenSelector.class)
public interface TestSaslProtocol extends TestRPC.TestProtocol {
public AuthenticationMethod getAuthMethod() throws IOException;
public AuthMethod getAuthMethod() throws IOException;
public String getAuthUser() throws IOException;
}
public static class TestSaslImpl extends TestRPC.TestImpl implements
TestSaslProtocol {
@Override
public AuthenticationMethod getAuthMethod() throws IOException {
return UserGroupInformation.getCurrentUser().getAuthenticationMethod();
public AuthMethod getAuthMethod() throws IOException {
return UserGroupInformation.getCurrentUser()
.getAuthenticationMethod().getAuthMethod();
}
@Override
public String getAuthUser() throws IOException {
@ -341,8 +364,11 @@ private void doDigestRpc(Server server, TestTokenSecretManager sm
try {
proxy = RPC.getProxy(TestSaslProtocol.class,
TestSaslProtocol.versionID, addr, conf);
AuthMethod authMethod = proxy.getAuthMethod();
assertEquals(TOKEN, authMethod);
//QOP must be auth
Assert.assertEquals(SaslRpcServer.SASL_PROPS.get(Sasl.QOP), "auth");
assertEquals(expectedQop.saslQop,
RPC.getConnectionIdForProxy(proxy).getSaslQop());
proxy.ping();
} finally {
server.stop();
@ -393,6 +419,7 @@ public void testPerConnectionConf() throws Exception {
newConf.set(CommonConfigurationKeysPublic.
HADOOP_RPC_SOCKET_FACTORY_CLASS_DEFAULT_KEY, "");
Client client = null;
TestSaslProtocol proxy1 = null;
TestSaslProtocol proxy2 = null;
TestSaslProtocol proxy3 = null;
@ -402,7 +429,7 @@ public void testPerConnectionConf() throws Exception {
proxy1 = RPC.getProxy(TestSaslProtocol.class,
TestSaslProtocol.versionID, addr, newConf);
proxy1.getAuthMethod();
Client client = WritableRpcEngine.getClient(conf);
client = WritableRpcEngine.getClient(newConf);
Set<ConnectionId> conns = client.getConnectionIds();
assertEquals("number of connections in cache is wrong", 1, conns.size());
// same conf, connection should be re-used
@ -428,9 +455,13 @@ public void testPerConnectionConf() throws Exception {
assertNotSame(connsArray[2].getMaxIdleTime(), timeouts[1]);
} finally {
server.stop();
RPC.stopProxy(proxy1);
RPC.stopProxy(proxy2);
RPC.stopProxy(proxy3);
// this is dirty, but clear out connection cache for next run
if (client != null) {
client.getConnectionIds().clear();
}
if (proxy1 != null) RPC.stopProxy(proxy1);
if (proxy2 != null) RPC.stopProxy(proxy2);
if (proxy3 != null) RPC.stopProxy(proxy3);
}
}
@ -793,14 +824,13 @@ private String internalGetAuthMethod(
final AuthMethod serverAuth,
final UseToken tokenType) throws Exception {
String currentUser = UserGroupInformation.getCurrentUser().getUserName();
final Configuration serverConf = new Configuration(conf);
serverConf.set(HADOOP_SECURITY_AUTHENTICATION, serverAuth.toString());
UserGroupInformation.setConfiguration(serverConf);
final UserGroupInformation serverUgi =
UserGroupInformation.createRemoteUser(currentUser + "-SERVER/localhost@NONE");
final UserGroupInformation serverUgi = (serverAuth == KERBEROS)
? UserGroupInformation.createRemoteUser("server/localhost@NONE")
: UserGroupInformation.createRemoteUser("server");
serverUgi.setAuthenticationMethod(serverAuth);
final TestTokenSecretManager sm = new TestTokenSecretManager();
@ -835,7 +865,7 @@ public Server run() throws IOException {
UserGroupInformation.setConfiguration(clientConf);
final UserGroupInformation clientUgi =
UserGroupInformation.createRemoteUser(currentUser + "-CLIENT");
UserGroupInformation.createRemoteUser("client");
clientUgi.setAuthenticationMethod(clientAuth);
final InetSocketAddress addr = NetUtils.getConnectAddress(server);
@ -873,14 +903,13 @@ public String run() throws IOException {
TestSaslProtocol.versionID, addr, clientConf);
proxy.ping();
// verify sasl completed
if (serverAuth != SIMPLE) {
assertEquals(SaslRpcServer.SASL_PROPS.get(Sasl.QOP), "auth");
}
// make sure the other side thinks we are who we said we are!!!
assertEquals(clientUgi.getUserName(), proxy.getAuthUser());
return proxy.getAuthMethod().toString();
AuthMethod authMethod = proxy.getAuthMethod();
// verify sasl completed with correct QOP
assertEquals((authMethod != SIMPLE) ? expectedQop.saslQop : null,
RPC.getConnectionIdForProxy(proxy).getSaslQop());
return authMethod.toString();
} finally {
if (proxy != null) {
RPC.stopProxy(proxy);

View File

@ -19,9 +19,9 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-project-dist</artifactId>
<artifactId>hadoop-project</artifactId>
<version>3.0.0-SNAPSHOT</version>
<relativePath>../../hadoop-project-dist</relativePath>
<relativePath>../../hadoop-project</relativePath>
</parent>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs-nfs</artifactId>

View File

@ -10,12 +10,6 @@ Trunk (Unreleased)
HDFS-3125. Add JournalService to enable Journal Daemon. (suresh)
HDFS-4659 Support setting execution bit for regular files (Brandon Li via sanjay)
HDFS-4762 Provide HDFS based NFSv3 and Mountd implementation (brandonli)
HDFS-4962 Use enum for nfs constants (Nicholas SZE via jing9)
IMPROVEMENTS
HDFS-4665. Move TestNetworkTopologyWithNodeGroup to common.
@ -241,10 +235,8 @@ Trunk (Unreleased)
HDFS-3934. duplicative dfs_hosts entries handled wrong. (Colin Patrick
McCabe)
HDFS-4948. mvn site for hadoop-hdfs-nfs fails. (brandonli)
HDFS-5043. For HdfsFileStatus, set default value of childrenNum to -1
instead of 0 to avoid confusing applications. (brandonli)
HDFS-4366. Block Replication Policy Implementation May Skip Higher-Priority
Blocks for Lower-Priority Blocks (Derek Dagit via kihwal)
Release 2.3.0 - UNRELEASED
@ -279,15 +271,39 @@ Release 2.1.1-beta - UNRELEASED
NEW FEATURES
HDFS-4962 Use enum for nfs constants (Nicholas SZE via jing9)
HDFS-5071 Change hdfs-nfs parent project to hadoop-project (brandonli)
HDFS-4763 Add script changes/utility for starting NFS gateway (brandonli)
IMPROVEMENTS
HDFS-4513. Clarify in the WebHDFS REST API that all JSON respsonses may
contain additional properties. (szetszwo)
HDFS-5061. Make FSNameSystem#auditLoggers an unmodifiable list.
(Arpit Agarwal via suresh)
HDFS-4905. Add appendToFile command to "hdfs dfs". (Arpit Agarwal via
cnauroth)
HDFS-4926. Namenode webserver's page has a tooltip that is inconsistent
with the datanode HTML link. (Vivek Ganesan via jing9)
HDFS-5047. Supress logging of full stack trace of quota and lease
exceptions. (Robert Parker via kihwal)
OPTIMIZATIONS
BUG FIXES
HDFS-5028. LeaseRenewer throws ConcurrentModificationException when timeout.
(zhaoyunjiong via szetszwo)
HDFS-5043. For HdfsFileStatus, set default value of childrenNum to -1
instead of 0 to avoid confusing applications. (brandonli)
Release 2.1.0-beta - 2013-08-06
INCOMPATIBLE CHANGES
@ -339,6 +355,10 @@ Release 2.1.0-beta - 2013-08-06
HDFS-3495. Update Balancer to support new NetworkTopology with NodeGroup.
(Junping Du via szetszwo)
HDFS-4659 Support setting execution bit for regular files (Brandon Li via sanjay)
HDFS-4762 Provide HDFS based NFSv3 and Mountd implementation (brandonli)
HDFS-4372. Track NameNode startup progress. (cnauroth)
HDFS-4373. Add HTTP API for querying NameNode startup progress. (cnauroth)
@ -719,6 +739,8 @@ Release 2.1.0-beta - 2013-08-06
HDFS-4943. WebHdfsFileSystem does not work when original file path has
encoded chars. (Jerry He via szetszwo)
HDFS-4948. mvn site for hadoop-hdfs-nfs fails. (brandonli)
HDFS-4887. TestNNThroughputBenchmark exits abruptly. (kihwal)
HDFS-4980. Incorrect logging.properties file for hadoop-httpfs.

View File

@ -57,6 +57,8 @@ function print_usage(){
echo " current directory contents with a snapshot"
echo " lsSnapshottableDir list all snapshottable dirs owned by the current user"
echo " Use -help to see options"
echo " portmap run a portmap service"
echo " nfs3 run an NFS version 3 gateway"
echo ""
echo "Most commands print help when invoked w/o parameters."
}
@ -149,6 +151,10 @@ elif [ "$COMMAND" = "snapshotDiff" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff
elif [ "$COMMAND" = "lsSnapshottableDir" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.snapshot.LsSnapshottableDir
elif [ "$COMMAND" = "portmap" ] ; then
CLASS=org.apache.hadoop.portmap.Portmap
elif [ "$COMMAND" = "nfs3" ] ; then
CLASS=org.apache.hadoop.hdfs.nfs.nfs3.Nfs3
else
CLASS="$COMMAND"
fi

View File

@ -170,12 +170,11 @@ public Path getHomeDirectory() {
}
/**
* Checks that the passed URI belongs to this filesystem, resolves the path
* component against the current working directory if relative, and finally
* returns the absolute path component.
* Checks that the passed URI belongs to this filesystem and returns
* just the path component. Expects a URI with an absolute path.
*
* @param file URI to check and resolve
* @return resolved absolute path component of {file}
* @param file URI with absolute path
* @return path component of {file}
* @throws IllegalArgumentException if URI does not belong to this DFS
*/
private String getPathName(Path file) {
@ -514,15 +513,10 @@ public void concat(Path trg, Path [] psrcs) throws IOException {
@Override
public boolean rename(Path src, Path dst) throws IOException {
statistics.incrementWriteOps(1);
// Both Paths have to belong to this DFS
final Path absSrc = fixRelativePart(src);
final Path absDst = fixRelativePart(dst);
FileSystem srcFS = getFSofPath(absSrc, getConf());
FileSystem dstFS = getFSofPath(absDst, getConf());
if (!srcFS.getUri().equals(getUri()) ||
!dstFS.getUri().equals(getUri())) {
throw new IOException("Renames across FileSystems not supported");
}
// Try the rename without resolving first
try {
return dfs.rename(getPathName(absSrc), getPathName(absDst));
@ -539,7 +533,8 @@ public Boolean doCall(final Path p)
@Override
public Boolean next(final FileSystem fs, final Path p)
throws IOException {
return fs.rename(source, p);
// Should just throw an error in FileSystem#checkPath
return doCall(p);
}
}.resolve(this, absDst);
}
@ -553,15 +548,8 @@ public Boolean next(final FileSystem fs, final Path p)
public void rename(Path src, Path dst, final Options.Rename... options)
throws IOException {
statistics.incrementWriteOps(1);
// Both Paths have to belong to this DFS
final Path absSrc = fixRelativePart(src);
final Path absDst = fixRelativePart(dst);
FileSystem srcFS = getFSofPath(absSrc, getConf());
FileSystem dstFS = getFSofPath(absDst, getConf());
if (!srcFS.getUri().equals(getUri()) ||
!dstFS.getUri().equals(getUri())) {
throw new IOException("Renames across FileSystems not supported");
}
// Try the rename without resolving first
try {
dfs.rename(getPathName(absSrc), getPathName(absDst), options);
@ -579,7 +567,7 @@ public Void doCall(final Path p)
@Override
public Void next(final FileSystem fs, final Path p)
throws IOException {
// Since we know it's this DFS for both, can just call doCall again
// Should just throw an error in FileSystem#checkPath
return doCall(p);
}
}.resolve(this, absDst);

View File

@ -449,8 +449,8 @@ private void run(final int id) throws InterruptedException {
LOG.warn("Failed to renew lease for " + clientsString() + " for "
+ (elapsed/1000) + " seconds. Aborting ...", ie);
synchronized (this) {
for(DFSClient c : dfsclients) {
c.abort();
while (!dfsclients.isEmpty()) {
dfsclients.get(0).abort();
}
}
break;

View File

@ -1208,7 +1208,6 @@ int computeReplicationWorkForBlocks(List<List<Block>> blocksToReplicate) {
// abandoned block or block reopened for append
if(bc == null || bc instanceof MutableBlockCollection) {
neededReplications.remove(block, priority); // remove from neededReplications
neededReplications.decrementReplicationIndex(priority);
continue;
}
@ -1235,7 +1234,6 @@ int computeReplicationWorkForBlocks(List<List<Block>> blocksToReplicate) {
if ( (pendingReplications.getNumReplicas(block) > 0) ||
(blockHasEnoughRacks(block)) ) {
neededReplications.remove(block, priority); // remove from neededReplications
neededReplications.decrementReplicationIndex(priority);
blockLog.info("BLOCK* Removing " + block
+ " from neededReplications as it has enough replicas");
continue;
@ -1295,7 +1293,6 @@ int computeReplicationWorkForBlocks(List<List<Block>> blocksToReplicate) {
if(bc == null || bc instanceof MutableBlockCollection) {
neededReplications.remove(block, priority); // remove from neededReplications
rw.targets = null;
neededReplications.decrementReplicationIndex(priority);
continue;
}
requiredReplication = bc.getBlockReplication();
@ -1309,7 +1306,6 @@ int computeReplicationWorkForBlocks(List<List<Block>> blocksToReplicate) {
if ( (pendingReplications.getNumReplicas(block) > 0) ||
(blockHasEnoughRacks(block)) ) {
neededReplications.remove(block, priority); // remove from neededReplications
neededReplications.decrementReplicationIndex(priority);
rw.targets = null;
blockLog.info("BLOCK* Removing " + block
+ " from neededReplications as it has enough replicas");
@ -1346,7 +1342,6 @@ int computeReplicationWorkForBlocks(List<List<Block>> blocksToReplicate) {
// remove from neededReplications
if(numEffectiveReplicas + targets.length >= requiredReplication) {
neededReplications.remove(block, priority); // remove from neededReplications
neededReplications.decrementReplicationIndex(priority);
}
}
}

View File

@ -18,11 +18,8 @@
package org.apache.hadoop.hdfs.server.blockmanagement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.util.LightWeightLinkedSet;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
@ -82,16 +79,12 @@ class UnderReplicatedBlocks implements Iterable<Block> {
static final int QUEUE_WITH_CORRUPT_BLOCKS = 4;
/** the queues themselves */
private List<LightWeightLinkedSet<Block>> priorityQueues
= new ArrayList<LightWeightLinkedSet<Block>>();
/** Stores the replication index for each priority */
private Map<Integer, Integer> priorityToReplIdx = new HashMap<Integer, Integer>(LEVEL);
= new ArrayList<LightWeightLinkedSet<Block>>(LEVEL);
/** Create an object. */
UnderReplicatedBlocks() {
for (int i = 0; i < LEVEL; i++) {
priorityQueues.add(new LightWeightLinkedSet<Block>());
priorityToReplIdx.put(i, 0);
}
}
@ -310,13 +303,15 @@ synchronized void update(Block block, int curReplicas,
/**
* Get a list of block lists to be replicated. The index of block lists
* represents its replication priority. Replication index will be tracked for
* each priority list separately in priorityToReplIdx map. Iterates through
* all priority lists and find the elements after replication index. Once the
* last priority lists reaches to end, all replication indexes will be set to
* 0 and start from 1st priority list to fulfill the blockToProces count.
* represents its replication priority. Iterates each block list in priority
* order beginning with the highest priority list. Iterators use a bookmark to
* resume where the previous iteration stopped. Returns when the block count
* is met or iteration reaches the end of the lowest priority list, in which
* case bookmarks for each block list are reset to the heads of their
* respective lists.
*
* @param blocksToProcess - number of blocks to fetch from underReplicated blocks.
* @param blocksToProcess - number of blocks to fetch from underReplicated
* blocks.
* @return Return a list of block lists to be replicated. The block list index
* represents its replication priority.
*/
@ -336,12 +331,8 @@ public synchronized List<List<Block>> chooseUnderReplicatedBlocks(
for (int priority = 0; priority < LEVEL; priority++) {
// Go through all blocks that need replications with current priority.
BlockIterator neededReplicationsIterator = iterator(priority);
Integer replIndex = priorityToReplIdx.get(priority);
// skip to the first unprocessed block, which is at replIndex
for (int i = 0; i < replIndex && neededReplicationsIterator.hasNext(); i++) {
neededReplicationsIterator.next();
}
// Set the iterator to the first unprocessed block at this priority level.
neededReplicationsIterator.setToBookmark();
blocksToProcess = Math.min(blocksToProcess, size());
@ -354,20 +345,18 @@ public synchronized List<List<Block>> chooseUnderReplicatedBlocks(
&& neededReplicationsIterator.hasNext()) {
Block block = neededReplicationsIterator.next();
blocksToReplicate.get(priority).add(block);
replIndex++;
blockCount++;
}
if (!neededReplicationsIterator.hasNext()
&& neededReplicationsIterator.getPriority() == LEVEL - 1) {
// reset all priorities replication index to 0 because there is no
// recently added blocks in any list.
// Reset all priorities' bookmarks to the beginning because there were
// no recently added blocks in any list.
for (int i = 0; i < LEVEL; i++) {
priorityToReplIdx.put(i, 0);
this.priorityQueues.get(i).resetBookmark();
}
break;
}
priorityToReplIdx.put(priority, replIndex);
}
return blocksToReplicate;
}
@ -450,15 +439,19 @@ public void remove() {
int getPriority() {
return level;
}
}
/**
* This method is to decrement the replication index for the given priority
*
* @param priority - int priority level
*/
public void decrementReplicationIndex(int priority) {
Integer replIdx = priorityToReplIdx.get(priority);
priorityToReplIdx.put(priority, --replIdx);
/**
* Sets iterator(s) to bookmarked elements.
*/
private synchronized void setToBookmark() {
if (this.isIteratorForLevel) {
this.iterators.set(0, priorityQueues.get(this.level)
.getBookmark());
} else {
for(int i=0; i<LEVEL; i++) {
this.iterators.set(i, priorityQueues.get(i).getBookmark());
}
}
}
}
}

View File

@ -774,7 +774,7 @@ private List<AuditLogger> initAuditLoggers(Configuration conf) {
if (auditLoggers.isEmpty()) {
auditLoggers.add(new DefaultAuditLogger());
}
return auditLoggers;
return Collections.unmodifiableList(auditLoggers);
}
void loadFSImage(StartupOption startOpt, FSImage fsImage, boolean haEnabled)

View File

@ -70,6 +70,8 @@
import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException;
import org.apache.hadoop.hdfs.protocol.NSQuotaExceededException;
import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
import org.apache.hadoop.hdfs.protocol.RecoveryInProgressException;
import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
@ -307,7 +309,10 @@ public NameNodeRpcServer(Configuration conf, NameNode nn)
QuotaExceededException.class,
RecoveryInProgressException.class,
AccessControlException.class,
InvalidToken.class);
InvalidToken.class,
LeaseExpiredException.class,
NSQuotaExceededException.class,
DSQuotaExceededException.class);
}
/**

View File

@ -709,10 +709,10 @@ private void generateNodeDataHeader(JspWriter out, DatanodeDescriptor d,
int idx = (suffix != null && name.endsWith(suffix)) ? name
.indexOf(suffix) : -1;
out.print(rowTxt() + "<td class=\"name\"><a title=\"" + d.getXferAddr()
out.print(rowTxt() + "<td class=\"name\"> <a title=\"" + url
+ "\" href=\"" + url + "\">"
+ ((idx > 0) ? name.substring(0, idx) : name) + "</a>"
+ ((alive) ? "" : "\n"));
+ ((alive) ? "" : "\n") + "<td class=\"address\">" + d.getXferAddr());
}
void generateDecommissioningNodeData(JspWriter out, DatanodeDescriptor d,
@ -747,9 +747,9 @@ void generateNodeData(JspWriter out, DatanodeDescriptor d, String suffix,
* Say the datanode is dn1.hadoop.apache.org with ip 192.168.0.5 we use:
* 1) d.getHostName():d.getPort() to display. Domain and port are stripped
* if they are common across the nodes. i.e. "dn1"
* 2) d.getHost():d.Port() for "title". i.e. "192.168.0.5:50010"
* 3) d.getHostName():d.getInfoPort() for url.
* 2) d.getHostName():d.getInfoPort() for url and title.
* i.e. "http://dn1.hadoop.apache.org:50075/..."
* 3) d.getXferAddr() for "Transferring Address". i.e. "192.168.0.5:50010"
* Note that "d.getHost():d.getPort()" is what DFS clients use to
* interact with datanodes.
*/
@ -880,7 +880,9 @@ void generateNodesList(ServletContext context, JspWriter out,
}
out.print("<tr class=\"headerRow\"> <th " + nodeHeaderStr("name")
+ "> Node <th " + nodeHeaderStr("lastcontact")
+ "> Node <th " + nodeHeaderStr("address")
+ "> Transferring<br>Address <th "
+ nodeHeaderStr("lastcontact")
+ "> Last <br>Contact <th " + nodeHeaderStr("adminstate")
+ "> Admin State <th " + nodeHeaderStr("capacity")
+ "> Configured <br>Capacity (" + diskByteStr + ") <th "
@ -896,8 +898,8 @@ void generateNodesList(ServletContext context, JspWriter out,
+ nodeHeaderStr("bpused") + "> Block Pool<br>Used ("
+ diskByteStr + ") <th "
+ nodeHeaderStr("pcbpused")
+ "> Block Pool<br>Used (%)"
+ "> Blocks <th " + nodeHeaderStr("volfails")
+ "> Block Pool<br>Used (%)" + " <th "
+ nodeHeaderStr("volfails")
+"> Failed Volumes\n");
JspHelper.sortNodeList(live, sorterField, sorterOrder);
@ -915,7 +917,9 @@ void generateNodesList(ServletContext context, JspWriter out,
if (dead.size() > 0) {
out.print("<table border=1 cellspacing=0> <tr id=\"row1\"> "
+ "<th " + nodeHeaderStr("node")
+ "> Node <th " + nodeHeaderStr("decommissioned")
+ "> Node <th " + nodeHeaderStr("address")
+ "> Transferring<br>Address <th "
+ nodeHeaderStr("decommissioned")
+ "> Decommissioned\n");
JspHelper.sortNodeList(dead, sorterField, sorterOrder);
@ -935,7 +939,9 @@ void generateNodesList(ServletContext context, JspWriter out,
if (decommissioning.size() > 0) {
out.print("<table border=1 cellspacing=0> <tr class=\"headRow\"> "
+ "<th " + nodeHeaderStr("name")
+ "> Node <th " + nodeHeaderStr("lastcontact")
+ "> Node <th " + nodeHeaderStr("address")
+ "> Transferring<br>Address <th "
+ nodeHeaderStr("lastcontact")
+ "> Last <br>Contact <th "
+ nodeHeaderStr("underreplicatedblocks")
+ "> Under Replicated Blocks <th "

View File

@ -56,6 +56,8 @@ public String toString() {
private DoubleLinkedElement<T> head;
private DoubleLinkedElement<T> tail;
private LinkedSetIterator bookmark;
/**
* @param initCapacity
* Recommended size of the internal array.
@ -69,6 +71,7 @@ public LightWeightLinkedSet(int initCapacity, float maxLoadFactor,
super(initCapacity, maxLoadFactor, minLoadFactor);
head = null;
tail = null;
bookmark = new LinkedSetIterator();
}
public LightWeightLinkedSet() {
@ -111,6 +114,12 @@ protected boolean addElem(final T element) {
tail = le;
if (head == null) {
head = le;
bookmark.next = head;
}
// Update bookmark, if necessary.
if (bookmark.next == null) {
bookmark.next = le;
}
return true;
}
@ -141,6 +150,11 @@ protected DoubleLinkedElement<T> removeElem(final T key) {
if (tail == found) {
tail = tail.before;
}
// Update bookmark, if necessary.
if (found == this.bookmark.next) {
this.bookmark.next = found.after;
}
return found;
}
@ -262,5 +276,25 @@ public void clear() {
super.clear();
this.head = null;
this.tail = null;
this.resetBookmark();
}
/**
* Returns a new iterator starting at the bookmarked element.
*
* @return the iterator to the bookmarked element.
*/
public Iterator<T> getBookmark() {
LinkedSetIterator toRet = new LinkedSetIterator();
toRet.next = this.bookmark.next;
this.bookmark = toRet;
return toRet;
}
/**
* Resets the bookmark to the beginning of the list.
*/
public void resetBookmark() {
this.bookmark.next = this.head;
}
}

View File

@ -47,7 +47,6 @@ div#dfsnodetable a#title {
}
div#dfsnodetable td, th {
border-bottom-style : none;
padding-bottom : 4px;
padding-top : 4px;
}
@ -103,6 +102,7 @@ table.nodes td {
div#dfsnodetable td, div#dfsnodetable th, div.dfstable td {
padding-left : 10px;
padding-right : 10px;
border:1px solid black;
}
td.perc_filled {

View File

@ -20,14 +20,18 @@
import static org.junit.Assert.*;
import java.io.IOException;
import java.util.Arrays;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.junit.*;
import com.google.common.base.Joiner;
public class TestGlobPaths {
static class RegexPathFilter implements PathFilter {
@ -784,4 +788,265 @@ public void cleanupDFS() throws IOException {
fs.delete(new Path(USER_DIR), true);
}
/**
* A glob test that can be run on either FileContext or FileSystem.
*/
private static interface FSTestWrapperGlobTest {
void run(FSTestWrapper wrap, FileSystem fs, FileContext fc)
throws Exception;
}
/**
* Run a glob test on FileSystem.
*/
private static void testOnFileSystem(FSTestWrapperGlobTest test) throws Exception {
Configuration conf = new HdfsConfiguration();
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
try {
FileSystem fs = FileSystem.get(conf);
test.run(new FileSystemTestWrapper(fs), fs, null);
} finally {
cluster.shutdown();
}
}
/**
* Run a glob test on FileContext.
*/
private static void testOnFileContext(FSTestWrapperGlobTest test) throws Exception {
Configuration conf = new HdfsConfiguration();
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
try {
FileContext fc = FileContext.getFileContext(conf);
test.run(new FileContextTestWrapper(fc), null, fc);
} finally {
cluster.shutdown();
}
}
/**
* Accept all paths.
*/
private static class AcceptAllPathFilter implements PathFilter {
@Override
public boolean accept(Path path) {
return true;
}
}
/**
* Accept only paths ending in Z.
*/
private static class AcceptPathsEndingInZ implements PathFilter {
@Override
public boolean accept(Path path) {
String stringPath = path.toUri().getPath();
return stringPath.endsWith("z");
}
}
/**
* Test globbing through symlinks.
*/
private static class TestGlobWithSymlinks implements FSTestWrapperGlobTest {
public void run(FSTestWrapper wrap, FileSystem fs, FileContext fc)
throws Exception {
// Test that globbing through a symlink to a directory yields a path
// containing that symlink.
wrap.mkdir(new Path("/alpha"),
FsPermission.getDirDefault(), false);
wrap.createSymlink(new Path("/alpha"), new Path("/alphaLink"), false);
wrap.mkdir(new Path("/alphaLink/beta"),
FsPermission.getDirDefault(), false);
// Test simple glob
FileStatus[] statuses =
wrap.globStatus(new Path("/alpha/*"), new AcceptAllPathFilter());
Assert.assertEquals(1, statuses.length);
Assert.assertEquals("/alpha/beta",
statuses[0].getPath().toUri().getPath());
// Test glob through symlink
statuses =
wrap.globStatus(new Path("/alphaLink/*"), new AcceptAllPathFilter());
Assert.assertEquals(1, statuses.length);
Assert.assertEquals("/alphaLink/beta",
statuses[0].getPath().toUri().getPath());
// If the terminal path component in a globbed path is a symlink,
// we don't dereference that link.
wrap.createSymlink(new Path("beta"), new Path("/alphaLink/betaLink"),
false);
statuses = wrap.globStatus(new Path("/alpha/betaLi*"),
new AcceptAllPathFilter());
Assert.assertEquals(1, statuses.length);
Assert.assertEquals("/alpha/betaLink",
statuses[0].getPath().toUri().getPath());
// todo: test symlink-to-symlink-to-dir, etc.
}
}
@Test
public void testGlobWithSymlinksOnFS() throws Exception {
testOnFileSystem(new TestGlobWithSymlinks());
}
@Test
public void testGlobWithSymlinksOnFC() throws Exception {
testOnFileContext(new TestGlobWithSymlinks());
}
/**
* Test globbing symlinks to symlinks.
*
* Also test globbing dangling symlinks. It should NOT throw any exceptions!
*/
private static class TestGlobWithSymlinksToSymlinks
implements FSTestWrapperGlobTest {
public void run(FSTestWrapper wrap, FileSystem fs, FileContext fc)
throws Exception {
// Test that globbing through a symlink to a symlink to a directory
// fully resolves
wrap.mkdir(new Path("/alpha"), FsPermission.getDirDefault(), false);
wrap.createSymlink(new Path("/alpha"), new Path("/alphaLink"), false);
wrap.createSymlink(new Path("/alphaLink"),
new Path("/alphaLinkLink"), false);
wrap.mkdir(new Path("/alpha/beta"), FsPermission.getDirDefault(), false);
// Test glob through symlink to a symlink to a directory
FileStatus statuses[] =
wrap.globStatus(new Path("/alphaLinkLink"), new AcceptAllPathFilter());
Assert.assertEquals(1, statuses.length);
Assert.assertEquals("/alphaLinkLink",
statuses[0].getPath().toUri().getPath());
statuses =
wrap.globStatus(new Path("/alphaLinkLink/*"), new AcceptAllPathFilter());
Assert.assertEquals(1, statuses.length);
Assert.assertEquals("/alphaLinkLink/beta",
statuses[0].getPath().toUri().getPath());
// Test glob of dangling symlink (theta does not actually exist)
wrap.createSymlink(new Path("theta"), new Path("/alpha/kappa"), false);
statuses = wrap.globStatus(new Path("/alpha/kappa/kappa"),
new AcceptAllPathFilter());
Assert.assertNull(statuses);
// Test glob of symlinks
wrap.createFile("/alpha/beta/gamma");
wrap.createSymlink(new Path("gamma"),
new Path("/alpha/beta/gammaLink"), false);
wrap.createSymlink(new Path("gammaLink"),
new Path("/alpha/beta/gammaLinkLink"), false);
wrap.createSymlink(new Path("gammaLinkLink"),
new Path("/alpha/beta/gammaLinkLinkLink"), false);
statuses = wrap.globStatus(new Path("/alpha/*/gammaLinkLinkLink"),
new AcceptAllPathFilter());
Assert.assertEquals(1, statuses.length);
Assert.assertEquals("/alpha/beta/gammaLinkLinkLink",
statuses[0].getPath().toUri().getPath());
statuses = wrap.globStatus(new Path("/alpha/beta/*"),
new AcceptAllPathFilter());
Assert.assertEquals("/alpha/beta/gamma;/alpha/beta/gammaLink;" +
"/alpha/beta/gammaLinkLink;/alpha/beta/gammaLinkLinkLink",
TestPath.mergeStatuses(statuses));
// Let's create two symlinks that point to each other, and glob on them.
wrap.createSymlink(new Path("tweedledee"),
new Path("/tweedledum"), false);
wrap.createSymlink(new Path("tweedledum"),
new Path("/tweedledee"), false);
statuses = wrap.globStatus(new Path("/tweedledee/unobtainium"),
new AcceptAllPathFilter());
Assert.assertNull(statuses);
}
}
@Test
public void testGlobWithSymlinksToSymlinksOnFS() throws Exception {
testOnFileSystem(new TestGlobWithSymlinksToSymlinks());
}
@Test
public void testGlobWithSymlinksToSymlinksOnFC() throws Exception {
testOnFileContext(new TestGlobWithSymlinksToSymlinks());
}
/**
* Test globbing symlinks with a custom PathFilter
*/
private static class TestGlobSymlinksWithCustomPathFilter
implements FSTestWrapperGlobTest {
public void run(FSTestWrapper wrap, FileSystem fs, FileContext fc)
throws Exception {
// Test that globbing through a symlink to a symlink to a directory
// fully resolves
wrap.mkdir(new Path("/alpha"), FsPermission.getDirDefault(), false);
wrap.createSymlink(new Path("/alpha"), new Path("/alphaLinkz"), false);
wrap.mkdir(new Path("/alpha/beta"), FsPermission.getDirDefault(), false);
wrap.mkdir(new Path("/alpha/betaz"), FsPermission.getDirDefault(), false);
// Test glob through symlink to a symlink to a directory, with a PathFilter
FileStatus statuses[] =
wrap.globStatus(new Path("/alpha/beta"), new AcceptPathsEndingInZ());
Assert.assertNull(statuses);
statuses =
wrap.globStatus(new Path("/alphaLinkz/betaz"), new AcceptPathsEndingInZ());
Assert.assertEquals(1, statuses.length);
Assert.assertEquals("/alphaLinkz/betaz",
statuses[0].getPath().toUri().getPath());
statuses =
wrap.globStatus(new Path("/*/*"), new AcceptPathsEndingInZ());
Assert.assertEquals("/alpha/betaz;/alphaLinkz/betaz",
TestPath.mergeStatuses(statuses));
statuses =
wrap.globStatus(new Path("/*/*"), new AcceptAllPathFilter());
Assert.assertEquals("/alpha/beta;/alpha/betaz;" +
"/alphaLinkz/beta;/alphaLinkz/betaz",
TestPath.mergeStatuses(statuses));
}
}
@Test
public void testGlobSymlinksWithCustomPathFilterOnFS() throws Exception {
testOnFileSystem(new TestGlobSymlinksWithCustomPathFilter());
}
@Test
public void testGlobSymlinksWithCustomPathFilterOnFC() throws Exception {
testOnFileContext(new TestGlobSymlinksWithCustomPathFilter());
}
/**
* Test that globStatus fills in the scheme even when it is not provided.
*/
private static class TestGlobFillsInScheme
implements FSTestWrapperGlobTest {
public void run(FSTestWrapper wrap, FileSystem fs, FileContext fc)
throws Exception {
// Verify that the default scheme is hdfs, when we don't supply one.
wrap.mkdir(new Path("/alpha"), FsPermission.getDirDefault(), false);
wrap.createSymlink(new Path("/alpha"), new Path("/alphaLink"), false);
FileStatus statuses[] =
wrap.globStatus(new Path("/alphaLink"), new AcceptAllPathFilter());
Assert.assertEquals(1, statuses.length);
Path path = statuses[0].getPath();
Assert.assertEquals("/alphaLink", path.toUri().getPath());
Assert.assertEquals("hdfs", path.toUri().getScheme());
if (fc != null) {
// If we're using FileContext, then we can list a file:/// URI.
// Since everyone should have the root directory, we list that.
statuses =
wrap.globStatus(new Path("file:///"), new AcceptAllPathFilter());
Assert.assertEquals(1, statuses.length);
Path filePath = statuses[0].getPath();
Assert.assertEquals("file", filePath.toUri().getScheme());
Assert.assertEquals("/", filePath.toUri().getPath());
} else {
// The FileSystem we passed in should have scheme 'hdfs'
Assert.assertEquals("hdfs", fs.getScheme());
}
}
}
@Test
public void testGlobFillsInSchemeOnFS() throws Exception {
testOnFileSystem(new TestGlobFillsInScheme());
}
@Test
public void testGlobFillsInSchemeOnFC() throws Exception {
testOnFileContext(new TestGlobFillsInScheme());
}
}

View File

@ -24,8 +24,10 @@
import javax.security.auth.login.LoginException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileSystemTestHelper;
import org.apache.hadoop.fs.FsConstants;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.MiniDFSCluster;
@ -69,6 +71,10 @@ public static void clusterSetupAtBegining() throws IOException,
fHdfs = cluster.getFileSystem(0);
fHdfs2 = cluster.getFileSystem(1);
fHdfs.getConf().set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY,
FsConstants.VIEWFS_URI.toString());
fHdfs2.getConf().set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY,
FsConstants.VIEWFS_URI.toString());
defaultWorkingDirectory = fHdfs.makeQualified( new Path("/user/" +
UserGroupInformation.getCurrentUser().getShortUserName()));

View File

@ -17,17 +17,7 @@
*/
package org.apache.hadoop.hdfs;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.io.*;
import java.security.Permission;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
@ -42,10 +32,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSInputChecker;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FsShell;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
@ -63,6 +50,9 @@
import org.junit.Test;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.CoreMatchers.not;
import static org.junit.Assert.*;
/**
* This class tests commands from DFSShell.
@ -101,6 +91,18 @@ static File createLocalFile(File f) throws IOException {
return f;
}
static File createLocalFileWithRandomData(int fileLength, File f)
throws IOException {
assertTrue(!f.exists());
f.createNewFile();
FileOutputStream out = new FileOutputStream(f.toString());
byte[] buffer = new byte[fileLength];
out.write(buffer);
out.flush();
out.close();
return f;
}
static void show(String s) {
System.out.println(Thread.currentThread().getStackTrace()[2] + " " + s);
}
@ -1748,6 +1750,85 @@ private void deleteFileUsingTrash(
}
}
@Test (timeout = 300000)
public void testAppendToFile() throws Exception {
final int inputFileLength = 1024 * 1024;
File testRoot = new File(TEST_ROOT_DIR, "testAppendtoFileDir");
testRoot.mkdirs();
File file1 = new File(testRoot, "file1");
File file2 = new File(testRoot, "file2");
createLocalFileWithRandomData(inputFileLength, file1);
createLocalFileWithRandomData(inputFileLength, file2);
Configuration conf = new HdfsConfiguration();
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
cluster.waitActive();
try {
FileSystem dfs = cluster.getFileSystem();
assertTrue("Not a HDFS: " + dfs.getUri(),
dfs instanceof DistributedFileSystem);
// Run appendToFile once, make sure that the target file is
// created and is of the right size.
Path remoteFile = new Path("/remoteFile");
FsShell shell = new FsShell();
shell.setConf(conf);
String[] argv = new String[] {
"-appendToFile", file1.toString(), file2.toString(), remoteFile.toString() };
int res = ToolRunner.run(shell, argv);
assertThat(res, is(0));
assertThat(dfs.getFileStatus(remoteFile).getLen(), is((long) inputFileLength * 2));
// Run the command once again and make sure that the target file
// size has been doubled.
res = ToolRunner.run(shell, argv);
assertThat(res, is(0));
assertThat(dfs.getFileStatus(remoteFile).getLen(), is((long) inputFileLength * 4));
} finally {
cluster.shutdown();
}
}
@Test (timeout = 300000)
public void testAppendToFileBadArgs() throws Exception {
final int inputFileLength = 1024 * 1024;
File testRoot = new File(TEST_ROOT_DIR, "testAppendToFileBadArgsDir");
testRoot.mkdirs();
File file1 = new File(testRoot, "file1");
createLocalFileWithRandomData(inputFileLength, file1);
Configuration conf = new HdfsConfiguration();
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
cluster.waitActive();
try {
FileSystem dfs = cluster.getFileSystem();
assertTrue("Not a HDFS: " + dfs.getUri(),
dfs instanceof DistributedFileSystem);
// Run appendToFile with insufficient arguments.
FsShell shell = new FsShell();
shell.setConf(conf);
String[] argv = new String[] {
"-appendToFile", file1.toString() };
int res = ToolRunner.run(shell, argv);
assertThat(res, not(0));
// Mix stdin with other input files. Must fail.
Path remoteFile = new Path("/remoteFile");
argv = new String[] {
"-appendToFile", file1.toString(), "-", remoteFile.toString() };
res = ToolRunner.run(shell, argv);
assertThat(res, not(0));
} finally {
cluster.shutdown();
}
}
/**
* Test that the server trash configuration is respected when
* the client configuration is not set.

View File

@ -21,8 +21,11 @@
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@ -31,6 +34,7 @@
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil;
@ -40,9 +44,13 @@
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
import org.apache.hadoop.hdfs.server.namenode.FSClusterStats;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.Namesystem;
import org.apache.hadoop.net.NetworkTopology;
import org.apache.hadoop.net.Node;
import org.apache.hadoop.util.Time;
@ -1004,4 +1012,185 @@ public void testGetReplWorkMultiplier() {
exception.expect(IllegalArgumentException.class);
blocksReplWorkMultiplier = DFSUtil.getReplWorkMultiplier(conf);
}
@Test(timeout = 60000)
public void testUpdateDoesNotCauseSkippedReplication() {
UnderReplicatedBlocks underReplicatedBlocks = new UnderReplicatedBlocks();
Block block1 = new Block(random.nextLong());
Block block2 = new Block(random.nextLong());
Block block3 = new Block(random.nextLong());
// Adding QUEUE_VERY_UNDER_REPLICATED block
final int block1CurReplicas = 2;
final int block1ExpectedReplicas = 7;
underReplicatedBlocks.add(block1, block1CurReplicas, 0,
block1ExpectedReplicas);
// Adding QUEUE_VERY_UNDER_REPLICATED block
underReplicatedBlocks.add(block2, 2, 0, 7);
// Adding QUEUE_UNDER_REPLICATED block
underReplicatedBlocks.add(block3, 2, 0, 6);
List<List<Block>> chosenBlocks;
// Choose 1 block from UnderReplicatedBlocks. Then it should pick 1 block
// from QUEUE_VERY_UNDER_REPLICATED.
chosenBlocks = underReplicatedBlocks.chooseUnderReplicatedBlocks(1);
assertTheChosenBlocks(chosenBlocks, 0, 1, 0, 0, 0);
// Increasing the replications will move the block down a
// priority. This simulates a replica being completed in between checks.
underReplicatedBlocks.update(block1, block1CurReplicas+1, 0,
block1ExpectedReplicas, 1, 0);
// Choose 1 block from UnderReplicatedBlocks. Then it should pick 1 block
// from QUEUE_VERY_UNDER_REPLICATED.
// This block was moved up a priority and should not be skipped over.
chosenBlocks = underReplicatedBlocks.chooseUnderReplicatedBlocks(1);
assertTheChosenBlocks(chosenBlocks, 0, 1, 0, 0, 0);
// Choose 1 block from UnderReplicatedBlocks. Then it should pick 1 block
// from QUEUE_UNDER_REPLICATED.
chosenBlocks = underReplicatedBlocks.chooseUnderReplicatedBlocks(1);
assertTheChosenBlocks(chosenBlocks, 0, 0, 1, 0, 0);
}
@Test(timeout = 60000)
public void testAddStoredBlockDoesNotCauseSkippedReplication()
throws IOException {
Namesystem mockNS = mock(Namesystem.class);
when(mockNS.isPopulatingReplQueues()).thenReturn(true);
when(mockNS.hasWriteLock()).thenReturn(true);
FSClusterStats mockStats = mock(FSClusterStats.class);
BlockManager bm =
new BlockManager(mockNS, mockStats, new HdfsConfiguration());
UnderReplicatedBlocks underReplicatedBlocks = bm.neededReplications;
Block block1 = new Block(random.nextLong());
Block block2 = new Block(random.nextLong());
// Adding QUEUE_UNDER_REPLICATED block
underReplicatedBlocks.add(block1, 0, 1, 1);
// Adding QUEUE_UNDER_REPLICATED block
underReplicatedBlocks.add(block2, 0, 1, 1);
List<List<Block>> chosenBlocks;
// Choose 1 block from UnderReplicatedBlocks. Then it should pick 1 block
// from QUEUE_VERY_UNDER_REPLICATED.
chosenBlocks = underReplicatedBlocks.chooseUnderReplicatedBlocks(1);
assertTheChosenBlocks(chosenBlocks, 1, 0, 0, 0, 0);
// Adding this block collection to the BlockManager, so that when we add the
// block under construction, the BlockManager will realize the expected
// replication has been achieved and remove it from the under-replicated
// queue.
BlockInfoUnderConstruction info = new BlockInfoUnderConstruction(block1, 1);
BlockCollection bc = mock(BlockCollection.class);
when(bc.getBlockReplication()).thenReturn((short)1);
bm.addBlockCollection(info, bc);
// Adding this block will increase its current replication, and that will
// remove it from the queue.
bm.addStoredBlockUnderConstruction(info,
TestReplicationPolicy.dataNodes[0], ReplicaState.FINALIZED);
// Choose 1 block from UnderReplicatedBlocks. Then it should pick 1 block
// from QUEUE_VERY_UNDER_REPLICATED.
// This block remains and should not be skipped over.
chosenBlocks = underReplicatedBlocks.chooseUnderReplicatedBlocks(1);
assertTheChosenBlocks(chosenBlocks, 1, 0, 0, 0, 0);
}
@Test(timeout = 60000)
public void
testConvertLastBlockToUnderConstructionDoesNotCauseSkippedReplication()
throws IOException {
Namesystem mockNS = mock(Namesystem.class);
when(mockNS.isPopulatingReplQueues()).thenReturn(true);
FSClusterStats mockStats = mock(FSClusterStats.class);
BlockManager bm =
new BlockManager(mockNS, mockStats, new HdfsConfiguration());
UnderReplicatedBlocks underReplicatedBlocks = bm.neededReplications;
Block block1 = new Block(random.nextLong());
Block block2 = new Block(random.nextLong());
// Adding QUEUE_UNDER_REPLICATED block
underReplicatedBlocks.add(block1, 0, 1, 1);
// Adding QUEUE_UNDER_REPLICATED block
underReplicatedBlocks.add(block2, 0, 1, 1);
List<List<Block>> chosenBlocks;
// Choose 1 block from UnderReplicatedBlocks. Then it should pick 1 block
// from QUEUE_VERY_UNDER_REPLICATED.
chosenBlocks = underReplicatedBlocks.chooseUnderReplicatedBlocks(1);
assertTheChosenBlocks(chosenBlocks, 1, 0, 0, 0, 0);
final BlockInfo info = new BlockInfo(block1, 1);
final MutableBlockCollection mbc = mock(MutableBlockCollection.class);
when(mbc.getLastBlock()).thenReturn(info);
when(mbc.getPreferredBlockSize()).thenReturn(block1.getNumBytes() + 1);
when(mbc.getBlockReplication()).thenReturn((short)1);
ContentSummary cs = mock(ContentSummary.class);
when(cs.getLength()).thenReturn((long)1);
when(mbc.computeContentSummary()).thenReturn(cs);
info.setBlockCollection(mbc);
bm.addBlockCollection(info, mbc);
DatanodeDescriptor[] dnAry = {dataNodes[0]};
final BlockInfoUnderConstruction ucBlock =
info.convertToBlockUnderConstruction(BlockUCState.UNDER_CONSTRUCTION,
dnAry);
when(mbc.setLastBlock((BlockInfo) any(), (DatanodeDescriptor[]) any()))
.thenReturn(ucBlock);
bm.convertLastBlockToUnderConstruction(mbc);
// Choose 1 block from UnderReplicatedBlocks. Then it should pick 1 block
// from QUEUE_VERY_UNDER_REPLICATED.
// This block remains and should not be skipped over.
chosenBlocks = underReplicatedBlocks.chooseUnderReplicatedBlocks(1);
assertTheChosenBlocks(chosenBlocks, 1, 0, 0, 0, 0);
}
@Test(timeout = 60000)
public void testupdateNeededReplicationsDoesNotCauseSkippedReplication()
throws IOException {
Namesystem mockNS = mock(Namesystem.class);
when(mockNS.isPopulatingReplQueues()).thenReturn(true);
FSClusterStats mockStats = mock(FSClusterStats.class);
BlockManager bm =
new BlockManager(mockNS, mockStats, new HdfsConfiguration());
UnderReplicatedBlocks underReplicatedBlocks = bm.neededReplications;
Block block1 = new Block(random.nextLong());
Block block2 = new Block(random.nextLong());
// Adding QUEUE_UNDER_REPLICATED block
underReplicatedBlocks.add(block1, 0, 1, 1);
// Adding QUEUE_UNDER_REPLICATED block
underReplicatedBlocks.add(block2, 0, 1, 1);
List<List<Block>> chosenBlocks;
// Choose 1 block from UnderReplicatedBlocks. Then it should pick 1 block
// from QUEUE_VERY_UNDER_REPLICATED.
chosenBlocks = underReplicatedBlocks.chooseUnderReplicatedBlocks(1);
assertTheChosenBlocks(chosenBlocks, 1, 0, 0, 0, 0);
bm.setReplication((short)0, (short)1, "", block1);
// Choose 1 block from UnderReplicatedBlocks. Then it should pick 1 block
// from QUEUE_VERY_UNDER_REPLICATED.
// This block remains and should not be skipped over.
chosenBlocks = underReplicatedBlocks.chooseUnderReplicatedBlocks(1);
assertTheChosenBlocks(chosenBlocks, 1, 0, 0, 0, 0);
}
}

View File

@ -325,10 +325,19 @@ public void testClear() {
assertEquals(NUM, set.size());
assertFalse(set.isEmpty());
// Advance the bookmark.
Iterator<Integer> bkmrkIt = set.getBookmark();
for (int i=0; i<set.size()/2+1; i++) {
bkmrkIt.next();
}
assertTrue(bkmrkIt.hasNext());
// clear the set
set.clear();
assertEquals(0, set.size());
assertTrue(set.isEmpty());
bkmrkIt = set.getBookmark();
assertFalse(bkmrkIt.hasNext());
// poll should return an empty list
assertEquals(0, set.pollAll().size());
@ -363,4 +372,64 @@ public void testOther() {
LOG.info("Test capacity - DONE");
}
@Test(timeout=60000)
public void testGetBookmarkReturnsBookmarkIterator() {
LOG.info("Test getBookmark returns proper iterator");
assertTrue(set.addAll(list));
Iterator<Integer> bookmark = set.getBookmark();
assertEquals(bookmark.next(), list.get(0));
final int numAdvance = list.size()/2;
for(int i=1; i<numAdvance; i++) {
bookmark.next();
}
Iterator<Integer> bookmark2 = set.getBookmark();
assertEquals(bookmark2.next(), list.get(numAdvance));
}
@Test(timeout=60000)
public void testBookmarkAdvancesOnRemoveOfSameElement() {
LOG.info("Test that the bookmark advances if we remove its element.");
assertTrue(set.add(list.get(0)));
assertTrue(set.add(list.get(1)));
assertTrue(set.add(list.get(2)));
Iterator<Integer> it = set.getBookmark();
assertEquals(it.next(), list.get(0));
set.remove(list.get(1));
it = set.getBookmark();
assertEquals(it.next(), list.get(2));
}
@Test(timeout=60000)
public void testBookmarkSetToHeadOnAddToEmpty() {
LOG.info("Test bookmark is set after adding to previously empty set.");
Iterator<Integer> it = set.getBookmark();
assertFalse(it.hasNext());
set.add(list.get(0));
set.add(list.get(1));
it = set.getBookmark();
assertTrue(it.hasNext());
assertEquals(it.next(), list.get(0));
assertEquals(it.next(), list.get(1));
assertFalse(it.hasNext());
}
@Test(timeout=60000)
public void testResetBookmarkPlacesBookmarkAtHead() {
set.addAll(list);
Iterator<Integer> it = set.getBookmark();
final int numAdvance = set.size()/2;
for (int i=0; i<numAdvance; i++) {
it.next();
}
assertEquals(it.next(), list.get(numAdvance));
set.resetBookmark();
it = set.getBookmark();
assertEquals(it.next(), list.get(0));
}
}

View File

@ -157,6 +157,9 @@ Release 2.3.0 - UNRELEASED
IMPROVEMENTS
MAPREDUCE-434. LocalJobRunner limited to single reducer (Sandy Ryza and
Aaron Kimball via Sandy Ryza)
OPTIMIZATIONS
MAPREDUCE-1981. Improve getSplits performance by using listLocatedStatus
@ -167,24 +170,12 @@ Release 2.3.0 - UNRELEASED
MAPREDUCE-5316. job -list-attempt-ids command does not handle illegal
task-state (Ashwin Shankar via jlowe)
MAPREDUCE-3193. FileInputFormat doesn't read files recursively in the
input path dir (Devaraj K via jlowe)
MAPREDUCE-5358. MRAppMaster throws invalid transitions for JobImpl
(Devaraj K via jlowe)
MAPREDUCE-5380. Invalid mapred command should return non-zero exit code
(Stephen Chu via jlowe)
MAPREDUCE-5404. HSAdminServer does not use ephemeral ports in minicluster
mode (Ted Yu via jlowe)
MAPREDUCE-5317. Stale files left behind for failed jobs (Ravi Prakash via
jlowe)
MAPREDUCE-5251. Reducer should not implicate map attempt if it has
insufficient space to fetch map output (Ashwin Shankar via jlowe)
Release 2.1.1-beta - UNRELEASED
INCOMPATIBLE CHANGES
@ -198,11 +189,37 @@ Release 2.1.1-beta - UNRELEASED
MAPREDUCE-5352. Optimize node local splits generated by
CombineFileInputFormat. (sseth)
MAPREDUCE-5446. TestJobHistoryEvents and TestJobHistoryParsing have race
conditions (jlowe via kihwal)
BUG FIXES
MAPREDUCE-5385. Fixed a bug with JobContext getCacheFiles API. (Omkar Vinit
Joshi via vinodkv)
MAPREDUCE-5428. HistoryFileManager doesn't stop threads when service is
stopped (Karthik Kambatla via jlowe)
MAPREDUCE-5251. Reducer should not implicate map attempt if it has
insufficient space to fetch map output (Ashwin Shankar via jlowe)
MAPREDUCE-5317. Stale files left behind for failed jobs (Ravi Prakash via
jlowe)
MAPREDUCE-5358. MRAppMaster throws invalid transitions for JobImpl
(Devaraj K via jlowe)
MAPREDUCE-3193. FileInputFormat doesn't read files recursively in the
input path dir (Devaraj K via jlowe)
MAPREDUCE-5440. TestCopyCommitter Fails on JDK7 (Robert Parker via jlowe)
MAPREDUCE-5367. Local jobs all use same local working directory
(Sandy Ryza)
MAPREDUCE-5425. Junit in TestJobHistoryServer failing in jdk 7 (Robert
Parker via jlowe)
Release 2.1.0-beta - 2013-08-06
INCOMPATIBLE CHANGES
@ -636,8 +653,8 @@ Release 2.1.0-beta - 2013-08-06
MAPREDUCE-5419. TestSlive is getting FileNotFound Exception (Robert Parker
via jlowe)
MAPREDUCE-5428. HistoryFileManager doesn't stop threads when service is
stopped (Karthik Kambatla via jlowe)
MAPREDUCE-5399. Unnecessary Configuration instantiation in IFileInputStream
slows down merge. (Stanislav Barton via Sandy Ryza)
BREAKDOWN OF HADOOP-8562 SUBTASKS
@ -1273,6 +1290,8 @@ Release 0.23.10 - UNRELEASED
MAPREDUCE-5419. TestSlive is getting FileNotFound Exception (Robert Parker
via jlowe)
MAPREDUCE-5440. TestCopyCommitter Fails on JDK7 (Robert Parker via jlowe)
Release 0.23.9 - 2013-07-08
INCOMPATIBLE CHANGES

View File

@ -357,15 +357,20 @@ public void waitForState(Job job, JobState finalState) throws Exception {
}
public void waitForState(Service.STATE finalState) throws Exception {
int timeoutSecs = 0;
while (!finalState.equals(getServiceState()) && timeoutSecs++ < 20) {
System.out.println("MRApp State is : " + getServiceState()
+ " Waiting for state : " + finalState);
Thread.sleep(500);
if (finalState == Service.STATE.STOPPED) {
Assert.assertTrue("Timeout while waiting for MRApp to stop",
waitForServiceToStop(20 * 1000));
} else {
int timeoutSecs = 0;
while (!finalState.equals(getServiceState()) && timeoutSecs++ < 20) {
System.out.println("MRApp State is : " + getServiceState()
+ " Waiting for state : " + finalState);
Thread.sleep(500);
}
System.out.println("MRApp State is : " + getServiceState());
Assert.assertEquals("MRApp state is not correct (timedout)", finalState,
getServiceState());
}
System.out.println("MRApp State is : " + getServiceState());
Assert.assertEquals("MRApp state is not correct (timedout)", finalState,
getServiceState());
}
public void verifyCompleted() {

View File

@ -79,11 +79,15 @@ public class LocalJobRunner implements ClientProtocol {
public static final String LOCAL_MAX_MAPS =
"mapreduce.local.map.tasks.maximum";
/** The maximum number of reduce tasks to run in parallel in LocalJobRunner */
public static final String LOCAL_MAX_REDUCES =
"mapreduce.local.reduce.tasks.maximum";
private FileSystem fs;
private HashMap<JobID, Job> jobs = new HashMap<JobID, Job>();
private JobConf conf;
private AtomicInteger map_tasks = new AtomicInteger(0);
private int reduce_tasks = 0;
private AtomicInteger reduce_tasks = new AtomicInteger(0);
final Random rand = new Random();
private LocalJobRunnerMetrics myMetrics = null;
@ -115,9 +119,11 @@ private class Job extends Thread implements TaskUmbilicalProtocol {
private JobConf job;
private int numMapTasks;
private int numReduceTasks;
private float [] partialMapProgress;
private float [] partialReduceProgress;
private Counters [] mapCounters;
private Counters reduceCounters;
private Counters [] reduceCounters;
private JobStatus status;
private List<TaskAttemptID> mapIds = Collections.synchronizedList(
@ -146,7 +152,9 @@ public Job(JobID jobid, String jobSubmitDir) throws IOException {
this.id = jobid;
JobConf conf = new JobConf(systemJobFile);
this.localFs = FileSystem.getLocal(conf);
this.localJobDir = localFs.makeQualified(conf.getLocalPath(jobDir));
String user = UserGroupInformation.getCurrentUser().getShortUserName();
this.localJobDir = localFs.makeQualified(new Path(
new Path(conf.getLocalPath(jobDir), user), jobid.toString()));
this.localJobFile = new Path(this.localJobDir, id + ".xml");
// Manage the distributed cache. If there are files to be copied,
@ -182,10 +190,14 @@ public Job(JobID jobid, String jobSubmitDir) throws IOException {
this.start();
}
protected abstract class RunnableWithThrowable implements Runnable {
public volatile Throwable storedException;
}
/**
* A Runnable instance that handles a map task to be run by an executor.
*/
protected class MapTaskRunnable implements Runnable {
protected class MapTaskRunnable extends RunnableWithThrowable {
private final int taskId;
private final TaskSplitMetaInfo info;
private final JobID jobId;
@ -196,8 +208,6 @@ protected class MapTaskRunnable implements Runnable {
// where to fetch mapper outputs.
private final Map<TaskAttemptID, MapOutputFile> mapOutputFiles;
public volatile Throwable storedException;
public MapTaskRunnable(TaskSplitMetaInfo info, int taskId, JobID jobId,
Map<TaskAttemptID, MapOutputFile> mapOutputFiles) {
this.info = info;
@ -217,7 +227,7 @@ public void run() {
info.getSplitIndex(), 1);
map.setUser(UserGroupInformation.getCurrentUser().
getShortUserName());
setupChildMapredLocalDirs(map, localConf);
setupChildMapredLocalDirs(localJobDir, map, localConf);
MapOutputFile mapOutput = new MROutputFiles();
mapOutput.setConf(localConf);
@ -251,12 +261,13 @@ public void run() {
* @param mapOutputFiles a mapping from task attempts to output files
* @return a List of Runnables, one per map task.
*/
protected List<MapTaskRunnable> getMapTaskRunnables(
protected List<RunnableWithThrowable> getMapTaskRunnables(
TaskSplitMetaInfo [] taskInfo, JobID jobId,
Map<TaskAttemptID, MapOutputFile> mapOutputFiles) {
int numTasks = 0;
ArrayList<MapTaskRunnable> list = new ArrayList<MapTaskRunnable>();
ArrayList<RunnableWithThrowable> list =
new ArrayList<RunnableWithThrowable>();
for (TaskSplitMetaInfo task : taskInfo) {
list.add(new MapTaskRunnable(task, numTasks++, jobId,
mapOutputFiles));
@ -265,12 +276,89 @@ protected List<MapTaskRunnable> getMapTaskRunnables(
return list;
}
protected class ReduceTaskRunnable extends RunnableWithThrowable {
private final int taskId;
private final JobID jobId;
private final JobConf localConf;
// This is a reference to a shared object passed in by the
// external context; this delivers state to the reducers regarding
// where to fetch mapper outputs.
private final Map<TaskAttemptID, MapOutputFile> mapOutputFiles;
public ReduceTaskRunnable(int taskId, JobID jobId,
Map<TaskAttemptID, MapOutputFile> mapOutputFiles) {
this.taskId = taskId;
this.jobId = jobId;
this.mapOutputFiles = mapOutputFiles;
this.localConf = new JobConf(job);
this.localConf.set("mapreduce.jobtracker.address", "local");
}
public void run() {
try {
TaskAttemptID reduceId = new TaskAttemptID(new TaskID(
jobId, TaskType.REDUCE, taskId), 0);
LOG.info("Starting task: " + reduceId);
ReduceTask reduce = new ReduceTask(systemJobFile.toString(),
reduceId, taskId, mapIds.size(), 1);
reduce.setUser(UserGroupInformation.getCurrentUser().
getShortUserName());
setupChildMapredLocalDirs(localJobDir, reduce, localConf);
reduce.setLocalMapFiles(mapOutputFiles);
if (!Job.this.isInterrupted()) {
reduce.setJobFile(localJobFile.toString());
localConf.setUser(reduce.getUser());
reduce.localizeConfiguration(localConf);
reduce.setConf(localConf);
try {
reduce_tasks.getAndIncrement();
myMetrics.launchReduce(reduce.getTaskID());
reduce.run(localConf, Job.this);
myMetrics.completeReduce(reduce.getTaskID());
} finally {
reduce_tasks.getAndDecrement();
}
LOG.info("Finishing task: " + reduceId);
} else {
throw new InterruptedException();
}
} catch (Throwable t) {
// store this to be rethrown in the initial thread context.
this.storedException = t;
}
}
}
/**
* Create Runnables to encapsulate reduce tasks for use by the executor
* service.
* @param jobId the job id
* @param mapOutputFiles a mapping from task attempts to output files
* @return a List of Runnables, one per reduce task.
*/
protected List<RunnableWithThrowable> getReduceTaskRunnables(
JobID jobId, Map<TaskAttemptID, MapOutputFile> mapOutputFiles) {
int taskId = 0;
ArrayList<RunnableWithThrowable> list =
new ArrayList<RunnableWithThrowable>();
for (int i = 0; i < this.numReduceTasks; i++) {
list.add(new ReduceTaskRunnable(taskId++, jobId, mapOutputFiles));
}
return list;
}
/**
* Initialize the counters that will hold partial-progress from
* the various task attempts.
* @param numMaps the number of map tasks in this job.
*/
private synchronized void initCounters(int numMaps) {
private synchronized void initCounters(int numMaps, int numReduces) {
// Initialize state trackers for all map tasks.
this.partialMapProgress = new float[numMaps];
this.mapCounters = new Counters[numMaps];
@ -278,16 +366,22 @@ private synchronized void initCounters(int numMaps) {
this.mapCounters[i] = new Counters();
}
this.reduceCounters = new Counters();
this.partialReduceProgress = new float[numReduces];
this.reduceCounters = new Counters[numReduces];
for (int i = 0; i < numReduces; i++) {
this.reduceCounters[i] = new Counters();
}
this.numMapTasks = numMaps;
this.numReduceTasks = numReduces;
}
/**
* Creates the executor service used to run map tasks.
*
* @param numMapTasks the total number of map tasks to be run
* @return an ExecutorService instance that handles map tasks
*/
protected ExecutorService createMapExecutor(int numMapTasks) {
protected synchronized ExecutorService createMapExecutor() {
// Determine the size of the thread pool to use
int maxMapThreads = job.getInt(LOCAL_MAX_MAPS, 1);
@ -295,13 +389,10 @@ protected ExecutorService createMapExecutor(int numMapTasks) {
throw new IllegalArgumentException(
"Configured " + LOCAL_MAX_MAPS + " must be >= 1");
}
this.numMapTasks = numMapTasks;
maxMapThreads = Math.min(maxMapThreads, this.numMapTasks);
maxMapThreads = Math.max(maxMapThreads, 1); // In case of no tasks.
initCounters(this.numMapTasks);
LOG.debug("Starting thread pool executor.");
LOG.debug("Starting mapper thread pool executor.");
LOG.debug("Max local threads: " + maxMapThreads);
LOG.debug("Map tasks to process: " + this.numMapTasks);
@ -314,6 +405,65 @@ protected ExecutorService createMapExecutor(int numMapTasks) {
return executor;
}
/**
* Creates the executor service used to run reduce tasks.
*
* @return an ExecutorService instance that handles reduce tasks
*/
protected synchronized ExecutorService createReduceExecutor() {
// Determine the size of the thread pool to use
int maxReduceThreads = job.getInt(LOCAL_MAX_REDUCES, 1);
if (maxReduceThreads < 1) {
throw new IllegalArgumentException(
"Configured " + LOCAL_MAX_REDUCES + " must be >= 1");
}
maxReduceThreads = Math.min(maxReduceThreads, this.numReduceTasks);
maxReduceThreads = Math.max(maxReduceThreads, 1); // In case of no tasks.
LOG.debug("Starting reduce thread pool executor.");
LOG.debug("Max local threads: " + maxReduceThreads);
LOG.debug("Reduce tasks to process: " + this.numReduceTasks);
// Create a new executor service to drain the work queue.
ExecutorService executor = Executors.newFixedThreadPool(maxReduceThreads);
return executor;
}
/** Run a set of tasks and waits for them to complete. */
private void runTasks(List<RunnableWithThrowable> runnables,
ExecutorService service, String taskType) throws Exception {
// Start populating the executor with work units.
// They may begin running immediately (in other threads).
for (Runnable r : runnables) {
service.submit(r);
}
try {
service.shutdown(); // Instructs queue to drain.
// Wait for tasks to finish; do not use a time-based timeout.
// (See http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6179024)
LOG.info("Waiting for " + taskType + " tasks");
service.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
} catch (InterruptedException ie) {
// Cancel all threads.
service.shutdownNow();
throw ie;
}
LOG.info(taskType + " task executor complete.");
// After waiting for the tasks to complete, if any of these
// have thrown an exception, rethrow it now in the main thread context.
for (RunnableWithThrowable r : runnables) {
if (r.storedException != null) {
throw new Exception(r.storedException);
}
}
}
private org.apache.hadoop.mapreduce.OutputCommitter
createOutputCommitter(boolean newApiCommitter, JobID jobId, Configuration conf) throws Exception {
org.apache.hadoop.mapreduce.OutputCommitter committer = null;
@ -358,94 +508,25 @@ public void run() {
SplitMetaInfoReader.readSplitMetaInfo(jobId, localFs, conf, systemJobDir);
int numReduceTasks = job.getNumReduceTasks();
if (numReduceTasks > 1 || numReduceTasks < 0) {
// we only allow 0 or 1 reducer in local mode
numReduceTasks = 1;
job.setNumReduceTasks(1);
}
outputCommitter.setupJob(jContext);
status.setSetupProgress(1.0f);
Map<TaskAttemptID, MapOutputFile> mapOutputFiles =
Collections.synchronizedMap(new HashMap<TaskAttemptID, MapOutputFile>());
List<MapTaskRunnable> taskRunnables = getMapTaskRunnables(taskSplitMetaInfos,
jobId, mapOutputFiles);
ExecutorService mapService = createMapExecutor(taskRunnables.size());
List<RunnableWithThrowable> mapRunnables = getMapTaskRunnables(
taskSplitMetaInfos, jobId, mapOutputFiles);
// Start populating the executor with work units.
// They may begin running immediately (in other threads).
for (Runnable r : taskRunnables) {
mapService.submit(r);
}
initCounters(mapRunnables.size(), numReduceTasks);
ExecutorService mapService = createMapExecutor();
runTasks(mapRunnables, mapService, "map");
try {
mapService.shutdown(); // Instructs queue to drain.
// Wait for tasks to finish; do not use a time-based timeout.
// (See http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6179024)
LOG.info("Waiting for map tasks");
mapService.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
} catch (InterruptedException ie) {
// Cancel all threads.
mapService.shutdownNow();
throw ie;
}
LOG.info("Map task executor complete.");
// After waiting for the map tasks to complete, if any of these
// have thrown an exception, rethrow it now in the main thread context.
for (MapTaskRunnable r : taskRunnables) {
if (r.storedException != null) {
throw new Exception(r.storedException);
}
}
TaskAttemptID reduceId =
new TaskAttemptID(new TaskID(jobId, TaskType.REDUCE, 0), 0);
try {
if (numReduceTasks > 0) {
ReduceTask reduce = new ReduceTask(systemJobFile.toString(),
reduceId, 0, mapIds.size(), 1);
reduce.setUser(UserGroupInformation.getCurrentUser().
getShortUserName());
JobConf localConf = new JobConf(job);
localConf.set("mapreduce.jobtracker.address", "local");
setupChildMapredLocalDirs(reduce, localConf);
// move map output to reduce input
for (int i = 0; i < mapIds.size(); i++) {
if (!this.isInterrupted()) {
TaskAttemptID mapId = mapIds.get(i);
Path mapOut = mapOutputFiles.get(mapId).getOutputFile();
MapOutputFile localOutputFile = new MROutputFiles();
localOutputFile.setConf(localConf);
Path reduceIn =
localOutputFile.getInputFileForWrite(mapId.getTaskID(),
localFs.getFileStatus(mapOut).getLen());
if (!localFs.mkdirs(reduceIn.getParent())) {
throw new IOException("Mkdirs failed to create "
+ reduceIn.getParent().toString());
}
if (!localFs.rename(mapOut, reduceIn))
throw new IOException("Couldn't rename " + mapOut);
} else {
throw new InterruptedException();
}
}
if (!this.isInterrupted()) {
reduce.setJobFile(localJobFile.toString());
localConf.setUser(reduce.getUser());
reduce.localizeConfiguration(localConf);
reduce.setConf(localConf);
reduce_tasks += 1;
myMetrics.launchReduce(reduce.getTaskID());
reduce.run(localConf, this);
myMetrics.completeReduce(reduce.getTaskID());
reduce_tasks -= 1;
} else {
throw new InterruptedException();
}
List<RunnableWithThrowable> reduceRunnables = getReduceTaskRunnables(
jobId, mapOutputFiles);
ExecutorService reduceService = createReduceExecutor();
runTasks(reduceRunnables, reduceService, "reduce");
}
} finally {
for (MapOutputFile output : mapOutputFiles.values()) {
@ -463,7 +544,6 @@ public void run() {
}
JobEndNotifier.localRunnerNotification(job, status);
} catch (Throwable t) {
try {
outputCommitter.abortJob(jContext,
@ -509,12 +589,13 @@ public synchronized boolean statusUpdate(TaskAttemptID taskId,
new ByteArrayInputStream(baos.toByteArray())));
LOG.info(taskStatus.getStateString());
int taskIndex = mapIds.indexOf(taskId);
if (taskIndex >= 0) { // mapping
int mapTaskIndex = mapIds.indexOf(taskId);
if (mapTaskIndex >= 0) {
// mapping
float numTasks = (float) this.numMapTasks;
partialMapProgress[taskIndex] = taskStatus.getProgress();
mapCounters[taskIndex] = taskStatus.getCounters();
partialMapProgress[mapTaskIndex] = taskStatus.getProgress();
mapCounters[mapTaskIndex] = taskStatus.getCounters();
float partialProgress = 0.0f;
for (float f : partialMapProgress) {
@ -522,8 +603,18 @@ public synchronized boolean statusUpdate(TaskAttemptID taskId,
}
status.setMapProgress(partialProgress / numTasks);
} else {
reduceCounters = taskStatus.getCounters();
status.setReduceProgress(taskStatus.getProgress());
// reducing
int reduceTaskIndex = taskId.getTaskID().getId();
float numTasks = (float) this.numReduceTasks;
partialReduceProgress[reduceTaskIndex] = taskStatus.getProgress();
reduceCounters[reduceTaskIndex] = taskStatus.getCounters();
float partialProgress = 0.0f;
for (float f : partialReduceProgress) {
partialProgress += f;
}
status.setReduceProgress(partialProgress / numTasks);
}
// ignore phase
@ -543,7 +634,13 @@ public synchronized Counters getCurrentCounters() {
for (Counters c : mapCounters) {
current = Counters.sum(current, c);
}
current = Counters.sum(current, reduceCounters);
if (null != reduceCounters && reduceCounters.length > 0) {
for (Counters c : reduceCounters) {
current = Counters.sum(current, c);
}
}
return current;
}
@ -682,8 +779,9 @@ public String getFilesystemName() throws IOException {
public ClusterMetrics getClusterMetrics() {
int numMapTasks = map_tasks.get();
return new ClusterMetrics(numMapTasks, reduce_tasks, numMapTasks,
reduce_tasks, 0, 0, 1, 1, jobs.size(), 1, 0, 0);
int numReduceTasks = reduce_tasks.get();
return new ClusterMetrics(numMapTasks, numReduceTasks, numMapTasks,
numReduceTasks, 0, 0, 1, 1, jobs.size(), 1, 0, 0);
}
public JobTrackerStatus getJobTrackerStatus() {
@ -814,6 +912,27 @@ public static int getLocalMaxRunningMaps(
return job.getConfiguration().getInt(LOCAL_MAX_MAPS, 1);
}
/**
* Set the max number of reduce tasks to run concurrently in the LocalJobRunner.
* @param job the job to configure
* @param maxReduces the maximum number of reduce tasks to allow.
*/
public static void setLocalMaxRunningReduces(
org.apache.hadoop.mapreduce.JobContext job,
int maxReduces) {
job.getConfiguration().setInt(LOCAL_MAX_REDUCES, maxReduces);
}
/**
* @return the max number of reduce tasks to run concurrently in the
* LocalJobRunner.
*/
public static int getLocalMaxRunningReduces(
org.apache.hadoop.mapreduce.JobContext job) {
return job.getConfiguration().getInt(LOCAL_MAX_REDUCES, 1);
}
@Override
public void cancelDelegationToken(Token<DelegationTokenIdentifier> token
) throws IOException,
@ -839,31 +958,27 @@ public LogParams getLogFileParams(org.apache.hadoop.mapreduce.JobID jobID,
throw new UnsupportedOperationException("Not supported");
}
static void setupChildMapredLocalDirs(Task t, JobConf conf) {
static void setupChildMapredLocalDirs(Path localJobDir, Task t, JobConf conf) {
String[] localDirs = conf.getTrimmedStrings(MRConfig.LOCAL_DIR);
String jobId = t.getJobID().toString();
String taskId = t.getTaskID().toString();
boolean isCleanup = t.isTaskCleanupTask();
String user = t.getUser();
StringBuffer childMapredLocalDir =
new StringBuffer(localDirs[0] + Path.SEPARATOR
+ getLocalTaskDir(user, jobId, taskId, isCleanup));
+ getLocalTaskDir(localJobDir, taskId, isCleanup));
for (int i = 1; i < localDirs.length; i++) {
childMapredLocalDir.append("," + localDirs[i] + Path.SEPARATOR
+ getLocalTaskDir(user, jobId, taskId, isCleanup));
+ getLocalTaskDir(localJobDir, taskId, isCleanup));
}
LOG.debug(MRConfig.LOCAL_DIR + " for child : " + childMapredLocalDir);
conf.set(MRConfig.LOCAL_DIR, childMapredLocalDir.toString());
}
static final String TASK_CLEANUP_SUFFIX = ".cleanup";
static final String SUBDIR = jobDir;
static final String JOBCACHE = "jobcache";
static String getLocalTaskDir(String user, String jobid, String taskid,
static String getLocalTaskDir(Path localJobDir, String taskid,
boolean isCleanupAttempt) {
String taskDir = SUBDIR + Path.SEPARATOR + user + Path.SEPARATOR + JOBCACHE
+ Path.SEPARATOR + jobid + Path.SEPARATOR + taskid;
String taskDir = localJobDir.toString() + Path.SEPARATOR + taskid;
if (isCleanupAttempt) {
taskDir = taskDir + TASK_CLEANUP_SUFFIX;
}

View File

@ -82,6 +82,8 @@ public class BackupStore<K,V> {
private boolean clearMarkFlag = false;
private boolean lastSegmentEOF = false;
private Configuration conf;
public BackupStore(Configuration conf, TaskAttemptID taskid)
throws IOException {
@ -106,6 +108,8 @@ public BackupStore(Configuration conf, TaskAttemptID taskid)
fileCache = new FileCache(conf);
tid = taskid;
this.conf = conf;
LOG.info("Created a new BackupStore with a memory of " + maxSize);
}
@ -500,7 +504,7 @@ void createInMemorySegment () throws IOException {
Reader<K, V> reader =
new org.apache.hadoop.mapreduce.task.reduce.InMemoryReader<K, V>(null,
(org.apache.hadoop.mapred.TaskAttemptID) tid,
dataOut.getData(), 0, usedSize);
dataOut.getData(), 0, usedSize, conf);
Segment<K, V> segment = new Segment<K, V>(reader, false);
segmentList.add(segment);
LOG.debug("Added Memory Segment to List. List Size is " +

View File

@ -60,6 +60,7 @@
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskCounter;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter;
import org.apache.hadoop.mapreduce.lib.map.WrappedMapper;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter;
@ -1860,7 +1861,6 @@ private void mergeParts() throws IOException, InterruptedException,
}
{
sortPhase.addPhases(partitions); // Divide sort phase into sub-phases
Merger.considerFinalMergeForProgress();
IndexRecord rec = new IndexRecord();
final SpillRecord spillRec = new SpillRecord(partitions);
@ -1893,7 +1893,8 @@ private void mergeParts() throws IOException, InterruptedException,
segmentList, mergeFactor,
new Path(mapId.toString()),
job.getOutputKeyComparator(), reporter, sortSegments,
null, spilledRecordsCounter, sortPhase.phase());
null, spilledRecordsCounter, sortPhase.phase(),
TaskType.MAP);
//write merged output to disk
long segmentStart = finalOut.getPos();

View File

@ -39,6 +39,7 @@
import org.apache.hadoop.mapred.IFile.Reader;
import org.apache.hadoop.mapred.IFile.Writer;
import org.apache.hadoop.mapreduce.MRConfig;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.util.PriorityQueue;
import org.apache.hadoop.util.Progress;
import org.apache.hadoop.util.Progressable;
@ -69,7 +70,8 @@ RawKeyValueIterator merge(Configuration conf, FileSystem fs,
throws IOException {
return
new MergeQueue<K, V>(conf, fs, inputs, deleteInputs, codec, comparator,
reporter, null).merge(keyClass, valueClass,
reporter, null,
TaskType.REDUCE).merge(keyClass, valueClass,
mergeFactor, tmpDir,
readsCounter, writesCounter,
mergePhase);
@ -90,7 +92,8 @@ RawKeyValueIterator merge(Configuration conf, FileSystem fs,
throws IOException {
return
new MergeQueue<K, V>(conf, fs, inputs, deleteInputs, codec, comparator,
reporter, mergedMapOutputsCounter).merge(
reporter, mergedMapOutputsCounter,
TaskType.REDUCE).merge(
keyClass, valueClass,
mergeFactor, tmpDir,
readsCounter, writesCounter,
@ -124,7 +127,8 @@ RawKeyValueIterator merge(Configuration conf, FileSystem fs,
Progress mergePhase)
throws IOException {
return new MergeQueue<K, V>(conf, fs, segments, comparator, reporter,
sortSegments).merge(keyClass, valueClass,
sortSegments,
TaskType.REDUCE).merge(keyClass, valueClass,
mergeFactor, tmpDir,
readsCounter, writesCounter,
mergePhase);
@ -140,10 +144,12 @@ RawKeyValueIterator merge(Configuration conf, FileSystem fs,
boolean sortSegments,
Counters.Counter readsCounter,
Counters.Counter writesCounter,
Progress mergePhase)
Progress mergePhase,
TaskType taskType)
throws IOException {
return new MergeQueue<K, V>(conf, fs, segments, comparator, reporter,
sortSegments, codec).merge(keyClass, valueClass,
sortSegments, codec,
taskType).merge(keyClass, valueClass,
mergeFactor, tmpDir,
readsCounter, writesCounter,
mergePhase);
@ -161,7 +167,8 @@ RawKeyValueIterator merge(Configuration conf, FileSystem fs,
Progress mergePhase)
throws IOException {
return new MergeQueue<K, V>(conf, fs, segments, comparator, reporter,
sortSegments).merge(keyClass, valueClass,
sortSegments,
TaskType.REDUCE).merge(keyClass, valueClass,
mergeFactor, inMemSegments,
tmpDir,
readsCounter, writesCounter,
@ -182,7 +189,8 @@ RawKeyValueIterator merge(Configuration conf, FileSystem fs,
Progress mergePhase)
throws IOException {
return new MergeQueue<K, V>(conf, fs, segments, comparator, reporter,
sortSegments, codec).merge(keyClass, valueClass,
sortSegments, codec,
TaskType.REDUCE).merge(keyClass, valueClass,
mergeFactor, inMemSegments,
tmpDir,
readsCounter, writesCounter,
@ -367,19 +375,6 @@ void reinitReader(int offset) throws IOException {
}
}
// Boolean variable for including/considering final merge as part of sort
// phase or not. This is true in map task, false in reduce task. It is
// used in calculating mergeProgress.
static boolean includeFinalMerge = false;
/**
* Sets the boolean variable includeFinalMerge to true. Called from
* map task before calling merge() so that final merge of map task
* is also considered as part of sort phase.
*/
static void considerFinalMergeForProgress() {
includeFinalMerge = true;
}
private static class MergeQueue<K extends Object, V extends Object>
extends PriorityQueue<Segment<K, V>> implements RawKeyValueIterator {
@ -401,6 +396,21 @@ private static class MergeQueue<K extends Object, V extends Object>
final DataInputBuffer value = new DataInputBuffer();
final DataInputBuffer diskIFileValue = new DataInputBuffer();
// Boolean variable for including/considering final merge as part of sort
// phase or not. This is true in map task, false in reduce task. It is
// used in calculating mergeProgress.
private boolean includeFinalMerge = false;
/**
* Sets the boolean variable includeFinalMerge to true. Called from
* map task before calling merge() so that final merge of map task
* is also considered as part of sort phase.
*/
private void considerFinalMergeForProgress() {
includeFinalMerge = true;
}
Segment<K, V> minSegment;
Comparator<Segment<K, V>> segmentComparator =
new Comparator<Segment<K, V>>() {
@ -419,14 +429,16 @@ public MergeQueue(Configuration conf, FileSystem fs,
CompressionCodec codec, RawComparator<K> comparator,
Progressable reporter)
throws IOException {
this(conf, fs, inputs, deleteInputs, codec, comparator, reporter, null);
this(conf, fs, inputs, deleteInputs, codec, comparator, reporter, null,
TaskType.REDUCE);
}
public MergeQueue(Configuration conf, FileSystem fs,
Path[] inputs, boolean deleteInputs,
CompressionCodec codec, RawComparator<K> comparator,
Progressable reporter,
Counters.Counter mergedMapOutputsCounter)
Counters.Counter mergedMapOutputsCounter,
TaskType taskType)
throws IOException {
this.conf = conf;
this.fs = fs;
@ -434,6 +446,10 @@ public MergeQueue(Configuration conf, FileSystem fs,
this.comparator = comparator;
this.reporter = reporter;
if (taskType == TaskType.MAP) {
considerFinalMergeForProgress();
}
for (Path file : inputs) {
LOG.debug("MergeQ: adding: " + file);
segments.add(new Segment<K, V>(conf, fs, file, codec, !deleteInputs,
@ -449,17 +465,20 @@ public MergeQueue(Configuration conf, FileSystem fs,
public MergeQueue(Configuration conf, FileSystem fs,
List<Segment<K, V>> segments, RawComparator<K> comparator,
Progressable reporter) {
this(conf, fs, segments, comparator, reporter, false);
this(conf, fs, segments, comparator, reporter, false, TaskType.REDUCE);
}
public MergeQueue(Configuration conf, FileSystem fs,
List<Segment<K, V>> segments, RawComparator<K> comparator,
Progressable reporter, boolean sortSegments) {
Progressable reporter, boolean sortSegments, TaskType taskType) {
this.conf = conf;
this.fs = fs;
this.comparator = comparator;
this.segments = segments;
this.reporter = reporter;
if (taskType == TaskType.MAP) {
considerFinalMergeForProgress();
}
if (sortSegments) {
Collections.sort(segments, segmentComparator);
}
@ -467,8 +486,10 @@ public MergeQueue(Configuration conf, FileSystem fs,
public MergeQueue(Configuration conf, FileSystem fs,
List<Segment<K, V>> segments, RawComparator<K> comparator,
Progressable reporter, boolean sortSegments, CompressionCodec codec) {
this(conf, fs, segments, comparator, reporter, sortSegments);
Progressable reporter, boolean sortSegments, CompressionCodec codec,
TaskType taskType) {
this(conf, fs, segments, comparator, reporter, sortSegments,
taskType);
this.codec = codec;
}

View File

@ -24,6 +24,7 @@
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
import java.util.TreeSet;
@ -74,6 +75,10 @@ public class ReduceTask extends Task {
private CompressionCodec codec;
// If this is a LocalJobRunner-based job, this will
// be a mapping from map task attempts to their output files.
// This will be null in other cases.
private Map<TaskAttemptID, MapOutputFile> localMapFiles;
{
getProgress().setStatus("reduce");
@ -105,23 +110,23 @@ public class ReduceTask extends Task {
// file paths, the first parameter is considered smaller than the second one.
// In case of files with same size and path are considered equal.
private Comparator<FileStatus> mapOutputFileComparator =
new Comparator<FileStatus>() {
public int compare(FileStatus a, FileStatus b) {
if (a.getLen() < b.getLen())
return -1;
else if (a.getLen() == b.getLen())
if (a.getPath().toString().equals(b.getPath().toString()))
return 0;
else
return -1;
new Comparator<FileStatus>() {
public int compare(FileStatus a, FileStatus b) {
if (a.getLen() < b.getLen())
return -1;
else if (a.getLen() == b.getLen())
if (a.getPath().toString().equals(b.getPath().toString()))
return 0;
else
return 1;
}
return -1;
else
return 1;
}
};
// A sorted set for keeping a set of map output files on disk
private final SortedSet<FileStatus> mapOutputFilesOnDisk =
new TreeSet<FileStatus>(mapOutputFileComparator);
new TreeSet<FileStatus>(mapOutputFileComparator);
public ReduceTask() {
super();
@ -133,6 +138,17 @@ public ReduceTask(String jobFile, TaskAttemptID taskId,
this.numMaps = numMaps;
}
/**
* Register the set of mapper outputs created by a LocalJobRunner-based
* job with this ReduceTask so it knows where to fetch from.
*
* This should not be called in normal (networked) execution.
*/
public void setLocalMapFiles(Map<TaskAttemptID, MapOutputFile> mapFiles) {
this.localMapFiles = mapFiles;
}
private CompressionCodec initCodec() {
// check if map-outputs are to be compressed
if (conf.getCompressMapOutput()) {
@ -174,20 +190,11 @@ public void readFields(DataInput in) throws IOException {
numMaps = in.readInt();
}
// Get the input files for the reducer.
private Path[] getMapFiles(FileSystem fs, boolean isLocal)
throws IOException {
// Get the input files for the reducer (for local jobs).
private Path[] getMapFiles(FileSystem fs) throws IOException {
List<Path> fileList = new ArrayList<Path>();
if (isLocal) {
// for local jobs
for(int i = 0; i < numMaps; ++i) {
fileList.add(mapOutputFile.getInputFile(i));
}
} else {
// for non local jobs
for (FileStatus filestatus : mapOutputFilesOnDisk) {
fileList.add(filestatus.getPath());
}
for(int i = 0; i < numMaps; ++i) {
fileList.add(mapOutputFile.getInputFile(i));
}
return fileList.toArray(new Path[0]);
}
@ -343,54 +350,31 @@ public void run(JobConf job, final TaskUmbilicalProtocol umbilical)
RawKeyValueIterator rIter = null;
ShuffleConsumerPlugin shuffleConsumerPlugin = null;
boolean isLocal = false;
// local if
// 1) framework == local or
// 2) framework == null and job tracker address == local
String framework = job.get(MRConfig.FRAMEWORK_NAME);
String masterAddr = job.get(MRConfig.MASTER_ADDRESS, "local");
if ((framework == null && masterAddr.equals("local"))
|| (framework != null && framework.equals(MRConfig.LOCAL_FRAMEWORK_NAME))) {
isLocal = true;
}
Class combinerClass = conf.getCombinerClass();
CombineOutputCollector combineCollector =
(null != combinerClass) ?
new CombineOutputCollector(reduceCombineOutputCounter, reporter, conf) : null;
if (!isLocal) {
Class combinerClass = conf.getCombinerClass();
CombineOutputCollector combineCollector =
(null != combinerClass) ?
new CombineOutputCollector(reduceCombineOutputCounter, reporter, conf) : null;
Class<? extends ShuffleConsumerPlugin> clazz =
job.getClass(MRConfig.SHUFFLE_CONSUMER_PLUGIN, Shuffle.class, ShuffleConsumerPlugin.class);
Class<? extends ShuffleConsumerPlugin> clazz =
job.getClass(MRConfig.SHUFFLE_CONSUMER_PLUGIN, Shuffle.class, ShuffleConsumerPlugin.class);
shuffleConsumerPlugin = ReflectionUtils.newInstance(clazz, job);
LOG.info("Using ShuffleConsumerPlugin: " + shuffleConsumerPlugin);
shuffleConsumerPlugin = ReflectionUtils.newInstance(clazz, job);
LOG.info("Using ShuffleConsumerPlugin: " + shuffleConsumerPlugin);
ShuffleConsumerPlugin.Context shuffleContext =
new ShuffleConsumerPlugin.Context(getTaskID(), job, FileSystem.getLocal(job), umbilical,
super.lDirAlloc, reporter, codec,
combinerClass, combineCollector,
spilledRecordsCounter, reduceCombineInputCounter,
shuffledMapsCounter,
reduceShuffleBytes, failedShuffleCounter,
mergedMapOutputsCounter,
taskStatus, copyPhase, sortPhase, this,
mapOutputFile, localMapFiles);
shuffleConsumerPlugin.init(shuffleContext);
rIter = shuffleConsumerPlugin.run();
ShuffleConsumerPlugin.Context shuffleContext =
new ShuffleConsumerPlugin.Context(getTaskID(), job, FileSystem.getLocal(job), umbilical,
super.lDirAlloc, reporter, codec,
combinerClass, combineCollector,
spilledRecordsCounter, reduceCombineInputCounter,
shuffledMapsCounter,
reduceShuffleBytes, failedShuffleCounter,
mergedMapOutputsCounter,
taskStatus, copyPhase, sortPhase, this,
mapOutputFile);
shuffleConsumerPlugin.init(shuffleContext);
rIter = shuffleConsumerPlugin.run();
} else {
// local job runner doesn't have a copy phase
copyPhase.complete();
final FileSystem rfs = FileSystem.getLocal(job).getRaw();
rIter = Merger.merge(job, rfs, job.getMapOutputKeyClass(),
job.getMapOutputValueClass(), codec,
getMapFiles(rfs, true),
!conf.getKeepFailedTaskFiles(),
job.getInt(JobContext.IO_SORT_FACTOR, 100),
new Path(getTaskID().toString()),
job.getOutputKeyComparator(),
reporter, spilledRecordsCounter, null, null);
}
// free up the data structures
mapOutputFilesOnDisk.clear();
@ -409,9 +393,7 @@ public void run(JobConf job, final TaskUmbilicalProtocol umbilical)
keyClass, valueClass);
}
if (shuffleConsumerPlugin != null) {
shuffleConsumerPlugin.close();
}
shuffleConsumerPlugin.close();
done(umbilical, reporter);
}

View File

@ -19,6 +19,8 @@
package org.apache.hadoop.mapred;
import java.io.IOException;
import java.util.Map;
import org.apache.hadoop.mapred.Task.CombineOutputCollector;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalDirAllocator;
@ -65,6 +67,7 @@ public static class Context<K,V> {
private final Progress mergePhase;
private final Task reduceTask;
private final MapOutputFile mapOutputFile;
private final Map<TaskAttemptID, MapOutputFile> localMapFiles;
public Context(org.apache.hadoop.mapreduce.TaskAttemptID reduceId,
JobConf jobConf, FileSystem localFS,
@ -80,7 +83,8 @@ public Context(org.apache.hadoop.mapreduce.TaskAttemptID reduceId,
Counters.Counter failedShuffleCounter,
Counters.Counter mergedMapOutputsCounter,
TaskStatus status, Progress copyPhase, Progress mergePhase,
Task reduceTask, MapOutputFile mapOutputFile) {
Task reduceTask, MapOutputFile mapOutputFile,
Map<TaskAttemptID, MapOutputFile> localMapFiles) {
this.reduceId = reduceId;
this.jobConf = jobConf;
this.localFS = localFS;
@ -101,6 +105,7 @@ public Context(org.apache.hadoop.mapreduce.TaskAttemptID reduceId,
this.mergePhase = mergePhase;
this.reduceTask = reduceTask;
this.mapOutputFile = mapOutputFile;
this.localMapFiles = localMapFiles;
}
public org.apache.hadoop.mapreduce.TaskAttemptID getReduceId() {
@ -163,6 +168,9 @@ public Task getReduceTask() {
public MapOutputFile getMapOutputFile() {
return mapOutputFile;
}
public Map<TaskAttemptID, MapOutputFile> getLocalMapFiles() {
return localMapFiles;
}
} // end of public static class Context<K,V>
}

View File

@ -60,7 +60,7 @@ class Fetcher<K,V> extends Thread {
/* Default read timeout (in milliseconds) */
private final static int DEFAULT_READ_TIMEOUT = 3 * 60 * 1000;
private final Reporter reporter;
protected final Reporter reporter;
private static enum ShuffleErrors{IO_ERROR, WRONG_LENGTH, BAD_ID, WRONG_MAP,
CONNECTION, WRONG_REDUCE}
@ -71,13 +71,13 @@ private static enum ShuffleErrors{IO_ERROR, WRONG_LENGTH, BAD_ID, WRONG_MAP,
private final Counters.Counter badIdErrs;
private final Counters.Counter wrongMapErrs;
private final Counters.Counter wrongReduceErrs;
private final MergeManager<K,V> merger;
private final ShuffleSchedulerImpl<K,V> scheduler;
private final ShuffleClientMetrics metrics;
private final ExceptionReporter exceptionReporter;
private final int id;
protected final MergeManager<K,V> merger;
protected final ShuffleSchedulerImpl<K,V> scheduler;
protected final ShuffleClientMetrics metrics;
protected final ExceptionReporter exceptionReporter;
protected final int id;
private static int nextId = 0;
private final int reduce;
protected final int reduce;
private final int connectionTimeout;
private final int readTimeout;

View File

@ -24,6 +24,7 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DataInputBuffer;
import org.apache.hadoop.mapred.IFile.Reader;
import org.apache.hadoop.mapreduce.TaskAttemptID;
@ -41,9 +42,9 @@ public class InMemoryReader<K, V> extends Reader<K, V> {
private int length;
public InMemoryReader(MergeManagerImpl<K,V> merger, TaskAttemptID taskAttemptId,
byte[] data, int start, int length)
byte[] data, int start, int length, Configuration conf)
throws IOException {
super(null, null, length - start, null, null);
super(conf, null, length - start, null, null);
this.merger = merger;
this.taskAttemptId = taskAttemptId;

View File

@ -0,0 +1,166 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapreduce.task.reduce;
import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import javax.crypto.SecretKey;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.IndexRecord;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapOutputFile;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SpillRecord;
import org.apache.hadoop.mapreduce.TaskAttemptID;
/**
* LocalFetcher is used by LocalJobRunner to perform a local filesystem
* fetch.
*/
class LocalFetcher<K,V> extends Fetcher<K, V> {
private static final Log LOG = LogFactory.getLog(LocalFetcher.class);
private static final MapHost LOCALHOST = new MapHost("local", "local");
private JobConf job;
private Map<TaskAttemptID, MapOutputFile> localMapFiles;
public LocalFetcher(JobConf job, TaskAttemptID reduceId,
ShuffleSchedulerImpl<K, V> scheduler,
MergeManager<K,V> merger,
Reporter reporter, ShuffleClientMetrics metrics,
ExceptionReporter exceptionReporter,
SecretKey shuffleKey,
Map<TaskAttemptID, MapOutputFile> localMapFiles) {
super(job, reduceId, scheduler, merger, reporter, metrics,
exceptionReporter, shuffleKey);
this.job = job;
this.localMapFiles = localMapFiles;
setName("localfetcher#" + id);
setDaemon(true);
}
public void run() {
// Create a worklist of task attempts to work over.
Set<TaskAttemptID> maps = new HashSet<TaskAttemptID>();
for (TaskAttemptID map : localMapFiles.keySet()) {
maps.add(map);
}
while (maps.size() > 0) {
try {
// If merge is on, block
merger.waitForResource();
metrics.threadBusy();
// Copy as much as is possible.
doCopy(maps);
metrics.threadFree();
} catch (InterruptedException ie) {
} catch (Throwable t) {
exceptionReporter.reportException(t);
}
}
}
/**
* The crux of the matter...
*/
private void doCopy(Set<TaskAttemptID> maps) throws IOException {
Iterator<TaskAttemptID> iter = maps.iterator();
while (iter.hasNext()) {
TaskAttemptID map = iter.next();
LOG.debug("LocalFetcher " + id + " going to fetch: " + map);
if (copyMapOutput(map)) {
// Successful copy. Remove this from our worklist.
iter.remove();
} else {
// We got back a WAIT command; go back to the outer loop
// and block for InMemoryMerge.
break;
}
}
}
/**
* Retrieve the map output of a single map task
* and send it to the merger.
*/
private boolean copyMapOutput(TaskAttemptID mapTaskId) throws IOException {
// Figure out where the map task stored its output.
Path mapOutputFileName = localMapFiles.get(mapTaskId).getOutputFile();
Path indexFileName = mapOutputFileName.suffix(".index");
// Read its index to determine the location of our split
// and its size.
SpillRecord sr = new SpillRecord(indexFileName, job);
IndexRecord ir = sr.getIndex(reduce);
long compressedLength = ir.partLength;
long decompressedLength = ir.rawLength;
// Get the location for the map output - either in-memory or on-disk
MapOutput<K, V> mapOutput = merger.reserve(mapTaskId, decompressedLength,
id);
// Check if we can shuffle *now* ...
if (mapOutput == null) {
LOG.info("fetcher#" + id + " - MergeManager returned Status.WAIT ...");
return false;
}
// Go!
LOG.info("localfetcher#" + id + " about to shuffle output of map " +
mapOutput.getMapId() + " decomp: " +
decompressedLength + " len: " + compressedLength + " to " +
mapOutput.getDescription());
// now read the file, seek to the appropriate section, and send it.
FileSystem localFs = FileSystem.getLocal(job).getRaw();
FSDataInputStream inStream = localFs.open(mapOutputFileName);
try {
inStream.seek(ir.startOffset);
mapOutput.shuffle(LOCALHOST, inStream, compressedLength, decompressedLength, metrics, reporter);
} finally {
try {
inStream.close();
} catch (IOException ioe) {
LOG.warn("IOException closing inputstream from map output: "
+ ioe.toString());
}
}
scheduler.copySucceeded(mapTaskId, LOCALHOST, compressedLength, 0,
mapOutput);
return true; // successful fetch.
}
}

View File

@ -613,7 +613,7 @@ private long createInMemorySegments(List<InMemoryMapOutput<K,V>> inMemoryMapOutp
fullSize -= size;
Reader<K,V> reader = new InMemoryReader<K,V>(MergeManagerImpl.this,
mo.getMapId(),
data, 0, (int)size);
data, 0, (int)size, jobConf);
inMemorySegments.add(new Segment<K,V>(reader, true,
(mo.isPrimaryMapOutput() ?
mergedMapOutputsCounter : null)));

View File

@ -18,10 +18,12 @@
package org.apache.hadoop.mapreduce.task.reduce;
import java.io.IOException;
import java.util.Map;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapOutputFile;
import org.apache.hadoop.mapred.RawKeyValueIterator;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.Task;
@ -56,6 +58,7 @@ public class Shuffle<K, V> implements ShuffleConsumerPlugin<K, V>, ExceptionRepo
private Progress copyPhase;
private TaskStatus taskStatus;
private Task reduceTask; //Used for status updates
private Map<TaskAttemptID, MapOutputFile> localMapFiles;
@Override
public void init(ShuffleConsumerPlugin.Context context) {
@ -69,6 +72,7 @@ public void init(ShuffleConsumerPlugin.Context context) {
this.copyPhase = context.getCopyPhase();
this.taskStatus = context.getStatus();
this.reduceTask = context.getReduceTask();
this.localMapFiles = context.getLocalMapFiles();
scheduler = new ShuffleSchedulerImpl<K, V>(jobConf, taskStatus, reduceId,
this, copyPhase, context.getShuffledMapsCounter(),
@ -103,13 +107,22 @@ public RawKeyValueIterator run() throws IOException, InterruptedException {
eventFetcher.start();
// Start the map-output fetcher threads
final int numFetchers = jobConf.getInt(MRJobConfig.SHUFFLE_PARALLEL_COPIES, 5);
boolean isLocal = localMapFiles != null;
final int numFetchers = isLocal ? 1 :
jobConf.getInt(MRJobConfig.SHUFFLE_PARALLEL_COPIES, 5);
Fetcher<K,V>[] fetchers = new Fetcher[numFetchers];
for (int i=0; i < numFetchers; ++i) {
fetchers[i] = new Fetcher<K,V>(jobConf, reduceId, scheduler, merger,
reporter, metrics, this,
reduceTask.getShuffleSecret());
fetchers[i].start();
if (isLocal) {
fetchers[0] = new LocalFetcher<K, V>(jobConf, reduceId, scheduler,
merger, reporter, metrics, this, reduceTask.getShuffleSecret(),
localMapFiles);
fetchers[0].start();
} else {
for (int i=0; i < numFetchers; ++i) {
fetchers[i] = new Fetcher<K,V>(jobConf, reduceId, scheduler, merger,
reporter, metrics, this,
reduceTask.getShuffleSecret());
fetchers[i].start();
}
}
// Wait for shuffle to complete successfully

View File

@ -155,7 +155,7 @@ public void testConsumerApi() {
mockCounter, mockCounter, mockCounter,
mockCounter, mockCounter, mockCounter,
mockTaskStatus, mockProgress, mockProgress,
mockTask, mockMapOutputFile);
mockTask, mockMapOutputFile, null);
shuffleConsumerPlugin.init(context);
shuffleConsumerPlugin.run();
shuffleConsumerPlugin.close();

View File

@ -32,6 +32,7 @@
import org.apache.hadoop.metrics2.source.JvmMetrics;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.service.CompositeService;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.util.ShutdownHookManager;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler;
@ -139,11 +140,13 @@ public HistoryClientService getClientService() {
return this.clientService;
}
public static void main(String[] args) {
Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler());
static JobHistoryServer launchJobHistoryServer(String[] args) {
Thread.
setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler());
StringUtils.startupShutdownMessage(JobHistoryServer.class, args, LOG);
JobHistoryServer jobHistoryServer = null;
try {
JobHistoryServer jobHistoryServer = new JobHistoryServer();
jobHistoryServer = new JobHistoryServer();
ShutdownHookManager.get().addShutdownHook(
new CompositeServiceShutdownHook(jobHistoryServer),
SHUTDOWN_HOOK_PRIORITY);
@ -152,7 +155,12 @@ public static void main(String[] args) {
jobHistoryServer.start();
} catch (Throwable t) {
LOG.fatal("Error starting JobHistoryServer", t);
System.exit(-1);
ExitUtil.terminate(-1, "Error starting JobHistoryServer");
}
return jobHistoryServer;
}
public static void main(String[] args) {
launchJobHistoryServer(args);
}
}

View File

@ -63,14 +63,11 @@ public class TestJobHistoryServer {
private static RecordFactory recordFactory = RecordFactoryProvider
.getRecordFactory(null);
JobHistoryServer historyServer=null;
// simple test init/start/stop JobHistoryServer. Status should change.
// simple test init/start/stop JobHistoryServer. Status should change.
@Test (timeout= 50000 )
public void testStartStopServer() throws Exception {
historyServer = new JobHistoryServer();
Configuration config = new Configuration();
historyServer.init(config);
@ -86,15 +83,9 @@ public void testStartStopServer() throws Exception {
historyServer.stop();
assertEquals(STATE.STOPPED, historyServer.getServiceState());
assertNotNull(historyService.getClientHandler().getConnectAddress());
}
//Test reports of JobHistoryServer. History server should get log files from MRApp and read them
@Test (timeout= 50000 )
public void testReports() throws Exception {
Configuration config = new Configuration();
@ -128,7 +119,6 @@ public void testReports() throws Exception {
assertEquals(1, jobs.size());
assertEquals("job_0_0000",jobs.keySet().iterator().next().toString());
Task task = job.getTasks().values().iterator().next();
TaskAttempt attempt = task.getAttempts().values().iterator().next();
@ -188,14 +178,14 @@ public void testReports() throws Exception {
assertEquals("", diagnosticResponse.getDiagnostics(0));
}
// test main method
// test launch method
@Test (timeout =60000)
public void testMainMethod() throws Exception {
public void testLaunch() throws Exception {
ExitUtil.disableSystemExit();
try {
JobHistoryServer.main(new String[0]);
historyServer = JobHistoryServer.launchJobHistoryServer(new String[0]);
} catch (ExitUtil.ExitException e) {
assertEquals(0,e.status);
ExitUtil.resetFirstExitException();

View File

@ -276,18 +276,16 @@ public void testOldCounterA() throws Exception {
// there are too few spills to combine (2 < 3)
// Each map spills 2^14 records, so maps spill 49152 records, combined.
// The reduce spill count is composed of the read from one segment and
// the intermediate merge of the other two. The intermediate merge
// The combiner has emitted 24576 records to the reducer; these are all
// fetched straight to memory from the map side. The intermediate merge
// adds 8192 records per segment read; again, there are too few spills to
// combine, so all 16834 are written to disk (total 32768 spilled records
// for the intermediate merge). The merge into the reduce includes only
// the unmerged segment, size 8192. Total spilled records in the reduce
// is 32768 from the merge + 8192 unmerged segment = 40960 records
// combine, so all Total spilled records in the reduce
// is 8192 records / map * 3 maps = 24576.
// Total: map + reduce = 49152 + 40960 = 90112
// Total: map + reduce = 49152 + 24576 = 73728
// 3 files, 5120 = 5 * 1024 rec/file = 15360 input records
// 4 records/line = 61440 output records
validateCounters(c1, 90112, 15360, 61440);
validateCounters(c1, 73728, 15360, 61440);
validateFileCounters(c1, inputSize, 0, 0, 0);
validateOldFileCounters(c1, inputSize, 61928, 0, 0);
}
@ -316,12 +314,12 @@ public void testOldCounterB() throws Exception {
// 1st merge: read + write = 8192 * 4
// 2nd merge: read + write = 8192 * 4
// final merge: 0
// Total reduce: 65536
// Total reduce: 32768
// Total: map + reduce = 2^16 + 2^16 = 131072
// Total: map + reduce = 2^16 + 2^15 = 98304
// 4 files, 5120 = 5 * 1024 rec/file = 15360 input records
// 4 records/line = 81920 output records
validateCounters(c1, 131072, 20480, 81920);
validateCounters(c1, 98304, 20480, 81920);
validateFileCounters(c1, inputSize, 0, 0, 0);
}
@ -349,7 +347,7 @@ public void testOldCounterC() throws Exception {
// Total reduce: 45056
// 5 files, 5120 = 5 * 1024 rec/file = 15360 input records
// 4 records/line = 102400 output records
validateCounters(c1, 147456, 25600, 102400);
validateCounters(c1, 122880, 25600, 102400);
validateFileCounters(c1, inputSize, 0, 0, 0);
}
@ -394,7 +392,7 @@ public void testNewCounterA() throws Exception {
job, new Path(OUT_DIR, "outputN0"));
assertTrue(job.waitForCompletion(true));
final Counters c1 = Counters.downgrade(job.getCounters());
validateCounters(c1, 90112, 15360, 61440);
validateCounters(c1, 73728, 15360, 61440);
validateFileCounters(c1, inputSize, 0, 0, 0);
}
@ -416,7 +414,7 @@ public void testNewCounterB() throws Exception {
job, new Path(OUT_DIR, "outputN1"));
assertTrue(job.waitForCompletion(true));
final Counters c1 = Counters.downgrade(job.getCounters());
validateCounters(c1, 131072, 20480, 81920);
validateCounters(c1, 98304, 20480, 81920);
validateFileCounters(c1, inputSize, 0, 0, 0);
}
@ -439,7 +437,7 @@ public void testNewCounterC() throws Exception {
job, new Path(OUT_DIR, "outputN2"));
assertTrue(job.waitForCompletion(true));
final Counters c1 = Counters.downgrade(job.getCounters());
validateCounters(c1, 147456, 25600, 102400);
validateCounters(c1, 122880, 25600, 102400);
validateFileCounters(c1, inputSize, 0, 0, 0);
}

View File

@ -63,7 +63,7 @@ public void configure(String keySpec, int expect) throws Exception {
conf.setOutputValueClass(LongWritable.class);
conf.setNumMapTasks(1);
conf.setNumReduceTasks(2);
conf.setNumReduceTasks(1);
conf.setOutputFormat(TextOutputFormat.class);
conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
@ -101,9 +101,7 @@ public void configure(String keySpec, int expect) throws Exception {
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
String line = reader.readLine();
//make sure we get what we expect as the first line, and also
//that we have two lines (both the lines must end up in the same
//reducer since the partitioner takes the same key spec for all
//lines
//that we have two lines
if (expect == 1) {
assertTrue(line.startsWith(line1));
} else if (expect == 2) {

View File

@ -31,9 +31,9 @@
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.mapred.LocalJobRunner;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
@ -410,6 +410,7 @@ public boolean nextKeyValue() {
}
/** Test case for zero mappers */
@Test
public void testEmptyMaps() throws Exception {
Job job = Job.getInstance();
Path outputPath = getOutputPath();
@ -428,5 +429,145 @@ public void testEmptyMaps() throws Exception {
boolean success = job.waitForCompletion(true);
assertTrue("Empty job should work", success);
}
/** @return the directory where numberfiles are written (mapper inputs) */
private Path getNumberDirPath() {
return new Path(getInputPath(), "numberfiles");
}
/**
* Write out an input file containing an integer.
*
* @param fileNum the file number to write to.
* @param value the value to write to the file
* @return the path of the written file.
*/
private Path makeNumberFile(int fileNum, int value) throws IOException {
Path workDir = getNumberDirPath();
Path filePath = new Path(workDir, "file" + fileNum);
Configuration conf = new Configuration();
FileSystem fs = FileSystem.getLocal(conf);
OutputStream os = fs.create(filePath);
BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
w.write("" + value);
w.close();
return filePath;
}
/**
* Each record received by this mapper is a number 'n'.
* Emit the values [0..n-1]
*/
public static class SequenceMapper
extends Mapper<LongWritable, Text, Text, NullWritable> {
public void map(LongWritable k, Text v, Context c)
throws IOException, InterruptedException {
int max = Integer.valueOf(v.toString());
for (int i = 0; i < max; i++) {
c.write(new Text("" + i), NullWritable.get());
}
}
}
private final static int NUMBER_FILE_VAL = 100;
/**
* Tally up the values and ensure that we got as much data
* out as we put in.
* Each mapper generated 'NUMBER_FILE_VAL' values (0..NUMBER_FILE_VAL-1).
* Verify that across all our reducers we got exactly this much
* data back.
*/
private void verifyNumberJob(int numMaps) throws Exception {
Path outputDir = getOutputPath();
Configuration conf = new Configuration();
FileSystem fs = FileSystem.getLocal(conf);
FileStatus [] stats = fs.listStatus(outputDir);
int valueSum = 0;
for (FileStatus f : stats) {
FSDataInputStream istream = fs.open(f.getPath());
BufferedReader r = new BufferedReader(new InputStreamReader(istream));
String line = null;
while ((line = r.readLine()) != null) {
valueSum += Integer.valueOf(line.trim());
}
r.close();
}
int maxVal = NUMBER_FILE_VAL - 1;
int expectedPerMapper = maxVal * (maxVal + 1) / 2;
int expectedSum = expectedPerMapper * numMaps;
LOG.info("expected sum: " + expectedSum + ", got " + valueSum);
assertEquals("Didn't get all our results back", expectedSum, valueSum);
}
/**
* Run a test which creates a SequenceMapper / IdentityReducer
* job over a set of generated number files.
*/
private void doMultiReducerTest(int numMaps, int numReduces,
int parallelMaps, int parallelReduces) throws Exception {
Path in = getNumberDirPath();
Path out = getOutputPath();
// Clear data from any previous tests.
Configuration conf = new Configuration();
FileSystem fs = FileSystem.getLocal(conf);
if (fs.exists(out)) {
fs.delete(out, true);
}
if (fs.exists(in)) {
fs.delete(in, true);
}
for (int i = 0; i < numMaps; i++) {
makeNumberFile(i, 100);
}
Job job = Job.getInstance();
job.setNumReduceTasks(numReduces);
job.setMapperClass(SequenceMapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.addInputPath(job, in);
FileOutputFormat.setOutputPath(job, out);
LocalJobRunner.setLocalMaxRunningMaps(job, parallelMaps);
LocalJobRunner.setLocalMaxRunningReduces(job, parallelReduces);
boolean result = job.waitForCompletion(true);
assertTrue("Job failed!!", result);
verifyNumberJob(numMaps);
}
@Test
public void testOneMapMultiReduce() throws Exception {
doMultiReducerTest(1, 2, 1, 1);
}
@Test
public void testOneMapMultiParallelReduce() throws Exception {
doMultiReducerTest(1, 2, 1, 2);
}
@Test
public void testMultiMapOneReduce() throws Exception {
doMultiReducerTest(4, 1, 2, 1);
}
@Test
public void testMultiMapMultiReduce() throws Exception {
doMultiReducerTest(4, 4, 2, 2);
}
}

View File

@ -56,7 +56,7 @@ private void testComparator(String keySpec, int expect)
conf.set("mapreduce.partition.keypartitioner.options", "-k1.1,1.1");
conf.set(MRJobConfig.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");
Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 2,
Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 1,
line1 +"\n" + line2 + "\n");
job.setMapperClass(InverseMapper.class);
job.setReducerClass(Reducer.class);

View File

@ -548,12 +548,12 @@
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.6.1</version>
<version>1.7.5</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.6.1</version>
<version>1.7.5</version>
</dependency>
<dependency>
<groupId>org.eclipse.jdt</groupId>
@ -593,7 +593,7 @@
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>1.5.3</version>
<version>1.7.4</version>
</dependency>
<dependency>
<groupId>net.sf.kosmosfs</groupId>

View File

@ -169,6 +169,7 @@ public void testPreserveStatus() {
Assert.fail("Preserve status failure");
} finally {
TestDistCpUtils.delete(fs, "/tmp1");
conf.unset(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
}
}

View File

@ -38,6 +38,8 @@ Release 2.1.1-beta - UNRELEASED
IMPROVEMENTS
YARN-589. Expose a REST API for monitoring the fair scheduler (Sandy Ryza).
OPTIMIZATIONS
BUG FIXES
@ -51,6 +53,18 @@ Release 2.1.1-beta - UNRELEASED
YARN-502. Fixed a state machine issue with RMNode inside ResourceManager
which was crashing scheduler. (Mayank Bansal via vinodkv)
YARN-573. Shared data structures in Public Localizer and Private Localizer
are not Thread safe. (Omkar Vinit Joshi via jlowe)
YARN-903. Changed ContainerManager to suppress unnecessary warnings when
stopping already stopped containers. (Omkar Vinit Joshi via vinodkv)
YARN-906. Fixed a bug in NodeManager where cancelling ContainerLaunch at
KILLING state causes that the container to hang. (Zhijie Shen via vinodkv)
YARN-994. HeartBeat thread in AMRMClientAsync does not handle runtime
exception correctly (Xuan Gong via bikas)
Release 2.1.0-beta - 2013-08-06
INCOMPATIBLE CHANGES
@ -783,9 +797,6 @@ Release 2.1.0-beta - 2013-08-06
YARN-945. Removed setting of AMRMToken's service from ResourceManager
and changed client libraries do it all the time and correctly. (vinodkv)
YARN-573. Shared data structures in Public Localizer and Private Localizer
are not Thread safe. (Omkar Vinit Joshi via jlowe)
BREAKDOWN OF HADOOP-8562/YARN-191 SUBTASKS AND RELATED JIRAS
YARN-158. Yarn creating package-info.java must not depend on sh.
@ -851,6 +862,8 @@ Release 2.1.0-beta - 2013-08-06
YARN-909. Disable TestLinuxContainerExecutorWithMocks on Windows. (Chuan Liu
via cnauroth)
YARN-1043. Push all metrics consistently. (Jian He via acmurthy)
Release 2.0.5-alpha - 06/06/2013
INCOMPATIBLE CHANGES
@ -1149,6 +1162,9 @@ Release 0.23.10 - UNRELEASED
IMPROVEMENTS
YARN-985. Nodemanager should log where a resource was localized (Ravi
Prakash via jeagles)
OPTIMIZATIONS
BUG FIXES

View File

@ -65,7 +65,7 @@ public class AMRMClientAsyncImpl<T extends ContainerRequest>
private volatile boolean keepRunning;
private volatile float progress;
private volatile Exception savedException;
private volatile Throwable savedException;
public AMRMClientAsyncImpl(int intervalMs, CallbackHandler callbackHandler) {
this(new AMRMClientImpl<T>(), intervalMs, callbackHandler);
@ -222,18 +222,12 @@ public void run() {
try {
response = client.allocate(progress);
} catch (YarnException ex) {
LOG.error("Yarn exception on heartbeat", ex);
} catch (Throwable ex) {
LOG.error("Exception on heartbeat", ex);
savedException = ex;
// interrupt handler thread in case it waiting on the queue
handlerThread.interrupt();
return;
} catch (IOException e) {
LOG.error("IO exception on heartbeat", e);
savedException = e;
// interrupt handler thread in case it waiting on the queue
handlerThread.interrupt();
return;
}
}
if (response != null) {

View File

@ -277,6 +277,8 @@ protected void populateNMTokens(AllocateResponse allocateResponse) {
public void unregisterApplicationMaster(FinalApplicationStatus appStatus,
String appMessage, String appTrackingUrl) throws YarnException,
IOException {
Preconditions.checkArgument(appStatus != null,
"AppStatus should not be null.");
FinishApplicationMasterRequest request =
FinishApplicationMasterRequest.newInstance(appStatus, appMessage,
appTrackingUrl);

View File

@ -159,14 +159,26 @@ public Resource answer(InvocationOnMock invocation)
@Test(timeout=10000)
public void testAMRMClientAsyncException() throws Exception {
String exStr = "TestException";
YarnException mockException = mock(YarnException.class);
when(mockException.getMessage()).thenReturn(exStr);
runHeartBeatThrowOutException(mockException);
}
@Test(timeout=10000)
public void testAMRMClientAsyncRunTimeException() throws Exception {
String exStr = "TestRunTimeException";
RuntimeException mockRunTimeException = mock(RuntimeException.class);
when(mockRunTimeException.getMessage()).thenReturn(exStr);
runHeartBeatThrowOutException(mockRunTimeException);
}
private void runHeartBeatThrowOutException(Exception ex) throws Exception{
Configuration conf = new Configuration();
TestCallbackHandler callbackHandler = new TestCallbackHandler();
@SuppressWarnings("unchecked")
AMRMClient<ContainerRequest> client = mock(AMRMClientImpl.class);
String exStr = "TestException";
YarnException mockException = mock(YarnException.class);
when(mockException.getMessage()).thenReturn(exStr);
when(client.allocate(anyFloat())).thenThrow(mockException);
when(client.allocate(anyFloat())).thenThrow(ex);
AMRMClientAsync<ContainerRequest> asyncClient =
AMRMClientAsync.createAMRMClientAsync(client, 20, callbackHandler);
@ -183,8 +195,8 @@ public void testAMRMClientAsyncException() throws Exception {
}
}
}
Assert.assertTrue(callbackHandler.savedException.getMessage().contains(exStr));
Assert.assertTrue(callbackHandler.savedException.getMessage().contains(
ex.getMessage()));
asyncClient.stop();
// stopping should have joined all threads and completed all callbacks

View File

@ -26,6 +26,7 @@
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
@ -319,7 +320,7 @@ private void testContainerManagement(NMClientImpl nmClient,
if (++i < size) {
// NodeManager may still need some time to make the container started
testGetContainerStatus(container, i, ContainerState.RUNNING, "",
-1000);
Arrays.asList(new Integer[] {-1000}));
try {
nmClient.stopContainer(container.getId(), container.getNodeId());
@ -330,8 +331,21 @@ private void testContainerManagement(NMClientImpl nmClient,
}
// getContainerStatus can be called after stopContainer
testGetContainerStatus(container, i, ContainerState.COMPLETE,
"Container killed by the ApplicationMaster.", 143);
try {
// O is possible if CLEANUP_CONTAINER is executed too late
testGetContainerStatus(container, i, ContainerState.COMPLETE,
"Container killed by the ApplicationMaster.", Arrays.asList(
new Integer[] {143, 0}));
} catch (YarnException e) {
// The exception is possible because, after the container is stopped,
// it may be removed from NM's context.
if (!e.getMessage()
.contains("was recently stopped on node manager")) {
throw (AssertionError)
(new AssertionError("Exception is not expected: " + e).initCause(
e));
}
}
}
}
}
@ -345,7 +359,7 @@ private void sleep(int sleepTime) {
}
private void testGetContainerStatus(Container container, int index,
ContainerState state, String diagnostics, int exitStatus)
ContainerState state, String diagnostics, List<Integer> exitStatuses)
throws YarnException, IOException {
while (true) {
try {
@ -357,7 +371,7 @@ private void testGetContainerStatus(Container container, int index,
assertEquals(container.getId(), status.getContainerId());
assertTrue("" + index + ": " + status.getDiagnostics(),
status.getDiagnostics().contains(diagnostics));
assertEquals(exitStatus, status.getExitStatus());
assertTrue(exitStatuses.contains(status.getExitStatus()));
break;
}
Thread.sleep(100);

View File

@ -29,6 +29,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.security.SecurityUtil;
@ -456,4 +457,10 @@ public static void main(String[] args) {
Configuration conf = new YarnConfiguration();
nodeManager.initAndStartNodeManager(conf, false);
}
@VisibleForTesting
@Private
public NodeStatusUpdater getNodeStatusUpdater() {
return nodeStatusUpdater;
}
}

View File

@ -19,6 +19,7 @@
package org.apache.hadoop.yarn.server.nodemanager;
import org.apache.hadoop.service.Service;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.server.api.records.NodeStatus;
public interface NodeStatusUpdater extends Service {
@ -28,4 +29,8 @@ public interface NodeStatusUpdater extends Service {
NodeStatus getNodeStatusAndUpdateContainersInContext();
long getRMIdentifier();
public boolean isContainerRecentlyStopped(ContainerId containerId);
public void clearFinishedContainersFromCache();
}

View File

@ -24,6 +24,7 @@
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@ -68,6 +69,9 @@
public class NodeStatusUpdaterImpl extends AbstractService implements
NodeStatusUpdater {
public static final String YARN_NODEMANAGER_DURATION_TO_TRACK_STOPPED_CONTAINERS =
YarnConfiguration.NM_PREFIX + "duration-to-track-stopped-containers";
private static final Log LOG = LogFactory.getLog(NodeStatusUpdaterImpl.class);
private final Object heartbeatMonitor = new Object();
@ -88,6 +92,10 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
private Map<ApplicationId, Long> appTokenKeepAliveMap =
new HashMap<ApplicationId, Long>();
private Random keepAliveDelayRandom = new Random();
// It will be used to track recently stopped containers on node manager.
private final Map<ContainerId, Long> recentlyStoppedContainers;
// Duration for which to track recently stopped container.
private long durationToTrackStoppedContainers;
private final NodeHealthCheckerService healthChecker;
private final NodeManagerMetrics metrics;
@ -103,6 +111,8 @@ public NodeStatusUpdaterImpl(Context context, Dispatcher dispatcher,
this.context = context;
this.dispatcher = dispatcher;
this.metrics = metrics;
this.recentlyStoppedContainers =
new LinkedHashMap<ContainerId, Long>();
}
@Override
@ -129,11 +139,27 @@ protected void serviceInit(Configuration conf) throws Exception {
conf.getInt(YarnConfiguration.RM_NM_EXPIRY_INTERVAL_MS,
YarnConfiguration.DEFAULT_RM_NM_EXPIRY_INTERVAL_MS);
// Default duration to track stopped containers on nodemanager is 10Min.
// This should not be assigned very large value as it will remember all the
// containers stopped during that time.
durationToTrackStoppedContainers =
conf.getLong(YARN_NODEMANAGER_DURATION_TO_TRACK_STOPPED_CONTAINERS,
600000);
if (durationToTrackStoppedContainers < 0) {
String message = "Invalid configuration for "
+ YARN_NODEMANAGER_DURATION_TO_TRACK_STOPPED_CONTAINERS + " default "
+ "value is 10Min(600000).";
LOG.error(message);
throw new YarnException(message);
}
if (LOG.isDebugEnabled()) {
LOG.debug(YARN_NODEMANAGER_DURATION_TO_TRACK_STOPPED_CONTAINERS + " :"
+ durationToTrackStoppedContainers);
}
super.serviceInit(conf);
LOG.info("Initialized nodemanager for " + nodeId + ":" +
" physical-memory=" + memoryMb + " virtual-memory=" + virtualMemoryMb +
" virtual-cores=" + virtualCores);
super.serviceInit(conf);
}
@Override
@ -290,6 +316,10 @@ public NodeStatus getNodeStatusAndUpdateContainersInContext() {
if (containerStatus.getState() == ContainerState.COMPLETE) {
// Remove
i.remove();
// Adding to finished containers cache. Cache will keep it around at
// least for #durationToTrackStoppedContainers duration. In the
// subsequent call to stop container it will get removed from cache.
addStoppedContainersToCache(containerId);
LOG.info("Removed completed container " + containerId);
}
@ -340,6 +370,46 @@ public void sendOutofBandHeartBeat() {
}
}
public boolean isContainerRecentlyStopped(ContainerId containerId) {
synchronized (recentlyStoppedContainers) {
return recentlyStoppedContainers.containsKey(containerId);
}
}
@Private
@VisibleForTesting
public void addStoppedContainersToCache(ContainerId containerId) {
synchronized (recentlyStoppedContainers) {
removeVeryOldStoppedContainersFromCache();
recentlyStoppedContainers.put(containerId,
System.currentTimeMillis() + durationToTrackStoppedContainers);
}
}
@Override
public void clearFinishedContainersFromCache() {
synchronized (recentlyStoppedContainers) {
recentlyStoppedContainers.clear();
}
}
@Private
@VisibleForTesting
public void removeVeryOldStoppedContainersFromCache() {
synchronized (recentlyStoppedContainers) {
long currentTime = System.currentTimeMillis();
Iterator<ContainerId> i =
recentlyStoppedContainers.keySet().iterator();
while (i.hasNext()) {
if (recentlyStoppedContainers.get(i.next()) < currentTime) {
i.remove();
} else {
break;
}
}
}
}
@Override
public long getRMIdentifier() {
return this.rmIdentifier;
@ -455,4 +525,6 @@ private void updateMasterKeys(NodeHeartbeatResponse response) {
new Thread(statusUpdaterRunnable, "Node Status Updater");
statusUpdater.start();
}
}

Some files were not shown because too many files have changed in this diff Show More