diff --git a/hadoop-client/pom.xml b/hadoop-client/pom.xml
index 5d8c7fad4d6..8dea4325899 100644
--- a/hadoop-client/pom.xml
+++ b/hadoop-client/pom.xml
@@ -143,10 +143,6 @@
org.mortbay.jettyjetty
-
- org.mortbay.jetty
- jetty-util
- com.sun.jerseyjersey-core
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 6651d6d0640..3d1edc5eacb 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -617,6 +617,17 @@ Release 2.0.5-beta - UNRELEASED
HADOOP-9429. TestConfiguration fails with IBM JAVA. (Amir Sanjar via
suresh)
+ HADOOP-9222. Cover package with org.apache.hadoop.io.lz4 unit tests (Vadim
+ Bondarev via jlowe)
+
+ HADOOP-9233. Cover package org.apache.hadoop.io.compress.zlib with unit
+ tests (Vadim Bondarev via jlowe)
+
+ HADOOP-9211. Set default max heap size in HADOOP_CLIENT_OPTS to 512m
+ in order to avoid OOME. (Plamen Jeliazkov via shv)
+
+ HADOOP-9473. Typo in FileUtil copy() method. (Glen Mazza via suresh)
+
Release 2.0.4-alpha - UNRELEASED
INCOMPATIBLE CHANGES
@@ -643,6 +654,9 @@ Release 2.0.4-alpha - UNRELEASED
HADOOP-9444. Modify hadoop-policy.xml to replace unexpanded variables to a
default value of '*'. (Roman Shaposhnik via vinodkv)
+ HADOOP-9471. hadoop-client wrongfully excludes jetty-util JAR,
+ breaking webhdfs. (tucu)
+
Release 2.0.3-alpha - 2013-02-06
INCOMPATIBLE CHANGES
@@ -1606,6 +1620,24 @@ Release 2.0.0-alpha - 05-23-2012
HADOOP-8655. Fix TextInputFormat for large deliminators. (Gelesh via
bobby)
+Release 0.23.8 - UNRELEASED
+
+ INCOMPATIBLE CHANGES
+
+ NEW FEATURES
+
+ IMPROVEMENTS
+
+ OPTIMIZATIONS
+
+ BUG FIXES
+
+ HADOOP-9222. Cover package with org.apache.hadoop.io.lz4 unit tests (Vadim
+ Bondarev via jlowe)
+
+ HADOOP-9233. Cover package org.apache.hadoop.io.compress.zlib with unit
+ tests (Vadim Bondarev via jlowe)
+
Release 0.23.7 - UNRELEASED
INCOMPATIBLE CHANGES
diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml
index 1dcf1de455a..5bb29375a04 100644
--- a/hadoop-common-project/hadoop-common/pom.xml
+++ b/hadoop-common-project/hadoop-common/pom.xml
@@ -522,6 +522,7 @@
org.apache.hadoop.io.compress.lz4.Lz4Compressororg.apache.hadoop.io.compress.lz4.Lz4Decompressororg.apache.hadoop.util.NativeCrc32
+ org.apache.hadoop.net.unix.DomainSocket${project.build.directory}/native/javah
diff --git a/hadoop-common-project/hadoop-common/src/CMakeLists.txt b/hadoop-common-project/hadoop-common/src/CMakeLists.txt
index 7449610b9ae..e008e789084 100644
--- a/hadoop-common-project/hadoop-common/src/CMakeLists.txt
+++ b/hadoop-common-project/hadoop-common/src/CMakeLists.txt
@@ -163,10 +163,10 @@ add_executable(test_bulk_crc32
${D}/util/bulk_crc32.c
${T}/util/test_bulk_crc32.c
)
-set_property(SOURCE main.cpp PROPERTY INCLUDE_DIRECTORIES "\"-Werror\" \"-Wall\"")
SET(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
add_dual_library(hadoop
+ main/native/src/exception.c
${D}/io/compress/lz4/Lz4Compressor.c
${D}/io/compress/lz4/Lz4Decompressor.c
${D}/io/compress/lz4/lz4.c
@@ -177,6 +177,7 @@ add_dual_library(hadoop
${D}/io/nativeio/NativeIO.c
${D}/io/nativeio/errno_enum.c
${D}/io/nativeio/file_descriptor.c
+ ${D}/net/unix/DomainSocket.c
${D}/security/JniBasedUnixGroupsMapping.c
${D}/security/JniBasedUnixGroupsNetgroupMapping.c
${D}/security/getGroup.c
diff --git a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh
index 42a0d05aaad..41289a9acda 100644
--- a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh
+++ b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh
@@ -62,7 +62,7 @@ export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANOD
export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"
# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
-export HADOOP_CLIENT_OPTS="-Xmx128m $HADOOP_CLIENT_OPTS"
+export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"
# On secure datanodes, user to run the datanode as after dropping privileges
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
index 73faf968628..67befcc323c 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
@@ -743,6 +743,21 @@ public class Configuration implements Iterable>,
return value.trim();
}
}
+
+ /**
+ * Get the value of the name property as a trimmed String,
+ * defaultValue if no such property exists.
+ * See @{Configuration#getTrimmed} for more details.
+ *
+ * @param name the property name.
+ * @param defaultValue the property default value.
+ * @return the value of the name or defaultValue
+ * if it is not set.
+ */
+ public String getTrimmed(String name, String defaultValue) {
+ String ret = getTrimmed(name);
+ return ret == null ? defaultValue : ret;
+ }
/**
* Get the value of the name property, without doing
@@ -877,7 +892,7 @@ public class Configuration implements Iterable>,
}
return result;
}
-
+
/**
* Get the value of the name property as an int.
*
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java
index 2cc834852e4..b61477e9d7b 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java
@@ -284,7 +284,7 @@ public class FileUtil {
// Check if dest is directory
if (!dstFS.exists(dst)) {
throw new IOException("`" + dst +"': specified destination directory " +
- "doest not exist");
+ "does not exist");
} else {
FileStatus sdst = dstFS.getFileStatus(dst);
if (!sdst.isDirectory())
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketInputStream.java
index 46039a5506e..cfa7b01e813 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketInputStream.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketInputStream.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.net;
import java.io.IOException;
+import org.apache.hadoop.classification.InterfaceAudience;
import java.io.InputStream;
import java.net.Socket;
import java.net.SocketTimeoutException;
@@ -37,7 +38,8 @@ import java.nio.channels.SelectionKey;
* IllegalBlockingModeException.
* Please use {@link SocketOutputStream} for writing.
*/
-class SocketInputStream extends InputStream
+@InterfaceAudience.LimitedPrivate("HDFS")
+public class SocketInputStream extends InputStream
implements ReadableByteChannel {
private Reader reader;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketOutputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketOutputStream.java
index 091c684059b..ead1d7b2b05 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketOutputStream.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketOutputStream.java
@@ -260,4 +260,8 @@ public class SocketOutputStream extends OutputStream
throws IOException {
transferToFully(fileCh, position, count, null, null);
}
+
+ public void setTimeout(int timeoutMs) {
+ writer.setTimeout(timeoutMs);
+ }
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
new file mode 100644
index 00000000000..4c6ae0592c2
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
@@ -0,0 +1,704 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.net.unix;
+
+import java.io.Closeable;
+import org.apache.hadoop.classification.InterfaceAudience;
+import java.io.FileDescriptor;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.SocketException;
+import java.nio.channels.AsynchronousCloseException;
+import java.nio.channels.ClosedChannelException;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.ByteBuffer;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.commons.lang.SystemUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.util.NativeCodeLoader;
+
+import com.google.common.annotations.VisibleForTesting;
+
+/**
+ * The implementation of UNIX domain sockets in Java.
+ *
+ * See {@link DomainSocket} for more information about UNIX domain sockets.
+ */
+@InterfaceAudience.LimitedPrivate("HDFS")
+public class DomainSocket implements Closeable {
+ static {
+ if (SystemUtils.IS_OS_WINDOWS) {
+ loadingFailureReason = "UNIX Domain sockets are not available on Windows.";
+ } else if (!NativeCodeLoader.isNativeCodeLoaded()) {
+ loadingFailureReason = "libhadoop cannot be loaded.";
+ } else {
+ String problem;
+ try {
+ anchorNative();
+ problem = null;
+ } catch (Throwable t) {
+ problem = "DomainSocket#anchorNative got error: " + t.getMessage();
+ }
+ loadingFailureReason = problem;
+ }
+ }
+
+ static Log LOG = LogFactory.getLog(DomainSocket.class);
+
+ /**
+ * True only if we should validate the paths used in {@link DomainSocket#bind()}
+ */
+ private static boolean validateBindPaths = true;
+
+ /**
+ * The reason why DomainSocket is not available, or null if it is available.
+ */
+ private final static String loadingFailureReason;
+
+ /**
+ * Initialize the native library code.
+ */
+ private static native void anchorNative();
+
+ /**
+ * This function is designed to validate that the path chosen for a UNIX
+ * domain socket is secure. A socket path is secure if it doesn't allow
+ * unprivileged users to perform a man-in-the-middle attack against it.
+ * For example, one way to perform a man-in-the-middle attack would be for
+ * a malicious user to move the server socket out of the way and create his
+ * own socket in the same place. Not good.
+ *
+ * Note that we only check the path once. It's possible that the
+ * permissions on the path could change, perhaps to something more relaxed,
+ * immediately after the path passes our validation test-- hence creating a
+ * security hole. However, the purpose of this check is to spot common
+ * misconfigurations. System administrators do not commonly change
+ * permissions on these paths while the server is running.
+ *
+ * @param path the path to validate
+ * @param skipComponents the number of starting path components to skip
+ * validation for (used only for testing)
+ */
+ @VisibleForTesting
+ native static void validateSocketPathSecurity0(String path,
+ int skipComponents) throws IOException;
+
+ /**
+ * Return true only if UNIX domain sockets are available.
+ */
+ public static String getLoadingFailureReason() {
+ return loadingFailureReason;
+ }
+
+ /**
+ * Disable validation of the server bind paths.
+ */
+ @VisibleForTesting
+ public static void disableBindPathValidation() {
+ validateBindPaths = false;
+ }
+
+ /**
+ * Given a path and a port, compute the effective path by replacing
+ * occurrences of _PORT with the port. This is mainly to make it
+ * possible to run multiple DataNodes locally for testing purposes.
+ *
+ * @param path The source path
+ * @param port Port number to use
+ *
+ * @return The effective path
+ */
+ public static String getEffectivePath(String path, int port) {
+ return path.replace("_PORT", String.valueOf(port));
+ }
+
+ /**
+ * Tracks the reference count of the file descriptor, and also whether it is
+ * open or closed.
+ */
+ private static class Status {
+ /**
+ * Bit mask representing a closed domain socket.
+ */
+ private static final int STATUS_CLOSED_MASK = 1 << 30;
+
+ /**
+ * Status bits
+ *
+ * Bit 30: 0 = DomainSocket open, 1 = DomainSocket closed
+ * Bits 29 to 0: the reference count.
+ */
+ private final AtomicInteger bits = new AtomicInteger(0);
+
+ Status() { }
+
+ /**
+ * Increment the reference count of the underlying file descriptor.
+ *
+ * @throws ClosedChannelException If the file descriptor is closed.
+ */
+ void reference() throws ClosedChannelException {
+ int curBits = bits.incrementAndGet();
+ if ((curBits & STATUS_CLOSED_MASK) != 0) {
+ bits.decrementAndGet();
+ throw new ClosedChannelException();
+ }
+ }
+
+ /**
+ * Decrement the reference count of the underlying file descriptor.
+ *
+ * @param checkClosed Whether to throw an exception if the file
+ * descriptor is closed.
+ *
+ * @throws AsynchronousCloseException If the file descriptor is closed and
+ * checkClosed is set.
+ */
+ void unreference(boolean checkClosed) throws AsynchronousCloseException {
+ int newCount = bits.decrementAndGet();
+ assert (newCount & ~STATUS_CLOSED_MASK) >= 0;
+ if (checkClosed && ((newCount & STATUS_CLOSED_MASK) != 0)) {
+ throw new AsynchronousCloseException();
+ }
+ }
+
+ /**
+ * Return true if the file descriptor is currently open.
+ *
+ * @return True if the file descriptor is currently open.
+ */
+ boolean isOpen() {
+ return ((bits.get() & STATUS_CLOSED_MASK) == 0);
+ }
+
+ /**
+ * Mark the file descriptor as closed.
+ *
+ * Once the file descriptor is closed, it cannot be reopened.
+ *
+ * @return The current reference count.
+ * @throws ClosedChannelException If someone else closes the file
+ * descriptor before we do.
+ */
+ int setClosed() throws ClosedChannelException {
+ while (true) {
+ int curBits = bits.get();
+ if ((curBits & STATUS_CLOSED_MASK) != 0) {
+ throw new ClosedChannelException();
+ }
+ if (bits.compareAndSet(curBits, curBits | STATUS_CLOSED_MASK)) {
+ return curBits & (~STATUS_CLOSED_MASK);
+ }
+ }
+ }
+
+ /**
+ * Get the current reference count.
+ *
+ * @return The current reference count.
+ */
+ int getReferenceCount() {
+ return bits.get() & (~STATUS_CLOSED_MASK);
+ }
+ }
+
+ /**
+ * The socket status.
+ */
+ private final Status status;
+
+ /**
+ * The file descriptor associated with this UNIX domain socket.
+ */
+ private final int fd;
+
+ /**
+ * The path associated with this UNIX domain socket.
+ */
+ private final String path;
+
+ /**
+ * The InputStream associated with this socket.
+ */
+ private final DomainInputStream inputStream = new DomainInputStream();
+
+ /**
+ * The OutputStream associated with this socket.
+ */
+ private final DomainOutputStream outputStream = new DomainOutputStream();
+
+ /**
+ * The Channel associated with this socket.
+ */
+ private final DomainChannel channel = new DomainChannel();
+
+ private DomainSocket(String path, int fd) {
+ this.status = new Status();
+ this.fd = fd;
+ this.path = path;
+ }
+
+ private static native int bind0(String path) throws IOException;
+
+ /**
+ * Create a new DomainSocket listening on the given path.
+ *
+ * @param path The path to bind and listen on.
+ * @return The new DomainSocket.
+ */
+ public static DomainSocket bindAndListen(String path) throws IOException {
+ if (loadingFailureReason != null) {
+ throw new UnsupportedOperationException(loadingFailureReason);
+ }
+ if (validateBindPaths) {
+ validateSocketPathSecurity0(path, 0);
+ }
+ int fd = bind0(path);
+ return new DomainSocket(path, fd);
+ }
+
+ private static native int accept0(int fd) throws IOException;
+
+ /**
+ * Accept a new UNIX domain connection.
+ *
+ * This method can only be used on sockets that were bound with bind().
+ *
+ * @return The new connection.
+ * @throws IOException If there was an I/O error
+ * performing the accept-- such as the
+ * socket being closed from under us.
+ * @throws SocketTimeoutException If the accept timed out.
+ */
+ public DomainSocket accept() throws IOException {
+ status.reference();
+ boolean exc = true;
+ try {
+ DomainSocket ret = new DomainSocket(path, accept0(fd));
+ exc = false;
+ return ret;
+ } finally {
+ status.unreference(exc);
+ }
+ }
+
+ private static native int connect0(String path);
+
+ /**
+ * Create a new DomainSocket connected to the given path.
+ *
+ * @param path The path to connect to.
+ * @return The new DomainSocket.
+ */
+ public static DomainSocket connect(String path) throws IOException {
+ if (loadingFailureReason != null) {
+ throw new UnsupportedOperationException(loadingFailureReason);
+ }
+ int fd = connect0(path);
+ return new DomainSocket(path, fd);
+ }
+
+ /**
+ * Return true if the file descriptor is currently open.
+ *
+ * @return True if the file descriptor is currently open.
+ */
+ public boolean isOpen() {
+ return status.isOpen();
+ }
+
+ /**
+ * @return The socket path.
+ */
+ public String getPath() {
+ return path;
+ }
+
+ /**
+ * @return The socket InputStream
+ */
+ public DomainInputStream getInputStream() {
+ return inputStream;
+ }
+
+ /**
+ * @return The socket OutputStream
+ */
+ public DomainOutputStream getOutputStream() {
+ return outputStream;
+ }
+
+ /**
+ * @return The socket Channel
+ */
+ public DomainChannel getChannel() {
+ return channel;
+ }
+
+ public static final int SEND_BUFFER_SIZE = 1;
+ public static final int RECEIVE_BUFFER_SIZE = 2;
+ public static final int SEND_TIMEOUT = 3;
+ public static final int RECEIVE_TIMEOUT = 4;
+
+ private static native void setAttribute0(int fd, int type, int val)
+ throws IOException;
+
+ public void setAttribute(int type, int size) throws IOException {
+ status.reference();
+ boolean exc = true;
+ try {
+ setAttribute0(fd, type, size);
+ exc = false;
+ } finally {
+ status.unreference(exc);
+ }
+ }
+
+ private native int getAttribute0(int fd, int type) throws IOException;
+
+ public int getAttribute(int type) throws IOException {
+ status.reference();
+ int attribute;
+ boolean exc = true;
+ try {
+ attribute = getAttribute0(fd, type);
+ exc = false;
+ return attribute;
+ } finally {
+ status.unreference(exc);
+ }
+ }
+
+ private static native void close0(int fd) throws IOException;
+
+ private static native void closeFileDescriptor0(FileDescriptor fd)
+ throws IOException;
+
+ private static native void shutdown0(int fd) throws IOException;
+
+ /**
+ * Close the Socket.
+ */
+ @Override
+ public void close() throws IOException {
+ // Set the closed bit on this DomainSocket
+ int refCount;
+ try {
+ refCount = status.setClosed();
+ } catch (ClosedChannelException e) {
+ // Someone else already closed the DomainSocket.
+ return;
+ }
+ // Wait for all references to go away
+ boolean didShutdown = false;
+ boolean interrupted = false;
+ while (refCount > 0) {
+ if (!didShutdown) {
+ try {
+ // Calling shutdown on the socket will interrupt blocking system
+ // calls like accept, write, and read that are going on in a
+ // different thread.
+ shutdown0(fd);
+ } catch (IOException e) {
+ LOG.error("shutdown error: ", e);
+ }
+ didShutdown = true;
+ }
+ try {
+ Thread.sleep(10);
+ } catch (InterruptedException e) {
+ interrupted = true;
+ }
+ refCount = status.getReferenceCount();
+ }
+
+ // At this point, nobody has a reference to the file descriptor,
+ // and nobody will be able to get one in the future either.
+ // We now call close(2) on the file descriptor.
+ // After this point, the file descriptor number will be reused by
+ // something else. Although this DomainSocket object continues to hold
+ // the old file descriptor number (it's a final field), we never use it
+ // again because this DomainSocket is closed.
+ close0(fd);
+ if (interrupted) {
+ Thread.currentThread().interrupt();
+ }
+ }
+
+ private native static void sendFileDescriptors0(int fd,
+ FileDescriptor descriptors[],
+ byte jbuf[], int offset, int length) throws IOException;
+
+ /**
+ * Send some FileDescriptor objects to the process on the other side of this
+ * socket.
+ *
+ * @param descriptors The file descriptors to send.
+ * @param jbuf Some bytes to send. You must send at least
+ * one byte.
+ * @param offset The offset in the jbuf array to start at.
+ * @param length Length of the jbuf array to use.
+ */
+ public void sendFileDescriptors(FileDescriptor descriptors[],
+ byte jbuf[], int offset, int length) throws IOException {
+ status.reference();
+ boolean exc = true;
+ try {
+ sendFileDescriptors0(fd, descriptors, jbuf, offset, length);
+ exc = false;
+ } finally {
+ status.unreference(exc);
+ }
+ }
+
+ private static native int receiveFileDescriptors0(int fd,
+ FileDescriptor[] descriptors,
+ byte jbuf[], int offset, int length) throws IOException;
+
+ /**
+ * Receive some FileDescriptor objects from the process on the other side of
+ * this socket.
+ *
+ * @param descriptors (output parameter) Array of FileDescriptors.
+ * We will fill as many slots as possible with file
+ * descriptors passed from the remote process. The
+ * other slots will contain NULL.
+ * @param jbuf (output parameter) Buffer to read into.
+ * The UNIX domain sockets API requires you to read
+ * at least one byte from the remote process, even
+ * if all you care about is the file descriptors
+ * you will receive.
+ * @param offset Offset into the byte buffer to load data
+ * @param length Length of the byte buffer to use for data
+ *
+ * @return The number of bytes read. This will be -1 if we
+ * reached EOF (similar to SocketInputStream);
+ * otherwise, it will be positive.
+ * @throws IOException if there was an I/O error.
+ */
+ public int receiveFileDescriptors(FileDescriptor[] descriptors,
+ byte jbuf[], int offset, int length) throws IOException {
+ status.reference();
+ boolean exc = true;
+ try {
+ int nBytes = receiveFileDescriptors0(fd, descriptors, jbuf, offset, length);
+ exc = false;
+ return nBytes;
+ } finally {
+ status.unreference(exc);
+ }
+ }
+
+ /**
+ * Receive some FileDescriptor objects from the process on the other side of
+ * this socket, and wrap them in FileInputStream objects.
+ *
+ * See {@link DomainSocket#recvFileInputStreams(ByteBuffer)}
+ */
+ public int recvFileInputStreams(FileInputStream[] streams, byte buf[],
+ int offset, int length) throws IOException {
+ FileDescriptor descriptors[] = new FileDescriptor[streams.length];
+ boolean success = false;
+ for (int i = 0; i < streams.length; i++) {
+ streams[i] = null;
+ }
+ status.reference();
+ try {
+ int ret = receiveFileDescriptors0(fd, descriptors, buf, offset, length);
+ for (int i = 0, j = 0; i < descriptors.length; i++) {
+ if (descriptors[i] != null) {
+ streams[j++] = new FileInputStream(descriptors[i]);
+ descriptors[i] = null;
+ }
+ }
+ success = true;
+ return ret;
+ } finally {
+ if (!success) {
+ for (int i = 0; i < descriptors.length; i++) {
+ if (descriptors[i] != null) {
+ try {
+ closeFileDescriptor0(descriptors[i]);
+ } catch (Throwable t) {
+ LOG.warn(t);
+ }
+ } else if (streams[i] != null) {
+ try {
+ streams[i].close();
+ } catch (Throwable t) {
+ LOG.warn(t);
+ } finally {
+ streams[i] = null; }
+ }
+ }
+ }
+ status.unreference(!success);
+ }
+ }
+
+ private native static int readArray0(int fd, byte b[], int off, int len)
+ throws IOException;
+
+ private native static int available0(int fd) throws IOException;
+
+ private static native void write0(int fd, int b) throws IOException;
+
+ private static native void writeArray0(int fd, byte b[], int offset, int length)
+ throws IOException;
+
+ private native static int readByteBufferDirect0(int fd, ByteBuffer dst,
+ int position, int remaining) throws IOException;
+
+ /**
+ * Input stream for UNIX domain sockets.
+ */
+ @InterfaceAudience.LimitedPrivate("HDFS")
+ public class DomainInputStream extends InputStream {
+ @Override
+ public int read() throws IOException {
+ status.reference();
+ boolean exc = true;
+ try {
+ byte b[] = new byte[1];
+ int ret = DomainSocket.readArray0(DomainSocket.this.fd, b, 0, 1);
+ exc = false;
+ return (ret >= 0) ? b[0] : -1;
+ } finally {
+ status.unreference(exc);
+ }
+ }
+
+ @Override
+ public int read(byte b[], int off, int len) throws IOException {
+ status.reference();
+ boolean exc = true;
+ try {
+ int nRead = DomainSocket.readArray0(DomainSocket.this.fd, b, off, len);
+ exc = false;
+ return nRead;
+ } finally {
+ status.unreference(exc);
+ }
+ }
+
+ @Override
+ public int available() throws IOException {
+ status.reference();
+ boolean exc = true;
+ try {
+ int nAvailable = DomainSocket.available0(DomainSocket.this.fd);
+ exc = false;
+ return nAvailable;
+ } finally {
+ status.unreference(exc);
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ DomainSocket.this.close();
+ }
+ }
+
+ /**
+ * Output stream for UNIX domain sockets.
+ */
+ @InterfaceAudience.LimitedPrivate("HDFS")
+ public class DomainOutputStream extends OutputStream {
+ @Override
+ public void close() throws IOException {
+ DomainSocket.this.close();
+ }
+
+ @Override
+ public void write(int val) throws IOException {
+ status.reference();
+ boolean exc = true;
+ try {
+ byte b[] = new byte[1];
+ b[0] = (byte)val;
+ DomainSocket.writeArray0(DomainSocket.this.fd, b, 0, 1);
+ exc = false;
+ } finally {
+ status.unreference(exc);
+ }
+ }
+
+ @Override
+ public void write(byte[] b, int off, int len) throws IOException {
+ status.reference();
+ boolean exc = true;
+ try {
+ DomainSocket.writeArray0(DomainSocket.this.fd, b, off, len);
+ exc = false;
+ } finally {
+ status.unreference(exc);
+ }
+ }
+ }
+
+ @InterfaceAudience.LimitedPrivate("HDFS")
+ public class DomainChannel implements ReadableByteChannel {
+ @Override
+ public boolean isOpen() {
+ return DomainSocket.this.isOpen();
+ }
+
+ @Override
+ public void close() throws IOException {
+ DomainSocket.this.close();
+ }
+
+ @Override
+ public int read(ByteBuffer dst) throws IOException {
+ status.reference();
+ boolean exc = true;
+ try {
+ int nread = 0;
+ if (dst.isDirect()) {
+ nread = DomainSocket.readByteBufferDirect0(DomainSocket.this.fd,
+ dst, dst.position(), dst.remaining());
+ } else if (dst.hasArray()) {
+ nread = DomainSocket.readArray0(DomainSocket.this.fd,
+ dst.array(), dst.position() + dst.arrayOffset(),
+ dst.remaining());
+ } else {
+ throw new AssertionError("we don't support " +
+ "using ByteBuffers that aren't either direct or backed by " +
+ "arrays");
+ }
+ if (nread > 0) {
+ dst.position(dst.position() + nread);
+ }
+ exc = false;
+ return nread;
+ } finally {
+ status.unreference(exc);
+ }
+ }
+ }
+
+ @Override
+ public String toString() {
+ return String.format("DomainSocket(fd=%d,path=%s)", fd, path);
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java
index aa4fa28ad1f..fde11e72cf4 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java
@@ -27,8 +27,10 @@ import java.io.DataInputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
+import java.util.Set;
import javax.crypto.SecretKey;
@@ -144,6 +146,10 @@ extends AbstractDelegationTokenIdentifier>
return;
}
+ protected void logExpireToken(TokenIdent ident) throws IOException {
+ return;
+ }
+
/**
* Update the current master key
* This is called once by startThreads before tokenRemoverThread is created,
@@ -363,15 +369,25 @@ extends AbstractDelegationTokenIdentifier>
}
/** Remove expired delegation tokens from cache */
- private synchronized void removeExpiredToken() {
+ private void removeExpiredToken() throws IOException {
long now = Time.now();
- Iterator i = currentTokens.values().iterator();
- while (i.hasNext()) {
- long renewDate = i.next().getRenewDate();
- if (now > renewDate) {
- i.remove();
+ Set expiredTokens = new HashSet();
+ synchronized (this) {
+ Iterator> i =
+ currentTokens.entrySet().iterator();
+ while (i.hasNext()) {
+ Map.Entry entry = i.next();
+ long renewDate = entry.getValue().getRenewDate();
+ if (renewDate < now) {
+ expiredTokens.add(entry.getKey());
+ i.remove();
+ }
}
}
+ // don't hold lock on 'this' to avoid edit log updates blocking token ops
+ for (TokenIdent ident : expiredTokens) {
+ logExpireToken(ident);
+ }
}
public void stopThreads() {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java
index 4a3424bad32..5eb51670829 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java
@@ -104,7 +104,7 @@ public class DataChecksum implements Checksum {
( (bytes[offset+2] & 0xff) << 16 ) |
( (bytes[offset+3] & 0xff) << 8 ) |
( (bytes[offset+4] & 0xff) );
- return newDataChecksum( Type.valueOf(bytes[0]), bytesPerChecksum );
+ return newDataChecksum( Type.valueOf(bytes[offset]), bytesPerChecksum );
}
/**
diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/exception.c b/hadoop-common-project/hadoop-common/src/main/native/src/exception.c
new file mode 100644
index 00000000000..39a03f9cde0
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/native/src/exception.c
@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exception.h"
+
+#include
+#include
+#include
+#include
+
+jthrowable newExceptionV(JNIEnv* env, const char *name,
+ const char *fmt, va_list ap)
+{
+ int need;
+ char buf[1], *msg = NULL;
+ va_list ap2;
+ jstring jstr = NULL;
+ jthrowable jthr;
+ jclass clazz;
+ jmethodID excCtor;
+
+ va_copy(ap2, ap);
+ clazz = (*env)->FindClass(env, name);
+ if (!clazz) {
+ jthr = (*env)->ExceptionOccurred(env);
+ (*env)->ExceptionClear(env);
+ goto done;
+ }
+ excCtor = (*env)->GetMethodID(env,
+ clazz, "", "(Ljava/lang/String;)V");
+ if (!excCtor) {
+ jthr = (*env)->ExceptionOccurred(env);
+ (*env)->ExceptionClear(env);
+ goto done;
+ }
+ need = vsnprintf(buf, sizeof(buf), fmt, ap);
+ if (need < 0) {
+ fmt = "vsnprintf error";
+ need = strlen(fmt);
+ }
+ msg = malloc(need + 1);
+ vsnprintf(msg, need + 1, fmt, ap2);
+ jstr = (*env)->NewStringUTF(env, msg);
+ if (!jstr) {
+ jthr = (*env)->ExceptionOccurred(env);
+ (*env)->ExceptionClear(env);
+ goto done;
+ }
+ jthr = (*env)->NewObject(env, clazz, excCtor, jstr);
+ if (!jthr) {
+ jthr = (*env)->ExceptionOccurred(env);
+ (*env)->ExceptionClear(env);
+ goto done;
+ }
+
+done:
+ free(msg);
+ va_end(ap2);
+ (*env)->DeleteLocalRef(env, jstr);
+ return jthr;
+}
+
+jthrowable newException(JNIEnv* env, const char *name, const char *fmt, ...)
+{
+ va_list ap;
+ jthrowable jthr;
+
+ va_start(ap, fmt);
+ jthr = newExceptionV(env, name, fmt, ap);
+ va_end(ap);
+ return jthr;
+}
+
+jthrowable newRuntimeException(JNIEnv* env, const char *fmt, ...)
+{
+ va_list ap;
+ jthrowable jthr;
+
+ va_start(ap, fmt);
+ jthr = newExceptionV(env, "java/lang/RuntimeException", fmt, ap);
+ va_end(ap);
+ return jthr;
+}
+
+jthrowable newIOException(JNIEnv* env, const char *fmt, ...)
+{
+ va_list ap;
+ jthrowable jthr;
+
+ va_start(ap, fmt);
+ jthr = newExceptionV(env, "java/io/IOException", fmt, ap);
+ va_end(ap);
+ return jthr;
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/exception.h b/hadoop-common-project/hadoop-common/src/main/native/src/exception.h
new file mode 100644
index 00000000000..d7af3772c85
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/native/src/exception.h
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef HADOOP_MAIN_NATIVE_SRC_EXCEPTION_H
+#define HADOOP_MAIN_NATIVE_SRC_EXCEPTION_H
+
+#include /* for jthrowable */
+#include /* for va_list */
+
+/**
+ * Create a new Exception.
+ *
+ * No exceptions will be pending on return.
+ *
+ * @param env The JNI environment
+ * @param name full name of the Java exception class
+ * @param fmt printf-style format string
+ * @param ap printf-style arguments
+ *
+ * @return The RuntimeException
+ */
+jthrowable newExceptionV(JNIEnv* env, const char *name,
+ const char *fmt, va_list ap);
+
+/**
+ * Create a new Exception.
+ *
+ * No exceptions will be pending on return.
+ *
+ * @param env The JNI environment
+ * @param name full name of the Java exception class
+ * @param fmt printf-style format string
+ * @param ... printf-style arguments
+ *
+ * @return The RuntimeException
+ */
+jthrowable newException(JNIEnv* env, const char *name, const char *fmt, ...)
+ __attribute__((format(printf, 3, 4)));
+
+/**
+ * Create a new RuntimeException.
+ *
+ * No exceptions will be pending on return.
+ *
+ * @param env The JNI environment
+ * @param fmt printf-style format string
+ * @param ... printf-style arguments
+ *
+ * @return The RuntimeException
+ */
+jthrowable newRuntimeException(JNIEnv* env, const char *fmt, ...)
+ __attribute__((format(printf, 2, 3)));
+
+/**
+ * Create a new IOException.
+ *
+ * No exceptions will be pending on return.
+ *
+ * @param env The JNI environment
+ * @param fmt printf-style format string
+ * @param ... printf-style arguments
+ *
+ * @return The IOException, or another exception if we failed
+ * to create the NativeIOException.
+ */
+jthrowable newIOException(JNIEnv* env, const char *fmt, ...)
+ __attribute__((format(printf, 2, 3)));
+
+#endif
diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c
new file mode 100644
index 00000000000..1c2ea698a00
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c
@@ -0,0 +1,944 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+
+#include "exception.h"
+#include "org/apache/hadoop/io/nativeio/file_descriptor.h"
+#include "org_apache_hadoop.h"
+#include "org_apache_hadoop_net_unix_DomainSocket.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include /* for FIONREAD */
+#include
+#include
+#include
+#include
+#include
+
+#define SEND_BUFFER_SIZE org_apache_hadoop_net_unix_DomainSocket_SEND_BUFFER_SIZE
+#define RECEIVE_BUFFER_SIZE org_apache_hadoop_net_unix_DomainSocket_RECEIVE_BUFFER_SIZE
+#define SEND_TIMEOUT org_apache_hadoop_net_unix_DomainSocket_SEND_TIMEOUT
+#define RECEIVE_TIMEOUT org_apache_hadoop_net_unix_DomainSocket_RECEIVE_TIMEOUT
+
+#define DEFAULT_RECEIVE_TIMEOUT 120000
+#define DEFAULT_SEND_TIMEOUT 120000
+#define LISTEN_BACKLOG 128
+
+/**
+ * Can't pass more than this number of file descriptors in a single message.
+ */
+#define MAX_PASSED_FDS 16
+
+static jthrowable setAttribute0(JNIEnv *env, jint fd, jint type, jint val);
+
+/**
+ * Convert an errno to a socket exception name.
+ *
+ * Note: we assume that all of these exceptions have a one-argument constructor
+ * that takes a string.
+ *
+ * @return The exception class name
+ */
+static const char *errnoToSocketExceptionName(int errnum)
+{
+ switch (errnum) {
+ case EAGAIN:
+ /* accept(2) returns EAGAIN when a socket timeout has been set, and that
+ * timeout elapses without an incoming connection. This error code is also
+ * used in non-blocking I/O, but we don't support that. */
+ case ETIMEDOUT:
+ return "java/net/SocketTimeoutException";
+ case EHOSTDOWN:
+ case EHOSTUNREACH:
+ case ECONNREFUSED:
+ return "java/net/NoRouteToHostException";
+ case ENOTSUP:
+ return "java/lang/UnsupportedOperationException";
+ default:
+ return "java/net/SocketException";
+ }
+}
+
+static jthrowable newSocketException(JNIEnv *env, int errnum,
+ const char *fmt, ...)
+ __attribute__((format(printf, 3, 4)));
+
+static jthrowable newSocketException(JNIEnv *env, int errnum,
+ const char *fmt, ...)
+{
+ va_list ap;
+ jthrowable jthr;
+
+ va_start(ap, fmt);
+ jthr = newExceptionV(env, errnoToSocketExceptionName(errnum), fmt, ap);
+ va_end(ap);
+ return jthr;
+}
+
+static const char* terror(int errnum)
+{
+ if ((errnum < 0) || (errnum >= sys_nerr)) {
+ return "unknown error.";
+ }
+ return sys_errlist[errnum];
+}
+
+/**
+ * Flexible buffer that will try to fit data on the stack, and fall back
+ * to the heap if necessary.
+ */
+struct flexibleBuffer {
+ int8_t *curBuf;
+ int8_t *allocBuf;
+ int8_t stackBuf[8196];
+};
+
+static jthrowable flexBufInit(JNIEnv *env, struct flexibleBuffer *flexBuf, jint length)
+{
+ flexBuf->curBuf = flexBuf->allocBuf = NULL;
+ if (length < sizeof(flexBuf->stackBuf)) {
+ flexBuf->curBuf = flexBuf->stackBuf;
+ return NULL;
+ }
+ flexBuf->allocBuf = malloc(length);
+ if (!flexBuf->allocBuf) {
+ return newException(env, "java/lang/OutOfMemoryError",
+ "OOM allocating space for %d bytes of data.", length);
+ }
+ flexBuf->curBuf = flexBuf->allocBuf;
+ return NULL;
+}
+
+static void flexBufFree(struct flexibleBuffer *flexBuf)
+{
+ free(flexBuf->allocBuf);
+}
+
+static jthrowable setup(JNIEnv *env, int *ofd, jobject jpath, int doConnect)
+{
+ const char *cpath = NULL;
+ struct sockaddr_un addr;
+ jthrowable jthr = NULL;
+ int fd = -1, ret;
+
+ fd = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0) {
+ ret = errno;
+ jthr = newSocketException(env, ret,
+ "error creating UNIX domain socket with SOCK_STREAM: %s",
+ terror(ret));
+ goto done;
+ }
+ memset(&addr, 0, sizeof(&addr));
+ addr.sun_family = AF_UNIX;
+ cpath = (*env)->GetStringUTFChars(env, jpath, NULL);
+ if (!cpath) {
+ jthr = (*env)->ExceptionOccurred(env);
+ (*env)->ExceptionClear(env);
+ goto done;
+ }
+ ret = snprintf(addr.sun_path, sizeof(addr.sun_path),
+ "%s", cpath);
+ if (ret < 0) {
+ ret = errno;
+ jthr = newSocketException(env, EIO,
+ "error computing UNIX domain socket path: error %d (%s)",
+ ret, terror(ret));
+ goto done;
+ }
+ if (ret >= sizeof(addr.sun_path)) {
+ jthr = newSocketException(env, ENAMETOOLONG,
+ "error computing UNIX domain socket path: path too long. "
+ "The longest UNIX domain socket path possible on this host "
+ "is %zd bytes.", sizeof(addr.sun_path) - 1);
+ goto done;
+ }
+ if (doConnect) {
+ RETRY_ON_EINTR(ret, connect(fd,
+ (struct sockaddr*)&addr, sizeof(addr)));
+ if (ret < 0) {
+ ret = errno;
+ jthr = newException(env, "java/net/ConnectException",
+ "connect(2) error: %s when trying to connect to '%s'",
+ terror(ret), addr.sun_path);
+ goto done;
+ }
+ } else {
+ RETRY_ON_EINTR(ret, unlink(addr.sun_path));
+ RETRY_ON_EINTR(ret, bind(fd, (struct sockaddr*)&addr, sizeof(addr)));
+ if (ret < 0) {
+ ret = errno;
+ jthr = newException(env, "java/net/BindException",
+ "bind(2) error: %s when trying to bind to '%s'",
+ terror(ret), addr.sun_path);
+ goto done;
+ }
+ /* We need to make the socket readable and writable for all users in the
+ * system.
+ *
+ * If the system administrator doesn't want the socket to be accessible to
+ * all users, he can simply adjust the +x permissions on one of the socket's
+ * parent directories.
+ *
+ * See HDFS-4485 for more discussion.
+ */
+ if (chmod(addr.sun_path, 0666)) {
+ ret = errno;
+ jthr = newException(env, "java/net/BindException",
+ "chmod(%s, 0666) failed: %s", addr.sun_path, terror(ret));
+ goto done;
+ }
+ if (listen(fd, LISTEN_BACKLOG) < 0) {
+ ret = errno;
+ jthr = newException(env, "java/net/BindException",
+ "listen(2) error: %s when trying to listen to '%s'",
+ terror(ret), addr.sun_path);
+ goto done;
+ }
+ }
+
+done:
+ if (cpath) {
+ (*env)->ReleaseStringUTFChars(env, jpath, cpath);
+ }
+ if (jthr) {
+ if (fd > 0) {
+ RETRY_ON_EINTR(ret, close(fd));
+ fd = -1;
+ }
+ } else {
+ *ofd = fd;
+ }
+ return jthr;
+}
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_anchorNative(
+JNIEnv *env, jclass clazz)
+{
+ fd_init(env); // for fd_get, fd_create, etc.
+}
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_validateSocketPathSecurity0(
+JNIEnv *env, jclass clazz, jobject jstr, jint skipComponents)
+{
+ jint utfLength;
+ char path[PATH_MAX], check[PATH_MAX], *token, *rest;
+ struct stat st;
+ int ret, mode, strlenPath;
+ uid_t uid;
+ jthrowable jthr = NULL;
+
+ utfLength = (*env)->GetStringUTFLength(env, jstr);
+ if (utfLength > sizeof(path)) {
+ jthr = newIOException(env, "path is too long! We expected a path "
+ "no longer than %zd UTF-8 bytes.", sizeof(path));
+ goto done;
+ }
+ (*env)->GetStringUTFRegion(env, jstr, 0, utfLength, path);
+ jthr = (*env)->ExceptionOccurred(env);
+ if (jthr) {
+ (*env)->ExceptionClear(env);
+ goto done;
+ }
+ uid = geteuid();
+ strlenPath = strlen(path);
+ if (strlenPath == 0) {
+ jthr = newIOException(env, "socket path is empty.");
+ goto done;
+ }
+ if (path[strlenPath - 1] == '/') {
+ /* It makes no sense to have a socket path that ends in a slash, since
+ * sockets are not directories. */
+ jthr = newIOException(env, "bad socket path '%s'. The socket path "
+ "must not end in a slash.", path);
+ goto done;
+ }
+ // This loop iterates through all of the path components except for the very
+ // last one. We don't validate the last component, since it's not supposed to
+ // be a directory. (If it is a directory, we will fail to create the socket
+ // later with EISDIR or similar.)
+ for (check[0] = '/', check[1] = '\0', rest = path, token = "";
+ token && rest[0];
+ token = strtok_r(rest, "/", &rest)) {
+ if (strcmp(check, "/") != 0) {
+ // If the previous directory we checked was '/', we skip appending another
+ // slash to the end because it would be unncessary. Otherwise we do it.
+ strcat(check, "/");
+ }
+ // These strcats are safe because the length of 'check' is the same as the
+ // length of 'path' and we never add more slashes than were in the original
+ // path.
+ strcat(check, token);
+ if (skipComponents > 0) {
+ skipComponents--;
+ continue;
+ }
+ if (stat(check, &st) < 0) {
+ ret = errno;
+ jthr = newIOException(env, "failed to stat a path component: '%s'. "
+ "error code %d (%s)", check, ret, terror(ret));
+ goto done;
+ }
+ mode = st.st_mode & 0777;
+ if (mode & 0002) {
+ jthr = newIOException(env, "the path component: '%s' is "
+ "world-writable. Its permissions are 0%03o. Please fix "
+ "this or select a different socket path.", check, mode);
+ goto done;
+ }
+ if ((mode & 0020) && (st.st_gid != 0)) {
+ jthr = newIOException(env, "the path component: '%s' is "
+ "group-writable, and the group is not root. Its permissions are "
+ "0%03o, and it is owned by gid %d. Please fix this or "
+ "select a different socket path.", check, mode, st.st_gid);
+ goto done;
+ }
+ if ((mode & 0200) && (st.st_uid != 0) &&
+ (st.st_uid != uid)) {
+ jthr = newIOException(env, "the path component: '%s' is "
+ "owned by a user who is not root and not you. Your effective user "
+ "id is %d; the path is owned by user id %d, and its permissions are "
+ "0%03o. Please fix this or select a different socket path.",
+ check, uid, st.st_uid, mode);
+ goto done;
+ goto done;
+ }
+ }
+done:
+ if (jthr) {
+ (*env)->Throw(env, jthr);
+ }
+}
+
+JNIEXPORT jint JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_bind0(
+JNIEnv *env, jclass clazz, jstring path)
+{
+ int fd;
+ jthrowable jthr = NULL;
+
+ jthr = setup(env, &fd, path, 0);
+ if (jthr) {
+ (*env)->Throw(env, jthr);
+ }
+ return fd;
+}
+
+JNIEXPORT jint JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_accept0(
+JNIEnv *env, jclass clazz, jint fd)
+{
+ int ret, newFd = -1;
+ socklen_t slen;
+ struct sockaddr_un remote;
+ jthrowable jthr = NULL;
+
+ slen = sizeof(remote);
+ do {
+ newFd = accept(fd, (struct sockaddr*)&remote, &slen);
+ } while ((newFd < 0) && (errno == EINTR));
+ if (newFd < 0) {
+ ret = errno;
+ jthr = newSocketException(env, ret, "accept(2) error: %s", terror(ret));
+ goto done;
+ }
+
+done:
+ if (jthr) {
+ if (newFd > 0) {
+ RETRY_ON_EINTR(ret, close(newFd));
+ newFd = -1;
+ }
+ (*env)->Throw(env, jthr);
+ }
+ return newFd;
+}
+
+JNIEXPORT jint JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_connect0(
+JNIEnv *env, jclass clazz, jstring path)
+{
+ int ret, fd;
+ jthrowable jthr = NULL;
+
+ jthr = setup(env, &fd, path, 1);
+ if (jthr) {
+ (*env)->Throw(env, jthr);
+ return -1;
+ }
+ if (((jthr = setAttribute0(env, fd, SEND_TIMEOUT, DEFAULT_SEND_TIMEOUT))) ||
+ ((jthr = setAttribute0(env, fd, RECEIVE_TIMEOUT, DEFAULT_RECEIVE_TIMEOUT)))) {
+ RETRY_ON_EINTR(ret, close(fd));
+ (*env)->Throw(env, jthr);
+ return -1;
+ }
+ return fd;
+}
+
+static void javaMillisToTimeVal(int javaMillis, struct timeval *tv)
+{
+ tv->tv_sec = javaMillis / 1000;
+ tv->tv_usec = (javaMillis - (tv->tv_sec * 1000)) * 1000;
+}
+
+static jthrowable setAttribute0(JNIEnv *env, jint fd, jint type, jint val)
+{
+ struct timeval tv;
+ int ret, buf;
+
+ switch (type) {
+ case SEND_BUFFER_SIZE:
+ buf = val;
+ if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buf, sizeof(buf))) {
+ ret = errno;
+ return newSocketException(env, ret,
+ "setsockopt(SO_SNDBUF) error: %s", terror(ret));
+ }
+ return NULL;
+ case RECEIVE_BUFFER_SIZE:
+ buf = val;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &buf, sizeof(buf))) {
+ ret = errno;
+ return newSocketException(env, ret,
+ "setsockopt(SO_RCVBUF) error: %s", terror(ret));
+ }
+ return NULL;
+ case SEND_TIMEOUT:
+ javaMillisToTimeVal(val, &tv);
+ if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, (struct timeval *)&tv,
+ sizeof(tv))) {
+ ret = errno;
+ return newSocketException(env, ret,
+ "setsockopt(SO_SNDTIMEO) error: %s", terror(ret));
+ }
+ return NULL;
+ case RECEIVE_TIMEOUT:
+ javaMillisToTimeVal(val, &tv);
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (struct timeval *)&tv,
+ sizeof(tv))) {
+ ret = errno;
+ return newSocketException(env, ret,
+ "setsockopt(SO_RCVTIMEO) error: %s", terror(ret));
+ }
+ return NULL;
+ default:
+ break;
+ }
+ return newRuntimeException(env, "Invalid attribute type %d.", type);
+}
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_setAttribute0(
+JNIEnv *env, jclass clazz, jint fd, jint type, jint val)
+{
+ jthrowable jthr = setAttribute0(env, fd, type, val);
+ if (jthr) {
+ (*env)->Throw(env, jthr);
+ }
+}
+
+static jint getSockOptBufSizeToJavaBufSize(int size)
+{
+#ifdef __linux__
+ // Linux always doubles the value that you set with setsockopt.
+ // We cut it in half here so that programs can at least read back the same
+ // value they set.
+ size /= 2;
+#endif
+ return size;
+}
+
+static int timeValToJavaMillis(const struct timeval *tv)
+{
+ return (tv->tv_sec * 1000) + (tv->tv_usec / 1000);
+}
+
+JNIEXPORT jint JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_getAttribute0(
+JNIEnv *env, jclass clazz, jint fd, jint type)
+{
+ struct timeval tv;
+ socklen_t len;
+ int ret, rval = 0;
+
+ switch (type) {
+ case SEND_BUFFER_SIZE:
+ len = sizeof(rval);
+ if (getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &rval, &len)) {
+ ret = errno;
+ (*env)->Throw(env, newSocketException(env, ret,
+ "getsockopt(SO_SNDBUF) error: %s", terror(ret)));
+ return -1;
+ }
+ return getSockOptBufSizeToJavaBufSize(rval);
+ case RECEIVE_BUFFER_SIZE:
+ len = sizeof(rval);
+ if (getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rval, &len)) {
+ ret = errno;
+ (*env)->Throw(env, newSocketException(env, ret,
+ "getsockopt(SO_RCVBUF) error: %s", terror(ret)));
+ return -1;
+ }
+ return getSockOptBufSizeToJavaBufSize(rval);
+ case SEND_TIMEOUT:
+ memset(&tv, 0, sizeof(tv));
+ len = sizeof(struct timeval);
+ if (getsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, &len)) {
+ ret = errno;
+ (*env)->Throw(env, newSocketException(env, ret,
+ "getsockopt(SO_SNDTIMEO) error: %s", terror(ret)));
+ return -1;
+ }
+ return timeValToJavaMillis(&tv);
+ case RECEIVE_TIMEOUT:
+ memset(&tv, 0, sizeof(tv));
+ len = sizeof(struct timeval);
+ if (getsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, &len)) {
+ ret = errno;
+ (*env)->Throw(env, newSocketException(env, ret,
+ "getsockopt(SO_RCVTIMEO) error: %s", terror(ret)));
+ return -1;
+ }
+ return timeValToJavaMillis(&tv);
+ default:
+ (*env)->Throw(env, newRuntimeException(env,
+ "Invalid attribute type %d.", type));
+ return -1;
+ }
+}
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_close0(
+JNIEnv *env, jclass clazz, jint fd)
+{
+ int ret;
+
+ RETRY_ON_EINTR(ret, close(fd));
+ if (ret) {
+ ret = errno;
+ (*env)->Throw(env, newSocketException(env, ret,
+ "close(2) error: %s", terror(ret)));
+ }
+}
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_closeFileDescriptor0(
+JNIEnv *env, jclass clazz, jobject jfd)
+{
+ Java_org_apache_hadoop_net_unix_DomainSocket_close0(
+ env, clazz, fd_get(env, jfd));
+}
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_shutdown0(
+JNIEnv *env, jclass clazz, jint fd)
+{
+ int ret;
+
+ RETRY_ON_EINTR(ret, shutdown(fd, SHUT_RDWR));
+ if (ret) {
+ ret = errno;
+ (*env)->Throw(env, newSocketException(env, ret,
+ "shutdown(2) error: %s", terror(ret)));
+ }
+}
+
+/**
+ * Write an entire buffer to a file descriptor.
+ *
+ * @param env The JNI environment.
+ * @param fd The fd to write to.
+ * @param buf The buffer to write
+ * @param amt The length of the buffer to write.
+ * @return NULL on success; or the unraised exception representing
+ * the problem.
+ */
+static jthrowable write_fully(JNIEnv *env, int fd, int8_t *buf, int amt)
+{
+ int err, res;
+
+ while (amt > 0) {
+ res = write(fd, buf, amt);
+ if (res < 0) {
+ err = errno;
+ if (err == EINTR) {
+ continue;
+ }
+ return newSocketException(env, err, "write(2) error: %s", terror(err));
+ }
+ amt -= res;
+ buf += res;
+ }
+ return NULL;
+}
+
+/**
+ * Our auxillary data setup.
+ *
+ * See man 3 cmsg for more information about auxillary socket data on UNIX.
+ *
+ * We use __attribute__((packed)) to ensure that the compiler doesn't insert any
+ * padding between 'hdr' and 'fds'.
+ * We use __attribute__((aligned(8)) to ensure that the compiler puts the start
+ * of the structure at an address which is a multiple of 8. If we did not do
+ * this, the attribute((packed)) would cause the compiler to generate a lot of
+ * slow code for accessing unaligned memory.
+ */
+struct cmsghdr_with_fds {
+ struct cmsghdr hdr;
+ int fds[MAX_PASSED_FDS];
+} __attribute__((packed,aligned(8)));
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_sendFileDescriptors0(
+JNIEnv *env, jclass clazz, jint fd, jobject jfds, jobject jbuf,
+jint offset, jint length)
+{
+ struct iovec vec[1];
+ struct flexibleBuffer flexBuf;
+ struct cmsghdr_with_fds aux;
+ jint jfdsLen;
+ int i, ret = -1, auxLen;
+ struct msghdr socketMsg;
+ jthrowable jthr = NULL;
+
+ jthr = flexBufInit(env, &flexBuf, length);
+ if (jthr) {
+ goto done;
+ }
+ if (length <= 0) {
+ jthr = newException(env, "java/lang/IllegalArgumentException",
+ "You must write at least one byte.");
+ goto done;
+ }
+ jfdsLen = (*env)->GetArrayLength(env, jfds);
+ if (jfdsLen <= 0) {
+ jthr = newException(env, "java/lang/IllegalArgumentException",
+ "Called sendFileDescriptors with no file descriptors.");
+ goto done;
+ } else if (jfdsLen > MAX_PASSED_FDS) {
+ jfdsLen = 0;
+ jthr = newException(env, "java/lang/IllegalArgumentException",
+ "Called sendFileDescriptors with an array of %d length. "
+ "The maximum is %d.", jfdsLen, MAX_PASSED_FDS);
+ goto done;
+ }
+ (*env)->GetByteArrayRegion(env, jbuf, offset, length, flexBuf.curBuf);
+ jthr = (*env)->ExceptionOccurred(env);
+ if (jthr) {
+ (*env)->ExceptionClear(env);
+ goto done;
+ }
+ memset(&vec, 0, sizeof(vec));
+ vec[0].iov_base = flexBuf.curBuf;
+ vec[0].iov_len = length;
+ auxLen = CMSG_LEN(jfdsLen * sizeof(int));
+ memset(&aux, 0, auxLen);
+ memset(&socketMsg, 0, sizeof(socketMsg));
+ socketMsg.msg_iov = vec;
+ socketMsg.msg_iovlen = 1;
+ socketMsg.msg_control = &aux;
+ socketMsg.msg_controllen = auxLen;
+ aux.hdr.cmsg_len = auxLen;
+ aux.hdr.cmsg_level = SOL_SOCKET;
+ aux.hdr.cmsg_type = SCM_RIGHTS;
+ for (i = 0; i < jfdsLen; i++) {
+ jobject jfd = (*env)->GetObjectArrayElement(env, jfds, i);
+ if (!jfd) {
+ jthr = (*env)->ExceptionOccurred(env);
+ if (jthr) {
+ (*env)->ExceptionClear(env);
+ goto done;
+ }
+ jthr = newException(env, "java/lang/NullPointerException",
+ "element %d of jfds was NULL.", i);
+ goto done;
+ }
+ aux.fds[i] = fd_get(env, jfd);
+ (*env)->DeleteLocalRef(env, jfd);
+ if (jthr) {
+ goto done;
+ }
+ }
+ RETRY_ON_EINTR(ret, sendmsg(fd, &socketMsg, 0));
+ if (ret < 0) {
+ ret = errno;
+ jthr = newSocketException(env, ret, "sendmsg(2) error: %s", terror(ret));
+ goto done;
+ }
+ length -= ret;
+ if (length > 0) {
+ // Write the rest of the bytes we were asked to send.
+ // This time, no fds will be attached.
+ jthr = write_fully(env, fd, flexBuf.curBuf + ret, length);
+ if (jthr) {
+ goto done;
+ }
+ }
+
+done:
+ flexBufFree(&flexBuf);
+ if (jthr) {
+ (*env)->Throw(env, jthr);
+ }
+}
+
+JNIEXPORT jint JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_receiveFileDescriptors0(
+JNIEnv *env, jclass clazz, jint fd, jarray jfds, jarray jbuf,
+jint offset, jint length)
+{
+ struct iovec vec[1];
+ struct flexibleBuffer flexBuf;
+ struct cmsghdr_with_fds aux;
+ int i, jRecvFdsLen = 0, auxLen;
+ jint jfdsLen = 0;
+ struct msghdr socketMsg;
+ ssize_t bytesRead = -1;
+ jobject fdObj;
+ jthrowable jthr = NULL;
+
+ jthr = flexBufInit(env, &flexBuf, length);
+ if (jthr) {
+ goto done;
+ }
+ if (length <= 0) {
+ jthr = newRuntimeException(env, "You must read at least one byte.");
+ goto done;
+ }
+ jfdsLen = (*env)->GetArrayLength(env, jfds);
+ if (jfdsLen <= 0) {
+ jthr = newException(env, "java/lang/IllegalArgumentException",
+ "Called receiveFileDescriptors with an array of %d length. "
+ "You must pass at least one fd.", jfdsLen);
+ goto done;
+ } else if (jfdsLen > MAX_PASSED_FDS) {
+ jfdsLen = 0;
+ jthr = newException(env, "java/lang/IllegalArgumentException",
+ "Called receiveFileDescriptors with an array of %d length. "
+ "The maximum is %d.", jfdsLen, MAX_PASSED_FDS);
+ goto done;
+ }
+ for (i = 0; i < jfdsLen; i++) {
+ (*env)->SetObjectArrayElement(env, jfds, i, NULL);
+ }
+ vec[0].iov_base = flexBuf.curBuf;
+ vec[0].iov_len = length;
+ auxLen = CMSG_LEN(jfdsLen * sizeof(int));
+ memset(&aux, 0, auxLen);
+ memset(&socketMsg, 0, auxLen);
+ socketMsg.msg_iov = vec;
+ socketMsg.msg_iovlen = 1;
+ socketMsg.msg_control = &aux;
+ socketMsg.msg_controllen = auxLen;
+ aux.hdr.cmsg_len = auxLen;
+ aux.hdr.cmsg_level = SOL_SOCKET;
+ aux.hdr.cmsg_type = SCM_RIGHTS;
+ RETRY_ON_EINTR(bytesRead, recvmsg(fd, &socketMsg, 0));
+ if (bytesRead < 0) {
+ int ret = errno;
+ if (ret == ECONNABORTED) {
+ // The remote peer disconnected on us. Treat this as an EOF.
+ bytesRead = -1;
+ goto done;
+ }
+ jthr = newSocketException(env, ret, "recvmsg(2) failed: %s",
+ terror(ret));
+ goto done;
+ } else if (bytesRead == 0) {
+ bytesRead = -1;
+ goto done;
+ }
+ jRecvFdsLen = (aux.hdr.cmsg_len - sizeof(struct cmsghdr)) / sizeof(int);
+ for (i = 0; i < jRecvFdsLen; i++) {
+ fdObj = fd_create(env, aux.fds[i]);
+ if (!fdObj) {
+ jthr = (*env)->ExceptionOccurred(env);
+ (*env)->ExceptionClear(env);
+ goto done;
+ }
+ // Make this -1 so we don't attempt to close it twice in an error path.
+ aux.fds[i] = -1;
+ (*env)->SetObjectArrayElement(env, jfds, i, fdObj);
+ // There is no point keeping around a local reference to the fdObj.
+ // The array continues to reference it.
+ (*env)->DeleteLocalRef(env, fdObj);
+ }
+ (*env)->SetByteArrayRegion(env, jbuf, offset, length, flexBuf.curBuf);
+ jthr = (*env)->ExceptionOccurred(env);
+ if (jthr) {
+ (*env)->ExceptionClear(env);
+ goto done;
+ }
+done:
+ flexBufFree(&flexBuf);
+ if (jthr) {
+ // Free any FileDescriptor references we may have created,
+ // or file descriptors we may have been passed.
+ for (i = 0; i < jRecvFdsLen; i++) {
+ if (aux.fds[i] >= 0) {
+ RETRY_ON_EINTR(i, close(aux.fds[i]));
+ aux.fds[i] = -1;
+ }
+ fdObj = (*env)->GetObjectArrayElement(env, jfds, i);
+ if (fdObj) {
+ int ret, afd = fd_get(env, fdObj);
+ if (afd >= 0) {
+ RETRY_ON_EINTR(ret, close(afd));
+ }
+ (*env)->SetObjectArrayElement(env, jfds, i, NULL);
+ (*env)->DeleteLocalRef(env, fdObj);
+ }
+ }
+ (*env)->Throw(env, jthr);
+ }
+ return bytesRead;
+}
+
+JNIEXPORT jint JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_readArray0(
+JNIEnv *env, jclass clazz, jint fd, jarray b, jint offset, jint length)
+{
+ int ret = -1;
+ struct flexibleBuffer flexBuf;
+ jthrowable jthr;
+
+ jthr = flexBufInit(env, &flexBuf, length);
+ if (jthr) {
+ goto done;
+ }
+ RETRY_ON_EINTR(ret, read(fd, flexBuf.curBuf, length));
+ if (ret < 0) {
+ ret = errno;
+ if (ret == ECONNABORTED) {
+ // The remote peer disconnected on us. Treat this as an EOF.
+ ret = -1;
+ goto done;
+ }
+ jthr = newSocketException(env, ret, "read(2) error: %s",
+ terror(ret));
+ goto done;
+ }
+ if (ret == 0) {
+ goto done;
+ }
+ (*env)->SetByteArrayRegion(env, b, offset, ret, flexBuf.curBuf);
+ jthr = (*env)->ExceptionOccurred(env);
+ if (jthr) {
+ (*env)->ExceptionClear(env);
+ goto done;
+ }
+done:
+ flexBufFree(&flexBuf);
+ if (jthr) {
+ (*env)->Throw(env, jthr);
+ }
+ return ret == 0 ? -1 : ret; // Java wants -1 on EOF
+}
+
+JNIEXPORT jint JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_available0(
+JNIEnv *env, jclass clazz, jint fd)
+{
+ int ret, avail = 0;
+ jthrowable jthr = NULL;
+
+ RETRY_ON_EINTR(ret, ioctl(fd, FIONREAD, &avail));
+ if (ret < 0) {
+ ret = errno;
+ jthr = newSocketException(env, ret,
+ "ioctl(%d, FIONREAD) error: %s", fd, terror(ret));
+ goto done;
+ }
+done:
+ if (jthr) {
+ (*env)->Throw(env, jthr);
+ }
+ return avail;
+}
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_writeArray0(
+JNIEnv *env, jclass clazz, jint fd, jarray b, jint offset, jint length)
+{
+ struct flexibleBuffer flexBuf;
+ jthrowable jthr;
+
+ jthr = flexBufInit(env, &flexBuf, length);
+ if (jthr) {
+ goto done;
+ }
+ (*env)->GetByteArrayRegion(env, b, offset, length, flexBuf.curBuf);
+ jthr = (*env)->ExceptionOccurred(env);
+ if (jthr) {
+ (*env)->ExceptionClear(env);
+ goto done;
+ }
+ jthr = write_fully(env, fd, flexBuf.curBuf, length);
+ if (jthr) {
+ goto done;
+ }
+
+done:
+ flexBufFree(&flexBuf);
+ if (jthr) {
+ (*env)->Throw(env, jthr);
+ }
+}
+
+JNIEXPORT jint JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_readByteBufferDirect0(
+JNIEnv *env, jclass clazz, jint fd, jobject dst, jint position, jint remaining)
+{
+ uint8_t *buf;
+ jthrowable jthr = NULL;
+ int res = -1;
+
+ buf = (*env)->GetDirectBufferAddress(env, dst);
+ if (!buf) {
+ jthr = newRuntimeException(env, "GetDirectBufferAddress failed.");
+ goto done;
+ }
+ RETRY_ON_EINTR(res, read(fd, buf + position, remaining));
+ if (res < 0) {
+ res = errno;
+ if (res != ECONNABORTED) {
+ jthr = newSocketException(env, res, "read(2) error: %s",
+ terror(res));
+ goto done;
+ } else {
+ // The remote peer disconnected on us. Treat this as an EOF.
+ res = -1;
+ }
+ }
+done:
+ if (jthr) {
+ (*env)->Throw(env, jthr);
+ }
+ return res;
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org_apache_hadoop.h b/hadoop-common-project/hadoop-common/src/main/native/src/org_apache_hadoop.h
index bc353e15cf0..92a6b272353 100644
--- a/hadoop-common-project/hadoop-common/src/main/native/src/org_apache_hadoop.h
+++ b/hadoop-common-project/hadoop-common/src/main/native/src/org_apache_hadoop.h
@@ -180,6 +180,10 @@ static FARPROC WINAPI do_dlsym(JNIEnv *env, HMODULE handle, LPCSTR symbol) {
THROW(env, "java/lang/InternalError", exception_msg); \
}
+#define RETRY_ON_EINTR(ret, expr) do { \
+ ret = expr; \
+} while ((ret == -1) && (errno == EINTR));
+
#endif
//vim: sw=2: ts=2: et
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java
new file mode 100644
index 00000000000..35f84b950e4
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java
@@ -0,0 +1,524 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.compress;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.compress.lz4.Lz4Compressor;
+import org.apache.hadoop.io.compress.snappy.SnappyCompressor;
+import org.apache.hadoop.io.compress.zlib.BuiltInZlibDeflater;
+import org.apache.hadoop.io.compress.zlib.ZlibCompressor;
+import org.apache.hadoop.io.compress.zlib.ZlibFactory;
+import org.apache.hadoop.util.NativeCodeLoader;
+import org.apache.log4j.Logger;
+import org.junit.Assert;
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import static org.junit.Assert.*;
+
+public class CompressDecompressTester {
+
+ private static final Logger logger = Logger
+ .getLogger(CompressDecompressTester.class);
+
+ private final byte[] originalRawData;
+
+ private ImmutableList> pairs = ImmutableList.of();
+ private ImmutableList.Builder> builder = ImmutableList.builder();
+
+ private ImmutableSet stateges = ImmutableSet.of();
+
+ private PreAssertionTester assertionDelegate;
+
+ public CompressDecompressTester(byte[] originalRawData) {
+ this.originalRawData = Arrays.copyOf(originalRawData,
+ originalRawData.length);
+ this.assertionDelegate = new PreAssertionTester() {
+
+ @Override
+ public ImmutableList> filterOnAssumeWhat(
+ ImmutableList> pairs) {
+ ImmutableList.Builder> builder = ImmutableList
+ .builder();
+
+ for (TesterPair pair : pairs) {
+ if (isAvailable(pair))
+ builder.add(pair);
+ }
+ return builder.build();
+ }
+ };
+ }
+
+ private static boolean isNativeSnappyLoadable() {
+ boolean snappyAvailable = false;
+ boolean loaded = false;
+ try {
+ System.loadLibrary("snappy");
+ logger.warn("Snappy native library is available");
+ snappyAvailable = true;
+ boolean hadoopNativeAvailable = NativeCodeLoader.isNativeCodeLoaded();
+ loaded = snappyAvailable && hadoopNativeAvailable;
+ if (loaded) {
+ logger.info("Snappy native library loaded");
+ } else {
+ logger.warn("Snappy native library not loaded");
+ }
+ } catch (Throwable t) {
+ logger.warn("Failed to load snappy: ", t);
+ return false;
+ }
+ return loaded;
+ }
+
+ public static CompressDecompressTester of(
+ byte[] rawData) {
+ return new CompressDecompressTester(rawData);
+ }
+
+
+ public CompressDecompressTester withCompressDecompressPair(
+ T compressor, E decompressor) {
+ addPair(
+ compressor,
+ decompressor,
+ Joiner.on("_").join(compressor.getClass().getCanonicalName(),
+ decompressor.getClass().getCanonicalName()));
+ return this;
+ }
+
+ public CompressDecompressTester withTestCases(
+ ImmutableSet stateges) {
+ this.stateges = ImmutableSet.copyOf(stateges);
+ return this;
+ }
+
+ private void addPair(T compressor, E decompressor, String name) {
+ builder.add(new TesterPair(name, compressor, decompressor));
+ }
+
+ public void test() throws InstantiationException, IllegalAccessException {
+ pairs = builder.build();
+ pairs = assertionDelegate.filterOnAssumeWhat(pairs);
+
+ for (TesterPair pair : pairs) {
+ for (CompressionTestStrategy strategy : stateges) {
+ strategy.getTesterStrategy().assertCompression(pair.getName(),
+ pair.getCompressor(), pair.getDecompressor(),
+ Arrays.copyOf(originalRawData, originalRawData.length));
+ }
+ }
+ endAll(pairs);
+ }
+
+ private void endAll(ImmutableList> pairs) {
+ for (TesterPair pair : pairs)
+ pair.end();
+ }
+
+ interface PreAssertionTester {
+ ImmutableList> filterOnAssumeWhat(
+ ImmutableList> pairs);
+ }
+
+ public enum CompressionTestStrategy {
+
+ COMPRESS_DECOMPRESS_ERRORS(new TesterCompressionStrategy() {
+ private final Joiner joiner = Joiner.on("- ");
+
+ @Override
+ public void assertCompression(String name, Compressor compressor,
+ Decompressor decompressor, byte[] rawData) {
+ assertTrue(checkSetInputNullPointerException(compressor));
+ assertTrue(checkSetInputNullPointerException(decompressor));
+
+ assertTrue(checkCompressArrayIndexOutOfBoundsException(compressor,
+ rawData));
+ assertTrue(checkCompressArrayIndexOutOfBoundsException(decompressor,
+ rawData));
+
+ assertTrue(checkCompressNullPointerException(compressor, rawData));
+ assertTrue(checkCompressNullPointerException(decompressor, rawData));
+
+ assertTrue(checkSetInputArrayIndexOutOfBoundsException(compressor));
+ assertTrue(checkSetInputArrayIndexOutOfBoundsException(decompressor));
+ }
+
+ private boolean checkSetInputNullPointerException(Compressor compressor) {
+ try {
+ compressor.setInput(null, 0, 1);
+ } catch (NullPointerException npe) {
+ return true;
+ } catch (Exception ex) {
+ logger.error(joiner.join(compressor.getClass().getCanonicalName(),
+ "checkSetInputNullPointerException error !!!"));
+ }
+ return false;
+ }
+
+ private boolean checkCompressNullPointerException(Compressor compressor,
+ byte[] rawData) {
+ try {
+ compressor.setInput(rawData, 0, rawData.length);
+ compressor.compress(null, 0, 1);
+ } catch (NullPointerException npe) {
+ return true;
+ } catch (Exception ex) {
+ logger.error(joiner.join(compressor.getClass().getCanonicalName(),
+ "checkCompressNullPointerException error !!!"));
+ }
+ return false;
+ }
+
+ private boolean checkCompressNullPointerException(
+ Decompressor decompressor, byte[] rawData) {
+ try {
+ decompressor.setInput(rawData, 0, rawData.length);
+ decompressor.decompress(null, 0, 1);
+ } catch (NullPointerException npe) {
+ return true;
+ } catch (Exception ex) {
+ logger.error(joiner.join(decompressor.getClass().getCanonicalName(),
+ "checkCompressNullPointerException error !!!"));
+ }
+ return false;
+ }
+
+ private boolean checkSetInputNullPointerException(
+ Decompressor decompressor) {
+ try {
+ decompressor.setInput(null, 0, 1);
+ } catch (NullPointerException npe) {
+ return true;
+ } catch (Exception ex) {
+ logger.error(joiner.join(decompressor.getClass().getCanonicalName(),
+ "checkSetInputNullPointerException error !!!"));
+ }
+ return false;
+ }
+
+ private boolean checkSetInputArrayIndexOutOfBoundsException(
+ Compressor compressor) {
+ try {
+ compressor.setInput(new byte[] { (byte) 0 }, 0, -1);
+ } catch (ArrayIndexOutOfBoundsException e) {
+ return true;
+ } catch (Exception e) {
+ logger.error(joiner.join(compressor.getClass().getCanonicalName(),
+ "checkSetInputArrayIndexOutOfBoundsException error !!!"));
+ }
+ return false;
+ }
+
+ private boolean checkCompressArrayIndexOutOfBoundsException(
+ Compressor compressor, byte[] rawData) {
+ try {
+ compressor.setInput(rawData, 0, rawData.length);
+ compressor.compress(new byte[rawData.length], 0, -1);
+ } catch (ArrayIndexOutOfBoundsException e) {
+ return true;
+ } catch (Exception e) {
+ logger.error(joiner.join(compressor.getClass().getCanonicalName(),
+ "checkCompressArrayIndexOutOfBoundsException error !!!"));
+ }
+ return false;
+ }
+
+ private boolean checkCompressArrayIndexOutOfBoundsException(
+ Decompressor decompressor, byte[] rawData) {
+ try {
+ decompressor.setInput(rawData, 0, rawData.length);
+ decompressor.decompress(new byte[rawData.length], 0, -1);
+ } catch (ArrayIndexOutOfBoundsException e) {
+ return true;
+ } catch (Exception e) {
+ logger.error(joiner.join(decompressor.getClass().getCanonicalName(),
+ "checkCompressArrayIndexOutOfBoundsException error !!!"));
+ }
+ return false;
+ }
+
+ private boolean checkSetInputArrayIndexOutOfBoundsException(
+ Decompressor decompressor) {
+ try {
+ decompressor.setInput(new byte[] { (byte) 0 }, 0, -1);
+ } catch (ArrayIndexOutOfBoundsException e) {
+ return true;
+ } catch (Exception e) {
+ logger.error(joiner.join(decompressor.getClass().getCanonicalName(),
+ "checkNullPointerException error !!!"));
+ }
+ return false;
+ }
+
+ }),
+
+ COMPRESS_DECOMPRESS_SINGLE_BLOCK(new TesterCompressionStrategy() {
+ final Joiner joiner = Joiner.on("- ");
+
+ @Override
+ public void assertCompression(String name, Compressor compressor,
+ Decompressor decompressor, byte[] rawData) {
+
+ int cSize = 0;
+ int decompressedSize = 0;
+ byte[] compressedResult = new byte[rawData.length];
+ byte[] decompressedBytes = new byte[rawData.length];
+ try {
+ assertTrue(
+ joiner.join(name, "compressor.needsInput before error !!!"),
+ compressor.needsInput());
+ assertTrue(
+ joiner.join(name, "compressor.getBytesWritten before error !!!"),
+ compressor.getBytesWritten() == 0);
+ compressor.setInput(rawData, 0, rawData.length);
+ compressor.finish();
+ while (!compressor.finished()) {
+ cSize += compressor.compress(compressedResult, 0,
+ compressedResult.length);
+ }
+ compressor.reset();
+
+ assertTrue(
+ joiner.join(name, "decompressor.needsInput() before error !!!"),
+ decompressor.needsInput());
+ decompressor.setInput(compressedResult, 0, cSize);
+ assertFalse(
+ joiner.join(name, "decompressor.needsInput() after error !!!"),
+ decompressor.needsInput());
+ while (!decompressor.finished()) {
+ decompressedSize = decompressor.decompress(decompressedBytes, 0,
+ decompressedBytes.length);
+ }
+ decompressor.reset();
+ assertTrue(joiner.join(name, " byte size not equals error !!!"),
+ decompressedSize == rawData.length);
+ assertArrayEquals(
+ joiner.join(name, " byte arrays not equals error !!!"), rawData,
+ decompressedBytes);
+ } catch (Exception ex) {
+ fail(joiner.join(name, ex.getMessage()));
+ }
+ }
+ }),
+
+ COMPRESS_DECOMPRESS_WITH_EMPTY_STREAM(new TesterCompressionStrategy() {
+ final Joiner joiner = Joiner.on("- ");
+ final ImmutableMap, Integer> emptySize = ImmutableMap
+ .of(Lz4Compressor.class, 4, ZlibCompressor.class, 16,
+ SnappyCompressor.class, 4, BuiltInZlibDeflater.class, 16);
+
+ @Override
+ void assertCompression(String name, Compressor compressor,
+ Decompressor decompressor, byte[] originalRawData) {
+ byte[] buf = null;
+ ByteArrayInputStream bytesIn = null;
+ BlockDecompressorStream blockDecompressorStream = null;
+ ByteArrayOutputStream bytesOut = new ByteArrayOutputStream();
+ // close without write
+ try {
+ compressor.reset();
+ // decompressor.end();
+ BlockCompressorStream blockCompressorStream = new BlockCompressorStream(
+ bytesOut, compressor, 1024, 0);
+ blockCompressorStream.close();
+ // check compressed output
+ buf = bytesOut.toByteArray();
+ int emSize = emptySize.get(compressor.getClass());
+ Assert.assertEquals(
+ joiner.join(name, "empty stream compressed output size != "
+ + emSize), emSize, buf.length);
+ // use compressed output as input for decompression
+ bytesIn = new ByteArrayInputStream(buf);
+ // create decompression stream
+ blockDecompressorStream = new BlockDecompressorStream(bytesIn,
+ decompressor, 1024);
+ // no byte is available because stream was closed
+ assertEquals(joiner.join(name, " return value is not -1"), -1,
+ blockDecompressorStream.read());
+ } catch (IOException e) {
+ fail(joiner.join(name, e.getMessage()));
+ } finally {
+ if (blockDecompressorStream != null)
+ try {
+ bytesOut.close();
+ blockDecompressorStream.close();
+ bytesIn.close();
+ blockDecompressorStream.close();
+ } catch (IOException e) {
+ }
+ }
+ }
+
+ }),
+
+ COMPRESS_DECOMPRESS_BLOCK(new TesterCompressionStrategy() {
+ private final Joiner joiner = Joiner.on("- ");
+ private static final int BLOCK_SIZE = 512;
+ private final byte[] operationBlock = new byte[BLOCK_SIZE];
+ // Use default of 512 as bufferSize and compressionOverhead of
+ // (1% of bufferSize + 12 bytes) = 18 bytes (zlib algorithm).
+ private static final int overheadSpace = BLOCK_SIZE / 100 + 12;
+
+ @Override
+ public void assertCompression(String name, Compressor compressor,
+ Decompressor decompressor, byte[] originalRawData) {
+ int off = 0;
+ int len = originalRawData.length;
+ int maxSize = BLOCK_SIZE - overheadSpace;
+ int compresSize = 0;
+ List blockLabels = new ArrayList();
+ ByteArrayOutputStream compressedOut = new ByteArrayOutputStream();
+ ByteArrayOutputStream decompressOut = new ByteArrayOutputStream();
+ try {
+ if (originalRawData.length > maxSize) {
+ do {
+ int bufLen = Math.min(len, maxSize);
+ compressor.setInput(originalRawData, off, bufLen);
+ compressor.finish();
+ while (!compressor.finished()) {
+ compresSize = compressor.compress(operationBlock, 0,
+ operationBlock.length);
+ compressedOut.write(operationBlock, 0, compresSize);
+ blockLabels.add(compresSize);
+ }
+ compressor.reset();
+ off += bufLen;
+ len -= bufLen;
+ } while (len > 0);
+ }
+
+ off = 0;
+ // compressed bytes
+ byte[] compressedBytes = compressedOut.toByteArray();
+ for (Integer step : blockLabels) {
+ decompressor.setInput(compressedBytes, off, step);
+ while (!decompressor.finished()) {
+ int dSize = decompressor.decompress(operationBlock, 0,
+ operationBlock.length);
+ decompressOut.write(operationBlock, 0, dSize);
+ }
+ decompressor.reset();
+ off = off + step;
+ }
+ assertArrayEquals(
+ joiner.join(name, "byte arrays not equals error !!!"),
+ originalRawData, decompressOut.toByteArray());
+ } catch (Exception ex) {
+ fail(joiner.join(name, ex.getMessage()));
+ } finally {
+ try {
+ compressedOut.close();
+ } catch (IOException e) {
+ }
+ try {
+ decompressOut.close();
+ } catch (IOException e) {
+ }
+ }
+ }
+ });
+
+ private final TesterCompressionStrategy testerStrategy;
+
+ CompressionTestStrategy(TesterCompressionStrategy testStrategy) {
+ this.testerStrategy = testStrategy;
+ }
+
+ public TesterCompressionStrategy getTesterStrategy() {
+ return testerStrategy;
+ }
+ }
+
+ static final class TesterPair {
+ private final T compressor;
+ private final E decompressor;
+ private final String name;
+
+ TesterPair(String name, T compressor, E decompressor) {
+ this.compressor = compressor;
+ this.decompressor = decompressor;
+ this.name = name;
+ }
+
+ public void end() {
+ Configuration cfg = new Configuration();
+ compressor.reinit(cfg);
+ compressor.end();
+ decompressor.end();
+ }
+
+ public T getCompressor() {
+ return compressor;
+ }
+
+ public E getDecompressor() {
+ return decompressor;
+ }
+
+ public String getName() {
+ return name;
+ }
+ }
+
+ /**
+ * Method for compressor availability check
+ */
+ private static boolean isAvailable(TesterPair pair) {
+ Compressor compressor = pair.compressor;
+
+ if (compressor.getClass().isAssignableFrom(Lz4Compressor.class)
+ && (NativeCodeLoader.isNativeCodeLoaded()))
+ return true;
+
+ else if (compressor.getClass().isAssignableFrom(BuiltInZlibDeflater.class)
+ && NativeCodeLoader.isNativeCodeLoaded())
+ return true;
+
+ else if (compressor.getClass().isAssignableFrom(ZlibCompressor.class)) {
+ return ZlibFactory.isNativeZlibLoaded(new Configuration());
+ }
+ else if (compressor.getClass().isAssignableFrom(SnappyCompressor.class)
+ && isNativeSnappyLoadable())
+ return true;
+
+ return false;
+ }
+
+ abstract static class TesterCompressionStrategy {
+
+ protected final Logger logger = Logger.getLogger(getClass());
+
+ abstract void assertCompression(String name, Compressor compressor,
+ Decompressor decompressor, byte[] originalRawData);
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java
new file mode 100644
index 00000000000..a8ac993c47b
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.compress;
+
+import static org.junit.Assert.fail;
+import java.util.Random;
+import org.apache.hadoop.io.compress.CompressDecompressTester.CompressionTestStrategy;
+import org.apache.hadoop.io.compress.lz4.Lz4Compressor;
+import org.apache.hadoop.io.compress.lz4.Lz4Decompressor;
+import org.apache.hadoop.io.compress.snappy.SnappyCompressor;
+import org.apache.hadoop.io.compress.snappy.SnappyDecompressor;
+import org.apache.hadoop.io.compress.zlib.BuiltInZlibDeflater;
+import org.apache.hadoop.io.compress.zlib.BuiltInZlibInflater;
+import org.junit.Test;
+import com.google.common.collect.ImmutableSet;
+
+/**
+ * Test for pairs:
+ *
+ * SnappyCompressor/SnappyDecompressor
+ * Lz4Compressor/Lz4Decompressor
+ * BuiltInZlibDeflater/new BuiltInZlibInflater
+ *
+ *
+ * Note: we can't use ZlibCompressor/ZlibDecompressor here
+ * because his constructor can throw exception (if native libraries not found)
+ * For ZlibCompressor/ZlibDecompressor pair testing used {@code TestZlibCompressorDecompressor}
+ *
+ *
+ *
+ */
+public class TestCompressorDecompressor {
+
+ private static final Random rnd = new Random(12345L);
+
+ @Test
+ public void testCompressorDecompressor() {
+ // no more for this data
+ int SIZE = 44 * 1024;
+
+ byte[] rawData = generate(SIZE);
+ try {
+ CompressDecompressTester.of(rawData)
+ .withCompressDecompressPair(new SnappyCompressor(), new SnappyDecompressor())
+ .withCompressDecompressPair(new Lz4Compressor(), new Lz4Decompressor())
+ .withCompressDecompressPair(new BuiltInZlibDeflater(), new BuiltInZlibInflater())
+ .withTestCases(ImmutableSet.of(CompressionTestStrategy.COMPRESS_DECOMPRESS_SINGLE_BLOCK,
+ CompressionTestStrategy.COMPRESS_DECOMPRESS_BLOCK,
+ CompressionTestStrategy.COMPRESS_DECOMPRESS_ERRORS,
+ CompressionTestStrategy.COMPRESS_DECOMPRESS_WITH_EMPTY_STREAM))
+ .test();
+
+ } catch (Exception ex) {
+ fail("testCompressorDecompressor error !!!" + ex);
+ }
+ }
+
+ @Test
+ public void testCompressorDecompressorWithExeedBufferLimit() {
+ int BYTE_SIZE = 100 * 1024;
+ byte[] rawData = generate(BYTE_SIZE);
+ try {
+ CompressDecompressTester.of(rawData)
+ .withCompressDecompressPair(
+ new SnappyCompressor(BYTE_SIZE + BYTE_SIZE / 2),
+ new SnappyDecompressor(BYTE_SIZE + BYTE_SIZE / 2))
+ .withCompressDecompressPair(new Lz4Compressor(BYTE_SIZE),
+ new Lz4Decompressor(BYTE_SIZE))
+ .withTestCases(ImmutableSet.of(CompressionTestStrategy.COMPRESS_DECOMPRESS_SINGLE_BLOCK,
+ CompressionTestStrategy.COMPRESS_DECOMPRESS_BLOCK,
+ CompressionTestStrategy.COMPRESS_DECOMPRESS_ERRORS,
+ CompressionTestStrategy.COMPRESS_DECOMPRESS_WITH_EMPTY_STREAM))
+ .test();
+
+ } catch (Exception ex) {
+ fail("testCompressorDecompressorWithExeedBufferLimit error !!!" + ex);
+ }
+ }
+
+ public static byte[] generate(int size) {
+ byte[] array = new byte[size];
+ for (int i = 0; i < size; i++)
+ array[i] = (byte) rnd.nextInt(16);
+ return array;
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/lz4/TestLz4CompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/lz4/TestLz4CompressorDecompressor.java
new file mode 100644
index 00000000000..e8555b23887
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/lz4/TestLz4CompressorDecompressor.java
@@ -0,0 +1,316 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.compress.lz4;
+
+import static org.junit.Assert.*;
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.hadoop.io.DataInputBuffer;
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.io.compress.BlockCompressorStream;
+import org.apache.hadoop.io.compress.BlockDecompressorStream;
+import org.apache.hadoop.io.compress.CompressionInputStream;
+import org.apache.hadoop.io.compress.CompressionOutputStream;
+import org.apache.hadoop.io.compress.Lz4Codec;
+import org.apache.hadoop.io.compress.lz4.Lz4Compressor;
+import org.apache.hadoop.io.compress.lz4.Lz4Decompressor;
+import org.junit.Before;
+import org.junit.Test;
+import static org.junit.Assume.*;
+
+public class TestLz4CompressorDecompressor {
+
+ private static final Random rnd = new Random(12345l);
+
+ @Before
+ public void before() {
+ assumeTrue(Lz4Codec.isNativeCodeLoaded());
+ }
+
+ //test on NullPointerException in {@code compressor.setInput()}
+ @Test
+ public void testCompressorSetInputNullPointerException() {
+ try {
+ Lz4Compressor compressor = new Lz4Compressor();
+ compressor.setInput(null, 0, 10);
+ fail("testCompressorSetInputNullPointerException error !!!");
+ } catch (NullPointerException ex) {
+ // expected
+ } catch (Exception e) {
+ fail("testCompressorSetInputNullPointerException ex error !!!");
+ }
+ }
+
+ //test on NullPointerException in {@code decompressor.setInput()}
+ @Test
+ public void testDecompressorSetInputNullPointerException() {
+ try {
+ Lz4Decompressor decompressor = new Lz4Decompressor();
+ decompressor.setInput(null, 0, 10);
+ fail("testDecompressorSetInputNullPointerException error !!!");
+ } catch (NullPointerException ex) {
+ // expected
+ } catch (Exception e) {
+ fail("testDecompressorSetInputNullPointerException ex error !!!");
+ }
+ }
+
+ //test on ArrayIndexOutOfBoundsException in {@code compressor.setInput()}
+ @Test
+ public void testCompressorSetInputAIOBException() {
+ try {
+ Lz4Compressor compressor = new Lz4Compressor();
+ compressor.setInput(new byte[] {}, -5, 10);
+ fail("testCompressorSetInputAIOBException error !!!");
+ } catch (ArrayIndexOutOfBoundsException ex) {
+ // expected
+ } catch (Exception ex) {
+ fail("testCompressorSetInputAIOBException ex error !!!");
+ }
+ }
+
+ //test on ArrayIndexOutOfBoundsException in {@code decompressor.setInput()}
+ @Test
+ public void testDecompressorSetInputAIOUBException() {
+ try {
+ Lz4Decompressor decompressor = new Lz4Decompressor();
+ decompressor.setInput(new byte[] {}, -5, 10);
+ fail("testDecompressorSetInputAIOBException error !!!");
+ } catch (ArrayIndexOutOfBoundsException ex) {
+ // expected
+ } catch (Exception e) {
+ fail("testDecompressorSetInputAIOBException ex error !!!");
+ }
+ }
+
+ //test on NullPointerException in {@code compressor.compress()}
+ @Test
+ public void testCompressorCompressNullPointerException() {
+ try {
+ Lz4Compressor compressor = new Lz4Compressor();
+ byte[] bytes = generate(1024 * 6);
+ compressor.setInput(bytes, 0, bytes.length);
+ compressor.compress(null, 0, 0);
+ fail("testCompressorCompressNullPointerException error !!!");
+ } catch (NullPointerException ex) {
+ // expected
+ } catch (Exception e) {
+ fail("testCompressorCompressNullPointerException ex error !!!");
+ }
+ }
+
+ //test on NullPointerException in {@code decompressor.decompress()}
+ @Test
+ public void testDecompressorCompressNullPointerException() {
+ try {
+ Lz4Decompressor decompressor = new Lz4Decompressor();
+ byte[] bytes = generate(1024 * 6);
+ decompressor.setInput(bytes, 0, bytes.length);
+ decompressor.decompress(null, 0, 0);
+ fail("testDecompressorCompressNullPointerException error !!!");
+ } catch (NullPointerException ex) {
+ // expected
+ } catch (Exception e) {
+ fail("testDecompressorCompressNullPointerException ex error !!!");
+ }
+ }
+
+ //test on ArrayIndexOutOfBoundsException in {@code compressor.compress()}
+ @Test
+ public void testCompressorCompressAIOBException() {
+ try {
+ Lz4Compressor compressor = new Lz4Compressor();
+ byte[] bytes = generate(1024 * 6);
+ compressor.setInput(bytes, 0, bytes.length);
+ compressor.compress(new byte[] {}, 0, -1);
+ fail("testCompressorCompressAIOBException error !!!");
+ } catch (ArrayIndexOutOfBoundsException ex) {
+ // expected
+ } catch (Exception e) {
+ fail("testCompressorCompressAIOBException ex error !!!");
+ }
+ }
+
+ //test on ArrayIndexOutOfBoundsException in decompressor.decompress()
+ @Test
+ public void testDecompressorCompressAIOBException() {
+ try {
+ Lz4Decompressor decompressor = new Lz4Decompressor();
+ byte[] bytes = generate(1024 * 6);
+ decompressor.setInput(bytes, 0, bytes.length);
+ decompressor.decompress(new byte[] {}, 0, -1);
+ fail("testDecompressorCompressAIOBException error !!!");
+ } catch (ArrayIndexOutOfBoundsException ex) {
+ // expected
+ } catch (Exception e) {
+ fail("testDecompressorCompressAIOBException ex error !!!");
+ }
+ }
+
+ // test Lz4Compressor compressor.compress()
+ @Test
+ public void testSetInputWithBytesSizeMoreThenDefaultLz4CompressorByfferSize() {
+ int BYTES_SIZE = 1024 * 64 + 1;
+ try {
+ Lz4Compressor compressor = new Lz4Compressor();
+ byte[] bytes = generate(BYTES_SIZE);
+ assertTrue("needsInput error !!!", compressor.needsInput());
+ compressor.setInput(bytes, 0, bytes.length);
+ byte[] emptyBytes = new byte[BYTES_SIZE];
+ int csize = compressor.compress(emptyBytes, 0, bytes.length);
+ assertTrue(
+ "testSetInputWithBytesSizeMoreThenDefaultLz4CompressorByfferSize error !!!",
+ csize != 0);
+ } catch (Exception ex) {
+ fail("testSetInputWithBytesSizeMoreThenDefaultLz4CompressorByfferSize ex error !!!");
+ }
+ }
+
+ // test compress/decompress process
+ @Test
+ public void testCompressDecompress() {
+ int BYTE_SIZE = 1024 * 54;
+ byte[] bytes = generate(BYTE_SIZE);
+ Lz4Compressor compressor = new Lz4Compressor();
+ try {
+ compressor.setInput(bytes, 0, bytes.length);
+ assertTrue("Lz4CompressDecompress getBytesRead error !!!",
+ compressor.getBytesRead() > 0);
+ assertTrue(
+ "Lz4CompressDecompress getBytesWritten before compress error !!!",
+ compressor.getBytesWritten() == 0);
+
+ byte[] compressed = new byte[BYTE_SIZE];
+ int cSize = compressor.compress(compressed, 0, compressed.length);
+ assertTrue(
+ "Lz4CompressDecompress getBytesWritten after compress error !!!",
+ compressor.getBytesWritten() > 0);
+ Lz4Decompressor decompressor = new Lz4Decompressor();
+ // set as input for decompressor only compressed data indicated with cSize
+ decompressor.setInput(compressed, 0, cSize);
+ byte[] decompressed = new byte[BYTE_SIZE];
+ decompressor.decompress(decompressed, 0, decompressed.length);
+
+ assertTrue("testLz4CompressDecompress finished error !!!", decompressor.finished());
+ assertArrayEquals(bytes, decompressed);
+ compressor.reset();
+ decompressor.reset();
+ assertTrue("decompressor getRemaining error !!!",decompressor.getRemaining() == 0);
+ } catch (Exception e) {
+ fail("testLz4CompressDecompress ex error!!!");
+ }
+ }
+
+ // test compress/decompress with empty stream
+ @Test
+ public void testCompressorDecompressorEmptyStreamLogic() {
+ ByteArrayInputStream bytesIn = null;
+ ByteArrayOutputStream bytesOut = null;
+ byte[] buf = null;
+ BlockDecompressorStream blockDecompressorStream = null;
+ try {
+ // compress empty stream
+ bytesOut = new ByteArrayOutputStream();
+ BlockCompressorStream blockCompressorStream = new BlockCompressorStream(
+ bytesOut, new Lz4Compressor(), 1024, 0);
+ // close without write
+ blockCompressorStream.close();
+ // check compressed output
+ buf = bytesOut.toByteArray();
+ assertEquals("empty stream compressed output size != 4", 4, buf.length);
+ // use compressed output as input for decompression
+ bytesIn = new ByteArrayInputStream(buf);
+ // create decompression stream
+ blockDecompressorStream = new BlockDecompressorStream(bytesIn,
+ new Lz4Decompressor(), 1024);
+ // no byte is available because stream was closed
+ assertEquals("return value is not -1", -1, blockDecompressorStream.read());
+ } catch (Exception e) {
+ fail("testCompressorDecompressorEmptyStreamLogic ex error !!!"
+ + e.getMessage());
+ } finally {
+ if (blockDecompressorStream != null)
+ try {
+ bytesIn.close();
+ bytesOut.close();
+ blockDecompressorStream.close();
+ } catch (IOException e) {
+ }
+ }
+ }
+
+ // test compress/decompress process through CompressionOutputStream/CompressionInputStream api
+ @Test
+ public void testCompressorDecopressorLogicWithCompressionStreams() {
+ DataOutputStream deflateOut = null;
+ DataInputStream inflateIn = null;
+ int BYTE_SIZE = 1024 * 100;
+ byte[] bytes = generate(BYTE_SIZE);
+ int bufferSize = 262144;
+ int compressionOverhead = (bufferSize / 6) + 32;
+ try {
+ DataOutputBuffer compressedDataBuffer = new DataOutputBuffer();
+ CompressionOutputStream deflateFilter = new BlockCompressorStream(
+ compressedDataBuffer, new Lz4Compressor(bufferSize), bufferSize,
+ compressionOverhead);
+ deflateOut = new DataOutputStream(new BufferedOutputStream(deflateFilter));
+ deflateOut.write(bytes, 0, bytes.length);
+ deflateOut.flush();
+ deflateFilter.finish();
+
+ DataInputBuffer deCompressedDataBuffer = new DataInputBuffer();
+ deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0,
+ compressedDataBuffer.getLength());
+
+ CompressionInputStream inflateFilter = new BlockDecompressorStream(
+ deCompressedDataBuffer, new Lz4Decompressor(bufferSize), bufferSize);
+
+ inflateIn = new DataInputStream(new BufferedInputStream(inflateFilter));
+
+ byte[] result = new byte[BYTE_SIZE];
+ inflateIn.read(result);
+
+ assertArrayEquals("original array not equals compress/decompressed array", result,
+ bytes);
+ } catch (IOException e) {
+ fail("testLz4CompressorDecopressorLogicWithCompressionStreams ex error !!!");
+ } finally {
+ try {
+ if (deflateOut != null)
+ deflateOut.close();
+ if (inflateIn != null)
+ inflateIn.close();
+ } catch (Exception e) {
+ }
+ }
+ }
+
+ public static byte[] generate(int size) {
+ byte[] array = new byte[size];
+ for (int i = 0; i < size; i++)
+ array[i] = (byte)rnd.nextInt(16);
+ return array;
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/zlib/TestZlibCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/zlib/TestZlibCompressorDecompressor.java
new file mode 100644
index 00000000000..6e792d1e4ea
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/zlib/TestZlibCompressorDecompressor.java
@@ -0,0 +1,362 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.compress.zlib;
+
+import static org.junit.Assert.*;
+import static org.junit.Assume.*;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeys;
+import org.apache.hadoop.io.DataInputBuffer;
+import org.apache.hadoop.io.compress.CompressDecompressTester;
+import org.apache.hadoop.io.compress.Compressor;
+import org.apache.hadoop.io.compress.Decompressor;
+import org.apache.hadoop.io.compress.DecompressorStream;
+import org.apache.hadoop.io.compress.CompressDecompressTester.CompressionTestStrategy;
+import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionLevel;
+import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionStrategy;
+import org.junit.Before;
+import org.junit.Test;
+import com.google.common.collect.ImmutableSet;
+
+public class TestZlibCompressorDecompressor {
+
+ private static final Random random = new Random(12345L);
+
+ @Before
+ public void before() {
+ assumeTrue(ZlibFactory.isNativeZlibLoaded(new Configuration()));
+ }
+
+ @Test
+ public void testZlibCompressorDecompressor() {
+ try {
+ int SIZE = 44 * 1024;
+ byte[] rawData = generate(SIZE);
+
+ CompressDecompressTester.of(rawData)
+ .withCompressDecompressPair(new ZlibCompressor(), new ZlibDecompressor())
+ .withTestCases(ImmutableSet.of(CompressionTestStrategy.COMPRESS_DECOMPRESS_SINGLE_BLOCK,
+ CompressionTestStrategy.COMPRESS_DECOMPRESS_BLOCK,
+ CompressionTestStrategy.COMPRESS_DECOMPRESS_ERRORS,
+ CompressionTestStrategy.COMPRESS_DECOMPRESS_WITH_EMPTY_STREAM))
+ .test();
+ } catch (Exception ex) {
+ fail("testCompressorDecompressor error !!!" + ex);
+ }
+ }
+
+ @Test
+ public void testCompressorDecompressorWithExeedBufferLimit() {
+ int BYTE_SIZE = 100 * 1024;
+ byte[] rawData = generate(BYTE_SIZE);
+ try {
+ CompressDecompressTester.of(rawData)
+ .withCompressDecompressPair(
+ new ZlibCompressor(
+ org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionLevel.BEST_COMPRESSION,
+ CompressionStrategy.DEFAULT_STRATEGY,
+ org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionHeader.DEFAULT_HEADER,
+ BYTE_SIZE),
+ new ZlibDecompressor(
+ org.apache.hadoop.io.compress.zlib.ZlibDecompressor.CompressionHeader.DEFAULT_HEADER,
+ BYTE_SIZE))
+ .withTestCases(ImmutableSet.of(CompressionTestStrategy.COMPRESS_DECOMPRESS_SINGLE_BLOCK,
+ CompressionTestStrategy.COMPRESS_DECOMPRESS_BLOCK,
+ CompressionTestStrategy.COMPRESS_DECOMPRESS_ERRORS,
+ CompressionTestStrategy.COMPRESS_DECOMPRESS_WITH_EMPTY_STREAM))
+ .test();
+ } catch (Exception ex) {
+ fail("testCompressorDecompressorWithExeedBufferLimit error !!!" + ex);
+ }
+ }
+
+
+ @Test
+ public void testZlibCompressorDecompressorWithConfiguration() {
+ Configuration conf = new Configuration();
+ conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, true);
+ if (ZlibFactory.isNativeZlibLoaded(conf)) {
+ byte[] rawData;
+ int tryNumber = 5;
+ int BYTE_SIZE = 10 * 1024;
+ Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
+ Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
+ rawData = generate(BYTE_SIZE);
+ try {
+ for (int i = 0; i < tryNumber; i++)
+ compressDecompressZlib(rawData, (ZlibCompressor) zlibCompressor,
+ (ZlibDecompressor) zlibDecompressor);
+ zlibCompressor.reinit(conf);
+ } catch (Exception ex) {
+ fail("testZlibCompressorDecompressorWithConfiguration ex error " + ex);
+ }
+ } else {
+ assertTrue("ZlibFactory is using native libs against request",
+ ZlibFactory.isNativeZlibLoaded(conf));
+ }
+ }
+
+ @Test
+ public void testZlibCompressDecompress() {
+ byte[] rawData = null;
+ int rawDataSize = 0;
+ rawDataSize = 1024 * 64;
+ rawData = generate(rawDataSize);
+ try {
+ ZlibCompressor compressor = new ZlibCompressor();
+ ZlibDecompressor decompressor = new ZlibDecompressor();
+ assertFalse("testZlibCompressDecompress finished error",
+ compressor.finished());
+ compressor.setInput(rawData, 0, rawData.length);
+ assertTrue("testZlibCompressDecompress getBytesRead before error",
+ compressor.getBytesRead() == 0);
+ compressor.finish();
+
+ byte[] compressedResult = new byte[rawDataSize];
+ int cSize = compressor.compress(compressedResult, 0, rawDataSize);
+ assertTrue("testZlibCompressDecompress getBytesRead ather error",
+ compressor.getBytesRead() == rawDataSize);
+ assertTrue(
+ "testZlibCompressDecompress compressed size no less then original size",
+ cSize < rawDataSize);
+ decompressor.setInput(compressedResult, 0, cSize);
+ byte[] decompressedBytes = new byte[rawDataSize];
+ decompressor.decompress(decompressedBytes, 0, decompressedBytes.length);
+ assertArrayEquals("testZlibCompressDecompress arrays not equals ",
+ rawData, decompressedBytes);
+ compressor.reset();
+ decompressor.reset();
+ } catch (IOException ex) {
+ fail("testZlibCompressDecompress ex !!!" + ex);
+ }
+ }
+
+ @Test
+ public void testZlibCompressorDecompressorSetDictionary() {
+ Configuration conf = new Configuration();
+ conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, true);
+ if (ZlibFactory.isNativeZlibLoaded(conf)) {
+ Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
+ Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
+
+ checkSetDictionaryNullPointerException(zlibCompressor);
+ checkSetDictionaryNullPointerException(zlibDecompressor);
+
+ checkSetDictionaryArrayIndexOutOfBoundsException(zlibDecompressor);
+ checkSetDictionaryArrayIndexOutOfBoundsException(zlibCompressor);
+ } else {
+ assertTrue("ZlibFactory is using native libs against request",
+ ZlibFactory.isNativeZlibLoaded(conf));
+ }
+ }
+
+ @Test
+ public void testZlibFactory() {
+ Configuration cfg = new Configuration();
+
+ assertTrue("testZlibFactory compression level error !!!",
+ CompressionLevel.DEFAULT_COMPRESSION == ZlibFactory
+ .getCompressionLevel(cfg));
+
+ assertTrue("testZlibFactory compression strategy error !!!",
+ CompressionStrategy.DEFAULT_STRATEGY == ZlibFactory
+ .getCompressionStrategy(cfg));
+
+ ZlibFactory.setCompressionLevel(cfg, CompressionLevel.BEST_COMPRESSION);
+ assertTrue("testZlibFactory compression strategy error !!!",
+ CompressionLevel.BEST_COMPRESSION == ZlibFactory
+ .getCompressionLevel(cfg));
+
+ ZlibFactory.setCompressionStrategy(cfg, CompressionStrategy.FILTERED);
+ assertTrue("testZlibFactory compression strategy error !!!",
+ CompressionStrategy.FILTERED == ZlibFactory.getCompressionStrategy(cfg));
+ }
+
+
+ private boolean checkSetDictionaryNullPointerException(
+ Decompressor decompressor) {
+ try {
+ decompressor.setDictionary(null, 0, 1);
+ } catch (NullPointerException ex) {
+ return true;
+ } catch (Exception ex) {
+ }
+ return false;
+ }
+
+ private boolean checkSetDictionaryNullPointerException(Compressor compressor) {
+ try {
+ compressor.setDictionary(null, 0, 1);
+ } catch (NullPointerException ex) {
+ return true;
+ } catch (Exception ex) {
+ }
+ return false;
+ }
+
+ private boolean checkSetDictionaryArrayIndexOutOfBoundsException(
+ Compressor compressor) {
+ try {
+ compressor.setDictionary(new byte[] { (byte) 0 }, 0, -1);
+ } catch (ArrayIndexOutOfBoundsException e) {
+ return true;
+ } catch (Exception e) {
+ }
+ return false;
+ }
+
+ private boolean checkSetDictionaryArrayIndexOutOfBoundsException(
+ Decompressor decompressor) {
+ try {
+ decompressor.setDictionary(new byte[] { (byte) 0 }, 0, -1);
+ } catch (ArrayIndexOutOfBoundsException e) {
+ return true;
+ } catch (Exception e) {
+ }
+ return false;
+ }
+
+ private byte[] compressDecompressZlib(byte[] rawData,
+ ZlibCompressor zlibCompressor, ZlibDecompressor zlibDecompressor)
+ throws IOException {
+ int cSize = 0;
+ byte[] compressedByte = new byte[rawData.length];
+ byte[] decompressedRawData = new byte[rawData.length];
+ zlibCompressor.setInput(rawData, 0, rawData.length);
+ zlibCompressor.finish();
+ while (!zlibCompressor.finished()) {
+ cSize = zlibCompressor.compress(compressedByte, 0, compressedByte.length);
+ }
+ zlibCompressor.reset();
+
+ assertTrue(zlibDecompressor.getBytesWritten() == 0);
+ assertTrue(zlibDecompressor.getBytesRead() == 0);
+ assertTrue(zlibDecompressor.needsInput());
+ zlibDecompressor.setInput(compressedByte, 0, cSize);
+ assertFalse(zlibDecompressor.needsInput());
+ while (!zlibDecompressor.finished()) {
+ zlibDecompressor.decompress(decompressedRawData, 0,
+ decompressedRawData.length);
+ }
+ assertTrue(zlibDecompressor.getBytesWritten() == rawData.length);
+ assertTrue(zlibDecompressor.getBytesRead() == cSize);
+ zlibDecompressor.reset();
+ assertTrue(zlibDecompressor.getRemaining() == 0);
+ assertArrayEquals(
+ "testZlibCompressorDecompressorWithConfiguration array equals error",
+ rawData, decompressedRawData);
+
+ return decompressedRawData;
+ }
+
+ @Test
+ public void testBuiltInGzipDecompressorExceptions() {
+ BuiltInGzipDecompressor decompresser = new BuiltInGzipDecompressor();
+ try {
+ decompresser.setInput(null, 0, 1);
+ } catch (NullPointerException ex) {
+ // expected
+ } catch (Exception ex) {
+ fail("testBuiltInGzipDecompressorExceptions npe error " + ex);
+ }
+
+ try {
+ decompresser.setInput(new byte[] { 0 }, 0, -1);
+ } catch (ArrayIndexOutOfBoundsException ex) {
+ // expected
+ } catch (Exception ex) {
+ fail("testBuiltInGzipDecompressorExceptions aioob error" + ex);
+ }
+
+ assertTrue("decompresser.getBytesRead error",
+ decompresser.getBytesRead() == 0);
+ assertTrue("decompresser.getRemaining error",
+ decompresser.getRemaining() == 0);
+ decompresser.reset();
+ decompresser.end();
+
+ InputStream decompStream = null;
+ try {
+ // invalid 0 and 1 bytes , must be 31, -117
+ int buffSize = 1 * 1024;
+ byte buffer[] = new byte[buffSize];
+ Decompressor decompressor = new BuiltInGzipDecompressor();
+ DataInputBuffer gzbuf = new DataInputBuffer();
+ decompStream = new DecompressorStream(gzbuf, decompressor);
+ gzbuf.reset(new byte[] { 0, 0, 1, 1, 1, 1, 11, 1, 1, 1, 1 }, 11);
+ decompStream.read(buffer);
+ } catch (IOException ioex) {
+ // expected
+ } catch (Exception ex) {
+ fail("invalid 0 and 1 byte in gzip stream" + ex);
+ }
+
+ // invalid 2 byte, must be 8
+ try {
+ int buffSize = 1 * 1024;
+ byte buffer[] = new byte[buffSize];
+ Decompressor decompressor = new BuiltInGzipDecompressor();
+ DataInputBuffer gzbuf = new DataInputBuffer();
+ decompStream = new DecompressorStream(gzbuf, decompressor);
+ gzbuf.reset(new byte[] { 31, -117, 7, 1, 1, 1, 1, 11, 1, 1, 1, 1 }, 11);
+ decompStream.read(buffer);
+ } catch (IOException ioex) {
+ // expected
+ } catch (Exception ex) {
+ fail("invalid 2 byte in gzip stream" + ex);
+ }
+
+ try {
+ int buffSize = 1 * 1024;
+ byte buffer[] = new byte[buffSize];
+ Decompressor decompressor = new BuiltInGzipDecompressor();
+ DataInputBuffer gzbuf = new DataInputBuffer();
+ decompStream = new DecompressorStream(gzbuf, decompressor);
+ gzbuf.reset(new byte[] { 31, -117, 8, -32, 1, 1, 1, 11, 1, 1, 1, 1 }, 11);
+ decompStream.read(buffer);
+ } catch (IOException ioex) {
+ // expected
+ } catch (Exception ex) {
+ fail("invalid 3 byte in gzip stream" + ex);
+ }
+ try {
+ int buffSize = 1 * 1024;
+ byte buffer[] = new byte[buffSize];
+ Decompressor decompressor = new BuiltInGzipDecompressor();
+ DataInputBuffer gzbuf = new DataInputBuffer();
+ decompStream = new DecompressorStream(gzbuf, decompressor);
+ gzbuf.reset(new byte[] { 31, -117, 8, 4, 1, 1, 1, 11, 1, 1, 1, 1 }, 11);
+ decompStream.read(buffer);
+ } catch (IOException ioex) {
+ // expected
+ } catch (Exception ex) {
+ fail("invalid 3 byte make hasExtraField" + ex);
+ }
+ }
+
+ public static byte[] generate(int size) {
+ byte[] data = new byte[size];
+ for (int i = 0; i < size; i++)
+ data[i] = (byte)random.nextInt(16);
+ return data;
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TemporarySocketDirectory.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TemporarySocketDirectory.java
new file mode 100644
index 00000000000..1df78d96b7d
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TemporarySocketDirectory.java
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.net.unix;
+
+import java.io.Closeable;
+import java.io.File;
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.commons.io.FileUtils;
+
+/**
+ * Create a temporary directory in which sockets can be created.
+ * When creating a UNIX domain socket, the name
+ * must be fairly short (around 110 bytes on most platforms).
+ */
+public class TemporarySocketDirectory implements Closeable {
+ private File dir;
+
+ public TemporarySocketDirectory() {
+ String tmp = System.getProperty("java.io.tmpdir", "/tmp");
+ dir = new File(tmp, "socks." + (System.currentTimeMillis() +
+ "." + (new Random().nextInt())));
+ dir.mkdirs();
+ dir.setWritable(true, true);
+ }
+
+ public File getDir() {
+ return dir;
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (dir != null) {
+ FileUtils.deleteDirectory(dir);
+ dir = null;
+ }
+ }
+
+ protected void finalize() throws IOException {
+ close();
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
new file mode 100644
index 00000000000..d512027d45d
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
@@ -0,0 +1,706 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.net.unix;
+
+import java.io.File;
+import java.io.FileDescriptor;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.SocketTimeoutException;
+import java.nio.ByteBuffer;
+import java.nio.channels.AsynchronousCloseException;
+import java.nio.channels.ClosedChannelException;
+import java.util.Arrays;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Assume;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import org.apache.commons.lang.exception.ExceptionUtils;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.net.unix.DomainSocket.DomainChannel;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.util.Shell;
+
+import com.google.common.io.Files;
+
+public class TestDomainSocket {
+ private static TemporarySocketDirectory sockDir;
+
+ @BeforeClass
+ public static void init() {
+ sockDir = new TemporarySocketDirectory();
+ DomainSocket.disableBindPathValidation();
+ }
+
+ @AfterClass
+ public static void shutdown() throws IOException {
+ sockDir.close();
+ }
+
+ @Before
+ public void before() {
+ Assume.assumeTrue(DomainSocket.getLoadingFailureReason() == null);
+ }
+
+ /**
+ * Test that we can create a socket and close it, even if it hasn't been
+ * opened.
+ *
+ * @throws IOException
+ */
+ @Test(timeout=180000)
+ public void testSocketCreateAndClose() throws IOException {
+ DomainSocket serv = DomainSocket.bindAndListen(
+ new File(sockDir.getDir(), "test_sock_create_and_close").
+ getAbsolutePath());
+ serv.close();
+ }
+
+ /**
+ * Test DomainSocket path setting and getting.
+ *
+ * @throws IOException
+ */
+ @Test(timeout=180000)
+ public void testSocketPathSetGet() throws IOException {
+ Assert.assertEquals("/var/run/hdfs/sock.100",
+ DomainSocket.getEffectivePath("/var/run/hdfs/sock._PORT", 100));
+ }
+
+ /**
+ * Test that we get a read result of -1 on EOF.
+ *
+ * @throws IOException
+ */
+ @Test(timeout=180000)
+ public void testSocketReadEof() throws Exception {
+ final String TEST_PATH = new File(sockDir.getDir(),
+ "testSocketReadEof").getAbsolutePath();
+ final DomainSocket serv = DomainSocket.bindAndListen(TEST_PATH);
+ ExecutorService exeServ = Executors.newSingleThreadExecutor();
+ Callable callable = new Callable() {
+ public Void call(){
+ DomainSocket conn;
+ try {
+ conn = serv.accept();
+ } catch (IOException e) {
+ throw new RuntimeException("unexpected IOException", e);
+ }
+ byte buf[] = new byte[100];
+ for (int i = 0; i < buf.length; i++) {
+ buf[i] = 0;
+ }
+ try {
+ Assert.assertEquals(-1, conn.getInputStream().read());
+ } catch (IOException e) {
+ throw new RuntimeException("unexpected IOException", e);
+ }
+ return null;
+ }
+ };
+ Future future = exeServ.submit(callable);
+ DomainSocket conn = DomainSocket.connect(serv.getPath());
+ Thread.sleep(50);
+ conn.close();
+ serv.close();
+ future.get(2, TimeUnit.MINUTES);
+ }
+
+ /**
+ * Test that if one thread is blocking in a read or write operation, another
+ * thread can close the socket and stop the accept.
+ *
+ * @throws IOException
+ */
+ @Test(timeout=180000)
+ public void testSocketAcceptAndClose() throws Exception {
+ final String TEST_PATH =
+ new File(sockDir.getDir(), "test_sock_accept_and_close").getAbsolutePath();
+ final DomainSocket serv = DomainSocket.bindAndListen(TEST_PATH);
+ ExecutorService exeServ = Executors.newSingleThreadExecutor();
+ Callable callable = new Callable() {
+ public Void call(){
+ try {
+ serv.accept();
+ throw new RuntimeException("expected the accept() to be " +
+ "interrupted and fail");
+ } catch (AsynchronousCloseException e) {
+ return null;
+ } catch (IOException e) {
+ throw new RuntimeException("unexpected IOException", e);
+ }
+ }
+ };
+ Future future = exeServ.submit(callable);
+ Thread.sleep(500);
+ serv.close();
+ future.get(2, TimeUnit.MINUTES);
+ }
+
+ /**
+ * Test that we get an AsynchronousCloseException when the DomainSocket
+ * we're using is closed during a read or write operation.
+ *
+ * @throws IOException
+ */
+ private void testAsyncCloseDuringIO(final boolean closeDuringWrite)
+ throws Exception {
+ final String TEST_PATH = new File(sockDir.getDir(),
+ "testAsyncCloseDuringIO(" + closeDuringWrite + ")").getAbsolutePath();
+ final DomainSocket serv = DomainSocket.bindAndListen(TEST_PATH);
+ ExecutorService exeServ = Executors.newFixedThreadPool(2);
+ Callable serverCallable = new Callable() {
+ public Void call() {
+ DomainSocket serverConn = null;
+ try {
+ serverConn = serv.accept();
+ byte buf[] = new byte[100];
+ for (int i = 0; i < buf.length; i++) {
+ buf[i] = 0;
+ }
+ // The server just continues either writing or reading until someone
+ // asynchronously closes the client's socket. At that point, all our
+ // reads return EOF, and writes get a socket error.
+ if (closeDuringWrite) {
+ try {
+ while (true) {
+ serverConn.getOutputStream().write(buf);
+ }
+ } catch (IOException e) {
+ }
+ } else {
+ do { ; } while
+ (serverConn.getInputStream().read(buf, 0, buf.length) != -1);
+ }
+ } catch (IOException e) {
+ throw new RuntimeException("unexpected IOException", e);
+ } finally {
+ IOUtils.cleanup(DomainSocket.LOG, serverConn);
+ }
+ return null;
+ }
+ };
+ Future serverFuture = exeServ.submit(serverCallable);
+ final DomainSocket clientConn = DomainSocket.connect(serv.getPath());
+ Callable clientCallable = new Callable() {
+ public Void call(){
+ // The client writes or reads until another thread
+ // asynchronously closes the socket. At that point, we should
+ // get ClosedChannelException, or possibly its subclass
+ // AsynchronousCloseException.
+ byte buf[] = new byte[100];
+ for (int i = 0; i < buf.length; i++) {
+ buf[i] = 0;
+ }
+ try {
+ if (closeDuringWrite) {
+ while (true) {
+ clientConn.getOutputStream().write(buf);
+ }
+ } else {
+ while (true) {
+ clientConn.getInputStream().read(buf, 0, buf.length);
+ }
+ }
+ } catch (ClosedChannelException e) {
+ return null;
+ } catch (IOException e) {
+ throw new RuntimeException("unexpected IOException", e);
+ }
+ }
+ };
+ Future clientFuture = exeServ.submit(clientCallable);
+ Thread.sleep(500);
+ clientConn.close();
+ serv.close();
+ clientFuture.get(2, TimeUnit.MINUTES);
+ serverFuture.get(2, TimeUnit.MINUTES);
+ }
+
+ @Test(timeout=180000)
+ public void testAsyncCloseDuringWrite() throws Exception {
+ testAsyncCloseDuringIO(true);
+ }
+
+ @Test(timeout=180000)
+ public void testAsyncCloseDuringRead() throws Exception {
+ testAsyncCloseDuringIO(false);
+ }
+
+ /**
+ * Test that attempting to connect to an invalid path doesn't work.
+ *
+ * @throws IOException
+ */
+ @Test(timeout=180000)
+ public void testInvalidOperations() throws IOException {
+ try {
+ DomainSocket.connect(
+ new File(sockDir.getDir(), "test_sock_invalid_operation").
+ getAbsolutePath());
+ } catch (IOException e) {
+ GenericTestUtils.assertExceptionContains("connect(2) error: ", e);
+ }
+ }
+
+ /**
+ * Test setting some server options.
+ *
+ * @throws IOException
+ */
+ @Test(timeout=180000)
+ public void testServerOptions() throws Exception {
+ final String TEST_PATH = new File(sockDir.getDir(),
+ "test_sock_server_options").getAbsolutePath();
+ DomainSocket serv = DomainSocket.bindAndListen(TEST_PATH);
+ try {
+ // Let's set a new receive buffer size
+ int bufSize = serv.getAttribute(DomainSocket.RECEIVE_BUFFER_SIZE);
+ int newBufSize = bufSize / 2;
+ serv.setAttribute(DomainSocket.RECEIVE_BUFFER_SIZE, newBufSize);
+ int nextBufSize = serv.getAttribute(DomainSocket.RECEIVE_BUFFER_SIZE);
+ Assert.assertEquals(newBufSize, nextBufSize);
+ // Let's set a server timeout
+ int newTimeout = 1000;
+ serv.setAttribute(DomainSocket.RECEIVE_TIMEOUT, newTimeout);
+ int nextTimeout = serv.getAttribute(DomainSocket.RECEIVE_TIMEOUT);
+ Assert.assertEquals(newTimeout, nextTimeout);
+ try {
+ serv.accept();
+ Assert.fail("expected the accept() to time out and fail");
+ } catch (SocketTimeoutException e) {
+ GenericTestUtils.assertExceptionContains("accept(2) error: ", e);
+ }
+ } finally {
+ serv.close();
+ Assert.assertFalse(serv.isOpen());
+ }
+ }
+
+ /**
+ * A Throwable representing success.
+ *
+ * We can't use null to represent this, because you cannot insert null into
+ * ArrayBlockingQueue.
+ */
+ static class Success extends Throwable {
+ private static final long serialVersionUID = 1L;
+ }
+
+ static interface WriteStrategy {
+ /**
+ * Initialize a WriteStrategy object from a Socket.
+ */
+ public void init(DomainSocket s) throws IOException;
+
+ /**
+ * Write some bytes.
+ */
+ public void write(byte b[]) throws IOException;
+ }
+
+ static class OutputStreamWriteStrategy implements WriteStrategy {
+ private OutputStream outs = null;
+
+ public void init(DomainSocket s) throws IOException {
+ outs = s.getOutputStream();
+ }
+
+ public void write(byte b[]) throws IOException {
+ outs.write(b);
+ }
+ }
+
+ abstract static class ReadStrategy {
+ /**
+ * Initialize a ReadStrategy object from a DomainSocket.
+ */
+ public abstract void init(DomainSocket s) throws IOException;
+
+ /**
+ * Read some bytes.
+ */
+ public abstract int read(byte b[], int off, int length) throws IOException;
+
+ public void readFully(byte buf[], int off, int len) throws IOException {
+ int toRead = len;
+ while (toRead > 0) {
+ int ret = read(buf, off, toRead);
+ if (ret < 0) {
+ throw new IOException( "Premature EOF from inputStream");
+ }
+ toRead -= ret;
+ off += ret;
+ }
+ }
+ }
+
+ static class InputStreamReadStrategy extends ReadStrategy {
+ private InputStream ins = null;
+
+ @Override
+ public void init(DomainSocket s) throws IOException {
+ ins = s.getInputStream();
+ }
+
+ @Override
+ public int read(byte b[], int off, int length) throws IOException {
+ return ins.read(b, off, length);
+ }
+ }
+
+ static class DirectByteBufferReadStrategy extends ReadStrategy {
+ private DomainChannel ch = null;
+
+ @Override
+ public void init(DomainSocket s) throws IOException {
+ ch = s.getChannel();
+ }
+
+ @Override
+ public int read(byte b[], int off, int length) throws IOException {
+ ByteBuffer buf = ByteBuffer.allocateDirect(b.length);
+ int nread = ch.read(buf);
+ if (nread < 0) return nread;
+ buf.flip();
+ buf.get(b, off, nread);
+ return nread;
+ }
+ }
+
+ static class ArrayBackedByteBufferReadStrategy extends ReadStrategy {
+ private DomainChannel ch = null;
+
+ @Override
+ public void init(DomainSocket s) throws IOException {
+ ch = s.getChannel();
+ }
+
+ @Override
+ public int read(byte b[], int off, int length) throws IOException {
+ ByteBuffer buf = ByteBuffer.wrap(b);
+ int nread = ch.read(buf);
+ if (nread < 0) return nread;
+ buf.flip();
+ buf.get(b, off, nread);
+ return nread;
+ }
+ }
+
+ /**
+ * Test a simple client/server interaction.
+ *
+ * @throws IOException
+ */
+ void testClientServer1(final Class extends WriteStrategy> writeStrategyClass,
+ final Class extends ReadStrategy> readStrategyClass) throws Exception {
+ final String TEST_PATH = new File(sockDir.getDir(),
+ "test_sock_client_server1").getAbsolutePath();
+ final byte clientMsg1[] = new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5, 0x6 };
+ final byte serverMsg1[] = new byte[] { 0x9, 0x8, 0x7, 0x6, 0x5 };
+ final byte clientMsg2 = 0x45;
+ final ArrayBlockingQueue threadResults =
+ new ArrayBlockingQueue(2);
+ final DomainSocket serv = DomainSocket.bindAndListen(TEST_PATH);
+ Thread serverThread = new Thread() {
+ public void run(){
+ // Run server
+ DomainSocket conn = null;
+ try {
+ conn = serv.accept();
+ byte in1[] = new byte[clientMsg1.length];
+ ReadStrategy reader = readStrategyClass.newInstance();
+ reader.init(conn);
+ reader.readFully(in1, 0, in1.length);
+ Assert.assertTrue(Arrays.equals(clientMsg1, in1));
+ WriteStrategy writer = writeStrategyClass.newInstance();
+ writer.init(conn);
+ writer.write(serverMsg1);
+ InputStream connInputStream = conn.getInputStream();
+ int in2 = connInputStream.read();
+ Assert.assertEquals((int)clientMsg2, in2);
+ conn.close();
+ } catch (Throwable e) {
+ threadResults.add(e);
+ Assert.fail(e.getMessage());
+ }
+ threadResults.add(new Success());
+ }
+ };
+ serverThread.start();
+
+ Thread clientThread = new Thread() {
+ public void run(){
+ try {
+ DomainSocket client = DomainSocket.connect(TEST_PATH);
+ WriteStrategy writer = writeStrategyClass.newInstance();
+ writer.init(client);
+ writer.write(clientMsg1);
+ ReadStrategy reader = readStrategyClass.newInstance();
+ reader.init(client);
+ byte in1[] = new byte[serverMsg1.length];
+ reader.readFully(in1, 0, in1.length);
+ Assert.assertTrue(Arrays.equals(serverMsg1, in1));
+ OutputStream clientOutputStream = client.getOutputStream();
+ clientOutputStream.write(clientMsg2);
+ client.close();
+ } catch (Throwable e) {
+ threadResults.add(e);
+ }
+ threadResults.add(new Success());
+ }
+ };
+ clientThread.start();
+
+ for (int i = 0; i < 2; i++) {
+ Throwable t = threadResults.take();
+ if (!(t instanceof Success)) {
+ Assert.fail(t.getMessage() + ExceptionUtils.getStackTrace(t));
+ }
+ }
+ serverThread.join(120000);
+ clientThread.join(120000);
+ serv.close();
+ }
+
+ @Test(timeout=180000)
+ public void testClientServerOutStreamInStream() throws Exception {
+ testClientServer1(OutputStreamWriteStrategy.class,
+ InputStreamReadStrategy.class);
+ }
+
+ @Test(timeout=180000)
+ public void testClientServerOutStreamInDbb() throws Exception {
+ testClientServer1(OutputStreamWriteStrategy.class,
+ DirectByteBufferReadStrategy.class);
+ }
+
+ @Test(timeout=180000)
+ public void testClientServerOutStreamInAbb() throws Exception {
+ testClientServer1(OutputStreamWriteStrategy.class,
+ ArrayBackedByteBufferReadStrategy.class);
+ }
+
+ static private class PassedFile {
+ private final int idx;
+ private final byte[] contents;
+ private FileInputStream fis;
+
+ public PassedFile(int idx) throws IOException {
+ this.idx = idx;
+ this.contents = new byte[] { (byte)(idx % 127) };
+ Files.write(contents, new File(getPath()));
+ this.fis = new FileInputStream(getPath());
+ }
+
+ public String getPath() {
+ return new File(sockDir.getDir(), "passed_file" + idx).getAbsolutePath();
+ }
+
+ public FileInputStream getInputStream() throws IOException {
+ return fis;
+ }
+
+ public void cleanup() throws IOException {
+ new File(getPath()).delete();
+ fis.close();
+ }
+
+ public void checkInputStream(FileInputStream fis) throws IOException {
+ byte buf[] = new byte[contents.length];
+ IOUtils.readFully(fis, buf, 0, buf.length);
+ Arrays.equals(contents, buf);
+ }
+
+ protected void finalize() {
+ try {
+ cleanup();
+ } catch(Throwable t) {
+ // ignore
+ }
+ }
+ }
+
+ /**
+ * Test file descriptor passing.
+ *
+ * @throws IOException
+ */
+ @Test(timeout=180000)
+ public void testFdPassing() throws Exception {
+ final String TEST_PATH =
+ new File(sockDir.getDir(), "test_sock").getAbsolutePath();
+ final byte clientMsg1[] = new byte[] { 0x11, 0x22, 0x33, 0x44, 0x55, 0x66 };
+ final byte serverMsg1[] = new byte[] { 0x31, 0x30, 0x32, 0x34, 0x31, 0x33,
+ 0x44, 0x1, 0x1, 0x1, 0x1, 0x1 };
+ final ArrayBlockingQueue threadResults =
+ new ArrayBlockingQueue(2);
+ final DomainSocket serv = DomainSocket.bindAndListen(TEST_PATH);
+ final PassedFile passedFiles[] =
+ new PassedFile[] { new PassedFile(1), new PassedFile(2) };
+ final FileDescriptor passedFds[] = new FileDescriptor[passedFiles.length];
+ for (int i = 0; i < passedFiles.length; i++) {
+ passedFds[i] = passedFiles[i].getInputStream().getFD();
+ }
+ Thread serverThread = new Thread() {
+ public void run(){
+ // Run server
+ DomainSocket conn = null;
+ try {
+ conn = serv.accept();
+ byte in1[] = new byte[clientMsg1.length];
+ InputStream connInputStream = conn.getInputStream();
+ IOUtils.readFully(connInputStream, in1, 0, in1.length);
+ Assert.assertTrue(Arrays.equals(clientMsg1, in1));
+ DomainSocket domainConn = (DomainSocket)conn;
+ domainConn.sendFileDescriptors(passedFds, serverMsg1, 0,
+ serverMsg1.length);
+ conn.close();
+ } catch (Throwable e) {
+ threadResults.add(e);
+ Assert.fail(e.getMessage());
+ }
+ threadResults.add(new Success());
+ }
+ };
+ serverThread.start();
+
+ Thread clientThread = new Thread() {
+ public void run(){
+ try {
+ DomainSocket client = DomainSocket.connect(TEST_PATH);
+ OutputStream clientOutputStream = client.getOutputStream();
+ InputStream clientInputStream = client.getInputStream();
+ clientOutputStream.write(clientMsg1);
+ DomainSocket domainConn = (DomainSocket)client;
+ byte in1[] = new byte[serverMsg1.length];
+ FileInputStream recvFis[] = new FileInputStream[passedFds.length];
+ int r = domainConn.
+ recvFileInputStreams(recvFis, in1, 0, in1.length - 1);
+ Assert.assertTrue(r > 0);
+ IOUtils.readFully(clientInputStream, in1, r, in1.length - r);
+ Assert.assertTrue(Arrays.equals(serverMsg1, in1));
+ for (int i = 0; i < passedFds.length; i++) {
+ Assert.assertNotNull(recvFis[i]);
+ passedFiles[i].checkInputStream(recvFis[i]);
+ }
+ for (FileInputStream fis : recvFis) {
+ fis.close();
+ }
+ client.close();
+ } catch (Throwable e) {
+ threadResults.add(e);
+ }
+ threadResults.add(new Success());
+ }
+ };
+ clientThread.start();
+
+ for (int i = 0; i < 2; i++) {
+ Throwable t = threadResults.take();
+ if (!(t instanceof Success)) {
+ Assert.fail(t.getMessage() + ExceptionUtils.getStackTrace(t));
+ }
+ }
+ serverThread.join(120000);
+ clientThread.join(120000);
+ serv.close();
+ for (PassedFile pf : passedFiles) {
+ pf.cleanup();
+ }
+ }
+
+ /**
+ * Run validateSocketPathSecurity
+ *
+ * @param str The path to validate
+ * @param prefix A prefix to skip validation for
+ * @throws IOException
+ */
+ private static void testValidateSocketPath(String str, String prefix)
+ throws IOException {
+ int skipComponents = 1;
+ File prefixFile = new File(prefix);
+ while (true) {
+ prefixFile = prefixFile.getParentFile();
+ if (prefixFile == null) {
+ break;
+ }
+ skipComponents++;
+ }
+ DomainSocket.validateSocketPathSecurity0(str,
+ skipComponents);
+ }
+
+ /**
+ * Test file descriptor path security.
+ *
+ * @throws IOException
+ */
+ @Test(timeout=180000)
+ public void testFdPassingPathSecurity() throws Exception {
+ TemporarySocketDirectory tmp = new TemporarySocketDirectory();
+ try {
+ String prefix = tmp.getDir().getAbsolutePath();
+ Shell.execCommand(new String [] {
+ "mkdir", "-p", prefix + "/foo/bar/baz" });
+ Shell.execCommand(new String [] {
+ "chmod", "0700", prefix + "/foo/bar/baz" });
+ Shell.execCommand(new String [] {
+ "chmod", "0700", prefix + "/foo/bar" });
+ Shell.execCommand(new String [] {
+ "chmod", "0707", prefix + "/foo" });
+ Shell.execCommand(new String [] {
+ "mkdir", "-p", prefix + "/q1/q2" });
+ Shell.execCommand(new String [] {
+ "chmod", "0700", prefix + "/q1" });
+ Shell.execCommand(new String [] {
+ "chmod", "0700", prefix + "/q1/q2" });
+ testValidateSocketPath(prefix + "/q1/q2", prefix);
+ try {
+ testValidateSocketPath(prefix + "/foo/bar/baz", prefix);
+ } catch (IOException e) {
+ GenericTestUtils.assertExceptionContains("/foo' is world-writable. " +
+ "Its permissions are 0707. Please fix this or select a " +
+ "different socket path.", e);
+ }
+ try {
+ testValidateSocketPath(prefix + "/nope", prefix);
+ } catch (IOException e) {
+ GenericTestUtils.assertExceptionContains("failed to stat a path " +
+ "component: ", e);
+ }
+ // Root should be secure
+ DomainSocket.validateSocketPathSecurity0("/foo", 1);
+ } finally {
+ tmp.close();
+ }
+ }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index c0b580cf411..808af4e6b2d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -187,6 +187,9 @@ Trunk (Unreleased)
HDFS-4633 TestDFSClientExcludedNodes fails sporadically if excluded nodes
cache expires too quickly (Chris Nauroth via Sanjay)
+ HDFS-347. DFS read performance suboptimal when client co-located on nodes
+ with data. (Colin Patrick McCabe via todd and atm)
+
OPTIMIZATIONS
BUG FIXES
@@ -355,6 +358,62 @@ Trunk (Unreleased)
HDFS-4674. TestBPOfferService fails on Windows due to failure parsing
datanode data directory as URI. (Chris Nauroth via suresh)
+ BREAKDOWN OF HDFS-347 SUBTASKS AND RELATED JIRAS
+
+ HDFS-4353. Encapsulate connections to peers in Peer and PeerServer classes.
+ (Colin Patrick McCabe via todd)
+
+ HDFS-4354. Create DomainSocket and DomainPeer and associated unit tests.
+ (Colin Patrick McCabe via todd)
+
+ HDFS-4356. BlockReaderLocal should use passed file descriptors rather than paths.
+ (Colin Patrick McCabe via todd)
+
+ HDFS-4388. DomainSocket should throw AsynchronousCloseException when appropriate.
+ (Colin Patrick McCabe via todd)
+
+ HDFS-4390. Bypass UNIX domain socket unit tests when they cannot be run.
+ (Colin Patrick McCabe via todd)
+
+ HDFS-4400. DFSInputStream#getBlockReader: last retries should ignore the cache
+ (Colin Patrick McCabe via todd)
+
+ HDFS-4401. Fix bug in DomainSocket path validation
+ (Colin Patrick McCabe via todd)
+
+ HDFS-4402. Some small DomainSocket fixes: avoid findbugs warning, change
+ log level, etc. (Colin Patrick McCabe via todd)
+
+ HDFS-4418. increase default FileInputStreamCache size (todd)
+
+ HDFS-4416. Rename dfs.datanode.domain.socket.path to dfs.domain.socket.path
+ (Colin Patrick McCabe via todd)
+
+ HDFS-4417. Fix case where local reads get disabled incorrectly
+ (Colin Patrick McCabe and todd via todd)
+
+ HDFS-4433. Make TestPeerCache not flaky (Colin Patrick McCabe via todd)
+
+ HDFS-4438. TestDomainSocket fails when system umask is set to 0002. (Colin
+ Patrick McCabe via atm)
+
+ HDFS-4440. Avoid annoying log message when dfs.domain.socket.path is not
+ set. (Colin Patrick McCabe via atm)
+
+ HDFS-4473. Don't create domain socket unless we need it. (Colin Patrick McCabe via atm)
+
+ HDFS-4485. DN should chmod socket path a+w. (Colin Patrick McCabe via atm)
+
+ HDFS-4453. Make a simple doc to describe the usage and design of the
+ shortcircuit read feature. (Colin Patrick McCabe via atm)
+
+ HDFS-4496. DFSClient: don't create a domain socket unless we need it (Colin
+ Patrick McCabe via todd)
+
+ HDFS-347: style cleanups (Colin Patrick McCabe via atm)
+
+ HDFS-4538. Allow use of legacy blockreader (Colin Patrick McCabe via todd)
+
Release 2.0.5-beta - UNRELEASED
INCOMPATIBLE CHANGES
@@ -399,6 +458,9 @@ Release 2.0.5-beta - UNRELEASED
HDFS-3940. Add Gset#clear method and clear the block map when namenode is
shutdown. (suresh)
+ HDFS-4679. Namenode operation checks should be done in a consistent
+ manner. (suresh)
+
OPTIMIZATIONS
BUG FIXES
@@ -500,6 +562,9 @@ Release 2.0.5-beta - UNRELEASED
HDFS-4643. Fix flakiness in TestQuorumJournalManager. (todd)
+ HDFS-4639. startFileInternal() should not increment generation stamp.
+ (Plamen Jeliazkov via shv)
+
Release 2.0.4-alpha - UNRELEASED
INCOMPATIBLE CHANGES
@@ -2473,6 +2538,20 @@ Release 2.0.0-alpha - 05-23-2012
HDFS-3039. Address findbugs and javadoc warnings on branch. (todd via atm)
+Release 0.23.8 - UNRELEASED
+
+ INCOMPATIBLE CHANGES
+
+ NEW FEATURES
+
+ IMPROVEMENTS
+
+ OPTIMIZATIONS
+
+ BUG FIXES
+
+ HDFS-4477. Secondary namenode may retain old tokens (daryn via kihwal)
+
Release 0.23.7 - UNRELEASED
INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml
index c019b10e423..4eaa65c8e4c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml
@@ -290,6 +290,14 @@
+
+
+
+
+
+
+
+
diff --git a/hadoop-hdfs-project/hadoop-hdfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/pom.xml
index 4116cd4136c..c0d29978d4e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/pom.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/pom.xml
@@ -515,6 +515,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
CHANGES.txtCHANGES.HDFS-1623.txt
+ CHANGES.HDFS-347.txt.idea/**src/main/conf/*src/main/docs/**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java
index cd6dc2d25ed..17ecb9e30d6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java
@@ -18,10 +18,8 @@
package org.apache.hadoop.hdfs;
import java.io.IOException;
-import java.net.Socket;
import org.apache.hadoop.fs.ByteBufferReadable;
-import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
/**
* A BlockReader is responsible for reading a single block
@@ -43,7 +41,29 @@ public interface BlockReader extends ByteBufferReadable {
*/
long skip(long n) throws IOException;
- void close() throws IOException;
+ /**
+ * Returns an estimate of the number of bytes that can be read
+ * (or skipped over) from this input stream without performing
+ * network I/O.
+ */
+ int available() throws IOException;
+
+ /**
+ * Close the block reader.
+ *
+ * @param peerCache The PeerCache to put the Peer we're using back
+ * into, or null if we should simply close the Peer
+ * we're using (along with its Socket).
+ * Ignored by Readers that don't maintain Peers.
+ * @param fisCache The FileInputStreamCache to put our FileInputStreams
+ * back into, or null if we should simply close them.
+ * Ignored by Readers that don't maintain
+ * FileInputStreams.
+ *
+ * @throws IOException
+ */
+ void close(PeerCache peerCache, FileInputStreamCache fisCache)
+ throws IOException;
/**
* Read exactly the given amount of data, throwing an exception
@@ -60,20 +80,4 @@ public interface BlockReader extends ByteBufferReadable {
* filled or the next call will return EOF.
*/
int readAll(byte[] buf, int offset, int len) throws IOException;
-
- /**
- * Take the socket used to talk to the DN.
- */
- Socket takeSocket();
-
- /**
- * Whether the BlockReader has reached the end of its input stream
- * and successfully sent a status code back to the datanode.
- */
- boolean hasSentStatusCode();
-
- /**
- * @return a reference to the streams this block reader is using.
- */
- IOStreamPair getStreams();
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
index c71f1ced6a6..7fbb1a01d59 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
@@ -17,20 +17,31 @@
*/
package org.apache.hadoop.hdfs;
+import java.io.BufferedOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.FileInputStream;
import java.io.IOException;
import java.net.InetSocketAddress;
-import java.net.Socket;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hdfs.DFSClient.Conf;
+import org.apache.hadoop.hdfs.net.Peer;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
-import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferEncryptor;
-import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
-import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
+import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
+import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto;
+import org.apache.hadoop.hdfs.protocolPB.PBHelper;
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
+import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
-import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.ipc.RemoteException;
+import org.apache.hadoop.net.unix.DomainSocket;
+import org.apache.hadoop.security.AccessControlException;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.token.SecretManager.InvalidToken;
import org.apache.hadoop.security.token.Token;
@@ -39,75 +50,182 @@ import org.apache.hadoop.security.token.Token;
*/
@InterfaceAudience.Private
public class BlockReaderFactory {
- /**
- * @see #newBlockReader(Conf, Socket, String, ExtendedBlock, Token, long, long, int, boolean, String)
- */
- public static BlockReader newBlockReader(
- Configuration conf,
- Socket sock, String file,
- ExtendedBlock block, Token blockToken,
- long startOffset, long len, DataEncryptionKey encryptionKey)
- throws IOException {
- int bufferSize = conf.getInt(DFSConfigKeys.IO_FILE_BUFFER_SIZE_KEY,
- DFSConfigKeys.IO_FILE_BUFFER_SIZE_DEFAULT);
- return newBlockReader(new Conf(conf),
- sock, file, block, blockToken, startOffset,
- len, bufferSize, true, "", encryptionKey, null);
- }
-
/**
* Create a new BlockReader specifically to satisfy a read.
* This method also sends the OP_READ_BLOCK request.
*
* @param conf the DFSClient configuration
- * @param sock An established Socket to the DN. The BlockReader will not close it normally
* @param file File location
* @param block The block object
* @param blockToken The block token for security
* @param startOffset The read offset, relative to block head
- * @param len The number of bytes to read
+ * @param len The number of bytes to read, or -1 to read as many as
+ * possible.
* @param bufferSize The IO buffer size (not the client buffer size)
+ * Ignored except on the legacy BlockReader.
* @param verifyChecksum Whether to verify checksum
- * @param clientName Client name
- * @return New BlockReader instance, or null on error.
+ * @param clientName Client name. Used for log messages.
+ * @param peer The peer
+ * @param datanodeID The datanode that the Peer is connected to
+ * @param domainSocketFactory The DomainSocketFactory to notify if the Peer
+ * is a DomainPeer which turns out to be faulty.
+ * If null, no factory will be notified in this
+ * case.
+ * @param allowShortCircuitLocalReads True if short-circuit local reads
+ * should be allowed.
+ * @return New BlockReader instance
*/
@SuppressWarnings("deprecation")
public static BlockReader newBlockReader(
- Conf conf,
- Socket sock, String file,
+ Configuration conf,
+ String file,
ExtendedBlock block,
Token blockToken,
long startOffset, long len,
- int bufferSize, boolean verifyChecksum,
+ boolean verifyChecksum,
String clientName,
- DataEncryptionKey encryptionKey,
- IOStreamPair ioStreams)
- throws IOException {
-
- if (conf.useLegacyBlockReader) {
- if (encryptionKey != null) {
- throw new RuntimeException("Encryption is not supported with the legacy block reader.");
- }
- return RemoteBlockReader.newBlockReader(
- sock, file, block, blockToken, startOffset, len, bufferSize, verifyChecksum, clientName);
- } else {
- if (ioStreams == null) {
- ioStreams = new IOStreamPair(NetUtils.getInputStream(sock),
- NetUtils.getOutputStream(sock, HdfsServerConstants.WRITE_TIMEOUT));
- if (encryptionKey != null) {
- IOStreamPair encryptedStreams =
- DataTransferEncryptor.getEncryptedStreams(
- ioStreams.out, ioStreams.in, encryptionKey);
- ioStreams = encryptedStreams;
+ Peer peer,
+ DatanodeID datanodeID,
+ DomainSocketFactory domSockFactory,
+ boolean allowShortCircuitLocalReads)
+ throws IOException {
+ peer.setReadTimeout(conf.getInt(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY,
+ HdfsServerConstants.READ_TIMEOUT));
+ peer.setWriteTimeout(HdfsServerConstants.WRITE_TIMEOUT);
+
+ if (peer.getDomainSocket() != null) {
+ if (allowShortCircuitLocalReads &&
+ (!conf.getBoolean(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL,
+ DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL_DEFAULT))) {
+ // If this is a domain socket, and short-circuit local reads are
+ // enabled, try to set up a BlockReaderLocal.
+ BlockReader reader = newShortCircuitBlockReader(conf, file,
+ block, blockToken, startOffset, len, peer, datanodeID,
+ domSockFactory, verifyChecksum);
+ if (reader != null) {
+ // One we've constructed the short-circuit block reader, we don't
+ // need the socket any more. So let's return it to the cache.
+ PeerCache peerCache = PeerCache.getInstance(
+ conf.getInt(DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPACITY_KEY,
+ DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT),
+ conf.getLong(DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY,
+ DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_DEFAULT));
+ peerCache.put(datanodeID, peer);
+ return reader;
}
}
-
+ // If this is a domain socket and we couldn't (or didn't want to) set
+ // up a BlockReaderLocal, check that we are allowed to pass data traffic
+ // over the socket before proceeding.
+ if (!conf.getBoolean(DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC,
+ DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT)) {
+ throw new IOException("Because we can't do short-circuit access, " +
+ "and data traffic over domain sockets is disabled, " +
+ "we cannot use this socket to talk to " + datanodeID);
+ }
+ }
+
+ if (conf.getBoolean(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER,
+ DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER_DEFAULT)) {
+ return RemoteBlockReader.newBlockReader(file,
+ block, blockToken, startOffset, len,
+ conf.getInt(DFSConfigKeys.IO_FILE_BUFFER_SIZE_KEY,
+ DFSConfigKeys.IO_FILE_BUFFER_SIZE_DEFAULT),
+ verifyChecksum, clientName, peer, datanodeID);
+ } else {
return RemoteBlockReader2.newBlockReader(
- sock, file, block, blockToken, startOffset, len, bufferSize,
- verifyChecksum, clientName, encryptionKey, ioStreams);
+ file, block, blockToken, startOffset, len,
+ verifyChecksum, clientName, peer, datanodeID);
}
}
-
+
+ /**
+ * Create a new short-circuit BlockReader.
+ *
+ * Here, we ask the DataNode to pass us file descriptors over our
+ * DomainSocket. If the DataNode declines to do so, we'll return null here;
+ * otherwise, we'll return the BlockReaderLocal. If the DataNode declines,
+ * this function will inform the DomainSocketFactory that short-circuit local
+ * reads are disabled for this DataNode, so that we don't ask again.
+ *
+ * @param conf the configuration.
+ * @param file the file name. Used in log messages.
+ * @param block The block object.
+ * @param blockToken The block token for security.
+ * @param startOffset The read offset, relative to block head.
+ * @param len The number of bytes to read, or -1 to read
+ * as many as possible.
+ * @param peer The peer to use.
+ * @param datanodeID The datanode that the Peer is connected to.
+ * @param domSockFactory The DomainSocketFactory to notify if the Peer
+ * is a DomainPeer which turns out to be faulty.
+ * If null, no factory will be notified in this
+ * case.
+ * @param verifyChecksum True if we should verify the checksums.
+ * Note: even if this is true, when
+ * DFS_CLIENT_READ_CHECKSUM_SKIP_CHECKSUM_KEY is
+ * set, we will skip checksums.
+ *
+ * @return The BlockReaderLocal, or null if the
+ * DataNode declined to provide short-circuit
+ * access.
+ * @throws IOException If there was a communication error.
+ */
+ private static BlockReaderLocal newShortCircuitBlockReader(
+ Configuration conf, String file, ExtendedBlock block,
+ Token blockToken, long startOffset,
+ long len, Peer peer, DatanodeID datanodeID,
+ DomainSocketFactory domSockFactory, boolean verifyChecksum)
+ throws IOException {
+ final DataOutputStream out =
+ new DataOutputStream(new BufferedOutputStream(
+ peer.getOutputStream()));
+ new Sender(out).requestShortCircuitFds(block, blockToken, 1);
+ DataInputStream in =
+ new DataInputStream(peer.getInputStream());
+ BlockOpResponseProto resp = BlockOpResponseProto.parseFrom(
+ PBHelper.vintPrefixed(in));
+ DomainSocket sock = peer.getDomainSocket();
+ switch (resp.getStatus()) {
+ case SUCCESS:
+ BlockReaderLocal reader = null;
+ byte buf[] = new byte[1];
+ FileInputStream fis[] = new FileInputStream[2];
+ sock.recvFileInputStreams(fis, buf, 0, buf.length);
+ try {
+ reader = new BlockReaderLocal(conf, file, block,
+ startOffset, len, fis[0], fis[1], datanodeID, verifyChecksum);
+ } finally {
+ if (reader == null) {
+ IOUtils.cleanup(DFSClient.LOG, fis[0], fis[1]);
+ }
+ }
+ return reader;
+ case ERROR_UNSUPPORTED:
+ if (!resp.hasShortCircuitAccessVersion()) {
+ DFSClient.LOG.warn("short-circuit read access is disabled for " +
+ "DataNode " + datanodeID + ". reason: " + resp.getMessage());
+ domSockFactory.disableShortCircuitForPath(sock.getPath());
+ } else {
+ DFSClient.LOG.warn("short-circuit read access for the file " +
+ file + " is disabled for DataNode " + datanodeID +
+ ". reason: " + resp.getMessage());
+ }
+ return null;
+ case ERROR_ACCESS_TOKEN:
+ String msg = "access control error while " +
+ "attempting to set up short-circuit access to " +
+ file + resp.getMessage();
+ DFSClient.LOG.debug(msg);
+ throw new InvalidBlockTokenException(msg);
+ default:
+ DFSClient.LOG.warn("error while attempting to set up short-circuit " +
+ "access to " + file + ": " + resp.getMessage());
+ domSockFactory.disableShortCircuitForPath(sock.getPath());
+ return null;
+ }
+ }
+
/**
* File name to print when accessing a block directly (from servlets)
* @param s Address of the block location
@@ -119,4 +237,24 @@ public class BlockReaderFactory {
final String poolId, final long blockId) {
return s.toString() + ":" + poolId + ":" + blockId;
}
+
+ /**
+ * Get {@link BlockReaderLocalLegacy} for short circuited local reads.
+ * This block reader implements the path-based style of local reads
+ * first introduced in HDFS-2246.
+ */
+ static BlockReader getLegacyBlockReaderLocal(UserGroupInformation ugi,
+ Configuration conf, String src, ExtendedBlock blk,
+ Token accessToken, DatanodeInfo chosenNode,
+ int socketTimeout, long offsetIntoBlock,
+ boolean connectToDnViaHostname) throws InvalidToken, IOException {
+ try {
+ return BlockReaderLocalLegacy.newBlockReader(ugi, conf, src,
+ blk, accessToken, chosenNode, socketTimeout, offsetIntoBlock,
+ blk.getNumBytes() - offsetIntoBlock, connectToDnViaHostname);
+ } catch (RemoteException re) {
+ throw re.unwrapRemoteException(InvalidToken.class,
+ AccessControlException.class);
+ }
+ }
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java
index a4d3ac2b229..ea22a9888f8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java
@@ -18,33 +18,20 @@
package org.apache.hadoop.hdfs;
import java.io.DataInputStream;
-import java.io.File;
+import org.apache.hadoop.conf.Configuration;
+
+import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.IOException;
-import java.net.Socket;
import java.nio.ByteBuffer;
-import java.security.PrivilegedExceptionAction;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
-import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
-import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
-import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
-import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
import org.apache.hadoop.hdfs.util.DirectBufferPool;
-import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.io.IOUtils;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.util.DataChecksum;
/**
@@ -56,84 +43,19 @@ import org.apache.hadoop.util.DataChecksum;
*
*
The client performing short circuit reads must be configured at the
* datanode.
- *
The client gets the path to the file where block is stored using
- * {@link org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol#getBlockLocalPathInfo(ExtendedBlock, Token)}
- * RPC call
- *
Client uses kerberos authentication to connect to the datanode over RPC,
- * if security is enabled.
+ *
The client gets the file descriptors for the metadata file and the data
+ * file for the block using
+ * {@link org.apache.hadoop.hdfs.server.datanode.DataXceiver#requestShortCircuitFds}.
+ *
+ *
The client reads the file descriptors.
*
*/
class BlockReaderLocal implements BlockReader {
- private static final Log LOG = LogFactory.getLog(DFSClient.class);
-
- //Stores the cache and proxy for a local datanode.
- private static class LocalDatanodeInfo {
- private ClientDatanodeProtocol proxy = null;
- private final Map cache;
-
- LocalDatanodeInfo() {
- final int cacheSize = 10000;
- final float hashTableLoadFactor = 0.75f;
- int hashTableCapacity = (int) Math.ceil(cacheSize / hashTableLoadFactor) + 1;
- cache = Collections
- .synchronizedMap(new LinkedHashMap(
- hashTableCapacity, hashTableLoadFactor, true) {
- private static final long serialVersionUID = 1;
-
- @Override
- protected boolean removeEldestEntry(
- Map.Entry eldest) {
- return size() > cacheSize;
- }
- });
- }
-
- private synchronized ClientDatanodeProtocol getDatanodeProxy(
- UserGroupInformation ugi, final DatanodeInfo node,
- final Configuration conf, final int socketTimeout,
- final boolean connectToDnViaHostname) throws IOException {
- if (proxy == null) {
- try {
- proxy = ugi.doAs(new PrivilegedExceptionAction() {
- @Override
- public ClientDatanodeProtocol run() throws Exception {
- return DFSUtil.createClientDatanodeProtocolProxy(node, conf,
- socketTimeout, connectToDnViaHostname);
- }
- });
- } catch (InterruptedException e) {
- LOG.warn("encountered exception ", e);
- }
- }
- return proxy;
- }
-
- private synchronized void resetDatanodeProxy() {
- if (null != proxy) {
- RPC.stopProxy(proxy);
- proxy = null;
- }
- }
-
- private BlockLocalPathInfo getBlockLocalPathInfo(ExtendedBlock b) {
- return cache.get(b);
- }
-
- private void setBlockLocalPathInfo(ExtendedBlock b, BlockLocalPathInfo info) {
- cache.put(b, info);
- }
-
- private void removeBlockLocalPathInfo(ExtendedBlock b) {
- cache.remove(b);
- }
- }
-
- // Multiple datanodes could be running on the local machine. Store proxies in
- // a map keyed by the ipc port of the datanode.
- private static Map localDatanodeInfoMap = new HashMap();
+ static final Log LOG = LogFactory.getLog(BlockReaderLocal.class);
private final FileInputStream dataIn; // reader for the data file
private final FileInputStream checksumIn; // reader for the checksum file
+ private final boolean verifyChecksum;
/**
* Offset from the most recent chunk boundary at which the next read should
@@ -153,7 +75,6 @@ class BlockReaderLocal implements BlockReader {
private ByteBuffer slowReadBuff = null;
private ByteBuffer checksumBuff = null;
private DataChecksum checksum;
- private final boolean verifyChecksum;
private static DirectBufferPool bufferPool = new DirectBufferPool();
@@ -163,187 +84,92 @@ class BlockReaderLocal implements BlockReader {
/** offset in block where reader wants to actually read */
private long startOffset;
private final String filename;
+
+ private final DatanodeID datanodeID;
+ private final ExtendedBlock block;
- /**
- * The only way this object can be instantiated.
- */
- static BlockReaderLocal newBlockReader(UserGroupInformation ugi,
- Configuration conf, String file, ExtendedBlock blk,
- Token token, DatanodeInfo node, int socketTimeout,
- long startOffset, long length, boolean connectToDnViaHostname)
- throws IOException {
+ private static int getSlowReadBufferNumChunks(Configuration conf,
+ int bytesPerChecksum) {
- LocalDatanodeInfo localDatanodeInfo = getLocalDatanodeInfo(node
- .getIpcPort());
- // check the cache first
- BlockLocalPathInfo pathinfo = localDatanodeInfo.getBlockLocalPathInfo(blk);
- if (pathinfo == null) {
- pathinfo = getBlockPathInfo(ugi, blk, node, conf, socketTimeout, token,
- connectToDnViaHostname);
- }
+ int bufSize =
+ conf.getInt(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY,
+ DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT);
- // check to see if the file exists. It may so happen that the
- // HDFS file has been deleted and this block-lookup is occurring
- // on behalf of a new HDFS file. This time, the block file could
- // be residing in a different portion of the fs.data.dir directory.
- // In this case, we remove this entry from the cache. The next
- // call to this method will re-populate the cache.
- FileInputStream dataIn = null;
- FileInputStream checksumIn = null;
- BlockReaderLocal localBlockReader = null;
- boolean skipChecksumCheck = skipChecksumCheck(conf);
- try {
- // get a local file system
- File blkfile = new File(pathinfo.getBlockPath());
- dataIn = new FileInputStream(blkfile);
-
- if (LOG.isDebugEnabled()) {
- LOG.debug("New BlockReaderLocal for file " + blkfile + " of size "
- + blkfile.length() + " startOffset " + startOffset + " length "
- + length + " short circuit checksum " + !skipChecksumCheck);
- }
-
- if (!skipChecksumCheck) {
- // get the metadata file
- File metafile = new File(pathinfo.getMetaPath());
- checksumIn = new FileInputStream(metafile);
-
- // read and handle the common header here. For now just a version
- BlockMetadataHeader header = BlockMetadataHeader
- .readHeader(new DataInputStream(checksumIn));
- short version = header.getVersion();
- if (version != BlockMetadataHeader.VERSION) {
- LOG.warn("Wrong version (" + version + ") for metadata file for "
- + blk + " ignoring ...");
- }
- DataChecksum checksum = header.getChecksum();
- long firstChunkOffset = startOffset
- - (startOffset % checksum.getBytesPerChecksum());
- localBlockReader = new BlockReaderLocal(conf, file, blk, token,
- startOffset, length, pathinfo, checksum, true, dataIn,
- firstChunkOffset, checksumIn);
- } else {
- localBlockReader = new BlockReaderLocal(conf, file, blk, token,
- startOffset, length, pathinfo, dataIn);
- }
- } catch (IOException e) {
- // remove from cache
- localDatanodeInfo.removeBlockLocalPathInfo(blk);
- DFSClient.LOG.warn("BlockReaderLocal: Removing " + blk
- + " from cache because local file " + pathinfo.getBlockPath()
- + " could not be opened.");
- throw e;
- } finally {
- if (localBlockReader == null) {
- if (dataIn != null) {
- dataIn.close();
- }
- if (checksumIn != null) {
- checksumIn.close();
- }
- }
- }
- return localBlockReader;
- }
-
- private static synchronized LocalDatanodeInfo getLocalDatanodeInfo(int port) {
- LocalDatanodeInfo ldInfo = localDatanodeInfoMap.get(port);
- if (ldInfo == null) {
- ldInfo = new LocalDatanodeInfo();
- localDatanodeInfoMap.put(port, ldInfo);
- }
- return ldInfo;
- }
-
- private static BlockLocalPathInfo getBlockPathInfo(UserGroupInformation ugi,
- ExtendedBlock blk, DatanodeInfo node, Configuration conf, int timeout,
- Token token, boolean connectToDnViaHostname)
- throws IOException {
- LocalDatanodeInfo localDatanodeInfo = getLocalDatanodeInfo(node.getIpcPort());
- BlockLocalPathInfo pathinfo = null;
- ClientDatanodeProtocol proxy = localDatanodeInfo.getDatanodeProxy(ugi, node,
- conf, timeout, connectToDnViaHostname);
- try {
- // make RPC to local datanode to find local pathnames of blocks
- pathinfo = proxy.getBlockLocalPathInfo(blk, token);
- if (pathinfo != null) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Cached location of block " + blk + " as " + pathinfo);
- }
- localDatanodeInfo.setBlockLocalPathInfo(blk, pathinfo);
- }
- } catch (IOException e) {
- localDatanodeInfo.resetDatanodeProxy(); // Reset proxy on error
- throw e;
- }
- return pathinfo;
- }
-
- private static boolean skipChecksumCheck(Configuration conf) {
- return conf.getBoolean(
- DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
- DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_DEFAULT);
- }
-
- private static int getSlowReadBufferNumChunks(Configuration conf, int bytesPerChecksum) {
- int bufferSizeBytes = conf.getInt(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY,
- DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT);
-
- if (bufferSizeBytes < bytesPerChecksum) {
- throw new IllegalArgumentException("Configured BlockReaderLocal buffer size (" + bufferSizeBytes + ") " +
- "is not large enough to hold a single chunk (" + bytesPerChecksum + "). Please configure " +
+ if (bufSize < bytesPerChecksum) {
+ throw new IllegalArgumentException("Configured BlockReaderLocal buffer size (" +
+ bufSize + ") is not large enough to hold a single chunk (" +
+ bytesPerChecksum + "). Please configure " +
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY + " appropriately");
}
// Round down to nearest chunk size
- return bufferSizeBytes / bytesPerChecksum;
+ return bufSize / bytesPerChecksum;
}
- private BlockReaderLocal(Configuration conf, String hdfsfile,
- ExtendedBlock block, Token token, long startOffset,
- long length, BlockLocalPathInfo pathinfo, FileInputStream dataIn)
- throws IOException {
- this(conf, hdfsfile, block, token, startOffset, length, pathinfo,
- DataChecksum.newDataChecksum(DataChecksum.Type.NULL, 4), false,
- dataIn, startOffset, null);
- }
-
- private BlockReaderLocal(Configuration conf, String hdfsfile,
- ExtendedBlock block, Token token, long startOffset,
- long length, BlockLocalPathInfo pathinfo, DataChecksum checksum,
- boolean verifyChecksum, FileInputStream dataIn, long firstChunkOffset,
- FileInputStream checksumIn) throws IOException {
- this.filename = hdfsfile;
- this.checksum = checksum;
- this.verifyChecksum = verifyChecksum;
- this.startOffset = Math.max(startOffset, 0);
-
- bytesPerChecksum = this.checksum.getBytesPerChecksum();
- checksumSize = this.checksum.getChecksumSize();
-
+ public BlockReaderLocal(Configuration conf, String filename,
+ ExtendedBlock block, long startOffset, long length,
+ FileInputStream dataIn, FileInputStream checksumIn,
+ DatanodeID datanodeID, boolean verifyChecksum) throws IOException {
this.dataIn = dataIn;
this.checksumIn = checksumIn;
- this.offsetFromChunkBoundary = (int) (startOffset-firstChunkOffset);
+ this.startOffset = Math.max(startOffset, 0);
+ this.filename = filename;
+ this.datanodeID = datanodeID;
+ this.block = block;
- int chunksPerChecksumRead = getSlowReadBufferNumChunks(conf, bytesPerChecksum);
- slowReadBuff = bufferPool.getBuffer(bytesPerChecksum * chunksPerChecksumRead);
- checksumBuff = bufferPool.getBuffer(checksumSize * chunksPerChecksumRead);
- // Initially the buffers have nothing to read.
- slowReadBuff.flip();
- checksumBuff.flip();
+ // read and handle the common header here. For now just a version
+ checksumIn.getChannel().position(0);
+ BlockMetadataHeader header = BlockMetadataHeader
+ .readHeader(new DataInputStream(
+ new BufferedInputStream(checksumIn,
+ BlockMetadataHeader.getHeaderSize())));
+ short version = header.getVersion();
+ if (version != BlockMetadataHeader.VERSION) {
+ throw new IOException("Wrong version (" + version + ") of the " +
+ "metadata file for " + filename + ".");
+ }
+ if (!verifyChecksum) {
+ this.verifyChecksum = false;
+ } else {
+ this.verifyChecksum = !conf.getBoolean(DFSConfigKeys.
+ DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
+ DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_DEFAULT);
+ }
+ long firstChunkOffset;
+ if (this.verifyChecksum) {
+ this.checksum = header.getChecksum();
+ this.bytesPerChecksum = this.checksum.getBytesPerChecksum();
+ this.checksumSize = this.checksum.getChecksumSize();
+ firstChunkOffset = startOffset
+ - (startOffset % checksum.getBytesPerChecksum());
+ this.offsetFromChunkBoundary = (int) (startOffset - firstChunkOffset);
+
+ int chunksPerChecksumRead = getSlowReadBufferNumChunks(conf, bytesPerChecksum);
+ slowReadBuff = bufferPool.getBuffer(bytesPerChecksum * chunksPerChecksumRead);
+ checksumBuff = bufferPool.getBuffer(checksumSize * chunksPerChecksumRead);
+ // Initially the buffers have nothing to read.
+ slowReadBuff.flip();
+ checksumBuff.flip();
+ long checkSumOffset = (firstChunkOffset / bytesPerChecksum) * checksumSize;
+ IOUtils.skipFully(checksumIn, checkSumOffset);
+ } else {
+ firstChunkOffset = startOffset;
+ this.checksum = null;
+ this.bytesPerChecksum = 0;
+ this.checksumSize = 0;
+ this.offsetFromChunkBoundary = 0;
+ }
+
boolean success = false;
try {
- // Skip both input streams to beginning of the chunk containing startOffset
- IOUtils.skipFully(dataIn, firstChunkOffset);
- if (checksumIn != null) {
- long checkSumOffset = (firstChunkOffset / bytesPerChecksum) * checksumSize;
- IOUtils.skipFully(checksumIn, checkSumOffset);
- }
+ // Reposition both input streams to the beginning of the chunk
+ // containing startOffset
+ this.dataIn.getChannel().position(firstChunkOffset);
success = true;
} finally {
if (!success) {
- bufferPool.returnBuffer(slowReadBuff);
- bufferPool.returnBuffer(checksumBuff);
+ if (slowReadBuff != null) bufferPool.returnBuffer(slowReadBuff);
+ if (checksumBuff != null) bufferPool.returnBuffer(checksumBuff);
}
}
}
@@ -663,10 +489,17 @@ class BlockReaderLocal implements BlockReader {
}
@Override
- public synchronized void close() throws IOException {
- dataIn.close();
- if (checksumIn != null) {
- checksumIn.close();
+ public synchronized void close(PeerCache peerCache,
+ FileInputStreamCache fisCache) throws IOException {
+ if (fisCache != null) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("putting FileInputStream for " + filename +
+ " back into FileInputStreamCache");
+ }
+ fisCache.put(datanodeID, block, new FileInputStream[] {dataIn, checksumIn});
+ } else {
+ LOG.debug("closing FileInputStream for " + filename);
+ IOUtils.cleanup(LOG, dataIn, checksumIn);
}
if (slowReadBuff != null) {
bufferPool.returnBuffer(slowReadBuff);
@@ -691,17 +524,8 @@ class BlockReaderLocal implements BlockReader {
}
@Override
- public Socket takeSocket() {
- return null;
- }
-
- @Override
- public boolean hasSentStatusCode() {
- return false;
- }
-
- @Override
- public IOStreamPair getStreams() {
- return null;
+ public int available() throws IOException {
+ // We never do network I/O in BlockReaderLocal.
+ return Integer.MAX_VALUE;
}
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java
new file mode 100644
index 00000000000..11fb4922fcb
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java
@@ -0,0 +1,704 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.net.Socket;
+import java.nio.ByteBuffer;
+import java.security.PrivilegedExceptionAction;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
+import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
+import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
+import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
+import org.apache.hadoop.hdfs.util.DirectBufferPool;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.util.DataChecksum;
+
+/**
+ * BlockReaderLocalLegacy enables local short circuited reads. If the DFS client is on
+ * the same machine as the datanode, then the client can read files directly
+ * from the local file system rather than going through the datanode for better
+ * performance.
+ *
+ * This is the legacy implementation based on HDFS-2246, which requires
+ * permissions on the datanode to be set so that clients can directly access the
+ * blocks. The new implementation based on HDFS-347 should be preferred on UNIX
+ * systems where the required native code has been implemented.
+ *
+ * {@link BlockReaderLocalLegacy} works as follows:
+ *
+ *
The client performing short circuit reads must be configured at the
+ * datanode.
+ *
The client gets the path to the file where block is stored using
+ * {@link org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol#getBlockLocalPathInfo(ExtendedBlock, Token)}
+ * RPC call
+ *
Client uses kerberos authentication to connect to the datanode over RPC,
+ * if security is enabled.
+ *
+ */
+class BlockReaderLocalLegacy implements BlockReader {
+ private static final Log LOG = LogFactory.getLog(DFSClient.class);
+
+ //Stores the cache and proxy for a local datanode.
+ private static class LocalDatanodeInfo {
+ private ClientDatanodeProtocol proxy = null;
+ private final Map cache;
+
+ LocalDatanodeInfo() {
+ final int cacheSize = 10000;
+ final float hashTableLoadFactor = 0.75f;
+ int hashTableCapacity = (int) Math.ceil(cacheSize / hashTableLoadFactor) + 1;
+ cache = Collections
+ .synchronizedMap(new LinkedHashMap(
+ hashTableCapacity, hashTableLoadFactor, true) {
+ private static final long serialVersionUID = 1;
+
+ @Override
+ protected boolean removeEldestEntry(
+ Map.Entry eldest) {
+ return size() > cacheSize;
+ }
+ });
+ }
+
+ private synchronized ClientDatanodeProtocol getDatanodeProxy(
+ UserGroupInformation ugi, final DatanodeInfo node,
+ final Configuration conf, final int socketTimeout,
+ final boolean connectToDnViaHostname) throws IOException {
+ if (proxy == null) {
+ try {
+ proxy = ugi.doAs(new PrivilegedExceptionAction() {
+ @Override
+ public ClientDatanodeProtocol run() throws Exception {
+ return DFSUtil.createClientDatanodeProtocolProxy(node, conf,
+ socketTimeout, connectToDnViaHostname);
+ }
+ });
+ } catch (InterruptedException e) {
+ LOG.warn("encountered exception ", e);
+ }
+ }
+ return proxy;
+ }
+
+ private synchronized void resetDatanodeProxy() {
+ if (null != proxy) {
+ RPC.stopProxy(proxy);
+ proxy = null;
+ }
+ }
+
+ private BlockLocalPathInfo getBlockLocalPathInfo(ExtendedBlock b) {
+ return cache.get(b);
+ }
+
+ private void setBlockLocalPathInfo(ExtendedBlock b, BlockLocalPathInfo info) {
+ cache.put(b, info);
+ }
+
+ private void removeBlockLocalPathInfo(ExtendedBlock b) {
+ cache.remove(b);
+ }
+ }
+
+ // Multiple datanodes could be running on the local machine. Store proxies in
+ // a map keyed by the ipc port of the datanode.
+ private static Map localDatanodeInfoMap = new HashMap();
+
+ private final FileInputStream dataIn; // reader for the data file
+ private final FileInputStream checksumIn; // reader for the checksum file
+
+ /**
+ * Offset from the most recent chunk boundary at which the next read should
+ * take place. Is only set to non-zero at construction time, and is
+ * decremented (usually to 0) by subsequent reads. This avoids having to do a
+ * checksum read at construction to position the read cursor correctly.
+ */
+ private int offsetFromChunkBoundary;
+
+ private byte[] skipBuf = null;
+
+ /**
+ * Used for checksummed reads that need to be staged before copying to their
+ * output buffer because they are either a) smaller than the checksum chunk
+ * size or b) issued by the slower read(byte[]...) path
+ */
+ private ByteBuffer slowReadBuff = null;
+ private ByteBuffer checksumBuff = null;
+ private DataChecksum checksum;
+ private final boolean verifyChecksum;
+
+ private static DirectBufferPool bufferPool = new DirectBufferPool();
+
+ private final int bytesPerChecksum;
+ private final int checksumSize;
+
+ /** offset in block where reader wants to actually read */
+ private long startOffset;
+ private final String filename;
+
+ /**
+ * The only way this object can be instantiated.
+ */
+ static BlockReaderLocalLegacy newBlockReader(UserGroupInformation ugi,
+ Configuration conf, String file, ExtendedBlock blk,
+ Token token, DatanodeInfo node, int socketTimeout,
+ long startOffset, long length, boolean connectToDnViaHostname)
+ throws IOException {
+
+ LocalDatanodeInfo localDatanodeInfo = getLocalDatanodeInfo(node
+ .getIpcPort());
+ // check the cache first
+ BlockLocalPathInfo pathinfo = localDatanodeInfo.getBlockLocalPathInfo(blk);
+ if (pathinfo == null) {
+ pathinfo = getBlockPathInfo(ugi, blk, node, conf, socketTimeout, token,
+ connectToDnViaHostname);
+ }
+
+ // check to see if the file exists. It may so happen that the
+ // HDFS file has been deleted and this block-lookup is occurring
+ // on behalf of a new HDFS file. This time, the block file could
+ // be residing in a different portion of the fs.data.dir directory.
+ // In this case, we remove this entry from the cache. The next
+ // call to this method will re-populate the cache.
+ FileInputStream dataIn = null;
+ FileInputStream checksumIn = null;
+ BlockReaderLocalLegacy localBlockReader = null;
+ boolean skipChecksumCheck = skipChecksumCheck(conf);
+ try {
+ // get a local file system
+ File blkfile = new File(pathinfo.getBlockPath());
+ dataIn = new FileInputStream(blkfile);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("New BlockReaderLocalLegacy for file " + blkfile + " of size "
+ + blkfile.length() + " startOffset " + startOffset + " length "
+ + length + " short circuit checksum " + !skipChecksumCheck);
+ }
+
+ if (!skipChecksumCheck) {
+ // get the metadata file
+ File metafile = new File(pathinfo.getMetaPath());
+ checksumIn = new FileInputStream(metafile);
+
+ // read and handle the common header here. For now just a version
+ BlockMetadataHeader header = BlockMetadataHeader
+ .readHeader(new DataInputStream(checksumIn));
+ short version = header.getVersion();
+ if (version != BlockMetadataHeader.VERSION) {
+ LOG.warn("Wrong version (" + version + ") for metadata file for "
+ + blk + " ignoring ...");
+ }
+ DataChecksum checksum = header.getChecksum();
+ long firstChunkOffset = startOffset
+ - (startOffset % checksum.getBytesPerChecksum());
+ localBlockReader = new BlockReaderLocalLegacy(conf, file, blk, token,
+ startOffset, length, pathinfo, checksum, true, dataIn,
+ firstChunkOffset, checksumIn);
+ } else {
+ localBlockReader = new BlockReaderLocalLegacy(conf, file, blk, token,
+ startOffset, length, pathinfo, dataIn);
+ }
+ } catch (IOException e) {
+ // remove from cache
+ localDatanodeInfo.removeBlockLocalPathInfo(blk);
+ DFSClient.LOG.warn("BlockReaderLocalLegacy: Removing " + blk
+ + " from cache because local file " + pathinfo.getBlockPath()
+ + " could not be opened.");
+ throw e;
+ } finally {
+ if (localBlockReader == null) {
+ if (dataIn != null) {
+ dataIn.close();
+ }
+ if (checksumIn != null) {
+ checksumIn.close();
+ }
+ }
+ }
+ return localBlockReader;
+ }
+
+ private static synchronized LocalDatanodeInfo getLocalDatanodeInfo(int port) {
+ LocalDatanodeInfo ldInfo = localDatanodeInfoMap.get(port);
+ if (ldInfo == null) {
+ ldInfo = new LocalDatanodeInfo();
+ localDatanodeInfoMap.put(port, ldInfo);
+ }
+ return ldInfo;
+ }
+
+ private static BlockLocalPathInfo getBlockPathInfo(UserGroupInformation ugi,
+ ExtendedBlock blk, DatanodeInfo node, Configuration conf, int timeout,
+ Token token, boolean connectToDnViaHostname)
+ throws IOException {
+ LocalDatanodeInfo localDatanodeInfo = getLocalDatanodeInfo(node.getIpcPort());
+ BlockLocalPathInfo pathinfo = null;
+ ClientDatanodeProtocol proxy = localDatanodeInfo.getDatanodeProxy(ugi, node,
+ conf, timeout, connectToDnViaHostname);
+ try {
+ // make RPC to local datanode to find local pathnames of blocks
+ pathinfo = proxy.getBlockLocalPathInfo(blk, token);
+ if (pathinfo != null) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Cached location of block " + blk + " as " + pathinfo);
+ }
+ localDatanodeInfo.setBlockLocalPathInfo(blk, pathinfo);
+ }
+ } catch (IOException e) {
+ localDatanodeInfo.resetDatanodeProxy(); // Reset proxy on error
+ throw e;
+ }
+ return pathinfo;
+ }
+
+ private static boolean skipChecksumCheck(Configuration conf) {
+ return conf.getBoolean(
+ DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
+ DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_DEFAULT);
+ }
+
+ private static int getSlowReadBufferNumChunks(Configuration conf, int bytesPerChecksum) {
+ int bufferSizeBytes = conf.getInt(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY,
+ DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT);
+
+ if (bufferSizeBytes < bytesPerChecksum) {
+ throw new IllegalArgumentException("Configured BlockReaderLocalLegacy " +
+ "buffer size (" + bufferSizeBytes + ") is not large enough to hold " +
+ "a single chunk (" + bytesPerChecksum + "). Please configure " +
+ DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY +
+ " appropriately");
+ }
+
+ // Round down to nearest chunk size
+ return bufferSizeBytes / bytesPerChecksum;
+ }
+
+ private BlockReaderLocalLegacy(Configuration conf, String hdfsfile,
+ ExtendedBlock block, Token token, long startOffset,
+ long length, BlockLocalPathInfo pathinfo, FileInputStream dataIn)
+ throws IOException {
+ this(conf, hdfsfile, block, token, startOffset, length, pathinfo,
+ DataChecksum.newDataChecksum(DataChecksum.Type.NULL, 4), false,
+ dataIn, startOffset, null);
+ }
+
+ private BlockReaderLocalLegacy(Configuration conf, String hdfsfile,
+ ExtendedBlock block, Token token, long startOffset,
+ long length, BlockLocalPathInfo pathinfo, DataChecksum checksum,
+ boolean verifyChecksum, FileInputStream dataIn, long firstChunkOffset,
+ FileInputStream checksumIn) throws IOException {
+ this.filename = hdfsfile;
+ this.checksum = checksum;
+ this.verifyChecksum = verifyChecksum;
+ this.startOffset = Math.max(startOffset, 0);
+
+ bytesPerChecksum = this.checksum.getBytesPerChecksum();
+ checksumSize = this.checksum.getChecksumSize();
+
+ this.dataIn = dataIn;
+ this.checksumIn = checksumIn;
+ this.offsetFromChunkBoundary = (int) (startOffset-firstChunkOffset);
+
+ int chunksPerChecksumRead = getSlowReadBufferNumChunks(conf, bytesPerChecksum);
+ slowReadBuff = bufferPool.getBuffer(bytesPerChecksum * chunksPerChecksumRead);
+ checksumBuff = bufferPool.getBuffer(checksumSize * chunksPerChecksumRead);
+ // Initially the buffers have nothing to read.
+ slowReadBuff.flip();
+ checksumBuff.flip();
+ boolean success = false;
+ try {
+ // Skip both input streams to beginning of the chunk containing startOffset
+ IOUtils.skipFully(dataIn, firstChunkOffset);
+ if (checksumIn != null) {
+ long checkSumOffset = (firstChunkOffset / bytesPerChecksum) * checksumSize;
+ IOUtils.skipFully(checksumIn, checkSumOffset);
+ }
+ success = true;
+ } finally {
+ if (!success) {
+ bufferPool.returnBuffer(slowReadBuff);
+ bufferPool.returnBuffer(checksumBuff);
+ }
+ }
+ }
+
+ /**
+ * Reads bytes into a buffer until EOF or the buffer's limit is reached
+ */
+ private int fillBuffer(FileInputStream stream, ByteBuffer buf)
+ throws IOException {
+ int bytesRead = stream.getChannel().read(buf);
+ if (bytesRead < 0) {
+ //EOF
+ return bytesRead;
+ }
+ while (buf.remaining() > 0) {
+ int n = stream.getChannel().read(buf);
+ if (n < 0) {
+ //EOF
+ return bytesRead;
+ }
+ bytesRead += n;
+ }
+ return bytesRead;
+ }
+
+ /**
+ * Utility method used by read(ByteBuffer) to partially copy a ByteBuffer into
+ * another.
+ */
+ private void writeSlice(ByteBuffer from, ByteBuffer to, int length) {
+ int oldLimit = from.limit();
+ from.limit(from.position() + length);
+ try {
+ to.put(from);
+ } finally {
+ from.limit(oldLimit);
+ }
+ }
+
+ @Override
+ public synchronized int read(ByteBuffer buf) throws IOException {
+ int nRead = 0;
+ if (verifyChecksum) {
+ // A 'direct' read actually has three phases. The first drains any
+ // remaining bytes from the slow read buffer. After this the read is
+ // guaranteed to be on a checksum chunk boundary. If there are still bytes
+ // to read, the fast direct path is used for as many remaining bytes as
+ // possible, up to a multiple of the checksum chunk size. Finally, any
+ // 'odd' bytes remaining at the end of the read cause another slow read to
+ // be issued, which involves an extra copy.
+
+ // Every 'slow' read tries to fill the slow read buffer in one go for
+ // efficiency's sake. As described above, all non-checksum-chunk-aligned
+ // reads will be served from the slower read path.
+
+ if (slowReadBuff.hasRemaining()) {
+ // There are remaining bytes from a small read available. This usually
+ // means this read is unaligned, which falls back to the slow path.
+ int fromSlowReadBuff = Math.min(buf.remaining(), slowReadBuff.remaining());
+ writeSlice(slowReadBuff, buf, fromSlowReadBuff);
+ nRead += fromSlowReadBuff;
+ }
+
+ if (buf.remaining() >= bytesPerChecksum && offsetFromChunkBoundary == 0) {
+ // Since we have drained the 'small read' buffer, we are guaranteed to
+ // be chunk-aligned
+ int len = buf.remaining() - (buf.remaining() % bytesPerChecksum);
+
+ // There's only enough checksum buffer space available to checksum one
+ // entire slow read buffer. This saves keeping the number of checksum
+ // chunks around.
+ len = Math.min(len, slowReadBuff.capacity());
+ int oldlimit = buf.limit();
+ buf.limit(buf.position() + len);
+ int readResult = 0;
+ try {
+ readResult = doByteBufferRead(buf);
+ } finally {
+ buf.limit(oldlimit);
+ }
+ if (readResult == -1) {
+ return nRead;
+ } else {
+ nRead += readResult;
+ buf.position(buf.position() + readResult);
+ }
+ }
+
+ // offsetFromChunkBoundary > 0 => unaligned read, use slow path to read
+ // until chunk boundary
+ if ((buf.remaining() > 0 && buf.remaining() < bytesPerChecksum) || offsetFromChunkBoundary > 0) {
+ int toRead = Math.min(buf.remaining(), bytesPerChecksum - offsetFromChunkBoundary);
+ int readResult = fillSlowReadBuffer(toRead);
+ if (readResult == -1) {
+ return nRead;
+ } else {
+ int fromSlowReadBuff = Math.min(readResult, buf.remaining());
+ writeSlice(slowReadBuff, buf, fromSlowReadBuff);
+ nRead += fromSlowReadBuff;
+ }
+ }
+ } else {
+ // Non-checksummed reads are much easier; we can just fill the buffer directly.
+ nRead = doByteBufferRead(buf);
+ if (nRead > 0) {
+ buf.position(buf.position() + nRead);
+ }
+ }
+ return nRead;
+ }
+
+ /**
+ * Tries to read as many bytes as possible into supplied buffer, checksumming
+ * each chunk if needed.
+ *
+ * Preconditions:
+ *
+ *
+ * If checksumming is enabled, buf.remaining must be a multiple of
+ * bytesPerChecksum. Note that this is not a requirement for clients of
+ * read(ByteBuffer) - in the case of non-checksum-sized read requests,
+ * read(ByteBuffer) will substitute a suitably sized buffer to pass to this
+ * method.
+ *
+ *
+ * Postconditions:
+ *
+ *
buf.limit and buf.mark are unchanged.
+ *
buf.position += min(offsetFromChunkBoundary, totalBytesRead) - so the
+ * requested bytes can be read straight from the buffer
+ *
+ *
+ * @param buf
+ * byte buffer to write bytes to. If checksums are not required, buf
+ * can have any number of bytes remaining, otherwise there must be a
+ * multiple of the checksum chunk size remaining.
+ * @return max(min(totalBytesRead, len) - offsetFromChunkBoundary, 0)
+ * that is, the the number of useful bytes (up to the amount
+ * requested) readable from the buffer by the client.
+ */
+ private synchronized int doByteBufferRead(ByteBuffer buf) throws IOException {
+ if (verifyChecksum) {
+ assert buf.remaining() % bytesPerChecksum == 0;
+ }
+ int dataRead = -1;
+
+ int oldpos = buf.position();
+ // Read as much as we can into the buffer.
+ dataRead = fillBuffer(dataIn, buf);
+
+ if (dataRead == -1) {
+ return -1;
+ }
+
+ if (verifyChecksum) {
+ ByteBuffer toChecksum = buf.duplicate();
+ toChecksum.position(oldpos);
+ toChecksum.limit(oldpos + dataRead);
+
+ checksumBuff.clear();
+ // Equivalent to (int)Math.ceil(toChecksum.remaining() * 1.0 / bytesPerChecksum );
+ int numChunks =
+ (toChecksum.remaining() + bytesPerChecksum - 1) / bytesPerChecksum;
+ checksumBuff.limit(checksumSize * numChunks);
+
+ fillBuffer(checksumIn, checksumBuff);
+ checksumBuff.flip();
+
+ checksum.verifyChunkedSums(toChecksum, checksumBuff, filename,
+ this.startOffset);
+ }
+
+ if (dataRead >= 0) {
+ buf.position(oldpos + Math.min(offsetFromChunkBoundary, dataRead));
+ }
+
+ if (dataRead < offsetFromChunkBoundary) {
+ // yikes, didn't even get enough bytes to honour offset. This can happen
+ // even if we are verifying checksums if we are at EOF.
+ offsetFromChunkBoundary -= dataRead;
+ dataRead = 0;
+ } else {
+ dataRead -= offsetFromChunkBoundary;
+ offsetFromChunkBoundary = 0;
+ }
+
+ return dataRead;
+ }
+
+ /**
+ * Ensures that up to len bytes are available and checksummed in the slow read
+ * buffer. The number of bytes available to read is returned. If the buffer is
+ * not already empty, the number of remaining bytes is returned and no actual
+ * read happens.
+ *
+ * @param len
+ * the maximum number of bytes to make available. After len bytes
+ * are read, the underlying bytestream must be at a checksum
+ * boundary, or EOF. That is, (len + currentPosition) %
+ * bytesPerChecksum == 0.
+ * @return the number of bytes available to read, or -1 if EOF.
+ */
+ private synchronized int fillSlowReadBuffer(int len) throws IOException {
+ int nRead = -1;
+ if (slowReadBuff.hasRemaining()) {
+ // Already got data, good to go.
+ nRead = Math.min(len, slowReadBuff.remaining());
+ } else {
+ // Round a complete read of len bytes (plus any implicit offset) to the
+ // next chunk boundary, since we try and read in multiples of a chunk
+ int nextChunk = len + offsetFromChunkBoundary +
+ (bytesPerChecksum - ((len + offsetFromChunkBoundary) % bytesPerChecksum));
+ int limit = Math.min(nextChunk, slowReadBuff.capacity());
+ assert limit % bytesPerChecksum == 0;
+
+ slowReadBuff.clear();
+ slowReadBuff.limit(limit);
+
+ nRead = doByteBufferRead(slowReadBuff);
+
+ if (nRead > 0) {
+ // So that next time we call slowReadBuff.hasRemaining(), we don't get a
+ // false positive.
+ slowReadBuff.limit(nRead + slowReadBuff.position());
+ }
+ }
+ return nRead;
+ }
+
+ @Override
+ public synchronized int read(byte[] buf, int off, int len) throws IOException {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("read off " + off + " len " + len);
+ }
+ if (!verifyChecksum) {
+ return dataIn.read(buf, off, len);
+ }
+
+ int nRead = fillSlowReadBuffer(slowReadBuff.capacity());
+
+ if (nRead > 0) {
+ // Possible that buffer is filled with a larger read than we need, since
+ // we tried to read as much as possible at once
+ nRead = Math.min(len, nRead);
+ slowReadBuff.get(buf, off, nRead);
+ }
+
+ return nRead;
+ }
+
+ @Override
+ public synchronized long skip(long n) throws IOException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("skip " + n);
+ }
+ if (n <= 0) {
+ return 0;
+ }
+ if (!verifyChecksum) {
+ return dataIn.skip(n);
+ }
+
+ // caller made sure newPosition is not beyond EOF.
+ int remaining = slowReadBuff.remaining();
+ int position = slowReadBuff.position();
+ int newPosition = position + (int)n;
+
+ // if the new offset is already read into dataBuff, just reposition
+ if (n <= remaining) {
+ assert offsetFromChunkBoundary == 0;
+ slowReadBuff.position(newPosition);
+ return n;
+ }
+
+ // for small gap, read through to keep the data/checksum in sync
+ if (n - remaining <= bytesPerChecksum) {
+ slowReadBuff.position(position + remaining);
+ if (skipBuf == null) {
+ skipBuf = new byte[bytesPerChecksum];
+ }
+ int ret = read(skipBuf, 0, (int)(n - remaining));
+ return ret;
+ }
+
+ // optimize for big gap: discard the current buffer, skip to
+ // the beginning of the appropriate checksum chunk and then
+ // read to the middle of that chunk to be in sync with checksums.
+
+ // We can't use this.offsetFromChunkBoundary because we need to know how
+ // many bytes of the offset were really read. Calling read(..) with a
+ // positive this.offsetFromChunkBoundary causes that many bytes to get
+ // silently skipped.
+ int myOffsetFromChunkBoundary = newPosition % bytesPerChecksum;
+ long toskip = n - remaining - myOffsetFromChunkBoundary;
+
+ slowReadBuff.position(slowReadBuff.limit());
+ checksumBuff.position(checksumBuff.limit());
+
+ IOUtils.skipFully(dataIn, toskip);
+ long checkSumOffset = (toskip / bytesPerChecksum) * checksumSize;
+ IOUtils.skipFully(checksumIn, checkSumOffset);
+
+ // read into the middle of the chunk
+ if (skipBuf == null) {
+ skipBuf = new byte[bytesPerChecksum];
+ }
+ assert skipBuf.length == bytesPerChecksum;
+ assert myOffsetFromChunkBoundary < bytesPerChecksum;
+
+ int ret = read(skipBuf, 0, myOffsetFromChunkBoundary);
+
+ if (ret == -1) { // EOS
+ return toskip;
+ } else {
+ return (toskip + ret);
+ }
+ }
+
+ @Override
+ public synchronized void close(PeerCache peerCache,
+ FileInputStreamCache fisCache) throws IOException {
+ IOUtils.cleanup(LOG, dataIn, checksumIn);
+ if (slowReadBuff != null) {
+ bufferPool.returnBuffer(slowReadBuff);
+ slowReadBuff = null;
+ }
+ if (checksumBuff != null) {
+ bufferPool.returnBuffer(checksumBuff);
+ checksumBuff = null;
+ }
+ startOffset = -1;
+ checksum = null;
+ }
+
+ @Override
+ public int readAll(byte[] buf, int offset, int len) throws IOException {
+ return BlockReaderUtil.readAll(this, buf, offset, len);
+ }
+
+ @Override
+ public void readFully(byte[] buf, int off, int len) throws IOException {
+ BlockReaderUtil.readFully(this, buf, off, len);
+ }
+
+ @Override
+ public int available() throws IOException {
+ // We never do network I/O in BlockReaderLocalLegacy.
+ return Integer.MAX_VALUE;
+ }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
index b747567b3dd..8a0a7bd243e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
@@ -131,7 +131,6 @@ import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseP
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpBlockChecksumResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status;
import org.apache.hadoop.hdfs.protocolPB.PBHelper;
-import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
@@ -196,12 +195,13 @@ public class DFSClient implements java.io.Closeable {
final FileSystem.Statistics stats;
final int hdfsTimeout; // timeout value for a DFS operation.
private final String authority;
- final SocketCache socketCache;
+ final PeerCache peerCache;
final Conf dfsClientConf;
private Random r = new Random();
private SocketAddress[] localInterfaceAddrs;
private DataEncryptionKey encryptionKey;
-
+ private boolean shouldUseLegacyBlockReaderLocal;
+
/**
* DFSClient configuration
*/
@@ -228,11 +228,16 @@ public class DFSClient implements java.io.Closeable {
final short defaultReplication;
final String taskId;
final FsPermission uMask;
- final boolean useLegacyBlockReader;
+ final boolean useLegacyBlockReaderLocal;
final boolean connectToDnViaHostname;
final boolean getHdfsBlocksMetadataEnabled;
final int getFileBlockStorageLocationsNumThreads;
final int getFileBlockStorageLocationsTimeout;
+ final String domainSocketPath;
+ final boolean skipShortCircuitChecksums;
+ final int shortCircuitBufferSize;
+ final boolean shortCircuitLocalReads;
+ final boolean domainSocketDataTraffic;
Conf(Configuration conf) {
maxFailoverAttempts = conf.getInt(
@@ -283,9 +288,9 @@ public class DFSClient implements java.io.Closeable {
.getInt(DFS_CLIENT_BLOCK_WRITE_LOCATEFOLLOWINGBLOCK_RETRIES_KEY,
DFS_CLIENT_BLOCK_WRITE_LOCATEFOLLOWINGBLOCK_RETRIES_DEFAULT);
uMask = FsPermission.getUMask(conf);
- useLegacyBlockReader = conf.getBoolean(
- DFS_CLIENT_USE_LEGACY_BLOCKREADER,
- DFS_CLIENT_USE_LEGACY_BLOCKREADER_DEFAULT);
+ useLegacyBlockReaderLocal = conf.getBoolean(
+ DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL,
+ DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL_DEFAULT);
connectToDnViaHostname = conf.getBoolean(DFS_CLIENT_USE_DN_HOSTNAME,
DFS_CLIENT_USE_DN_HOSTNAME_DEFAULT);
getHdfsBlocksMetadataEnabled = conf.getBoolean(
@@ -297,6 +302,20 @@ public class DFSClient implements java.io.Closeable {
getFileBlockStorageLocationsTimeout = conf.getInt(
DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT,
DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT_DEFAULT);
+ domainSocketPath = conf.getTrimmed(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
+ DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_DEFAULT);
+ skipShortCircuitChecksums = conf.getBoolean(
+ DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
+ DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_DEFAULT);
+ shortCircuitBufferSize = conf.getInt(
+ DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY,
+ DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT);
+ shortCircuitLocalReads = conf.getBoolean(
+ DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY,
+ DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT);
+ domainSocketDataTraffic = conf.getBoolean(
+ DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC,
+ DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT);
}
private DataChecksum.Type getChecksumType(Configuration conf) {
@@ -354,7 +373,7 @@ public class DFSClient implements java.io.Closeable {
private final Map filesBeingWritten
= new HashMap();
- private boolean shortCircuitLocalReads;
+ private final DomainSocketFactory domainSocketFactory;
/**
* Same as this(NameNode.getAddress(conf), conf);
@@ -398,6 +417,11 @@ public class DFSClient implements java.io.Closeable {
throws IOException {
// Copy only the required DFSClient configuration
this.dfsClientConf = new Conf(conf);
+ this.shouldUseLegacyBlockReaderLocal =
+ this.dfsClientConf.useLegacyBlockReaderLocal;
+ if (this.dfsClientConf.useLegacyBlockReaderLocal) {
+ LOG.debug("Using legacy short-circuit local reads.");
+ }
this.conf = conf;
this.stats = stats;
this.socketFactory = NetUtils.getSocketFactory(conf, ClientProtocol.class);
@@ -427,12 +451,8 @@ public class DFSClient implements java.io.Closeable {
}
// read directly from the block file if configured.
- this.shortCircuitLocalReads = conf.getBoolean(
- DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY,
- DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT);
- if (LOG.isDebugEnabled()) {
- LOG.debug("Short circuit read is " + shortCircuitLocalReads);
- }
+ this.domainSocketFactory = new DomainSocketFactory(dfsClientConf);
+
String localInterfaces[] =
conf.getTrimmedStrings(DFSConfigKeys.DFS_CLIENT_LOCAL_INTERFACES);
localInterfaceAddrs = getLocalInterfaceAddrs(localInterfaces);
@@ -442,7 +462,7 @@ public class DFSClient implements java.io.Closeable {
Joiner.on(',').join(localInterfaceAddrs) + "]");
}
- this.socketCache = SocketCache.getInstance(dfsClientConf.socketCacheCapacity, dfsClientConf.socketCacheExpiry);
+ this.peerCache = PeerCache.getInstance(dfsClientConf.socketCacheCapacity, dfsClientConf.socketCacheExpiry);
}
/**
@@ -797,29 +817,11 @@ public class DFSClient implements java.io.Closeable {
AccessControlException.class);
}
}
-
- /**
- * Get {@link BlockReader} for short circuited local reads.
- */
- static BlockReader getLocalBlockReader(UserGroupInformation ugi,
- Configuration conf, String src, ExtendedBlock blk,
- Token accessToken, DatanodeInfo chosenNode,
- int socketTimeout, long offsetIntoBlock, boolean connectToDnViaHostname)
- throws InvalidToken, IOException {
- try {
- return BlockReaderLocal.newBlockReader(ugi, conf, src, blk, accessToken,
- chosenNode, socketTimeout, offsetIntoBlock, blk.getNumBytes()
- - offsetIntoBlock, connectToDnViaHostname);
- } catch (RemoteException re) {
- throw re.unwrapRemoteException(InvalidToken.class,
- AccessControlException.class);
- }
- }
private static Map localAddrMap = Collections
.synchronizedMap(new HashMap());
- private static boolean isLocalAddress(InetSocketAddress targetAddr) {
+ static boolean isLocalAddress(InetSocketAddress targetAddr) {
InetAddress addr = targetAddr.getAddress();
Boolean cached = localAddrMap.get(addr.getHostAddress());
if (cached != null) {
@@ -2319,10 +2321,6 @@ public class DFSClient implements java.io.Closeable {
super(in);
}
}
-
- boolean shouldTryShortCircuitRead(InetSocketAddress targetAddr) {
- return shortCircuitLocalReads && isLocalAddress(targetAddr);
- }
void reportChecksumFailure(String file, ExtendedBlock blk, DatanodeInfo dn) {
DatanodeInfo [] dnArr = { dn };
@@ -2346,13 +2344,15 @@ public class DFSClient implements java.io.Closeable {
+ ", ugi=" + ugi + "]";
}
- void disableShortCircuit() {
- LOG.info("Short circuit is disabled");
- shortCircuitLocalReads = false;
+ public DomainSocketFactory getDomainSocketFactory() {
+ return domainSocketFactory;
}
-
- @VisibleForTesting
- boolean getShortCircuitLocalReads() {
- return shortCircuitLocalReads;
+
+ public void disableLegacyBlockReaderLocal() {
+ shouldUseLegacyBlockReaderLocal = false;
+ }
+
+ public boolean useLegacyBlockReaderLocal() {
+ return shouldUseLegacyBlockReaderLocal;
}
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index 4cd2460866b..2c5a40074c5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -265,6 +265,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final int DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_DEFAULT = 3;
public static final String DFS_CLIENT_USE_LEGACY_BLOCKREADER = "dfs.client.use.legacy.blockreader";
public static final boolean DFS_CLIENT_USE_LEGACY_BLOCKREADER_DEFAULT = false;
+ public static final String DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL = "dfs.client.use.legacy.blockreader.local";
+ public static final boolean DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL_DEFAULT = false;
public static final String DFS_BALANCER_MOVEDWINWIDTH_KEY = "dfs.balancer.movedWinWidth";
public static final long DFS_BALANCER_MOVEDWINWIDTH_DEFAULT = 5400*1000L;
public static final String DFS_DATANODE_ADDRESS_KEY = "dfs.datanode.address";
@@ -347,7 +349,13 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final String DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY = "dfs.client.read.shortcircuit.skip.checksum";
public static final boolean DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_DEFAULT = false;
public static final String DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY = "dfs.client.read.shortcircuit.buffer.size";
+ public static final String DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_KEY = "dfs.client.read.shortcircuit.streams.cache.size";
+ public static final int DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_DEFAULT = 100;
+ public static final String DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_KEY = "dfs.client.read.shortcircuit.streams.cache.expiry.ms";
+ public static final long DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT = 5000;
public static final int DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT = 1024 * 1024;
+ public static final String DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC = "dfs.client.domain.socket.data.traffic";
+ public static final boolean DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT = false;
// property for fsimage compression
public static final String DFS_IMAGE_COMPRESS_KEY = "dfs.image.compress";
@@ -404,6 +412,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final int DFS_NAMENODE_MAX_OP_SIZE_DEFAULT = 50 * 1024 * 1024;
public static final String DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY = "dfs.block.local-path-access.user";
+ public static final String DFS_DOMAIN_SOCKET_PATH_KEY = "dfs.domain.socket.path";
+ public static final String DFS_DOMAIN_SOCKET_PATH_DEFAULT = "";
// HA related configuration
public static final String DFS_HA_NAMENODES_KEY_PREFIX = "dfs.ha.namenodes";
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
index 5a3574d1488..685487c8497 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hdfs;
+import java.io.FileInputStream;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.Socket;
@@ -32,18 +33,20 @@ import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
+import org.apache.commons.io.IOUtils;
import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.fs.ChecksumException;
import org.apache.hadoop.fs.ByteBufferReadable;
+import org.apache.hadoop.fs.ChecksumException;
import org.apache.hadoop.fs.FSInputStream;
import org.apache.hadoop.fs.UnresolvedLinkException;
-import org.apache.hadoop.hdfs.SocketCache.SocketAndStreams;
+import org.apache.hadoop.hdfs.net.DomainPeer;
+import org.apache.hadoop.hdfs.net.Peer;
+import org.apache.hadoop.hdfs.net.TcpPeerServer;
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
-import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
import org.apache.hadoop.hdfs.protocol.datatransfer.InvalidEncryptionKeyException;
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
@@ -51,20 +54,23 @@ import org.apache.hadoop.hdfs.server.datanode.ReplicaNotFoundException;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.net.unix.DomainSocket;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.token.Token;
+import com.google.common.annotations.VisibleForTesting;
+
/****************************************************************
* DFSInputStream provides bytes from a named file. It handles
* negotiation of the namenode and various datanodes as necessary.
****************************************************************/
@InterfaceAudience.Private
public class DFSInputStream extends FSInputStream implements ByteBufferReadable {
- private final SocketCache socketCache;
-
+ @VisibleForTesting
+ static boolean tcpReadsDisabledForTesting = false;
+ private final PeerCache peerCache;
private final DFSClient dfsClient;
private boolean closed = false;
-
private final String src;
private final long prefetchSize;
private BlockReader blockReader = null;
@@ -76,6 +82,8 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
private long pos = 0;
private long blockEnd = -1;
+ private final FileInputStreamCache fileInputStreamCache;
+
/**
* This variable tracks the number of failures since the start of the
* most recent user-facing operation. That is to say, it should be reset
@@ -110,7 +118,14 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
this.verifyChecksum = verifyChecksum;
this.buffersize = buffersize;
this.src = src;
- this.socketCache = dfsClient.socketCache;
+ this.peerCache = dfsClient.peerCache;
+ this.fileInputStreamCache = new FileInputStreamCache(
+ dfsClient.conf.getInt(DFSConfigKeys.
+ DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_KEY,
+ DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_DEFAULT),
+ dfsClient.conf.getLong(DFSConfigKeys.
+ DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_KEY,
+ DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT));
prefetchSize = dfsClient.getConf().prefetchSize;
timeWindow = dfsClient.getConf().timeWindow;
nCachedConnRetry = dfsClient.getConf().nCachedConnRetry;
@@ -243,7 +258,9 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
locatedBlocks.getFileLength() + lastBlockBeingWrittenLength;
}
- private synchronized boolean blockUnderConstruction() {
+ // Short circuit local reads are forbidden for files that are
+ // under construction. See HDFS-2757.
+ synchronized boolean shortCircuitForbidden() {
return locatedBlocks.isUnderConstruction();
}
@@ -424,7 +441,7 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
// Will be getting a new BlockReader.
if (blockReader != null) {
- closeBlockReader(blockReader);
+ blockReader.close(peerCache, fileInputStreamCache);
blockReader = null;
}
@@ -462,7 +479,7 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
return chosenNode;
} catch (AccessControlException ex) {
DFSClient.LOG.warn("Short circuit access failed " + ex);
- dfsClient.disableShortCircuit();
+ dfsClient.disableLegacyBlockReaderLocal();
continue;
} catch (IOException ex) {
if (ex instanceof InvalidEncryptionKeyException && refetchEncryptionKey > 0) {
@@ -510,10 +527,11 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
dfsClient.checkOpen();
if (blockReader != null) {
- closeBlockReader(blockReader);
+ blockReader.close(peerCache, fileInputStreamCache);
blockReader = null;
}
super.close();
+ fileInputStreamCache.close();
closed = true;
}
@@ -811,7 +829,7 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
addIntoCorruptedBlockMap(block.getBlock(), chosenNode, corruptedBlockMap);
} catch (AccessControlException ex) {
DFSClient.LOG.warn("Short circuit access failed " + ex);
- dfsClient.disableShortCircuit();
+ dfsClient.disableLegacyBlockReaderLocal();
continue;
} catch (IOException e) {
if (e instanceof InvalidEncryptionKeyException && refetchEncryptionKey > 0) {
@@ -837,7 +855,7 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
}
} finally {
if (reader != null) {
- closeBlockReader(reader);
+ reader.close(peerCache, fileInputStreamCache);
}
}
// Put chosen node into dead list, continue
@@ -845,22 +863,34 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
}
}
- /**
- * Close the given BlockReader and cache its socket.
- */
- private void closeBlockReader(BlockReader reader) throws IOException {
- if (reader.hasSentStatusCode()) {
- IOStreamPair ioStreams = reader.getStreams();
- Socket oldSock = reader.takeSocket();
- socketCache.put(oldSock, ioStreams);
+ private Peer newTcpPeer(InetSocketAddress addr) throws IOException {
+ Peer peer = null;
+ boolean success = false;
+ Socket sock = null;
+ try {
+ sock = dfsClient.socketFactory.createSocket();
+ NetUtils.connect(sock, addr,
+ dfsClient.getRandomLocalInterfaceAddr(),
+ dfsClient.getConf().socketTimeout);
+ peer = TcpPeerServer.peerFromSocketAndKey(sock,
+ dfsClient.getDataEncryptionKey());
+ success = true;
+ return peer;
+ } finally {
+ if (!success) {
+ IOUtils.closeQuietly(peer);
+ IOUtils.closeQuietly(sock);
+ }
}
- reader.close();
}
/**
* Retrieve a BlockReader suitable for reading.
* This method will reuse the cached connection to the DN if appropriate.
* Otherwise, it will create a new connection.
+ * Throwing an IOException from this method is basically equivalent to
+ * declaring the DataNode bad, so we try to connect a lot of different ways
+ * before doing that.
*
* @param dnAddr Address of the datanode
* @param chosenNode Chosen datanode information
@@ -885,82 +915,113 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
boolean verifyChecksum,
String clientName)
throws IOException {
-
- // Can't local read a block under construction, see HDFS-2757
- if (dfsClient.shouldTryShortCircuitRead(dnAddr) &&
- !blockUnderConstruction()) {
- return DFSClient.getLocalBlockReader(dfsClient.ugi, dfsClient.conf,
- src, block, blockToken, chosenNode, dfsClient.hdfsTimeout,
- startOffset, dfsClient.connectToDnViaHostname());
+ // Firstly, we check to see if we have cached any file descriptors for
+ // local blocks. If so, we can just re-use those file descriptors.
+ FileInputStream fis[] = fileInputStreamCache.get(chosenNode, block);
+ if (fis != null) {
+ if (DFSClient.LOG.isDebugEnabled()) {
+ DFSClient.LOG.debug("got FileInputStreams for " + block + " from " +
+ "the FileInputStreamCache.");
+ }
+ return new BlockReaderLocal(dfsClient.conf, file,
+ block, startOffset, len, fis[0], fis[1], chosenNode, verifyChecksum);
}
- IOException err = null;
- boolean fromCache = true;
-
- // Allow retry since there is no way of knowing whether the cached socket
- // is good until we actually use it.
- for (int retries = 0; retries <= nCachedConnRetry && fromCache; ++retries) {
- SocketAndStreams sockAndStreams = null;
- // Don't use the cache on the last attempt - it's possible that there
- // are arbitrarily many unusable sockets in the cache, but we don't
- // want to fail the read.
- if (retries < nCachedConnRetry) {
- sockAndStreams = socketCache.get(dnAddr);
- }
- Socket sock;
- if (sockAndStreams == null) {
- fromCache = false;
-
- sock = dfsClient.socketFactory.createSocket();
-
- // TCP_NODELAY is crucial here because of bad interactions between
- // Nagle's Algorithm and Delayed ACKs. With connection keepalive
- // between the client and DN, the conversation looks like:
- // 1. Client -> DN: Read block X
- // 2. DN -> Client: data for block X
- // 3. Client -> DN: Status OK (successful read)
- // 4. Client -> DN: Read block Y
- // The fact that step #3 and #4 are both in the client->DN direction
- // triggers Nagling. If the DN is using delayed ACKs, this results
- // in a delay of 40ms or more.
- //
- // TCP_NODELAY disables nagling and thus avoids this performance
- // disaster.
- sock.setTcpNoDelay(true);
-
- NetUtils.connect(sock, dnAddr,
- dfsClient.getRandomLocalInterfaceAddr(),
- dfsClient.getConf().socketTimeout);
- sock.setSoTimeout(dfsClient.getConf().socketTimeout);
- } else {
- sock = sockAndStreams.sock;
- }
-
+ // If the legacy local block reader is enabled and we are reading a local
+ // block, try to create a BlockReaderLocalLegacy. The legacy local block
+ // reader implements local reads in the style first introduced by HDFS-2246.
+ if ((dfsClient.useLegacyBlockReaderLocal()) &&
+ DFSClient.isLocalAddress(dnAddr) &&
+ (!shortCircuitForbidden())) {
try {
- // The OP_READ_BLOCK request is sent as we make the BlockReader
- BlockReader reader =
- BlockReaderFactory.newBlockReader(dfsClient.getConf(),
- sock, file, block,
- blockToken,
- startOffset, len,
- bufferSize, verifyChecksum,
- clientName,
- dfsClient.getDataEncryptionKey(),
- sockAndStreams == null ? null : sockAndStreams.ioStreams);
+ return BlockReaderFactory.getLegacyBlockReaderLocal(dfsClient.ugi,
+ dfsClient.conf, clientName, block, blockToken, chosenNode,
+ dfsClient.hdfsTimeout, startOffset,dfsClient.connectToDnViaHostname());
+ } catch (IOException e) {
+ DFSClient.LOG.warn("error creating legacy BlockReaderLocal. " +
+ "Disabling legacy local reads.", e);
+ dfsClient.disableLegacyBlockReaderLocal();
+ }
+ }
+
+ // Look for cached domain peers.
+ int cacheTries = 0;
+ DomainSocketFactory dsFactory = dfsClient.getDomainSocketFactory();
+ BlockReader reader = null;
+ for (; cacheTries < nCachedConnRetry; ++cacheTries) {
+ Peer peer = peerCache.get(chosenNode, true);
+ if (peer == null) break;
+ try {
+ boolean allowShortCircuitLocalReads = dfsClient.getConf().
+ shortCircuitLocalReads && (!shortCircuitForbidden());
+ reader = BlockReaderFactory.newBlockReader(
+ dfsClient.conf, file, block, blockToken, startOffset,
+ len, verifyChecksum, clientName, peer, chosenNode,
+ dsFactory, allowShortCircuitLocalReads);
return reader;
} catch (IOException ex) {
- // Our socket is no good.
- DFSClient.LOG.debug("Error making BlockReader. Closing stale " + sock, ex);
- if (sockAndStreams != null) {
- sockAndStreams.close();
- } else {
- sock.close();
+ DFSClient.LOG.debug("Error making BlockReader with DomainSocket. " +
+ "Closing stale " + peer, ex);
+ } finally {
+ if (reader == null) {
+ IOUtils.closeQuietly(peer);
}
- err = ex;
}
}
- throw err;
+ // Try to create a DomainPeer.
+ DomainSocket domSock = dsFactory.create(dnAddr, this);
+ if (domSock != null) {
+ Peer peer = new DomainPeer(domSock);
+ try {
+ boolean allowShortCircuitLocalReads = dfsClient.getConf().
+ shortCircuitLocalReads && (!shortCircuitForbidden());
+ reader = BlockReaderFactory.newBlockReader(
+ dfsClient.conf, file, block, blockToken, startOffset,
+ len, verifyChecksum, clientName, peer, chosenNode,
+ dsFactory, allowShortCircuitLocalReads);
+ return reader;
+ } catch (IOException e) {
+ DFSClient.LOG.warn("failed to connect to " + domSock, e);
+ } finally {
+ if (reader == null) {
+ // If the Peer that we got the error from was a DomainPeer,
+ // mark the socket path as bad, so that newDataSocket will not try
+ // to re-open this socket for a while.
+ dsFactory.disableDomainSocketPath(domSock.getPath());
+ IOUtils.closeQuietly(peer);
+ }
+ }
+ }
+
+ // Look for cached peers.
+ for (; cacheTries < nCachedConnRetry; ++cacheTries) {
+ Peer peer = peerCache.get(chosenNode, false);
+ if (peer == null) break;
+ try {
+ reader = BlockReaderFactory.newBlockReader(
+ dfsClient.conf, file, block, blockToken, startOffset,
+ len, verifyChecksum, clientName, peer, chosenNode,
+ dsFactory, false);
+ return reader;
+ } catch (IOException ex) {
+ DFSClient.LOG.debug("Error making BlockReader. Closing stale " +
+ peer, ex);
+ } finally {
+ if (reader == null) {
+ IOUtils.closeQuietly(peer);
+ }
+ }
+ }
+ if (tcpReadsDisabledForTesting) {
+ throw new IOException("TCP reads are disabled.");
+ }
+ // Try to create a new remote peer.
+ Peer peer = newTcpPeer(dnAddr);
+ return BlockReaderFactory.newBlockReader(
+ dfsClient.conf, file, block, blockToken, startOffset,
+ len, verifyChecksum, clientName, peer, chosenNode,
+ dsFactory, false);
}
@@ -1094,7 +1155,7 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
// the TCP buffer, then just eat up the intervening data.
//
int diff = (int)(targetPos - pos);
- if (diff <= DFSClient.TCP_WINDOW_SIZE) {
+ if (diff <= blockReader.available()) {
try {
pos += blockReader.skip(diff);
if (pos == targetPos) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
new file mode 100644
index 00000000000..ebcb84a810e
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
@@ -0,0 +1,142 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hdfs.DFSClient.Conf;
+
+import org.apache.hadoop.net.unix.DomainSocket;
+
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+
+class DomainSocketFactory {
+ public static final Log LOG = LogFactory.getLog(DomainSocketFactory.class);
+ private final Conf conf;
+
+ enum PathStatus {
+ UNUSABLE,
+ SHORT_CIRCUIT_DISABLED,
+ }
+
+ /**
+ * Information about domain socket paths.
+ */
+ Cache pathInfo =
+ CacheBuilder.newBuilder()
+ .expireAfterWrite(10, TimeUnit.MINUTES)
+ .build();
+
+ public DomainSocketFactory(Conf conf) {
+ this.conf = conf;
+
+ String feature = null;
+ if (conf.shortCircuitLocalReads && (!conf.useLegacyBlockReaderLocal)) {
+ feature = "The short-circuit local reads feature";
+ } else if (conf.domainSocketDataTraffic) {
+ feature = "UNIX domain socket data traffic";
+ }
+ if (feature != null) {
+ if (conf.domainSocketPath.isEmpty()) {
+ LOG.warn(feature + " is disabled because you have not set " +
+ DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY);
+ } else if (DomainSocket.getLoadingFailureReason() != null) {
+ LOG.warn(feature + " is disabled because " +
+ DomainSocket.getLoadingFailureReason());
+ } else {
+ LOG.debug(feature + "is enabled.");
+ }
+ }
+ }
+
+ /**
+ * Create a DomainSocket.
+ *
+ * @param addr The address of the DataNode
+ * @param stream The DFSInputStream the socket will be created for.
+ *
+ * @return null if the socket could not be created; the
+ * socket otherwise. If there was an error while
+ * creating the socket, we will add the socket path
+ * to our list of failed domain socket paths.
+ */
+ DomainSocket create(InetSocketAddress addr, DFSInputStream stream) {
+ // If there is no domain socket path configured, we can't use domain
+ // sockets.
+ if (conf.domainSocketPath.isEmpty()) return null;
+ // If we can't do anything with the domain socket, don't create it.
+ if ((conf.domainSocketDataTraffic == false) &&
+ ((!conf.shortCircuitLocalReads) || conf.useLegacyBlockReaderLocal)) {
+ return null;
+ }
+ // UNIX domain sockets can only be used to talk to local peers
+ if (!DFSClient.isLocalAddress(addr)) return null;
+ // If the DomainSocket code is not loaded, we can't create
+ // DomainSocket objects.
+ if (DomainSocket.getLoadingFailureReason() != null) return null;
+ String escapedPath = DomainSocket.
+ getEffectivePath(conf.domainSocketPath, addr.getPort());
+ PathStatus info = pathInfo.getIfPresent(escapedPath);
+ if (info == PathStatus.UNUSABLE) {
+ // We tried to connect to this domain socket before, and it was totally
+ // unusable.
+ return null;
+ }
+ if ((!conf.domainSocketDataTraffic) &&
+ ((info == PathStatus.SHORT_CIRCUIT_DISABLED) ||
+ stream.shortCircuitForbidden())) {
+ // If we don't want to pass data over domain sockets, and we don't want
+ // to pass file descriptors over them either, we have no use for domain
+ // sockets.
+ return null;
+ }
+ boolean success = false;
+ DomainSocket sock = null;
+ try {
+ sock = DomainSocket.connect(escapedPath);
+ sock.setAttribute(DomainSocket.RECEIVE_TIMEOUT, conf.socketTimeout);
+ success = true;
+ } catch (IOException e) {
+ LOG.warn("error creating DomainSocket", e);
+ // fall through
+ } finally {
+ if (!success) {
+ if (sock != null) {
+ IOUtils.closeQuietly(sock);
+ }
+ pathInfo.put(escapedPath, PathStatus.UNUSABLE);
+ sock = null;
+ }
+ }
+ return sock;
+ }
+
+ public void disableShortCircuitForPath(String path) {
+ pathInfo.put(path, PathStatus.SHORT_CIRCUIT_DISABLED);
+ }
+
+ public void disableDomainSocketPath(String path) {
+ pathInfo.put(path, PathStatus.UNUSABLE);
+ }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/FileInputStreamCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/FileInputStreamCache.java
new file mode 100644
index 00000000000..ac0af814362
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/FileInputStreamCache.java
@@ -0,0 +1,264 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import java.io.FileInputStream;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.concurrent.ScheduledThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.util.Time;
+
+import com.google.common.collect.LinkedListMultimap;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+
+/**
+ * FileInputStream cache is used to cache FileInputStream objects that we
+ * have received from the DataNode.
+ */
+class FileInputStreamCache {
+ private final static Log LOG = LogFactory.getLog(FileInputStreamCache.class);
+
+ /**
+ * The executor service that runs the cacheCleaner. There is only one of
+ * these per VM.
+ */
+ private final static ScheduledThreadPoolExecutor executor
+ = new ScheduledThreadPoolExecutor(1, new ThreadFactoryBuilder().
+ setDaemon(true).setNameFormat("FileInputStreamCache Cleaner").
+ build());
+
+ /**
+ * The CacheCleaner for this FileInputStreamCache. We don't create this
+ * and schedule it until it becomes necessary.
+ */
+ private CacheCleaner cacheCleaner;
+
+ /**
+ * Maximum number of entries to allow in the cache.
+ */
+ private final int maxCacheSize;
+
+ /**
+ * The minimum time in milliseconds to preserve an element in the cache.
+ */
+ private final long expiryTimeMs;
+
+ /**
+ * True if the FileInputStreamCache is closed.
+ */
+ private boolean closed = false;
+
+ /**
+ * Cache entries.
+ */
+ private final LinkedListMultimap map = LinkedListMultimap.create();
+
+ /**
+ * Expiry thread which makes sure that the file descriptors get closed
+ * after a while.
+ */
+ class CacheCleaner implements Runnable {
+ @Override
+ public void run() {
+ synchronized(FileInputStreamCache.this) {
+ if (closed) return;
+ long curTime = Time.monotonicNow();
+ for (Iterator> iter = map.entries().iterator();
+ iter.hasNext();
+ iter = map.entries().iterator()) {
+ Entry entry = iter.next();
+ if (entry.getValue().getTime() + expiryTimeMs >= curTime) {
+ break;
+ }
+ entry.getValue().close();
+ iter.remove();
+ }
+ }
+ }
+ }
+
+ /**
+ * The key identifying a FileInputStream array.
+ */
+ static class Key {
+ private final DatanodeID datanodeID;
+ private final ExtendedBlock block;
+
+ public Key(DatanodeID datanodeID, ExtendedBlock block) {
+ this.datanodeID = datanodeID;
+ this.block = block;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (!(other instanceof FileInputStreamCache.Key)) {
+ return false;
+ }
+ FileInputStreamCache.Key otherKey = (FileInputStreamCache.Key)other;
+ return (block.equals(otherKey.block) &
+ (block.getGenerationStamp() == otherKey.block.getGenerationStamp()) &
+ datanodeID.equals(otherKey.datanodeID));
+ }
+
+ @Override
+ public int hashCode() {
+ return block.hashCode();
+ }
+ }
+
+ /**
+ * The value containing a FileInputStream array and the time it was added to
+ * the cache.
+ */
+ static class Value {
+ private final FileInputStream fis[];
+ private final long time;
+
+ public Value (FileInputStream fis[]) {
+ this.fis = fis;
+ this.time = Time.monotonicNow();
+ }
+
+ public FileInputStream[] getFileInputStreams() {
+ return fis;
+ }
+
+ public long getTime() {
+ return time;
+ }
+
+ public void close() {
+ IOUtils.cleanup(LOG, fis);
+ }
+ }
+
+ /**
+ * Create a new FileInputStream
+ *
+ * @param maxCacheSize The maximum number of elements to allow in
+ * the cache.
+ * @param expiryTimeMs The minimum time in milliseconds to preserve
+ * elements in the cache.
+ */
+ public FileInputStreamCache(int maxCacheSize, long expiryTimeMs) {
+ this.maxCacheSize = maxCacheSize;
+ this.expiryTimeMs = expiryTimeMs;
+ }
+
+ /**
+ * Put an array of FileInputStream objects into the cache.
+ *
+ * @param datanodeID The DatanodeID to store the streams under.
+ * @param block The Block to store the streams under.
+ * @param fis The streams.
+ */
+ public void put(DatanodeID datanodeID, ExtendedBlock block,
+ FileInputStream fis[]) {
+ boolean inserted = false;
+ try {
+ synchronized(this) {
+ if (closed) return;
+ if (map.size() + 1 > maxCacheSize) {
+ Iterator> iter = map.entries().iterator();
+ if (!iter.hasNext()) return;
+ Entry entry = iter.next();
+ entry.getValue().close();
+ iter.remove();
+ }
+ if (cacheCleaner == null) {
+ cacheCleaner = new CacheCleaner();
+ executor.scheduleAtFixedRate(cacheCleaner, expiryTimeMs, expiryTimeMs,
+ TimeUnit.MILLISECONDS);
+ }
+ map.put(new Key(datanodeID, block), new Value(fis));
+ inserted = true;
+ }
+ } finally {
+ if (!inserted) {
+ IOUtils.cleanup(LOG, fis);
+ }
+ }
+ }
+
+ /**
+ * Find and remove an array of FileInputStream objects from the cache.
+ *
+ * @param datanodeID The DatanodeID to search for.
+ * @param block The Block to search for.
+ *
+ * @return null if no streams can be found; the
+ * array otherwise. If this is non-null, the
+ * array will have been removed from the cache.
+ */
+ public synchronized FileInputStream[] get(DatanodeID datanodeID,
+ ExtendedBlock block) {
+ Key key = new Key(datanodeID, block);
+ List ret = map.get(key);
+ if (ret.isEmpty()) return null;
+ Value val = ret.get(0);
+ map.remove(key, val);
+ return val.getFileInputStreams();
+ }
+
+ /**
+ * Close the cache and free all associated resources.
+ */
+ public synchronized void close() {
+ if (closed) return;
+ closed = true;
+ if (cacheCleaner != null) {
+ executor.remove(cacheCleaner);
+ }
+ for (Iterator> iter = map.entries().iterator();
+ iter.hasNext();
+ iter = map.entries().iterator()) {
+ Entry entry = iter.next();
+ entry.getValue().close();
+ iter.remove();
+ }
+ }
+
+ public synchronized String toString() {
+ StringBuilder bld = new StringBuilder();
+ bld.append("FileInputStreamCache(");
+ String prefix = "";
+ for (Entry entry : map.entries()) {
+ bld.append(prefix);
+ bld.append(entry.getKey());
+ prefix = ", ";
+ }
+ bld.append(")");
+ return bld.toString();
+ }
+
+ public long getExpiryTimeMs() {
+ return expiryTimeMs;
+ }
+
+ public int getMaxCacheSize() {
+ return maxCacheSize;
+ }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/SocketCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/PeerCache.java
similarity index 52%
rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/SocketCache.java
rename to hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/PeerCache.java
index 0ea9100fc94..424b641c8c3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/SocketCache.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/PeerCache.java
@@ -18,92 +18,104 @@
package org.apache.hadoop.hdfs;
-import java.io.Closeable;
-import java.net.Socket;
-import java.net.SocketAddress;
-
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
-import java.io.IOException;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.LinkedListMultimap;
import org.apache.commons.logging.Log;
-import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.hdfs.net.Peer;
import org.apache.hadoop.util.Daemon;
-import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time;
/**
* A cache of input stream sockets to Data Node.
*/
-class SocketCache {
- private static final Log LOG = LogFactory.getLog(SocketCache.class);
-
- @InterfaceAudience.Private
- static class SocketAndStreams implements Closeable {
- public final Socket sock;
- public final IOStreamPair ioStreams;
- long createTime;
+class PeerCache {
+ private static final Log LOG = LogFactory.getLog(PeerCache.class);
+
+ private static class Key {
+ final DatanodeID dnID;
+ final boolean isDomain;
- public SocketAndStreams(Socket s, IOStreamPair ioStreams) {
- this.sock = s;
- this.ioStreams = ioStreams;
- this.createTime = Time.monotonicNow();
+ Key(DatanodeID dnID, boolean isDomain) {
+ this.dnID = dnID;
+ this.isDomain = isDomain;
}
@Override
- public void close() {
- if (ioStreams != null) {
- IOUtils.closeStream(ioStreams.in);
- IOUtils.closeStream(ioStreams.out);
+ public boolean equals(Object o) {
+ if (!(o instanceof Key)) {
+ return false;
}
- IOUtils.closeSocket(sock);
+ Key other = (Key)o;
+ return dnID.equals(other.dnID) && isDomain == other.isDomain;
}
- public long getCreateTime() {
- return this.createTime;
+ @Override
+ public int hashCode() {
+ return dnID.hashCode() ^ (isDomain ? 1 : 0);
+ }
+ }
+
+ private static class Value {
+ private final Peer peer;
+ private final long time;
+
+ Value(Peer peer, long time) {
+ this.peer = peer;
+ this.time = time;
+ }
+
+ Peer getPeer() {
+ return peer;
+ }
+
+ long getTime() {
+ return time;
}
}
private Daemon daemon;
/** A map for per user per datanode. */
- private static LinkedListMultimap multimap =
+ private final LinkedListMultimap multimap =
LinkedListMultimap.create();
- private static int capacity;
- private static long expiryPeriod;
- private static SocketCache scInstance = new SocketCache();
- private static boolean isInitedOnce = false;
-
- public static synchronized SocketCache getInstance(int c, long e) {
- // capacity is only initialized once
- if (isInitedOnce == false) {
- capacity = c;
- expiryPeriod = e;
+ private final int capacity;
+ private final long expiryPeriod;
+ private static PeerCache instance = null;
+
+ @VisibleForTesting
+ PeerCache(int c, long e) {
+ this.capacity = c;
+ this.expiryPeriod = e;
- if (capacity == 0 ) {
- LOG.info("SocketCache disabled.");
- }
- else if (expiryPeriod == 0) {
- throw new IllegalStateException("Cannot initialize expiryPeriod to " +
- expiryPeriod + "when cache is enabled.");
- }
- isInitedOnce = true;
+ if (capacity == 0 ) {
+ LOG.info("SocketCache disabled.");
+ }
+ else if (expiryPeriod == 0) {
+ throw new IllegalStateException("Cannot initialize expiryPeriod to " +
+ expiryPeriod + "when cache is enabled.");
+ }
+ }
+
+ public static synchronized PeerCache getInstance(int c, long e) {
+ // capacity is only initialized once
+ if (instance == null) {
+ instance = new PeerCache(c, e);
} else { //already initialized once
- if (capacity != c || expiryPeriod != e) {
- LOG.info("capacity and expiry periods already set to " + capacity +
- " and " + expiryPeriod + " respectively. Cannot set it to " + c +
- " and " + e);
+ if (instance.capacity != c || instance.expiryPeriod != e) {
+ LOG.info("capacity and expiry periods already set to " +
+ instance.capacity + " and " + instance.expiryPeriod +
+ " respectively. Cannot set it to " + c + " and " + e);
}
}
- return scInstance;
+ return instance;
}
private boolean isDaemonStarted() {
@@ -120,44 +132,47 @@ class SocketCache {
@Override
public void run() {
try {
- SocketCache.this.run();
+ PeerCache.this.run();
} catch(InterruptedException e) {
//noop
} finally {
- SocketCache.this.clear();
+ PeerCache.this.clear();
}
}
@Override
public String toString() {
- return String.valueOf(SocketCache.this);
+ return String.valueOf(PeerCache.this);
}
});
daemon.start();
}
/**
- * Get a cached socket to the given address.
- * @param remote Remote address the socket is connected to.
- * @return A socket with unknown state, possibly closed underneath. Or null.
+ * Get a cached peer connected to the given DataNode.
+ * @param dnId The DataNode to get a Peer for.
+ * @param isDomain Whether to retrieve a DomainPeer or not.
+ *
+ * @return An open Peer connected to the DN, or null if none
+ * was found.
*/
- public synchronized SocketAndStreams get(SocketAddress remote) {
+ public synchronized Peer get(DatanodeID dnId, boolean isDomain) {
if (capacity <= 0) { // disabled
return null;
}
- List sockStreamList = multimap.get(remote);
+ List sockStreamList = multimap.get(new Key(dnId, isDomain));
if (sockStreamList == null) {
return null;
}
- Iterator iter = sockStreamList.iterator();
+ Iterator iter = sockStreamList.iterator();
while (iter.hasNext()) {
- SocketAndStreams candidate = iter.next();
+ Value candidate = iter.next();
iter.remove();
- if (!candidate.sock.isClosed()) {
- return candidate;
+ if (!candidate.getPeer().isClosed()) {
+ return candidate.getPeer();
}
}
return null;
@@ -167,30 +182,23 @@ class SocketCache {
* Give an unused socket to the cache.
* @param sock socket not used by anyone.
*/
- public synchronized void put(Socket sock, IOStreamPair ioStreams) {
-
- Preconditions.checkNotNull(sock);
- SocketAndStreams s = new SocketAndStreams(sock, ioStreams);
+ public synchronized void put(DatanodeID dnId, Peer peer) {
+ Preconditions.checkNotNull(dnId);
+ Preconditions.checkNotNull(peer);
+ if (peer.isClosed()) return;
if (capacity <= 0) {
// Cache disabled.
- s.close();
+ IOUtils.cleanup(LOG, peer);
return;
}
startExpiryDaemon();
- SocketAddress remoteAddr = sock.getRemoteSocketAddress();
- if (remoteAddr == null) {
- LOG.warn("Cannot cache (unconnected) socket with no remote address: " +
- sock);
- IOUtils.closeSocket(sock);
- return;
- }
-
if (capacity == multimap.size()) {
evictOldest();
}
- multimap.put(remoteAddr, s);
+ multimap.put(new Key(dnId, peer.getDomainSocket() != null),
+ new Value(peer, Time.monotonicNow()));
}
public synchronized int size() {
@@ -202,18 +210,17 @@ class SocketCache {
*/
private synchronized void evictExpired(long expiryPeriod) {
while (multimap.size() != 0) {
- Iterator> iter =
+ Iterator> iter =
multimap.entries().iterator();
- Entry entry = iter.next();
+ Entry entry = iter.next();
// if oldest socket expired, remove it
if (entry == null ||
- Time.monotonicNow() - entry.getValue().getCreateTime() <
+ Time.monotonicNow() - entry.getValue().getTime() <
expiryPeriod) {
break;
}
+ IOUtils.cleanup(LOG, entry.getValue().getPeer());
iter.remove();
- SocketAndStreams s = entry.getValue();
- s.close();
}
}
@@ -221,16 +228,18 @@ class SocketCache {
* Evict the oldest entry in the cache.
*/
private synchronized void evictOldest() {
- Iterator> iter =
+ // We can get the oldest element immediately, because of an interesting
+ // property of LinkedListMultimap: its iterator traverses entries in the
+ // order that they were added.
+ Iterator> iter =
multimap.entries().iterator();
if (!iter.hasNext()) {
throw new IllegalStateException("Cannot evict from empty cache! " +
"capacity: " + capacity);
}
- Entry entry = iter.next();
+ Entry entry = iter.next();
+ IOUtils.cleanup(LOG, entry.getValue().getPeer());
iter.remove();
- SocketAndStreams s = entry.getValue();
- s.close();
}
/**
@@ -255,11 +264,24 @@ class SocketCache {
* Empty the cache, and close all sockets.
*/
@VisibleForTesting
- protected synchronized void clear() {
- for (SocketAndStreams sockAndStream : multimap.values()) {
- sockAndStream.close();
+ synchronized void clear() {
+ for (Value value : multimap.values()) {
+ IOUtils.cleanup(LOG, value.getPeer());
}
multimap.clear();
}
-
+
+ @VisibleForTesting
+ void close() {
+ clear();
+ if (daemon != null) {
+ daemon.interrupt();
+ try {
+ daemon.join();
+ } catch (InterruptedException e) {
+ throw new RuntimeException("failed to join thread");
+ }
+ }
+ daemon = null;
+ }
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java
index f7ac589921e..61120f39edc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java
@@ -29,6 +29,8 @@ import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.FSInputChecker;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.net.Peer;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtoUtil;
import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
@@ -54,8 +56,8 @@ import org.apache.hadoop.util.DataChecksum;
@InterfaceAudience.Private
@Deprecated
public class RemoteBlockReader extends FSInputChecker implements BlockReader {
-
- Socket dnSock; //for now just sending the status code (e.g. checksumOk) after the read.
+ private final Peer peer;
+ private final DatanodeID datanodeID;
private final DataInputStream in;
private DataChecksum checksum;
@@ -125,9 +127,9 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
// if eos was set in the previous read, send a status code to the DN
if (eos && !eosBefore && nRead >= 0) {
if (needChecksum()) {
- sendReadResult(dnSock, Status.CHECKSUM_OK);
+ sendReadResult(peer, Status.CHECKSUM_OK);
} else {
- sendReadResult(dnSock, Status.SUCCESS);
+ sendReadResult(peer, Status.SUCCESS);
}
}
return nRead;
@@ -321,7 +323,8 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
private RemoteBlockReader(String file, String bpid, long blockId,
DataInputStream in, DataChecksum checksum, boolean verifyChecksum,
- long startOffset, long firstChunkOffset, long bytesToRead, Socket dnSock) {
+ long startOffset, long firstChunkOffset, long bytesToRead, Peer peer,
+ DatanodeID datanodeID) {
// Path is used only for printing block and file information in debug
super(new Path("/blk_" + blockId + ":" + bpid + ":of:"+ file)/*too non path-like?*/,
1, verifyChecksum,
@@ -329,7 +332,8 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
checksum.getBytesPerChecksum(),
checksum.getChecksumSize());
- this.dnSock = dnSock;
+ this.peer = peer;
+ this.datanodeID = datanodeID;
this.in = in;
this.checksum = checksum;
this.startOffset = Math.max( startOffset, 0 );
@@ -348,13 +352,6 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
checksumSize = this.checksum.getChecksumSize();
}
- public static RemoteBlockReader newBlockReader(Socket sock, String file,
- ExtendedBlock block, Token blockToken,
- long startOffset, long len, int bufferSize) throws IOException {
- return newBlockReader(sock, file, block, blockToken, startOffset,
- len, bufferSize, true, "");
- }
-
/**
* Create a new BlockReader specifically to satisfy a read.
* This method also sends the OP_READ_BLOCK request.
@@ -370,16 +367,17 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
* @param clientName Client name
* @return New BlockReader instance, or null on error.
*/
- public static RemoteBlockReader newBlockReader( Socket sock, String file,
+ public static RemoteBlockReader newBlockReader(String file,
ExtendedBlock block,
Token blockToken,
long startOffset, long len,
int bufferSize, boolean verifyChecksum,
- String clientName)
+ String clientName, Peer peer,
+ DatanodeID datanodeID)
throws IOException {
// in and out will be closed when sock is closed (by the caller)
- final DataOutputStream out = new DataOutputStream(new BufferedOutputStream(
- NetUtils.getOutputStream(sock, HdfsServerConstants.WRITE_TIMEOUT)));
+ final DataOutputStream out =
+ new DataOutputStream(new BufferedOutputStream(peer.getOutputStream()));
new Sender(out).readBlock(block, blockToken, clientName, startOffset, len,
verifyChecksum);
@@ -388,12 +386,11 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
//
DataInputStream in = new DataInputStream(
- new BufferedInputStream(NetUtils.getInputStream(sock),
- bufferSize));
+ new BufferedInputStream(peer.getInputStream(), bufferSize));
BlockOpResponseProto status = BlockOpResponseProto.parseFrom(
PBHelper.vintPrefixed(in));
- RemoteBlockReader2.checkSuccess(status, sock, block, file);
+ RemoteBlockReader2.checkSuccess(status, peer, block, file);
ReadOpChecksumInfoProto checksumInfo =
status.getReadOpChecksumInfo();
DataChecksum checksum = DataTransferProtoUtil.fromProto(
@@ -411,15 +408,19 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
}
return new RemoteBlockReader(file, block.getBlockPoolId(), block.getBlockId(),
- in, checksum, verifyChecksum, startOffset, firstChunkOffset, len, sock);
+ in, checksum, verifyChecksum, startOffset, firstChunkOffset, len,
+ peer, datanodeID);
}
@Override
- public synchronized void close() throws IOException {
+ public synchronized void close(PeerCache peerCache,
+ FileInputStreamCache fisCache) throws IOException {
startOffset = -1;
checksum = null;
- if (dnSock != null) {
- dnSock.close();
+ if (peerCache != null & sentStatusCode) {
+ peerCache.put(datanodeID, peer);
+ } else {
+ peer.close();
}
// in will be closed when its Socket is closed.
@@ -436,37 +437,21 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
return readFully(this, buf, offset, len);
}
- @Override
- public Socket takeSocket() {
- assert hasSentStatusCode() :
- "BlockReader shouldn't give back sockets mid-read";
- Socket res = dnSock;
- dnSock = null;
- return res;
- }
-
- @Override
- public boolean hasSentStatusCode() {
- return sentStatusCode;
- }
-
/**
* When the reader reaches end of the read, it sends a status response
* (e.g. CHECKSUM_OK) to the DN. Failure to do so could lead to the DN
* closing our connection (which we will re-open), but won't affect
* data correctness.
*/
- void sendReadResult(Socket sock, Status statusCode) {
- assert !sentStatusCode : "already sent status code to " + sock;
+ void sendReadResult(Peer peer, Status statusCode) {
+ assert !sentStatusCode : "already sent status code to " + peer;
try {
- RemoteBlockReader2.writeReadResult(
- NetUtils.getOutputStream(sock, HdfsServerConstants.WRITE_TIMEOUT),
- statusCode);
+ RemoteBlockReader2.writeReadResult(peer.getOutputStream(), statusCode);
sentStatusCode = true;
} catch (IOException e) {
// It's ok not to be able to send this. But something is probably wrong.
LOG.info("Could not send read status (" + statusCode + ") to datanode " +
- sock.getInetAddress() + ": " + e.getMessage());
+ peer.getRemoteAddressString() + ": " + e.getMessage());
}
}
@@ -486,12 +471,11 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
public int read(ByteBuffer buf) throws IOException {
throw new UnsupportedOperationException("readDirect unsupported in RemoteBlockReader");
}
-
+
@Override
- public IOStreamPair getStreams() {
- // This class doesn't support encryption, which is the only thing this
- // method is used for. See HDFS-3637.
- return null;
+ public int available() throws IOException {
+ // An optimistic estimate of how much data is available
+ // to us without doing network I/O.
+ return DFSClient.TCP_WINDOW_SIZE;
}
-
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java
index 58bb37a724a..4d62734b1fa 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java
@@ -23,16 +23,16 @@ import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.InetSocketAddress;
-import java.net.Socket;
import java.nio.ByteBuffer;
import java.nio.channels.ReadableByteChannel;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hdfs.net.Peer;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtoUtil;
-import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
import org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader;
import org.apache.hadoop.hdfs.protocol.datatransfer.PacketReceiver;
import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
@@ -44,10 +44,11 @@ import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
import org.apache.hadoop.hdfs.protocolPB.PBHelper;
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
-import org.apache.hadoop.net.SocketInputWrapper;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.util.DataChecksum;
+import com.google.common.annotations.VisibleForTesting;
+
/**
* This is a wrapper around connection to datanode
* and understands checksum, offset etc.
@@ -79,9 +80,8 @@ public class RemoteBlockReader2 implements BlockReader {
static final Log LOG = LogFactory.getLog(RemoteBlockReader2.class);
- Socket dnSock;
- // for now just sending the status code (e.g. checksumOk) after the read.
- private IOStreamPair ioStreams;
+ final private Peer peer;
+ final private DatanodeID datanodeID;
private final ReadableByteChannel in;
private DataChecksum checksum;
@@ -114,6 +114,11 @@ public class RemoteBlockReader2 implements BlockReader {
/** Amount of unread data in the current received packet */
int dataLeft = 0;
+ @VisibleForTesting
+ public Peer getPeer() {
+ return peer;
+ }
+
@Override
public synchronized int read(byte[] buf, int off, int len)
throws IOException {
@@ -246,13 +251,13 @@ public class RemoteBlockReader2 implements BlockReader {
}
protected RemoteBlockReader2(String file, String bpid, long blockId,
- ReadableByteChannel in, DataChecksum checksum, boolean verifyChecksum,
- long startOffset, long firstChunkOffset, long bytesToRead, Socket dnSock,
- IOStreamPair ioStreams) {
+ DataChecksum checksum, boolean verifyChecksum,
+ long startOffset, long firstChunkOffset, long bytesToRead, Peer peer,
+ DatanodeID datanodeID) {
// Path is used only for printing block and file information in debug
- this.dnSock = dnSock;
- this.ioStreams = ioStreams;
- this.in = in;
+ this.peer = peer;
+ this.datanodeID = datanodeID;
+ this.in = peer.getInputStreamChannel();
this.checksum = checksum;
this.verifyChecksum = verifyChecksum;
this.startOffset = Math.max( startOffset, 0 );
@@ -269,39 +274,20 @@ public class RemoteBlockReader2 implements BlockReader {
@Override
- public synchronized void close() throws IOException {
+ public synchronized void close(PeerCache peerCache,
+ FileInputStreamCache fisCache) throws IOException {
packetReceiver.close();
-
startOffset = -1;
checksum = null;
- if (dnSock != null) {
- dnSock.close();
+ if (peerCache != null && sentStatusCode) {
+ peerCache.put(datanodeID, peer);
+ } else {
+ peer.close();
}
// in will be closed when its Socket is closed.
}
- /**
- * Take the socket used to talk to the DN.
- */
- @Override
- public Socket takeSocket() {
- assert hasSentStatusCode() :
- "BlockReader shouldn't give back sockets mid-read";
- Socket res = dnSock;
- dnSock = null;
- return res;
- }
-
- /**
- * Whether the BlockReader has reached the end of its input stream
- * and successfully sent a status code back to the datanode.
- */
- @Override
- public boolean hasSentStatusCode() {
- return sentStatusCode;
- }
-
/**
* When the reader reaches end of the read, it sends a status response
* (e.g. CHECKSUM_OK) to the DN. Failure to do so could lead to the DN
@@ -309,14 +295,14 @@ public class RemoteBlockReader2 implements BlockReader {
* data correctness.
*/
void sendReadResult(Status statusCode) {
- assert !sentStatusCode : "already sent status code to " + dnSock;
+ assert !sentStatusCode : "already sent status code to " + peer;
try {
- writeReadResult(ioStreams.out, statusCode);
+ writeReadResult(peer.getOutputStream(), statusCode);
sentStatusCode = true;
} catch (IOException e) {
// It's ok not to be able to send this. But something is probably wrong.
LOG.info("Could not send read status (" + statusCode + ") to datanode " +
- dnSock.getInetAddress() + ": " + e.getMessage());
+ peer.getRemoteAddressString() + ": " + e.getMessage());
}
}
@@ -367,42 +353,34 @@ public class RemoteBlockReader2 implements BlockReader {
* @param blockToken The block token for security
* @param startOffset The read offset, relative to block head
* @param len The number of bytes to read
- * @param bufferSize The IO buffer size (not the client buffer size)
* @param verifyChecksum Whether to verify checksum
* @param clientName Client name
+ * @param peer The Peer to use
+ * @param datanodeID The DatanodeID this peer is connected to
* @return New BlockReader instance, or null on error.
*/
- public static BlockReader newBlockReader(Socket sock, String file,
+ public static BlockReader newBlockReader(String file,
ExtendedBlock block,
Token blockToken,
long startOffset, long len,
- int bufferSize, boolean verifyChecksum,
+ boolean verifyChecksum,
String clientName,
- DataEncryptionKey encryptionKey,
- IOStreamPair ioStreams)
+ Peer peer, DatanodeID datanodeID)
throws IOException {
-
- ReadableByteChannel ch;
- if (ioStreams.in instanceof SocketInputWrapper) {
- ch = ((SocketInputWrapper)ioStreams.in).getReadableByteChannel();
- } else {
- ch = (ReadableByteChannel) ioStreams.in;
- }
-
// in and out will be closed when sock is closed (by the caller)
final DataOutputStream out = new DataOutputStream(new BufferedOutputStream(
- ioStreams.out));
+ peer.getOutputStream()));
new Sender(out).readBlock(block, blockToken, clientName, startOffset, len,
verifyChecksum);
//
// Get bytes in block
//
- DataInputStream in = new DataInputStream(ioStreams.in);
+ DataInputStream in = new DataInputStream(peer.getInputStream());
BlockOpResponseProto status = BlockOpResponseProto.parseFrom(
PBHelper.vintPrefixed(in));
- checkSuccess(status, sock, block, file);
+ checkSuccess(status, peer, block, file);
ReadOpChecksumInfoProto checksumInfo =
status.getReadOpChecksumInfo();
DataChecksum checksum = DataTransferProtoUtil.fromProto(
@@ -420,34 +398,36 @@ public class RemoteBlockReader2 implements BlockReader {
}
return new RemoteBlockReader2(file, block.getBlockPoolId(), block.getBlockId(),
- ch, checksum, verifyChecksum, startOffset, firstChunkOffset, len, sock,
- ioStreams);
+ checksum, verifyChecksum, startOffset, firstChunkOffset, len, peer,
+ datanodeID);
}
static void checkSuccess(
- BlockOpResponseProto status, Socket sock,
+ BlockOpResponseProto status, Peer peer,
ExtendedBlock block, String file)
throws IOException {
if (status.getStatus() != Status.SUCCESS) {
if (status.getStatus() == Status.ERROR_ACCESS_TOKEN) {
throw new InvalidBlockTokenException(
"Got access token error for OP_READ_BLOCK, self="
- + sock.getLocalSocketAddress() + ", remote="
- + sock.getRemoteSocketAddress() + ", for file " + file
+ + peer.getLocalAddressString() + ", remote="
+ + peer.getRemoteAddressString() + ", for file " + file
+ ", for pool " + block.getBlockPoolId() + " block "
+ block.getBlockId() + "_" + block.getGenerationStamp());
} else {
throw new IOException("Got error for OP_READ_BLOCK, self="
- + sock.getLocalSocketAddress() + ", remote="
- + sock.getRemoteSocketAddress() + ", for file " + file
+ + peer.getLocalAddressString() + ", remote="
+ + peer.getRemoteAddressString() + ", for file " + file
+ ", for pool " + block.getBlockPoolId() + " block "
+ block.getBlockId() + "_" + block.getGenerationStamp());
}
}
}
-
+
@Override
- public IOStreamPair getStreams() {
- return ioStreams;
+ public int available() throws IOException {
+ // An optimistic estimate of how much data is available
+ // to us without doing network I/O.
+ return DFSClient.TCP_WINDOW_SIZE;
}
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/BasicInetPeer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/BasicInetPeer.java
new file mode 100644
index 00000000000..7c6c3f4b6ff
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/BasicInetPeer.java
@@ -0,0 +1,128 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.net;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.Socket;
+import java.nio.channels.ReadableByteChannel;
+
+import org.apache.hadoop.net.unix.DomainSocket;
+
+/**
+ * Represents a peer that we communicate with by using a basic Socket
+ * that has no associated Channel.
+ *
+ */
+class BasicInetPeer implements Peer {
+ private final Socket socket;
+ private final OutputStream out;
+ private final InputStream in;
+ private final boolean isLocal;
+
+ public BasicInetPeer(Socket socket) throws IOException {
+ this.socket = socket;
+ this.out = socket.getOutputStream();
+ this.in = socket.getInputStream();
+ this.isLocal = socket.getInetAddress().equals(socket.getLocalAddress());
+ }
+
+ @Override
+ public ReadableByteChannel getInputStreamChannel() {
+ /*
+ * This Socket has no channel, so there's nothing to return here.
+ */
+ return null;
+ }
+
+ @Override
+ public void setReadTimeout(int timeoutMs) throws IOException {
+ socket.setSoTimeout(timeoutMs);
+ }
+
+ @Override
+ public int getReceiveBufferSize() throws IOException {
+ return socket.getReceiveBufferSize();
+ }
+
+ @Override
+ public boolean getTcpNoDelay() throws IOException {
+ return socket.getTcpNoDelay();
+ }
+
+ @Override
+ public void setWriteTimeout(int timeoutMs) {
+ /*
+ * We can't implement write timeouts. :(
+ *
+ * Java provides no facility to set a blocking write timeout on a Socket.
+ * You can simulate a blocking write with a timeout by using
+ * non-blocking I/O. However, we can't use nio here, because this Socket
+ * doesn't have an associated Channel.
+ *
+ * See http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4031100 for
+ * more details.
+ */
+ }
+
+ @Override
+ public boolean isClosed() {
+ return socket.isClosed();
+ }
+
+ @Override
+ public void close() throws IOException {
+ socket.close();
+ }
+
+ @Override
+ public String getRemoteAddressString() {
+ return socket.getRemoteSocketAddress().toString();
+ }
+
+ @Override
+ public String getLocalAddressString() {
+ return socket.getLocalSocketAddress().toString();
+ }
+
+ @Override
+ public InputStream getInputStream() throws IOException {
+ return in;
+ }
+
+ @Override
+ public OutputStream getOutputStream() throws IOException {
+ return out;
+ }
+
+ @Override
+ public boolean isLocal() {
+ return isLocal;
+ }
+
+ @Override
+ public String toString() {
+ return "BasicInetPeer(" + socket.toString() + ")";
+ }
+
+ @Override
+ public DomainSocket getDomainSocket() {
+ return null;
+ }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeer.java
new file mode 100644
index 00000000000..46279b62e66
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeer.java
@@ -0,0 +1,117 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.net;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.channels.ReadableByteChannel;
+
+import org.apache.hadoop.net.unix.DomainSocket;
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * Represents a peer that we communicate with by using blocking I/O
+ * on a UNIX domain socket.
+ */
+@InterfaceAudience.Private
+public class DomainPeer implements Peer {
+ private final DomainSocket socket;
+ private final OutputStream out;
+ private final InputStream in;
+ private final ReadableByteChannel channel;
+
+ public DomainPeer(DomainSocket socket) {
+ this.socket = socket;
+ this.out = socket.getOutputStream();
+ this.in = socket.getInputStream();
+ this.channel = socket.getChannel();
+ }
+
+ @Override
+ public ReadableByteChannel getInputStreamChannel() {
+ return channel;
+ }
+
+ @Override
+ public void setReadTimeout(int timeoutMs) throws IOException {
+ socket.setAttribute(DomainSocket.RECEIVE_TIMEOUT, timeoutMs);
+ }
+
+ @Override
+ public int getReceiveBufferSize() throws IOException {
+ return socket.getAttribute(DomainSocket.RECEIVE_BUFFER_SIZE);
+ }
+
+ @Override
+ public boolean getTcpNoDelay() throws IOException {
+ /* No TCP, no TCP_NODELAY. */
+ return false;
+ }
+
+ @Override
+ public void setWriteTimeout(int timeoutMs) throws IOException {
+ socket.setAttribute(DomainSocket.SEND_TIMEOUT, timeoutMs);
+ }
+
+ @Override
+ public boolean isClosed() {
+ return !socket.isOpen();
+ }
+
+ @Override
+ public void close() throws IOException {
+ socket.close();
+ }
+
+ @Override
+ public String getRemoteAddressString() {
+ return "unix:" + socket.getPath();
+ }
+
+ @Override
+ public String getLocalAddressString() {
+ return "";
+ }
+
+ @Override
+ public InputStream getInputStream() throws IOException {
+ return in;
+ }
+
+ @Override
+ public OutputStream getOutputStream() throws IOException {
+ return out;
+ }
+
+ @Override
+ public boolean isLocal() {
+ /* UNIX domain sockets can only be used for local communication. */
+ return true;
+ }
+
+ @Override
+ public String toString() {
+ return "DomainPeer(" + getRemoteAddressString() + ")";
+ }
+
+ @Override
+ public DomainSocket getDomainSocket() {
+ return socket;
+ }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeerServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeerServer.java
new file mode 100644
index 00000000000..dce64621482
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeerServer.java
@@ -0,0 +1,89 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.net;
+
+import java.io.IOException;
+import java.net.SocketTimeoutException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.hdfs.net.PeerServer;
+import org.apache.hadoop.net.unix.DomainSocket;
+import org.apache.hadoop.classification.InterfaceAudience;
+
+@InterfaceAudience.Private
+public class DomainPeerServer implements PeerServer {
+ static Log LOG = LogFactory.getLog(DomainPeerServer.class);
+ private final DomainSocket sock;
+
+ DomainPeerServer(DomainSocket sock) {
+ this.sock = sock;
+ }
+
+ public DomainPeerServer(String path, int port)
+ throws IOException {
+ this(DomainSocket.bindAndListen(DomainSocket.getEffectivePath(path, port)));
+ }
+
+ public String getBindPath() {
+ return sock.getPath();
+ }
+
+ @Override
+ public void setReceiveBufferSize(int size) throws IOException {
+ sock.setAttribute(DomainSocket.RECEIVE_BUFFER_SIZE, size);
+ }
+
+ @Override
+ public Peer accept() throws IOException, SocketTimeoutException {
+ DomainSocket connSock = sock.accept();
+ Peer peer = null;
+ boolean success = false;
+ try {
+ peer = new DomainPeer(connSock);
+ success = true;
+ return peer;
+ } finally {
+ if (!success) {
+ if (peer != null) peer.close();
+ connSock.close();
+ }
+ }
+ }
+
+ @Override
+ public String getListeningString() {
+ return "unix:" + sock.getPath();
+ }
+
+ @Override
+ public void close() throws IOException {
+ try {
+ sock.close();
+ } catch (IOException e) {
+ LOG.error("error closing DomainPeerServer: ", e);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "DomainPeerServer(" + getListeningString() + ")";
+ }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/EncryptedPeer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/EncryptedPeer.java
new file mode 100644
index 00000000000..6b0e506d3cf
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/EncryptedPeer.java
@@ -0,0 +1,142 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.net;
+
+import java.io.IOException;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferEncryptor;
+import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
+import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
+import org.apache.hadoop.net.unix.DomainSocket;
+
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.channels.ReadableByteChannel;
+
+/**
+ * Represents a peer that we communicate with by using an encrypted
+ * communications medium.
+ */
+@InterfaceAudience.Private
+public class EncryptedPeer implements Peer {
+ private final Peer enclosedPeer;
+
+ /**
+ * An encrypted InputStream.
+ */
+ private final InputStream in;
+
+ /**
+ * An encrypted OutputStream.
+ */
+ private final OutputStream out;
+
+ /**
+ * An encrypted ReadableByteChannel.
+ */
+ private final ReadableByteChannel channel;
+
+ public EncryptedPeer(Peer enclosedPeer, DataEncryptionKey key)
+ throws IOException {
+ this.enclosedPeer = enclosedPeer;
+ IOStreamPair ios = DataTransferEncryptor.getEncryptedStreams(
+ enclosedPeer.getOutputStream(), enclosedPeer.getInputStream(), key);
+ this.in = ios.in;
+ this.out = ios.out;
+ this.channel = ios.in instanceof ReadableByteChannel ?
+ (ReadableByteChannel)ios.in : null;
+ }
+
+ @Override
+ public ReadableByteChannel getInputStreamChannel() {
+ return channel;
+ }
+
+ @Override
+ public void setReadTimeout(int timeoutMs) throws IOException {
+ enclosedPeer.setReadTimeout(timeoutMs);
+ }
+
+ @Override
+ public int getReceiveBufferSize() throws IOException {
+ return enclosedPeer.getReceiveBufferSize();
+ }
+
+ @Override
+ public boolean getTcpNoDelay() throws IOException {
+ return enclosedPeer.getTcpNoDelay();
+ }
+
+ @Override
+ public void setWriteTimeout(int timeoutMs) throws IOException {
+ enclosedPeer.setWriteTimeout(timeoutMs);
+ }
+
+ @Override
+ public boolean isClosed() {
+ return enclosedPeer.isClosed();
+ }
+
+ @Override
+ public void close() throws IOException {
+ try {
+ in.close();
+ } finally {
+ try {
+ out.close();
+ } finally {
+ enclosedPeer.close();
+ }
+ }
+ }
+
+ @Override
+ public String getRemoteAddressString() {
+ return enclosedPeer.getRemoteAddressString();
+ }
+
+ @Override
+ public String getLocalAddressString() {
+ return enclosedPeer.getLocalAddressString();
+ }
+
+ @Override
+ public InputStream getInputStream() throws IOException {
+ return in;
+ }
+
+ @Override
+ public OutputStream getOutputStream() throws IOException {
+ return out;
+ }
+
+ @Override
+ public boolean isLocal() {
+ return enclosedPeer.isLocal();
+ }
+
+ @Override
+ public String toString() {
+ return "EncryptedPeer(" + enclosedPeer + ")";
+ }
+
+ @Override
+ public DomainSocket getDomainSocket() {
+ return enclosedPeer.getDomainSocket();
+ }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/NioInetPeer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/NioInetPeer.java
new file mode 100644
index 00000000000..1dc9d1d1186
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/NioInetPeer.java
@@ -0,0 +1,131 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.net;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.Socket;
+import java.nio.channels.ReadableByteChannel;
+
+import org.apache.hadoop.net.SocketInputStream;
+import org.apache.hadoop.net.SocketOutputStream;
+import org.apache.hadoop.net.unix.DomainSocket;
+
+/**
+ * Represents a peer that we communicate with by using non-blocking I/O
+ * on a Socket.
+ */
+class NioInetPeer implements Peer {
+ private final Socket socket;
+
+ /**
+ * An InputStream which simulates blocking I/O with timeouts using NIO.
+ */
+ private final SocketInputStream in;
+
+ /**
+ * An OutputStream which simulates blocking I/O with timeouts using NIO.
+ */
+ private final SocketOutputStream out;
+
+ private final boolean isLocal;
+
+ NioInetPeer(Socket socket) throws IOException {
+ this.socket = socket;
+ this.in = new SocketInputStream(socket.getChannel(), 0);
+ this.out = new SocketOutputStream(socket.getChannel(), 0);
+ this.isLocal = socket.getInetAddress().equals(socket.getLocalAddress());
+ }
+
+ @Override
+ public ReadableByteChannel getInputStreamChannel() {
+ return in;
+ }
+
+ @Override
+ public void setReadTimeout(int timeoutMs) throws IOException {
+ in.setTimeout(timeoutMs);
+ }
+
+ @Override
+ public int getReceiveBufferSize() throws IOException {
+ return socket.getReceiveBufferSize();
+ }
+
+ @Override
+ public boolean getTcpNoDelay() throws IOException {
+ return socket.getTcpNoDelay();
+ }
+
+ @Override
+ public void setWriteTimeout(int timeoutMs) throws IOException {
+ out.setTimeout(timeoutMs);
+ }
+
+ @Override
+ public boolean isClosed() {
+ return socket.isClosed();
+ }
+
+ @Override
+ public void close() throws IOException {
+ // We always close the outermost streams-- in this case, 'in' and 'out'
+ // Closing either one of these will also close the Socket.
+ try {
+ in.close();
+ } finally {
+ out.close();
+ }
+ }
+
+ @Override
+ public String getRemoteAddressString() {
+ return socket.getRemoteSocketAddress().toString();
+ }
+
+ @Override
+ public String getLocalAddressString() {
+ return socket.getLocalSocketAddress().toString();
+ }
+
+ @Override
+ public InputStream getInputStream() throws IOException {
+ return in;
+ }
+
+ @Override
+ public OutputStream getOutputStream() throws IOException {
+ return out;
+ }
+
+ @Override
+ public boolean isLocal() {
+ return isLocal;
+ }
+
+ @Override
+ public String toString() {
+ return "NioInetPeer(" + socket.toString() + ")";
+ }
+
+ @Override
+ public DomainSocket getDomainSocket() {
+ return null;
+ }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/Peer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/Peer.java
new file mode 100644
index 00000000000..862720e921f
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/Peer.java
@@ -0,0 +1,115 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.net;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.channels.ReadableByteChannel;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.net.unix.DomainSocket;
+
+/**
+ * Represents a connection to a peer.
+ */
+@InterfaceAudience.Private
+public interface Peer extends Closeable {
+ /**
+ * @return The input stream channel associated with this
+ * peer, or null if it has none.
+ */
+ public ReadableByteChannel getInputStreamChannel();
+
+ /**
+ * Set the read timeout on this peer.
+ *
+ * @param timeoutMs The timeout in milliseconds.
+ */
+ public void setReadTimeout(int timeoutMs) throws IOException;
+
+ /**
+ * @return The receive buffer size.
+ */
+ public int getReceiveBufferSize() throws IOException;
+
+ /**
+ * @return True if TCP_NODELAY is turned on.
+ */
+ public boolean getTcpNoDelay() throws IOException;
+
+ /**
+ * Set the write timeout on this peer.
+ *
+ * Note: this is not honored for BasicInetPeer.
+ * See {@link BasicSocketPeer#setWriteTimeout} for details.
+ *
+ * @param timeoutMs The timeout in milliseconds.
+ */
+ public void setWriteTimeout(int timeoutMs) throws IOException;
+
+ /**
+ * @return true only if the peer is closed.
+ */
+ public boolean isClosed();
+
+ /**
+ * Close the peer.
+ *
+ * It's safe to re-close a Peer that is already closed.
+ */
+ public void close() throws IOException;
+
+ /**
+ * @return A string representing the remote end of our
+ * connection to the peer.
+ */
+ public String getRemoteAddressString();
+
+ /**
+ * @return A string representing the local end of our
+ * connection to the peer.
+ */
+ public String getLocalAddressString();
+
+ /**
+ * @return An InputStream associated with the Peer.
+ * This InputStream will be valid until you close
+ * this peer with Peer#close.
+ */
+ public InputStream getInputStream() throws IOException;
+
+ /**
+ * @return An OutputStream associated with the Peer.
+ * This OutputStream will be valid until you close
+ * this peer with Peer#close.
+ */
+ public OutputStream getOutputStream() throws IOException;
+
+ /**
+ * @return True if the peer resides on the same
+ * computer as we.
+ */
+ public boolean isLocal();
+
+ /**
+ * @return The DomainSocket associated with the current
+ * peer, or null if there is none.
+ */
+ public DomainSocket getDomainSocket();
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/PeerServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/PeerServer.java
new file mode 100644
index 00000000000..c7b6b14df49
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/PeerServer.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.net;
+
+import java.io.Closeable;
+import org.apache.hadoop.classification.InterfaceAudience;
+import java.io.IOException;
+import java.net.SocketTimeoutException;
+
+@InterfaceAudience.Private
+public interface PeerServer extends Closeable {
+ /**
+ * Set the receive buffer size of the PeerServer.
+ *
+ * @param size The receive buffer size.
+ */
+ public void setReceiveBufferSize(int size) throws IOException;
+
+ /**
+ * Listens for a connection to be made to this server and accepts
+ * it. The method blocks until a connection is made.
+ *
+ * @exception IOException if an I/O error occurs when waiting for a
+ * connection.
+ * @exception SecurityException if a security manager exists and its
+ * checkAccept method doesn't allow the operation.
+ * @exception SocketTimeoutException if a timeout was previously set and
+ * the timeout has been reached.
+ */
+ public Peer accept() throws IOException, SocketTimeoutException;
+
+ /**
+ * @return A string representation of the address we're
+ * listening on.
+ */
+ public String getListeningString();
+
+ /**
+ * Free the resources associated with this peer server.
+ * This normally includes sockets, etc.
+ *
+ * @throws IOException If there is an error closing the PeerServer
+ */
+ public void close() throws IOException;
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/TcpPeerServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/TcpPeerServer.java
new file mode 100644
index 00000000000..29d86634f29
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/TcpPeerServer.java
@@ -0,0 +1,156 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.net;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.net.ServerSocket;
+import java.net.Socket;
+import java.net.SocketTimeoutException;
+import java.nio.channels.ServerSocketChannel;
+import java.nio.channels.SocketChannel;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
+import org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter.SecureResources;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.ipc.Server;
+
+@InterfaceAudience.Private
+public class TcpPeerServer implements PeerServer {
+ static Log LOG = LogFactory.getLog(TcpPeerServer.class);
+
+ private final ServerSocket serverSocket;
+
+ public static Peer peerFromSocket(Socket socket)
+ throws IOException {
+ Peer peer = null;
+ boolean success = false;
+ try {
+ // TCP_NODELAY is crucial here because of bad interactions between
+ // Nagle's Algorithm and Delayed ACKs. With connection keepalive
+ // between the client and DN, the conversation looks like:
+ // 1. Client -> DN: Read block X
+ // 2. DN -> Client: data for block X
+ // 3. Client -> DN: Status OK (successful read)
+ // 4. Client -> DN: Read block Y
+ // The fact that step #3 and #4 are both in the client->DN direction
+ // triggers Nagling. If the DN is using delayed ACKs, this results
+ // in a delay of 40ms or more.
+ //
+ // TCP_NODELAY disables nagling and thus avoids this performance
+ // disaster.
+ socket.setTcpNoDelay(true);
+ SocketChannel channel = socket.getChannel();
+ if (channel == null) {
+ peer = new BasicInetPeer(socket);
+ } else {
+ peer = new NioInetPeer(socket);
+ }
+ success = true;
+ return peer;
+ } finally {
+ if (!success) {
+ if (peer != null) peer.close();
+ socket.close();
+ }
+ }
+ }
+
+ public static Peer peerFromSocketAndKey(Socket s,
+ DataEncryptionKey key) throws IOException {
+ Peer peer = null;
+ boolean success = false;
+ try {
+ peer = peerFromSocket(s);
+ if (key != null) {
+ peer = new EncryptedPeer(peer, key);
+ }
+ success = true;
+ return peer;
+ } finally {
+ if (!success) {
+ IOUtils.cleanup(null, peer);
+ }
+ }
+ }
+
+ /**
+ * Create a non-secure TcpPeerServer.
+ *
+ * @param socketWriteTimeout The Socket write timeout in ms.
+ * @param bindAddr The address to bind to.
+ * @throws IOException
+ */
+ public TcpPeerServer(int socketWriteTimeout,
+ InetSocketAddress bindAddr) throws IOException {
+ this.serverSocket = (socketWriteTimeout > 0) ?
+ ServerSocketChannel.open().socket() : new ServerSocket();
+ Server.bind(serverSocket, bindAddr, 0);
+ }
+
+ /**
+ * Create a secure TcpPeerServer.
+ *
+ * @param secureResources Security resources.
+ */
+ public TcpPeerServer(SecureResources secureResources) {
+ this.serverSocket = secureResources.getStreamingSocket();
+ }
+
+ /**
+ * @return the IP address which this TcpPeerServer is listening on.
+ */
+ public InetSocketAddress getStreamingAddr() {
+ return new InetSocketAddress(
+ serverSocket.getInetAddress().getHostAddress(),
+ serverSocket.getLocalPort());
+ }
+
+ @Override
+ public void setReceiveBufferSize(int size) throws IOException {
+ this.serverSocket.setReceiveBufferSize(size);
+ }
+
+ @Override
+ public Peer accept() throws IOException, SocketTimeoutException {
+ Peer peer = peerFromSocket(serverSocket.accept());
+ return peer;
+ }
+
+ @Override
+ public String getListeningString() {
+ return serverSocket.getLocalSocketAddress().toString();
+ }
+
+ @Override
+ public void close() throws IOException {
+ try {
+ serverSocket.close();
+ } catch(IOException e) {
+ LOG.error("error closing TcpPeerServer: ", e);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "TcpPeerServer(" + getListeningString() + ")";
+ }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/DataTransferProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/DataTransferProtocol.java
index 7f4463789b4..b89b30af204 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/DataTransferProtocol.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/DataTransferProtocol.java
@@ -107,6 +107,18 @@ public interface DataTransferProtocol {
final String clientName,
final DatanodeInfo[] targets) throws IOException;
+ /**
+ * Request short circuit access file descriptors from a DataNode.
+ *
+ * @param blk The block to get file descriptors for.
+ * @param blockToken Security token for accessing the block.
+ * @param maxVersion Maximum version of the block data the client
+ * can understand.
+ */
+ public void requestShortCircuitFds(final ExtendedBlock blk,
+ final Token blockToken,
+ int maxVersion) throws IOException;
+
/**
* Receive a block from a source datanode
* and then notifies the namenode
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Op.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Op.java
index 4b9b47fe3db..d64e83e0329 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Op.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Op.java
@@ -34,7 +34,8 @@ public enum Op {
REPLACE_BLOCK((byte)83),
COPY_BLOCK((byte)84),
BLOCK_CHECKSUM((byte)85),
- TRANSFER_BLOCK((byte)86);
+ TRANSFER_BLOCK((byte)86),
+ REQUEST_SHORT_CIRCUIT_FDS((byte)87);
/** The code for this operation. */
public final byte code;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Receiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Receiver.java
index a156dfa538a..910938ab44e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Receiver.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Receiver.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpCopyBlockProto
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpReadBlockProto;
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpReplaceBlockProto;
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpTransferBlockProto;
+import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpRequestShortCircuitAccessProto;
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpWriteBlockProto;
import org.apache.hadoop.hdfs.protocolPB.PBHelper;
@@ -76,6 +77,9 @@ public abstract class Receiver implements DataTransferProtocol {
case TRANSFER_BLOCK:
opTransferBlock(in);
break;
+ case REQUEST_SHORT_CIRCUIT_FDS:
+ opRequestShortCircuitFds(in);
+ break;
default:
throw new IOException("Unknown op " + op + " in data stream");
}
@@ -117,6 +121,15 @@ public abstract class Receiver implements DataTransferProtocol {
PBHelper.convert(proto.getTargetsList()));
}
+ /** Receive {@link Op#REQUEST_SHORT_CIRCUIT_FDS} */
+ private void opRequestShortCircuitFds(DataInputStream in) throws IOException {
+ final OpRequestShortCircuitAccessProto proto =
+ OpRequestShortCircuitAccessProto.parseFrom(vintPrefixed(in));
+ requestShortCircuitFds(PBHelper.convert(proto.getHeader().getBlock()),
+ PBHelper.convert(proto.getHeader().getToken()),
+ proto.getMaxVersion());
+ }
+
/** Receive OP_REPLACE_BLOCK */
private void opReplaceBlock(DataInputStream in) throws IOException {
OpReplaceBlockProto proto = OpReplaceBlockProto.parseFrom(vintPrefixed(in));
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Sender.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Sender.java
index fb8bee5388b..e8f8cd64eeb 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Sender.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Sender.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpCopyBlockProto
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpReadBlockProto;
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpReplaceBlockProto;
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpTransferBlockProto;
+import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpRequestShortCircuitAccessProto;
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpWriteBlockProto;
import org.apache.hadoop.hdfs.protocolPB.PBHelper;
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
@@ -140,6 +141,17 @@ public class Sender implements DataTransferProtocol {
send(out, Op.TRANSFER_BLOCK, proto);
}
+ @Override
+ public void requestShortCircuitFds(final ExtendedBlock blk,
+ final Token blockToken,
+ int maxVersion) throws IOException {
+ OpRequestShortCircuitAccessProto proto =
+ OpRequestShortCircuitAccessProto.newBuilder()
+ .setHeader(DataTransferProtoUtil.buildBaseHeader(
+ blk, blockToken)).setMaxVersion(maxVersion).build();
+ send(out, Op.REQUEST_SHORT_CIRCUIT_FDS, proto);
+ }
+
@Override
public void replaceBlock(final ExtendedBlock blk,
final Token blockToken,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
index e233c816f9a..ebb3f5d242d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
@@ -310,6 +310,23 @@ public class DelegationTokenSecretManager
namesystem.logUpdateMasterKey(key);
}
}
+
+ @Override //AbstractDelegationTokenManager
+ protected void logExpireToken(final DelegationTokenIdentifier dtId)
+ throws IOException {
+ synchronized (noInterruptsLock) {
+ // The edit logging code will fail catastrophically if it
+ // is interrupted during a logSync, since the interrupt
+ // closes the edit log files. Doing this inside the
+ // above lock and then checking interruption status
+ // prevents this bug.
+ if (Thread.interrupted()) {
+ throw new InterruptedIOException(
+ "Interrupted before expiring delegation token");
+ }
+ namesystem.logExpireDelegationToken(dtId);
+ }
+ }
/** A utility method for creating credentials. */
public static Credentials createCredentials(final NameNode namenode,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
index b569ca686e6..123cce5b6ba 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
@@ -45,6 +45,8 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.BlockReader;
import org.apache.hadoop.hdfs.BlockReaderFactory;
import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.net.TcpPeerServer;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
@@ -208,9 +210,12 @@ public class JspHelper {
// Use the block name for file name.
String file = BlockReaderFactory.getFileName(addr, poolId, blockId);
BlockReader blockReader = BlockReaderFactory.newBlockReader(
- conf, s, file,
+ conf, file,
new ExtendedBlock(poolId, blockId, 0, genStamp), blockToken,
- offsetIntoBlock, amtToRead, encryptionKey);
+ offsetIntoBlock, amtToRead, true,
+ "JspHelper", TcpPeerServer.peerFromSocketAndKey(s, encryptionKey),
+ new DatanodeID(addr.getAddress().toString(),
+ addr.getHostName(), poolId, addr.getPort(), 0, 0), null, false);
byte[] buf = new byte[(int)amtToRead];
int readOffset = 0;
@@ -229,8 +234,7 @@ public class JspHelper {
amtToRead -= numRead;
readOffset += numRead;
}
- blockReader = null;
- s.close();
+ blockReader.close(null, null);
out.print(HtmlQuoting.quoteHtmlChars(new String(buf, Charsets.UTF_8)));
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
index dae6a108bb3..7427e834dba 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
@@ -53,19 +53,18 @@ import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.net.InetSocketAddress;
-import java.net.ServerSocket;
import java.net.Socket;
import java.net.SocketException;
import java.net.SocketTimeoutException;
import java.net.URI;
import java.net.UnknownHostException;
import java.nio.channels.ClosedByInterruptException;
-import java.nio.channels.ServerSocketChannel;
import java.nio.channels.SocketChannel;
import java.security.PrivilegedExceptionAction;
import java.util.AbstractList;
@@ -93,6 +92,8 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.HDFSPolicyProvider;
import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.net.DomainPeerServer;
+import org.apache.hadoop.hdfs.net.TcpPeerServer;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
@@ -151,11 +152,11 @@ import org.apache.hadoop.io.ReadaheadPool;
import org.apache.hadoop.ipc.ProtobufRpcEngine;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.ipc.RemoteException;
-import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.util.MBeans;
import org.apache.hadoop.net.DNS;
import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.net.unix.DomainSocket;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.security.UserGroupInformation;
@@ -235,6 +236,7 @@ public class DataNode extends Configured
LogFactory.getLog(DataNode.class.getName() + ".clienttrace");
private static final String USAGE = "Usage: java DataNode [-rollback | -regular]";
+ static final int CURRENT_BLOCK_FORMAT_VERSION = 1;
/**
* Use {@link NetUtils#createSocketAddr(String)} instead.
@@ -252,6 +254,7 @@ public class DataNode extends Configured
public final static String EMPTY_DEL_HINT = "";
AtomicInteger xmitsInProgress = new AtomicInteger();
Daemon dataXceiverServer = null;
+ Daemon localDataXceiverServer = null;
ThreadGroup threadGroup = null;
private DNConf dnConf;
private volatile boolean heartbeatsDisabledForTests = false;
@@ -263,6 +266,7 @@ public class DataNode extends Configured
private String hostName;
private DatanodeID id;
+ final private String fileDescriptorPassingDisabledReason;
boolean isBlockTokenEnabled;
BlockPoolTokenSecretManager blockPoolTokenSecretManager;
private boolean hasAnyBlockPoolRegistered = false;
@@ -311,6 +315,24 @@ public class DataNode extends Configured
this.getHdfsBlockLocationsEnabled = conf.getBoolean(
DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED,
DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED_DEFAULT);
+
+ // Determine whether we should try to pass file descriptors to clients.
+ if (conf.getBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY,
+ DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT)) {
+ String reason = DomainSocket.getLoadingFailureReason();
+ if (reason != null) {
+ LOG.warn("File descriptor passing is disabled because " + reason);
+ this.fileDescriptorPassingDisabledReason = reason;
+ } else {
+ LOG.info("File descriptor passing is enabled.");
+ this.fileDescriptorPassingDisabledReason = null;
+ }
+ } else {
+ this.fileDescriptorPassingDisabledReason =
+ "File descriptor passing was not configured.";
+ LOG.debug(this.fileDescriptorPassingDisabledReason);
+ }
+
try {
hostName = getHostName(conf);
LOG.info("Configured hostname is " + hostName);
@@ -525,25 +547,63 @@ public class DataNode extends Configured
private void initDataXceiver(Configuration conf) throws IOException {
// find free port or use privileged port provided
- ServerSocket ss;
- if (secureResources == null) {
- InetSocketAddress addr = DataNode.getStreamingAddr(conf);
- ss = (dnConf.socketWriteTimeout > 0) ?
- ServerSocketChannel.open().socket() : new ServerSocket();
- Server.bind(ss, addr, 0);
+ TcpPeerServer tcpPeerServer;
+ if (secureResources != null) {
+ tcpPeerServer = new TcpPeerServer(secureResources);
} else {
- ss = secureResources.getStreamingSocket();
+ tcpPeerServer = new TcpPeerServer(dnConf.socketWriteTimeout,
+ DataNode.getStreamingAddr(conf));
}
- ss.setReceiveBufferSize(HdfsConstants.DEFAULT_DATA_SOCKET_SIZE);
-
- streamingAddr = new InetSocketAddress(ss.getInetAddress().getHostAddress(),
- ss.getLocalPort());
-
+ tcpPeerServer.setReceiveBufferSize(HdfsConstants.DEFAULT_DATA_SOCKET_SIZE);
+ streamingAddr = tcpPeerServer.getStreamingAddr();
LOG.info("Opened streaming server at " + streamingAddr);
this.threadGroup = new ThreadGroup("dataXceiverServer");
this.dataXceiverServer = new Daemon(threadGroup,
- new DataXceiverServer(ss, conf, this));
+ new DataXceiverServer(tcpPeerServer, conf, this));
this.threadGroup.setDaemon(true); // auto destroy when empty
+
+ if (conf.getBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY,
+ DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT) ||
+ conf.getBoolean(DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC,
+ DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT)) {
+ DomainPeerServer domainPeerServer =
+ getDomainPeerServer(conf, streamingAddr.getPort());
+ if (domainPeerServer != null) {
+ this.localDataXceiverServer = new Daemon(threadGroup,
+ new DataXceiverServer(domainPeerServer, conf, this));
+ LOG.info("Listening on UNIX domain socket: " +
+ domainPeerServer.getBindPath());
+ }
+ }
+ }
+
+ static DomainPeerServer getDomainPeerServer(Configuration conf,
+ int port) throws IOException {
+ String domainSocketPath =
+ conf.getTrimmed(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
+ DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_DEFAULT);
+ if (domainSocketPath.isEmpty()) {
+ if (conf.getBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY,
+ DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT) &&
+ (!conf.getBoolean(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL,
+ DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL_DEFAULT))) {
+ LOG.warn("Although short-circuit local reads are configured, " +
+ "they are disabled because you didn't configure " +
+ DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY);
+ }
+ return null;
+ }
+ if (DomainSocket.getLoadingFailureReason() != null) {
+ throw new RuntimeException("Although a UNIX domain socket " +
+ "path is configured as " + domainSocketPath + ", we cannot " +
+ "start a localDataXceiverServer because " +
+ DomainSocket.getLoadingFailureReason());
+ }
+ DomainPeerServer domainPeerServer =
+ new DomainPeerServer(domainSocketPath, port);
+ domainPeerServer.setReceiveBufferSize(
+ HdfsConstants.DEFAULT_DATA_SOCKET_SIZE);
+ return domainPeerServer;
}
// calls specific to BP
@@ -1044,6 +1104,42 @@ public class DataNode extends Configured
return info;
}
+ @InterfaceAudience.LimitedPrivate("HDFS")
+ static public class ShortCircuitFdsUnsupportedException extends IOException {
+ private static final long serialVersionUID = 1L;
+ public ShortCircuitFdsUnsupportedException(String msg) {
+ super(msg);
+ }
+ }
+
+ @InterfaceAudience.LimitedPrivate("HDFS")
+ static public class ShortCircuitFdsVersionException extends IOException {
+ private static final long serialVersionUID = 1L;
+ public ShortCircuitFdsVersionException(String msg) {
+ super(msg);
+ }
+ }
+
+ FileInputStream[] requestShortCircuitFdsForRead(final ExtendedBlock blk,
+ final Token token, int maxVersion)
+ throws ShortCircuitFdsUnsupportedException,
+ ShortCircuitFdsVersionException, IOException {
+ if (fileDescriptorPassingDisabledReason != null) {
+ throw new ShortCircuitFdsUnsupportedException(
+ fileDescriptorPassingDisabledReason);
+ }
+ checkBlockToken(blk, token, BlockTokenSecretManager.AccessMode.READ);
+ int blkVersion = CURRENT_BLOCK_FORMAT_VERSION;
+ if (maxVersion < blkVersion) {
+ throw new ShortCircuitFdsVersionException("Your client is too old " +
+ "to read this block! Its format version is " +
+ blkVersion + ", but the highest format version you can read is " +
+ maxVersion);
+ }
+ metrics.incrBlocksGetLocalPathInfo();
+ return data.getShortCircuitFdsForRead(blk);
+ }
+
@Override
public HdfsBlocksMetadata getHdfsBlocksMetadata(List blocks,
List> tokens) throws IOException,
@@ -1118,32 +1214,45 @@ public class DataNode extends Configured
if (dataXceiverServer != null) {
((DataXceiverServer) this.dataXceiverServer.getRunnable()).kill();
this.dataXceiverServer.interrupt();
-
- // wait for all data receiver threads to exit
- if (this.threadGroup != null) {
- int sleepMs = 2;
- while (true) {
- this.threadGroup.interrupt();
- LOG.info("Waiting for threadgroup to exit, active threads is " +
- this.threadGroup.activeCount());
- if (this.threadGroup.activeCount() == 0) {
- break;
- }
- try {
- Thread.sleep(sleepMs);
- } catch (InterruptedException e) {}
- sleepMs = sleepMs * 3 / 2; // exponential backoff
- if (sleepMs > 1000) {
- sleepMs = 1000;
- }
+ }
+ if (localDataXceiverServer != null) {
+ ((DataXceiverServer) this.localDataXceiverServer.getRunnable()).kill();
+ this.localDataXceiverServer.interrupt();
+ }
+ // wait for all data receiver threads to exit
+ if (this.threadGroup != null) {
+ int sleepMs = 2;
+ while (true) {
+ this.threadGroup.interrupt();
+ LOG.info("Waiting for threadgroup to exit, active threads is " +
+ this.threadGroup.activeCount());
+ if (this.threadGroup.activeCount() == 0) {
+ break;
+ }
+ try {
+ Thread.sleep(sleepMs);
+ } catch (InterruptedException e) {}
+ sleepMs = sleepMs * 3 / 2; // exponential backoff
+ if (sleepMs > 1000) {
+ sleepMs = 1000;
}
}
- // wait for dataXceiveServer to terminate
+ this.threadGroup = null;
+ }
+ if (this.dataXceiverServer != null) {
+ // wait for dataXceiverServer to terminate
try {
this.dataXceiverServer.join();
} catch (InterruptedException ie) {
}
}
+ if (this.localDataXceiverServer != null) {
+ // wait for localDataXceiverServer to terminate
+ try {
+ this.localDataXceiverServer.join();
+ } catch (InterruptedException ie) {
+ }
+ }
if(blockPoolManager != null) {
try {
@@ -1538,6 +1647,9 @@ public class DataNode extends Configured
// start dataXceiveServer
dataXceiverServer.start();
+ if (localDataXceiverServer != null) {
+ localDataXceiverServer.start();
+ }
ipcServer.start();
startPlugins(conf);
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java
index 1d4c1c3fc70..466635b5e88 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hdfs.server.datanode;
import static org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status.ERROR;
+import static org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status.ERROR_UNSUPPORTED;
import static org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status.ERROR_ACCESS_TOKEN;
import static org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status.SUCCESS;
import static org.apache.hadoop.util.Time.now;
@@ -28,6 +29,8 @@ import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.EOFException;
+import java.io.FileDescriptor;
+import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InterruptedIOException;
@@ -39,6 +42,7 @@ import java.nio.channels.ClosedChannelException;
import java.util.Arrays;
import org.apache.commons.logging.Log;
+import org.apache.hadoop.hdfs.net.Peer;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
@@ -59,12 +63,13 @@ import org.apache.hadoop.hdfs.protocolPB.PBHelper;
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
+import org.apache.hadoop.hdfs.server.datanode.DataNode.ShortCircuitFdsUnsupportedException;
+import org.apache.hadoop.hdfs.server.datanode.DataNode.ShortCircuitFdsVersionException;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.LengthInputStream;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.MD5Hash;
import org.apache.hadoop.net.NetUtils;
-import org.apache.hadoop.net.SocketInputWrapper;
import org.apache.hadoop.security.token.SecretManager.InvalidToken;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.util.DataChecksum;
@@ -79,8 +84,7 @@ class DataXceiver extends Receiver implements Runnable {
public static final Log LOG = DataNode.LOG;
static final Log ClientTraceLog = DataNode.ClientTraceLog;
- private final Socket s;
- private final boolean isLocal; //is a local connection?
+ private final Peer peer;
private final String remoteAddress; // address of remote side
private final String localAddress; // local address of this daemon
private final DataNode datanode;
@@ -88,7 +92,7 @@ class DataXceiver extends Receiver implements Runnable {
private final DataXceiverServer dataXceiverServer;
private final boolean connectToDnViaHostname;
private long opStartTime; //the start time of receiving an Op
- private final SocketInputWrapper socketIn;
+ private final InputStream socketIn;
private OutputStream socketOut;
/**
@@ -97,25 +101,23 @@ class DataXceiver extends Receiver implements Runnable {
*/
private String previousOpClientName;
- public static DataXceiver create(Socket s, DataNode dn,
+ public static DataXceiver create(Peer peer, DataNode dn,
DataXceiverServer dataXceiverServer) throws IOException {
- return new DataXceiver(s, dn, dataXceiverServer);
+ return new DataXceiver(peer, dn, dataXceiverServer);
}
- private DataXceiver(Socket s,
- DataNode datanode,
+ private DataXceiver(Peer peer, DataNode datanode,
DataXceiverServer dataXceiverServer) throws IOException {
- this.s = s;
+ this.peer = peer;
this.dnConf = datanode.getDnConf();
- this.socketIn = NetUtils.getInputStream(s);
- this.socketOut = NetUtils.getOutputStream(s, dnConf.socketWriteTimeout);
- this.isLocal = s.getInetAddress().equals(s.getLocalAddress());
+ this.socketIn = peer.getInputStream();
+ this.socketOut = peer.getOutputStream();
this.datanode = datanode;
this.dataXceiverServer = dataXceiverServer;
this.connectToDnViaHostname = datanode.getDnConf().connectToDnViaHostname;
- remoteAddress = s.getRemoteSocketAddress().toString();
- localAddress = s.getLocalSocketAddress().toString();
+ remoteAddress = peer.getRemoteAddressString();
+ localAddress = peer.getLocalAddressString();
if (LOG.isDebugEnabled()) {
LOG.debug("Number of active connections is: "
@@ -155,11 +157,10 @@ class DataXceiver extends Receiver implements Runnable {
public void run() {
int opsProcessed = 0;
Op op = null;
-
- dataXceiverServer.childSockets.add(s);
-
+
+ dataXceiverServer.addPeer(peer);
try {
-
+ peer.setWriteTimeout(datanode.getDnConf().socketWriteTimeout);
InputStream input = socketIn;
if (dnConf.encryptDataTransfer) {
IOStreamPair encryptedStreams = null;
@@ -169,8 +170,9 @@ class DataXceiver extends Receiver implements Runnable {
dnConf.encryptionAlgorithm);
} catch (InvalidMagicNumberException imne) {
LOG.info("Failed to read expected encryption handshake from client " +
- "at " + s.getInetAddress() + ". Perhaps the client is running an " +
- "older version of Hadoop which does not support encryption");
+ "at " + peer.getRemoteAddressString() + ". Perhaps the client " +
+ "is running an older version of Hadoop which does not support " +
+ "encryption");
return;
}
input = encryptedStreams.in;
@@ -189,9 +191,9 @@ class DataXceiver extends Receiver implements Runnable {
try {
if (opsProcessed != 0) {
assert dnConf.socketKeepaliveTimeout > 0;
- socketIn.setTimeout(dnConf.socketKeepaliveTimeout);
+ peer.setReadTimeout(dnConf.socketKeepaliveTimeout);
} else {
- socketIn.setTimeout(dnConf.socketTimeout);
+ peer.setReadTimeout(dnConf.socketTimeout);
}
op = readOp();
} catch (InterruptedIOException ignored) {
@@ -202,7 +204,7 @@ class DataXceiver extends Receiver implements Runnable {
if (opsProcessed > 0 &&
(err instanceof EOFException || err instanceof ClosedChannelException)) {
if (LOG.isDebugEnabled()) {
- LOG.debug("Cached " + s.toString() + " closing after " + opsProcessed + " ops");
+ LOG.debug("Cached " + peer + " closing after " + opsProcessed + " ops");
}
} else {
throw err;
@@ -212,13 +214,13 @@ class DataXceiver extends Receiver implements Runnable {
// restore normal timeout
if (opsProcessed != 0) {
- s.setSoTimeout(dnConf.socketTimeout);
+ peer.setReadTimeout(dnConf.socketTimeout);
}
opStartTime = now();
processOp(op);
++opsProcessed;
- } while (!s.isClosed() && dnConf.socketKeepaliveTimeout > 0);
+ } while (!peer.isClosed() && dnConf.socketKeepaliveTimeout > 0);
} catch (Throwable t) {
LOG.error(datanode.getDisplayName() + ":DataXceiver error processing " +
((op == null) ? "unknown" : op.name()) + " operation " +
@@ -230,9 +232,70 @@ class DataXceiver extends Receiver implements Runnable {
+ datanode.getXceiverCount());
}
updateCurrentThreadName("Cleaning up");
+ dataXceiverServer.closePeer(peer);
IOUtils.closeStream(in);
- IOUtils.closeSocket(s);
- dataXceiverServer.childSockets.remove(s);
+ }
+ }
+
+ @Override
+ public void requestShortCircuitFds(final ExtendedBlock blk,
+ final Token token,
+ int maxVersion) throws IOException {
+ updateCurrentThreadName("Passing file descriptors for block " + blk);
+ BlockOpResponseProto.Builder bld = BlockOpResponseProto.newBuilder();
+ FileInputStream fis[] = null;
+ try {
+ if (peer.getDomainSocket() == null) {
+ throw new IOException("You cannot pass file descriptors over " +
+ "anything but a UNIX domain socket.");
+ }
+ fis = datanode.requestShortCircuitFdsForRead(blk, token, maxVersion);
+ bld.setStatus(SUCCESS);
+ bld.setShortCircuitAccessVersion(DataNode.CURRENT_BLOCK_FORMAT_VERSION);
+ } catch (ShortCircuitFdsVersionException e) {
+ bld.setStatus(ERROR_UNSUPPORTED);
+ bld.setShortCircuitAccessVersion(DataNode.CURRENT_BLOCK_FORMAT_VERSION);
+ bld.setMessage(e.getMessage());
+ } catch (ShortCircuitFdsUnsupportedException e) {
+ bld.setStatus(ERROR_UNSUPPORTED);
+ bld.setMessage(e.getMessage());
+ } catch (InvalidToken e) {
+ bld.setStatus(ERROR_ACCESS_TOKEN);
+ bld.setMessage(e.getMessage());
+ } catch (IOException e) {
+ bld.setStatus(ERROR);
+ bld.setMessage(e.getMessage());
+ }
+ try {
+ bld.build().writeDelimitedTo(socketOut);
+ if (fis != null) {
+ FileDescriptor fds[] = new FileDescriptor[fis.length];
+ for (int i = 0; i < fds.length; i++) {
+ fds[i] = fis[i].getFD();
+ }
+ byte buf[] = new byte[] { (byte)0 };
+ peer.getDomainSocket().
+ sendFileDescriptors(fds, buf, 0, buf.length);
+ }
+ } finally {
+ if (ClientTraceLog.isInfoEnabled()) {
+ DatanodeRegistration dnR = datanode.getDNRegistrationForBP(blk
+ .getBlockPoolId());
+ BlockSender.ClientTraceLog.info(String.format(
+ String.format(
+ "src: %s, dest: %s, op: %s, blockid: %s, srvID: %s, " +
+ "success: %b",
+ "127.0.0.1", // src IP
+ "127.0.0.1", // dst IP
+ "REQUEST_SHORT_CIRCUIT_FDS", // operation
+ blk.getBlockId(), // block id
+ dnR.getStorageID(),
+ (fis != null)
+ )));
+ }
+ if (fis != null) {
+ IOUtils.cleanup(LOG, fis);
+ }
}
}
@@ -287,8 +350,9 @@ class DataXceiver extends Receiver implements Runnable {
ClientReadStatusProto stat = ClientReadStatusProto.parseFrom(
PBHelper.vintPrefixed(in));
if (!stat.hasStatus()) {
- LOG.warn("Client " + s.getInetAddress() + " did not send a valid status " +
- "code after reading. Will close connection.");
+ LOG.warn("Client " + peer.getRemoteAddressString() +
+ " did not send a valid status code after reading. " +
+ "Will close connection.");
IOUtils.closeStream(out);
}
} catch (IOException ioe) {
@@ -321,7 +385,7 @@ class DataXceiver extends Receiver implements Runnable {
//update metrics
datanode.metrics.addReadBlockOp(elapsed());
- datanode.metrics.incrReadsFromClient(isLocal);
+ datanode.metrics.incrReadsFromClient(peer.isLocal());
}
@Override
@@ -359,8 +423,8 @@ class DataXceiver extends Receiver implements Runnable {
LOG.debug("isDatanode=" + isDatanode
+ ", isClient=" + isClient
+ ", isTransfer=" + isTransfer);
- LOG.debug("writeBlock receive buf size " + s.getReceiveBufferSize() +
- " tcp no delay " + s.getTcpNoDelay());
+ LOG.debug("writeBlock receive buf size " + peer.getReceiveBufferSize() +
+ " tcp no delay " + peer.getTcpNoDelay());
}
// We later mutate block's generation stamp and length, but we need to
@@ -391,8 +455,8 @@ class DataXceiver extends Receiver implements Runnable {
stage != BlockConstructionStage.PIPELINE_CLOSE_RECOVERY) {
// open a block receiver
blockReceiver = new BlockReceiver(block, in,
- s.getRemoteSocketAddress().toString(),
- s.getLocalSocketAddress().toString(),
+ peer.getRemoteAddressString(),
+ peer.getLocalAddressString(),
stage, latestGenerationStamp, minBytesRcvd, maxBytesRcvd,
clientname, srcDataNode, datanode, requestedChecksum);
} else {
@@ -547,7 +611,7 @@ class DataXceiver extends Receiver implements Runnable {
//update metrics
datanode.metrics.addWriteBlockOp(elapsed());
- datanode.metrics.incrWritesFromClient(isLocal);
+ datanode.metrics.incrWritesFromClient(peer.isLocal());
}
@Override
@@ -555,7 +619,7 @@ class DataXceiver extends Receiver implements Runnable {
final Token blockToken,
final String clientName,
final DatanodeInfo[] targets) throws IOException {
- checkAccess(null, true, blk, blockToken,
+ checkAccess(socketOut, true, blk, blockToken,
Op.TRANSFER_BLOCK, BlockTokenSecretManager.AccessMode.COPY);
previousOpClientName = clientName;
updateCurrentThreadName(Op.TRANSFER_BLOCK + " " + blk);
@@ -642,8 +706,9 @@ class DataXceiver extends Receiver implements Runnable {
}
if (!dataXceiverServer.balanceThrottler.acquire()) { // not able to start
- String msg = "Not able to copy block " + block.getBlockId() + " to "
- + s.getRemoteSocketAddress() + " because threads quota is exceeded.";
+ String msg = "Not able to copy block " + block.getBlockId() + " " +
+ "to " + peer.getRemoteAddressString() + " because threads " +
+ "quota is exceeded.";
LOG.info(msg);
sendResponse(ERROR, msg);
return;
@@ -672,7 +737,7 @@ class DataXceiver extends Receiver implements Runnable {
datanode.metrics.incrBytesRead((int) read);
datanode.metrics.incrBlocksRead();
- LOG.info("Copied " + block + " to " + s.getRemoteSocketAddress());
+ LOG.info("Copied " + block + " to " + peer.getRemoteAddressString());
} catch (IOException ioe) {
isOpSuccess = false;
LOG.info("opCopyBlock " + block + " received exception " + ioe);
@@ -717,8 +782,9 @@ class DataXceiver extends Receiver implements Runnable {
}
if (!dataXceiverServer.balanceThrottler.acquire()) { // not able to start
- String msg = "Not able to receive block " + block.getBlockId() + " from "
- + s.getRemoteSocketAddress() + " because threads quota is exceeded.";
+ String msg = "Not able to receive block " + block.getBlockId() +
+ " from " + peer.getRemoteAddressString() + " because threads " +
+ "quota is exceeded.";
LOG.warn(msg);
sendResponse(ERROR, msg);
return;
@@ -795,7 +861,7 @@ class DataXceiver extends Receiver implements Runnable {
// notify name node
datanode.notifyNamenodeReceivedBlock(block, delHint);
- LOG.info("Moved " + block + " from " + s.getRemoteSocketAddress());
+ LOG.info("Moved " + block + " from " + peer.getRemoteAddressString());
} catch (IOException ioe) {
opStatus = ERROR;
@@ -818,7 +884,7 @@ class DataXceiver extends Receiver implements Runnable {
try {
sendResponse(opStatus, errMsg);
} catch (IOException ioe) {
- LOG.warn("Error writing reply back to " + s.getRemoteSocketAddress());
+ LOG.warn("Error writing reply back to " + peer.getRemoteAddressString());
}
IOUtils.closeStream(proxyOut);
IOUtils.closeStream(blockReceiver);
@@ -872,7 +938,7 @@ class DataXceiver extends Receiver implements Runnable {
}
- private void checkAccess(DataOutputStream out, final boolean reply,
+ private void checkAccess(OutputStream out, final boolean reply,
final ExtendedBlock blk,
final Token t,
final Op op,
@@ -887,11 +953,6 @@ class DataXceiver extends Receiver implements Runnable {
} catch(InvalidToken e) {
try {
if (reply) {
- if (out == null) {
- out = new DataOutputStream(
- NetUtils.getOutputStream(s, dnConf.socketWriteTimeout));
- }
-
BlockOpResponseProto.Builder resp = BlockOpResponseProto.newBuilder()
.setStatus(ERROR_ACCESS_TOKEN);
if (mode == BlockTokenSecretManager.AccessMode.WRITE) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java
index bb0f7fd81b4..2755eb415f8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java
@@ -18,18 +18,16 @@
package org.apache.hadoop.hdfs.server.datanode;
import java.io.IOException;
-import java.net.ServerSocket;
-import java.net.Socket;
import java.net.SocketTimeoutException;
import java.nio.channels.AsynchronousCloseException;
-import java.util.Collections;
import java.util.HashSet;
-import java.util.Iterator;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.net.Peer;
+import org.apache.hadoop.hdfs.net.PeerServer;
import org.apache.hadoop.hdfs.server.balancer.Balancer;
import org.apache.hadoop.hdfs.util.DataTransferThrottler;
import org.apache.hadoop.io.IOUtils;
@@ -45,11 +43,9 @@ import org.apache.hadoop.util.Daemon;
class DataXceiverServer implements Runnable {
public static final Log LOG = DataNode.LOG;
- ServerSocket ss;
- DataNode datanode;
- // Record all sockets opened for data transfer
- Set childSockets = Collections.synchronizedSet(
- new HashSet());
+ private final PeerServer peerServer;
+ private final DataNode datanode;
+ private final Set peers = new HashSet();
/**
* Maximal number of concurrent xceivers per node.
@@ -109,10 +105,10 @@ class DataXceiverServer implements Runnable {
long estimateBlockSize;
- DataXceiverServer(ServerSocket ss, Configuration conf,
+ DataXceiverServer(PeerServer peerServer, Configuration conf,
DataNode datanode) {
- this.ss = ss;
+ this.peerServer = peerServer;
this.datanode = datanode;
this.maxXceiverCount =
@@ -130,12 +126,10 @@ class DataXceiverServer implements Runnable {
@Override
public void run() {
+ Peer peer = null;
while (datanode.shouldRun) {
- Socket s = null;
try {
- s = ss.accept();
- s.setTcpNoDelay(true);
- // Timeouts are set within DataXceiver.run()
+ peer = peerServer.accept();
// Make sure the xceiver count is not exceeded
int curXceiverCount = datanode.getXceiverCount();
@@ -146,7 +140,7 @@ class DataXceiverServer implements Runnable {
}
new Daemon(datanode.threadGroup,
- DataXceiver.create(s, datanode, this))
+ DataXceiver.create(peer, datanode, this))
.start();
} catch (SocketTimeoutException ignored) {
// wake up to see if should continue to run
@@ -157,10 +151,10 @@ class DataXceiverServer implements Runnable {
LOG.warn(datanode.getDisplayName() + ":DataXceiverServer: ", ace);
}
} catch (IOException ie) {
- IOUtils.closeSocket(s);
+ IOUtils.cleanup(null, peer);
LOG.warn(datanode.getDisplayName() + ":DataXceiverServer: ", ie);
} catch (OutOfMemoryError ie) {
- IOUtils.closeSocket(s);
+ IOUtils.cleanup(null, peer);
// DataNode can run out of memory if there is too many transfers.
// Log the event, Sleep for 30 seconds, other transfers may complete by
// then.
@@ -176,33 +170,35 @@ class DataXceiverServer implements Runnable {
datanode.shouldRun = false;
}
}
+ synchronized (this) {
+ for (Peer p : peers) {
+ IOUtils.cleanup(LOG, p);
+ }
+ }
try {
- ss.close();
+ peerServer.close();
} catch (IOException ie) {
LOG.warn(datanode.getDisplayName()
+ " :DataXceiverServer: close exception", ie);
}
}
-
+
void kill() {
assert datanode.shouldRun == false :
"shoudRun should be set to false before killing";
try {
- this.ss.close();
+ this.peerServer.close();
} catch (IOException ie) {
LOG.warn(datanode.getDisplayName() + ":DataXceiverServer.kill(): ", ie);
}
+ }
+
+ synchronized void addPeer(Peer peer) {
+ peers.add(peer);
+ }
- // close all the sockets that were accepted earlier
- synchronized (childSockets) {
- for (Iterator it = childSockets.iterator();
- it.hasNext();) {
- Socket thissock = it.next();
- try {
- thissock.close();
- } catch (IOException e) {
- }
- }
- }
+ synchronized void closePeer(Peer peer) {
+ peers.remove(peer);
+ IOUtils.cleanup(null, peer);
}
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java
index 13ef752750b..57e8887be81 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.datanode.fsdataset;
import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
@@ -386,4 +387,6 @@ public interface FsDatasetSpi extends FSDatasetMBean {
public HdfsBlocksMetadata getHdfsBlocksMetadata(List blocks)
throws IOException;
+ FileInputStream[] getShortCircuitFdsForRead(ExtendedBlock block)
+ throws IOException;
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
index 86e9797fcfa..dfbd52c2070 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
@@ -76,6 +76,7 @@ import org.apache.hadoop.hdfs.server.datanode.fsdataset.VolumeChoosingPolicy;
import org.apache.hadoop.hdfs.server.datanode.metrics.FSDatasetMBean;
import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand.RecoveringBlock;
import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo;
+import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.nativeio.NativeIO;
import org.apache.hadoop.metrics2.util.MBeans;
import org.apache.hadoop.util.DataChecksum;
@@ -1700,6 +1701,26 @@ class FsDatasetImpl implements FsDatasetSpi {
return info;
}
+ @Override // FsDatasetSpi
+ public FileInputStream[] getShortCircuitFdsForRead(ExtendedBlock block)
+ throws IOException {
+ File datafile = getBlockFile(block);
+ File metafile = FsDatasetUtil.getMetaFile(datafile,
+ block.getGenerationStamp());
+ FileInputStream fis[] = new FileInputStream[2];
+ boolean success = false;
+ try {
+ fis[0] = new FileInputStream(datafile);
+ fis[1] = new FileInputStream(metafile);
+ success = true;
+ return fis;
+ } finally {
+ if (!success) {
+ IOUtils.cleanup(null, fis);
+ }
+ }
+ }
+
@Override // FsDatasetSpi
public HdfsBlocksMetadata getHdfsBlocksMetadata(List blocks)
throws IOException {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
index a2dfca3329b..650e29745e7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
@@ -246,8 +246,7 @@ public class FSDirectory implements Closeable {
long preferredBlockSize,
String clientName,
String clientMachine,
- DatanodeDescriptor clientNode,
- long generationStamp)
+ DatanodeDescriptor clientNode)
throws FileAlreadyExistsException, QuotaExceededException,
UnresolvedLinkException, SnapshotAccessControlException {
waitForReady();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index ee9a05776e0..dd652ec94b2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -1221,7 +1221,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
writeLock();
try {
checkOperation(OperationCategory.WRITE);
-
if (isInSafeMode()) {
throw new SafeModeException("Cannot set permission for " + src, safeMode);
}
@@ -1259,7 +1258,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
writeLock();
try {
checkOperation(OperationCategory.WRITE);
-
if (isInSafeMode()) {
throw new SafeModeException("Cannot set owner for " + src, safeMode);
}
@@ -1313,9 +1311,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
LocatedBlocks getBlockLocations(String src, long offset, long length,
boolean doAccessTime, boolean needBlockToken, boolean checkSafeMode)
throws FileNotFoundException, UnresolvedLinkException, IOException {
- FSPermissionChecker pc = getPermissionChecker();
try {
- return getBlockLocationsInt(pc, src, offset, length, doAccessTime,
+ return getBlockLocationsInt(src, offset, length, doAccessTime,
needBlockToken, checkSafeMode);
} catch (AccessControlException e) {
logAuditEvent(false, "open", src);
@@ -1323,14 +1320,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
}
}
- private LocatedBlocks getBlockLocationsInt(FSPermissionChecker pc,
- String src, long offset, long length, boolean doAccessTime,
- boolean needBlockToken, boolean checkSafeMode)
+ private LocatedBlocks getBlockLocationsInt(String src, long offset,
+ long length, boolean doAccessTime, boolean needBlockToken,
+ boolean checkSafeMode)
throws FileNotFoundException, UnresolvedLinkException, IOException {
- if (isPermissionEnabled) {
- checkPathAccess(pc, src, FsAction.READ);
- }
-
if (offset < 0) {
throw new HadoopIllegalArgumentException(
"Negative offset is not supported. File: " + src);
@@ -1358,13 +1351,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
* Get block locations within the specified range, updating the
* access times if necessary.
*/
- private LocatedBlocks getBlockLocationsUpdateTimes(String src,
- long offset,
- long length,
- boolean doAccessTime,
- boolean needBlockToken)
- throws FileNotFoundException, UnresolvedLinkException, IOException {
-
+ private LocatedBlocks getBlockLocationsUpdateTimes(String src, long offset,
+ long length, boolean doAccessTime, boolean needBlockToken)
+ throws FileNotFoundException,
+ UnresolvedLinkException, IOException {
+ FSPermissionChecker pc = getPermissionChecker();
for (int attempt = 0; attempt < 2; attempt++) {
boolean isReadOp = (attempt == 0);
if (isReadOp) { // first attempt is with readlock
@@ -1380,6 +1371,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
} else {
checkOperation(OperationCategory.WRITE);
}
+ if (isPermissionEnabled) {
+ checkPathAccess(pc, src, FsAction.READ);
+ }
// if the namenode is in safemode, then do not update access time
if (isInSafeMode()) {
@@ -1426,6 +1420,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
*/
void concat(String target, String [] srcs)
throws IOException, UnresolvedLinkException {
+ if(FSNamesystem.LOG.isDebugEnabled()) {
+ FSNamesystem.LOG.debug("concat " + Arrays.toString(srcs) +
+ " to " + target);
+ }
try {
concatInt(target, srcs);
} catch (AccessControlException e) {
@@ -1436,11 +1434,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
private void concatInt(String target, String [] srcs)
throws IOException, UnresolvedLinkException {
- if(FSNamesystem.LOG.isDebugEnabled()) {
- FSNamesystem.LOG.debug("concat " + Arrays.toString(srcs) +
- " to " + target);
- }
-
// verify args
if(target.isEmpty()) {
throw new IllegalArgumentException("Target file name is empty");
@@ -1594,6 +1587,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
*/
void setTimes(String src, long mtime, long atime)
throws IOException, UnresolvedLinkException {
+ if (!isAccessTimeSupported() && atime != -1) {
+ throw new IOException("Access time for hdfs is not configured. " +
+ " Please set " + DFS_NAMENODE_ACCESSTIME_PRECISION_KEY + " configuration parameter.");
+ }
try {
setTimesInt(src, mtime, atime);
} catch (AccessControlException e) {
@@ -1604,16 +1601,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
private void setTimesInt(String src, long mtime, long atime)
throws IOException, UnresolvedLinkException {
- if (!isAccessTimeSupported() && atime != -1) {
- throw new IOException("Access time for hdfs is not configured. " +
- " Please set " + DFS_NAMENODE_ACCESSTIME_PRECISION_KEY + " configuration parameter.");
- }
HdfsFileStatus resultingStat = null;
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
+ if (isInSafeMode()) {
+ throw new SafeModeException("Cannot set times " + src, safeMode);
+ }
// Write access is required to set access and modification times
if (isPermissionEnabled) {
@@ -1639,6 +1635,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
void createSymlink(String target, String link,
PermissionStatus dirPerms, boolean createParent)
throws IOException, UnresolvedLinkException {
+ if (!DFSUtil.isValidName(link)) {
+ throw new InvalidPathException("Invalid file name: " + link);
+ }
try {
createSymlinkInt(target, link, dirPerms, createParent);
} catch (AccessControlException e) {
@@ -1650,17 +1649,34 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
private void createSymlinkInt(String target, String link,
PermissionStatus dirPerms, boolean createParent)
throws IOException, UnresolvedLinkException {
+ if (NameNode.stateChangeLog.isDebugEnabled()) {
+ NameNode.stateChangeLog.debug("DIR* NameSystem.createSymlink: target="
+ + target + " link=" + link);
+ }
HdfsFileStatus resultingStat = null;
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
-
+ if (isInSafeMode()) {
+ throw new SafeModeException("Cannot create symlink " + link, safeMode);
+ }
if (!createParent) {
verifyParentDir(link);
}
- createSymlinkInternal(pc, target, link, dirPerms, createParent);
+ if (!dir.isValidToCreate(link)) {
+ throw new IOException("failed to create link " + link
+ +" either because the filename is invalid or the file exists");
+ }
+ if (isPermissionEnabled) {
+ checkAncestorAccess(pc, link, FsAction.WRITE);
+ }
+ // validate that we have enough inodes.
+ checkFsObjectLimit();
+
+ // add symbolic link to namespace
+ dir.addSymlink(link, target, dirPerms, createParent);
resultingStat = getAuditFileInfo(link, false);
} finally {
writeUnlock();
@@ -1669,37 +1685,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
logAuditEvent(true, "createSymlink", link, target, resultingStat);
}
- /**
- * Create a symbolic link.
- */
- private void createSymlinkInternal(FSPermissionChecker pc, String target,
- String link, PermissionStatus dirPerms, boolean createParent)
- throws IOException, UnresolvedLinkException {
- assert hasWriteLock();
- if (NameNode.stateChangeLog.isDebugEnabled()) {
- NameNode.stateChangeLog.debug("DIR* NameSystem.createSymlink: target=" +
- target + " link=" + link);
- }
- if (isInSafeMode()) {
- throw new SafeModeException("Cannot create symlink " + link, safeMode);
- }
- if (!DFSUtil.isValidName(link)) {
- throw new InvalidPathException("Invalid file name: " + link);
- }
- if (!dir.isValidToCreate(link)) {
- throw new IOException("failed to create link " + link
- +" either because the filename is invalid or the file exists");
- }
- if (isPermissionEnabled) {
- checkAncestorAccess(pc, link, FsAction.WRITE);
- }
- // validate that we have enough inodes.
- checkFsObjectLimit();
-
- // add symbolic link to namespace
- dir.addSymlink(link, target, dirPerms, createParent);
- }
-
/**
* Set replication for an existing file.
*
@@ -1819,13 +1804,24 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
throws AccessControlException, SafeModeException,
FileAlreadyExistsException, UnresolvedLinkException,
FileNotFoundException, ParentNotDirectoryException, IOException {
+ if (NameNode.stateChangeLog.isDebugEnabled()) {
+ NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: src=" + src
+ + ", holder=" + holder
+ + ", clientMachine=" + clientMachine
+ + ", createParent=" + createParent
+ + ", replication=" + replication
+ + ", createFlag=" + flag.toString());
+ }
+ if (!DFSUtil.isValidName(src)) {
+ throw new InvalidPathException(src);
+ }
+
boolean skipSync = false;
final HdfsFileStatus stat;
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.WRITE);
writeLock();
try {
- checkOperation(OperationCategory.WRITE);
startFileInternal(pc, src, permissions, holder, clientMachine, flag,
createParent, replication, blockSize);
stat = dir.getFileInfo(src, false);
@@ -1868,21 +1864,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
AccessControlException, UnresolvedLinkException, FileNotFoundException,
ParentNotDirectoryException, IOException {
assert hasWriteLock();
- if (NameNode.stateChangeLog.isDebugEnabled()) {
- NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: src=" + src
- + ", holder=" + holder
- + ", clientMachine=" + clientMachine
- + ", createParent=" + createParent
- + ", replication=" + replication
- + ", createFlag=" + flag.toString());
- }
+ checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) {
throw new SafeModeException("Cannot create file" + src, safeMode);
}
- if (!DFSUtil.isValidName(src)) {
- throw new InvalidPathException(src);
- }
-
// Verify that the destination does not exist as a directory already.
final INodesInPath iip = dir.getINodesInPath4Write(src);
final INode inode = iip.getLastINode();
@@ -1945,9 +1930,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
checkFsObjectLimit();
// increment global generation stamp
- long genstamp = nextGenerationStamp();
INodeFileUnderConstruction newNode = dir.addFile(src, permissions,
- replication, blockSize, holder, clientMachine, clientNode, genstamp);
+ replication, blockSize, holder, clientMachine, clientNode);
if (newNode == null) {
throw new IOException("DIR* NameSystem.startFile: " +
"Unable to add file to namespace.");
@@ -2013,21 +1997,20 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
*/
boolean recoverLease(String src, String holder, String clientMachine)
throws IOException {
+ if (!DFSUtil.isValidName(src)) {
+ throw new IOException("Invalid file name: " + src);
+ }
+
boolean skipSync = false;
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
-
if (isInSafeMode()) {
throw new SafeModeException(
"Cannot recover the lease of " + src, safeMode);
}
- if (!DFSUtil.isValidName(src)) {
- throw new IOException("Invalid file name: " + src);
- }
-
final INodeFile inode = INodeFile.valueOf(dir.getINode(src), src);
if (!inode.isUnderConstruction()) {
return true;
@@ -2151,13 +2134,20 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
"Append is not enabled on this NameNode. Use the " +
DFS_SUPPORT_APPEND_KEY + " configuration option to enable it.");
}
+ if (NameNode.stateChangeLog.isDebugEnabled()) {
+ NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: src=" + src
+ + ", holder=" + holder
+ + ", clientMachine=" + clientMachine);
+ }
+ if (!DFSUtil.isValidName(src)) {
+ throw new InvalidPathException(src);
+ }
+
LocatedBlock lb = null;
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.WRITE);
writeLock();
try {
- checkOperation(OperationCategory.WRITE);
-
lb = startFileInternal(pc, src, null, holder, clientMachine,
EnumSet.of(CreateFlag.APPEND),
false, blockManager.maxReplication, 0);
@@ -2448,21 +2438,21 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
boolean abandonBlock(ExtendedBlock b, String src, String holder)
throws LeaseExpiredException, FileNotFoundException,
UnresolvedLinkException, IOException {
+ if(NameNode.stateChangeLog.isDebugEnabled()) {
+ NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: " + b
+ + "of file " + src);
+ }
checkOperation(OperationCategory.WRITE);
writeLock();
try {
checkOperation(OperationCategory.WRITE);
- //
- // Remove the block from the pending creates list
- //
- if(NameNode.stateChangeLog.isDebugEnabled()) {
- NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: "
- +b+"of file "+src);
- }
if (isInSafeMode()) {
throw new SafeModeException("Cannot abandon block " + b +
" for fle" + src, safeMode);
}
+ //
+ // Remove the block from the pending creates list
+ //
INodeFileUnderConstruction file = checkLease(src, holder);
dir.removeBlock(src, file, ExtendedBlock.getLocalBlock(b));
if(NameNode.stateChangeLog.isDebugEnabled()) {
@@ -2524,19 +2514,23 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
*/
boolean completeFile(String src, String holder, ExtendedBlock last)
throws SafeModeException, UnresolvedLinkException, IOException {
+ if (NameNode.stateChangeLog.isDebugEnabled()) {
+ NameNode.stateChangeLog.debug("DIR* NameSystem.completeFile: " +
+ src + " for " + holder);
+ }
checkBlock(last);
boolean success = false;
checkOperation(OperationCategory.WRITE);
writeLock();
try {
- checkOperation(OperationCategory.WRITE);
-
- success = completeFileInternal(src, holder,
- ExtendedBlock.getLocalBlock(last));
+ success = completeFileInternal(src, holder,
+ ExtendedBlock.getLocalBlock(last));
} finally {
writeUnlock();
}
getEditLog().logSync();
+ NameNode.stateChangeLog.info("DIR* completeFile: " + src + " is closed by "
+ + holder);
return success;
}
@@ -2544,10 +2538,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
String holder, Block last) throws SafeModeException,
UnresolvedLinkException, IOException {
assert hasWriteLock();
- if (NameNode.stateChangeLog.isDebugEnabled()) {
- NameNode.stateChangeLog.debug("DIR* NameSystem.completeFile: " +
- src + " for " + holder);
- }
+ checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) {
throw new SafeModeException("Cannot complete file " + src, safeMode);
}
@@ -2588,9 +2579,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
finalizeINodeFileUnderConstruction(src, pendingFile,
iip.getLatestSnapshot());
-
- NameNode.stateChangeLog.info("DIR* completeFile: " + src + " is closed by "
- + holder);
return true;
}
@@ -2691,18 +2679,19 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
private boolean renameToInt(String src, String dst)
throws IOException, UnresolvedLinkException {
- boolean status = false;
- HdfsFileStatus resultingStat = null;
if (NameNode.stateChangeLog.isDebugEnabled()) {
NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: " + src +
" to " + dst);
}
+ if (!DFSUtil.isValidName(dst)) {
+ throw new IOException("Invalid name: " + dst);
+ }
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.WRITE);
+ boolean status = false;
+ HdfsFileStatus resultingStat = null;
writeLock();
try {
- checkOperation(OperationCategory.WRITE);
-
status = renameToInternal(pc, src, dst);
if (status) {
resultingStat = getAuditFileInfo(dst, false);
@@ -2722,12 +2711,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
private boolean renameToInternal(FSPermissionChecker pc, String src, String dst)
throws IOException, UnresolvedLinkException {
assert hasWriteLock();
+ checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) {
throw new SafeModeException("Cannot rename " + src, safeMode);
}
- if (!DFSUtil.isValidName(dst)) {
- throw new IOException("Invalid name: " + dst);
- }
if (isPermissionEnabled) {
//We should not be doing this. This is move() not renameTo().
//but for now,
@@ -2749,16 +2736,18 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
/** Rename src to dst */
void renameTo(String src, String dst, Options.Rename... options)
throws IOException, UnresolvedLinkException {
- HdfsFileStatus resultingStat = null;
if (NameNode.stateChangeLog.isDebugEnabled()) {
NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: with options - "
+ src + " to " + dst);
}
+ if (!DFSUtil.isValidName(dst)) {
+ throw new InvalidPathException("Invalid name: " + dst);
+ }
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.WRITE);
+ HdfsFileStatus resultingStat = null;
writeLock();
try {
- checkOperation(OperationCategory.WRITE);
renameToInternal(pc, src, dst, options);
resultingStat = getAuditFileInfo(dst, false);
} finally {
@@ -2777,12 +2766,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
private void renameToInternal(FSPermissionChecker pc, String src, String dst,
Options.Rename... options) throws IOException {
assert hasWriteLock();
+ checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) {
throw new SafeModeException("Cannot rename " + src, safeMode);
}
- if (!DFSUtil.isValidName(dst)) {
- throw new InvalidPathException("Invalid name: " + dst);
- }
if (isPermissionEnabled) {
checkParentAccess(pc, src, FsAction.WRITE);
checkAncestorAccess(pc, dst, FsAction.WRITE);
@@ -2969,16 +2956,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
HdfsFileStatus getFileInfo(String src, boolean resolveLink)
throws AccessControlException, UnresolvedLinkException,
StandbyException, IOException {
+ if (!DFSUtil.isValidName(src)) {
+ throw new InvalidPathException("Invalid file name: " + src);
+ }
HdfsFileStatus stat = null;
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.READ);
readLock();
try {
checkOperation(OperationCategory.READ);
-
- if (!DFSUtil.isValidName(src)) {
- throw new InvalidPathException("Invalid file name: " + src);
- }
if (isPermissionEnabled) {
checkTraverse(pc, src);
}
@@ -3035,16 +3021,18 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
private boolean mkdirsInt(String src, PermissionStatus permissions,
boolean createParent) throws IOException, UnresolvedLinkException {
- HdfsFileStatus resultingStat = null;
- boolean status = false;
if(NameNode.stateChangeLog.isDebugEnabled()) {
NameNode.stateChangeLog.debug("DIR* NameSystem.mkdirs: " + src);
}
+ if (!DFSUtil.isValidName(src)) {
+ throw new InvalidPathException(src);
+ }
FSPermissionChecker pc = getPermissionChecker();
checkOperation(OperationCategory.WRITE);
+ HdfsFileStatus resultingStat = null;
+ boolean status = false;
writeLock();
try {
- checkOperation(OperationCategory.WRITE);
status = mkdirsInternal(pc, src, permissions, createParent);
if (status) {
resultingStat = dir.getFileInfo(src, false);
@@ -3066,6 +3054,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
PermissionStatus permissions, boolean createParent)
throws IOException, UnresolvedLinkException {
assert hasWriteLock();
+ checkOperation(OperationCategory.WRITE);
if (isInSafeMode()) {
throw new SafeModeException("Cannot create directory " + src, safeMode);
}
@@ -3077,9 +3066,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
// a new directory is not created.
return true;
}
- if (!DFSUtil.isValidName(src)) {
- throw new InvalidPathException(src);
- }
if (isPermissionEnabled) {
checkAncestorAccess(pc, src, FsAction.WRITE);
}
@@ -3355,8 +3341,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
boolean closeFile, boolean deleteblock, DatanodeID[] newtargets,
String[] newtargetstorages)
throws IOException, UnresolvedLinkException {
- String src = "";
+ LOG.info("commitBlockSynchronization(lastblock=" + lastblock
+ + ", newgenerationstamp=" + newgenerationstamp
+ + ", newlength=" + newlength
+ + ", newtargets=" + Arrays.asList(newtargets)
+ + ", closeFile=" + closeFile
+ + ", deleteBlock=" + deleteblock
+ + ")");
checkOperation(OperationCategory.WRITE);
+ String src = "";
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@@ -3368,13 +3361,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
"Cannot commitBlockSynchronization while in safe mode",
safeMode);
}
- LOG.info("commitBlockSynchronization(lastblock=" + lastblock
- + ", newgenerationstamp=" + newgenerationstamp
- + ", newlength=" + newlength
- + ", newtargets=" + Arrays.asList(newtargets)
- + ", closeFile=" + closeFile
- + ", deleteBlock=" + deleteblock
- + ")");
final BlockInfo storedBlock = blockManager.getStoredBlock(ExtendedBlock
.getLocalBlock(lastblock));
if (storedBlock == null) {
@@ -3465,7 +3451,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
writeLock();
try {
checkOperation(OperationCategory.WRITE);
-
if (isInSafeMode()) {
throw new SafeModeException("Cannot renew lease for " + holder, safeMode);
}
@@ -4951,11 +4936,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
*/
void reportBadBlocks(LocatedBlock[] blocks) throws IOException {
checkOperation(OperationCategory.WRITE);
+ NameNode.stateChangeLog.info("*DIR* reportBadBlocks");
writeLock();
try {
checkOperation(OperationCategory.WRITE);
-
- NameNode.stateChangeLog.info("*DIR* reportBadBlocks");
for (int i = 0; i < blocks.length; i++) {
ExtendedBlock blk = blocks[i].getBlock();
DatanodeInfo[] nodes = blocks[i].getLocations();
@@ -5018,6 +5002,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
ExtendedBlock newBlock, DatanodeID[] newNodes)
throws IOException {
checkOperation(OperationCategory.WRITE);
+ LOG.info("updatePipeline(block=" + oldBlock
+ + ", newGenerationStamp=" + newBlock.getGenerationStamp()
+ + ", newLength=" + newBlock.getNumBytes()
+ + ", newNodes=" + Arrays.asList(newNodes)
+ + ", clientName=" + clientName
+ + ")");
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@@ -5027,12 +5017,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
}
assert newBlock.getBlockId()==oldBlock.getBlockId() : newBlock + " and "
+ oldBlock + " has different block identifier";
- LOG.info("updatePipeline(block=" + oldBlock
- + ", newGenerationStamp=" + newBlock.getGenerationStamp()
- + ", newLength=" + newBlock.getNumBytes()
- + ", newNodes=" + Arrays.asList(newNodes)
- + ", clientName=" + clientName
- + ")");
updatePipelineInternal(clientName, oldBlock, newBlock, newNodes);
} finally {
writeUnlock();
@@ -5284,7 +5268,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
writeLock();
try {
checkOperation(OperationCategory.WRITE);
-
if (isInSafeMode()) {
throw new SafeModeException("Cannot issue delegation token", safeMode);
}
@@ -5409,6 +5392,21 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
getEditLog().logSync();
}
+ /**
+ * Log the cancellation of expired tokens to edit logs
+ *
+ * @param id token identifier to cancel
+ */
+ public void logExpireDelegationToken(DelegationTokenIdentifier id) {
+ assert !isInSafeMode() :
+ "this should never be called while in safemode, since we stop " +
+ "the DT manager before entering safemode!";
+ // No need to hold FSN lock since we don't access any internal
+ // structures, and this is stopped before the FSN shuts itself
+ // down, etc.
+ getEditLog().logCancelDelegationToken(id);
+ }
+
private void logReassignLease(String leaseHolder, String src,
String newHolder) {
assert hasWriteLock();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
index 90542124076..e66611f8dab 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
@@ -42,6 +42,7 @@ import org.apache.hadoop.hdfs.BlockReaderFactory;
import org.apache.hadoop.hdfs.DFSClient;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.net.TcpPeerServer;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.DirectoryListing;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
@@ -559,9 +560,10 @@ public class NamenodeFsck {
String file = BlockReaderFactory.getFileName(targetAddr, block.getBlockPoolId(),
block.getBlockId());
blockReader = BlockReaderFactory.newBlockReader(
- conf, s, file, block, lblock
- .getBlockToken(), 0, -1,
- namenode.getRpcServer().getDataEncryptionKey());
+ conf, file, block, lblock.getBlockToken(), 0, -1, true, "fsck",
+ TcpPeerServer.peerFromSocketAndKey(s, namenode.getRpcServer().
+ getDataEncryptionKey()),
+ chosenNode, null, false);
} catch (IOException ex) {
// Put chosen node into dead list, continue
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientDatanodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientDatanodeProtocol.proto
index ca24f7a4a44..7659f2ee8fc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientDatanodeProtocol.proto
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientDatanodeProtocol.proto
@@ -74,6 +74,8 @@ message DeleteBlockPoolResponseProto {
* Gets the file information where block and its metadata is stored
* block - block for which path information is being requested
* token - block token
+ *
+ * This message is deprecated in favor of file descriptor passing.
*/
message GetBlockLocalPathInfoRequestProto {
required ExtendedBlockProto block = 1;
@@ -84,6 +86,8 @@ message GetBlockLocalPathInfoRequestProto {
* block - block for which file path information is being returned
* localPath - file path where the block data is stored
* localMetaPath - file path where the block meta data is stored
+ *
+ * This message is deprecated in favor of file descriptor passing.
*/
message GetBlockLocalPathInfoResponseProto {
required ExtendedBlockProto block = 1;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/datatransfer.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/datatransfer.proto
index 0e78e7b3d58..968ce8f17bc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/datatransfer.proto
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/datatransfer.proto
@@ -115,6 +115,16 @@ message OpBlockChecksumProto {
required BaseHeaderProto header = 1;
}
+message OpRequestShortCircuitAccessProto {
+ required BaseHeaderProto header = 1;
+
+ /** In order to get short-circuit access to block data, clients must set this
+ * to the highest version of the block data that they can understand.
+ * Currently 1 is the only version, but more versions may exist in the future
+ * if the on-disk format changes.
+ */
+ required uint32 maxVersion = 2;
+}
message PacketHeaderProto {
// All fields must be fixed-length!
@@ -133,6 +143,7 @@ enum Status {
ERROR_EXISTS = 4;
ERROR_ACCESS_TOKEN = 5;
CHECKSUM_OK = 6;
+ ERROR_UNSUPPORTED = 7;
}
message PipelineAckProto {
@@ -165,6 +176,16 @@ message BlockOpResponseProto {
/** explanatory text which may be useful to log on the client side */
optional string message = 5;
+
+ /** If the server chooses to agree to the request of a client for
+ * short-circuit access, it will send a response message with the relevant
+ * file descriptors attached.
+ *
+ * In the body of the message, this version number will be set to the
+ * specific version number of the block data that the client is about to
+ * read.
+ */
+ optional uint32 shortCircuitAccessVersion = 6;
}
/**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index d10b1816c0b..30f2bc9dd0d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -1231,6 +1231,17 @@
+
+ dfs.domain.socket.path
+
+
+ Optional. This is a path to a UNIX domain socket that will be used for
+ communication between the DataNode and local HDFS clients.
+ If the string "_PORT" is present in this path, it will be replaced by the
+ TCP port of the DataNode.
+
+
+
dfs.datanode.fsdataset.volume.choosing.balanced-space-threshold10737418240
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ShortCircuitLocalReads.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ShortCircuitLocalReads.apt.vm
new file mode 100644
index 00000000000..6792f156b38
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ShortCircuitLocalReads.apt.vm
@@ -0,0 +1,68 @@
+
+~~ Licensed under the Apache License, Version 2.0 (the "License");
+~~ you may not use this file except in compliance with the License.
+~~ You may obtain a copy of the License at
+~~
+~~ http://www.apache.org/licenses/LICENSE-2.0
+~~
+~~ Unless required by applicable law or agreed to in writing, software
+~~ distributed under the License is distributed on an "AS IS" BASIS,
+~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+~~ See the License for the specific language governing permissions and
+~~ limitations under the License. See accompanying LICENSE file.
+
+ ---
+ Hadoop Distributed File System-${project.version} - Short-Circuit Local Reads
+ ---
+ ---
+ ${maven.build.timestamp}
+
+HDFS Short-Circuit Local Reads
+
+ \[ {{{./index.html}Go Back}} \]
+
+%{toc|section=1|fromDepth=0}
+
+* {Background}
+
+ In <<>>, reads normally go through the <<>>. Thus, when the
+ client asks the <<>> to read a file, the <<>> reads that
+ file off of the disk and sends the data to the client over a TCP socket.
+ So-called "short-circuit" reads bypass the <<>>, allowing the client
+ to read the file directly. Obviously, this is only possible in cases where
+ the client is co-located with the data. Short-circuit reads provide a
+ substantial performance boost to many applications.
+
+* {Configuration}
+
+ To configure short-circuit local reads, you will need to enable
+ <<>>. See
+ {{{../hadoop-common/NativeLibraries.html}Native
+ Libraries}} for details on enabling this library.
+
+ Short-circuit reads make use of a UNIX domain socket. This is a special path
+ in the filesystem that allows the client and the DataNodes to communicate.
+ You will need to set a path to this socket. The DataNode needs to be able to
+ create this path. On the other hand, it should not be possible for any user
+ except the hdfs user or root to create this path. For this reason, paths
+ under <<>> or <<>> are often used.
+
+ Short-circuit local reads need to be configured on both the <<>>
+ and the client.
+
+* {Example Configuration}
+
+ Here is an example configuration.
+
+----
+
+
+ dfs.client.read.shortcircuit
+ true
+
+
+ dfs.domain.socket.path
+ /var/lib/hadoop-hdfs/dn_socket
+
+
+----
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java
index 29d8063426e..f96849e64cd 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java
@@ -28,9 +28,9 @@ import java.net.Socket;
import java.util.List;
import java.util.Random;
-import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.net.TcpPeerServer;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
@@ -150,12 +150,12 @@ public class BlockReaderTestUtil {
sock.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
return BlockReaderFactory.newBlockReader(
- new DFSClient.Conf(conf),
- sock, targetAddr.toString()+ ":" + block.getBlockId(), block,
+ conf,
+ targetAddr.toString()+ ":" + block.getBlockId(), block,
testBlock.getBlockToken(),
offset, lenToRead,
- conf.getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, 4096),
- true, "", null, null);
+ true, "BlockReaderTestUtil", TcpPeerServer.peerFromSocket(sock),
+ nodes[0], null, false);
}
/**
@@ -166,5 +166,4 @@ public class BlockReaderTestUtil {
int ipcport = nodes[0].getIpcPort();
return cluster.getDataNode(ipcport);
}
-
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index 9d18c1d6433..60d89627c0a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -2188,14 +2188,27 @@ public class MiniDFSCluster {
/**
* Get file correpsonding to a block
* @param storageDir storage directory
- * @param blk block to be corrupted
- * @return file corresponding to the block
+ * @param blk the block
+ * @return data file corresponding to the block
*/
public static File getBlockFile(File storageDir, ExtendedBlock blk) {
return new File(getFinalizedDir(storageDir, blk.getBlockPoolId()),
blk.getBlockName());
}
+ /**
+ * Get the latest metadata file correpsonding to a block
+ * @param storageDir storage directory
+ * @param blk the block
+ * @return metadata file corresponding to the block
+ */
+ public static File getBlockMetadataFile(File storageDir, ExtendedBlock blk) {
+ return new File(getFinalizedDir(storageDir, blk.getBlockPoolId()),
+ blk.getBlockName() + "_" + blk.getGenerationStamp() +
+ Block.METADATA_EXTENSION);
+
+ }
+
/**
* Shut down a cluster if it is not null
* @param cluster cluster reference or null
@@ -2223,7 +2236,7 @@ public class MiniDFSCluster {
}
/**
- * Get files related to a block for a given datanode
+ * Get the block data file for a block from a given datanode
* @param dnIndex Index of the datanode to get block files for
* @param block block for which corresponding files are needed
*/
@@ -2238,6 +2251,24 @@ public class MiniDFSCluster {
}
return null;
}
+
+ /**
+ * Get the block metadata file for a block from a given datanode
+ *
+ * @param dnIndex Index of the datanode to get block files for
+ * @param block block for which corresponding files are needed
+ */
+ public static File getBlockMetadataFile(int dnIndex, ExtendedBlock block) {
+ // Check for block file in the two storage directories of the datanode
+ for (int i = 0; i <=1 ; i++) {
+ File storageDir = MiniDFSCluster.getStorageDir(dnIndex, i);
+ File blockMetaFile = getBlockMetadataFile(storageDir, block);
+ if (blockMetaFile.exists()) {
+ return blockMetaFile;
+ }
+ }
+ return null;
+ }
/**
* Throw an exception if the MiniDFSCluster is not started with a single
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java
index 7ccd5b6f52f..e35da42a7d4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java
@@ -17,90 +17,333 @@
*/
package org.apache.hadoop.hdfs;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
+import java.io.EOFException;
+import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
+import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
+import java.util.concurrent.TimeoutException;
import org.apache.hadoop.fs.ChecksumException;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
+import org.apache.hadoop.io.IOUtils;
+import org.junit.Assert;
import org.junit.Test;
public class TestBlockReaderLocal {
- static MiniDFSCluster cluster;
- static HdfsConfiguration conf;
-
- @BeforeClass
- public static void setupCluster() throws IOException {
- conf = new HdfsConfiguration();
-
- conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
- conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
- false);
- conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
- UserGroupInformation.getCurrentUser().getShortUserName());
-
- cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
- }
-
- @AfterClass
- public static void teardownCluster() {
- cluster.shutdown();
+ public static void assertArrayRegionsEqual(byte []buf1, int off1, byte []buf2,
+ int off2, int len) {
+ for (int i = 0; i < len; i++) {
+ if (buf1[off1 + i] != buf2[off2 + i]) {
+ Assert.fail("arrays differ at byte " + i + ". " +
+ "The first array has " + (int)buf1[off1 + i] +
+ ", but the second array has " + (int)buf2[off2 + i]);
+ }
+ }
}
/**
- * Test that, in the case of an error, the position and limit of a ByteBuffer
- * are left unchanged. This is not mandated by ByteBufferReadable, but clients
- * of this class might immediately issue a retry on failure, so it's polite.
+ * Similar to IOUtils#readFully(). Reads bytes in a loop.
+ *
+ * @param reader The BlockReaderLocal to read bytes from
+ * @param buf The ByteBuffer to read into
+ * @param off The offset in the buffer to read into
+ * @param len The number of bytes to read.
+ *
+ * @throws IOException If it could not read the requested number of bytes
*/
+ private static void readFully(BlockReaderLocal reader,
+ ByteBuffer buf, int off, int len) throws IOException {
+ int amt = len;
+ while (amt > 0) {
+ buf.limit(off + len);
+ buf.position(off);
+ long ret = reader.read(buf);
+ if (ret < 0) {
+ throw new EOFException( "Premature EOF from BlockReaderLocal " +
+ "after reading " + (len - amt) + " byte(s).");
+ }
+ amt -= ret;
+ off += ret;
+ }
+ }
+
+ private static interface BlockReaderLocalTest {
+ final int TEST_LENGTH = 12345;
+ public void setup(File blockFile, boolean usingChecksums)
+ throws IOException;
+ public void doTest(BlockReaderLocal reader, byte original[])
+ throws IOException;
+ }
+
+ public void runBlockReaderLocalTest(BlockReaderLocalTest test,
+ boolean checksum) throws IOException {
+ MiniDFSCluster cluster = null;
+ HdfsConfiguration conf = new HdfsConfiguration();
+ conf.setBoolean(DFSConfigKeys.
+ DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, !checksum);
+ conf.set(DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY, "CRC32C");
+ FileInputStream dataIn = null, checkIn = null;
+ final Path TEST_PATH = new Path("/a");
+ final long RANDOM_SEED = 4567L;
+ BlockReaderLocal blockReaderLocal = null;
+ FSDataInputStream fsIn = null;
+ byte original[] = new byte[BlockReaderLocalTest.TEST_LENGTH];
+
+ try {
+ cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+ cluster.waitActive();
+ FileSystem fs = cluster.getFileSystem();
+ DFSTestUtil.createFile(fs, TEST_PATH,
+ BlockReaderLocalTest.TEST_LENGTH, (short)1, RANDOM_SEED);
+ try {
+ DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
+ } catch (InterruptedException e) {
+ Assert.fail("unexpected InterruptedException during " +
+ "waitReplication: " + e);
+ } catch (TimeoutException e) {
+ Assert.fail("unexpected TimeoutException during " +
+ "waitReplication: " + e);
+ }
+ fsIn = fs.open(TEST_PATH);
+ IOUtils.readFully(fsIn, original, 0,
+ BlockReaderLocalTest.TEST_LENGTH);
+ fsIn.close();
+ fsIn = null;
+ ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
+ File dataFile = MiniDFSCluster.getBlockFile(0, block);
+ File metaFile = MiniDFSCluster.getBlockMetadataFile(0, block);
+
+ DatanodeID datanodeID = cluster.getDataNodes().get(0).getDatanodeId();
+ cluster.shutdown();
+ cluster = null;
+ test.setup(dataFile, checksum);
+ dataIn = new FileInputStream(dataFile);
+ checkIn = new FileInputStream(metaFile);
+ blockReaderLocal = new BlockReaderLocal(conf,
+ TEST_PATH.getName(), block, 0, -1,
+ dataIn, checkIn, datanodeID, checksum);
+ dataIn = null;
+ checkIn = null;
+ test.doTest(blockReaderLocal, original);
+ } finally {
+ if (fsIn != null) fsIn.close();
+ if (cluster != null) cluster.shutdown();
+ if (dataIn != null) dataIn.close();
+ if (checkIn != null) checkIn.close();
+ if (blockReaderLocal != null) blockReaderLocal.close(null, null);
+ }
+ }
+
+ private static class TestBlockReaderLocalImmediateClose
+ implements BlockReaderLocalTest {
+ @Override
+ public void setup(File blockFile, boolean usingChecksums)
+ throws IOException { }
+ @Override
+ public void doTest(BlockReaderLocal reader, byte original[])
+ throws IOException { }
+ }
+
@Test
- public void testStablePositionAfterCorruptRead() throws Exception {
- final short REPL_FACTOR = 1;
- final long FILE_LENGTH = 512L;
- cluster.waitActive();
- FileSystem fs = cluster.getFileSystem();
+ public void testBlockReaderLocalImmediateClose() throws IOException {
+ runBlockReaderLocalTest(new TestBlockReaderLocalImmediateClose(), true);
+ runBlockReaderLocalTest(new TestBlockReaderLocalImmediateClose(), false);
+ }
+
+ private static class TestBlockReaderSimpleReads
+ implements BlockReaderLocalTest {
+ @Override
+ public void setup(File blockFile, boolean usingChecksums)
+ throws IOException { }
+ @Override
+ public void doTest(BlockReaderLocal reader, byte original[])
+ throws IOException {
+ byte buf[] = new byte[TEST_LENGTH];
+ reader.readFully(buf, 0, 512);
+ assertArrayRegionsEqual(original, 0, buf, 0, 512);
+ reader.readFully(buf, 512, 512);
+ assertArrayRegionsEqual(original, 512, buf, 512, 512);
+ reader.readFully(buf, 1024, 513);
+ assertArrayRegionsEqual(original, 1024, buf, 1024, 513);
+ reader.readFully(buf, 1537, 514);
+ assertArrayRegionsEqual(original, 1537, buf, 1537, 514);
+ }
+ }
+
+ @Test
+ public void testBlockReaderSimpleReads() throws IOException {
+ runBlockReaderLocalTest(new TestBlockReaderSimpleReads(), true);
+ }
- Path path = new Path("/corrupted");
+ @Test
+ public void testBlockReaderSimpleReadsNoChecksum() throws IOException {
+ runBlockReaderLocalTest(new TestBlockReaderSimpleReads(), false);
+ }
+
+ private static class TestBlockReaderLocalArrayReads2
+ implements BlockReaderLocalTest {
+ @Override
+ public void setup(File blockFile, boolean usingChecksums)
+ throws IOException { }
+ @Override
+ public void doTest(BlockReaderLocal reader, byte original[])
+ throws IOException {
+ byte buf[] = new byte[TEST_LENGTH];
+ reader.readFully(buf, 0, 10);
+ assertArrayRegionsEqual(original, 0, buf, 0, 10);
+ reader.readFully(buf, 10, 100);
+ assertArrayRegionsEqual(original, 10, buf, 10, 100);
+ reader.readFully(buf, 110, 700);
+ assertArrayRegionsEqual(original, 110, buf, 110, 700);
+ reader.readFully(buf, 810, 1); // from offset 810 to offset 811
+ reader.readFully(buf, 811, 5);
+ assertArrayRegionsEqual(original, 811, buf, 811, 5);
+ reader.readFully(buf, 816, 900); // skip from offset 816 to offset 1716
+ reader.readFully(buf, 1716, 5);
+ assertArrayRegionsEqual(original, 1716, buf, 1716, 5);
+ }
+ }
+
+ @Test
+ public void testBlockReaderLocalArrayReads2() throws IOException {
+ runBlockReaderLocalTest(new TestBlockReaderLocalArrayReads2(),
+ true);
+ }
- DFSTestUtil.createFile(fs, path, FILE_LENGTH, REPL_FACTOR, 12345L);
- DFSTestUtil.waitReplication(fs, path, REPL_FACTOR);
+ @Test
+ public void testBlockReaderLocalArrayReads2NoChecksum()
+ throws IOException {
+ runBlockReaderLocalTest(new TestBlockReaderLocalArrayReads2(),
+ false);
+ }
- ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, path);
- int blockFilesCorrupted = cluster.corruptBlockOnDataNodes(block);
- assertEquals("All replicas not corrupted", REPL_FACTOR, blockFilesCorrupted);
+ private static class TestBlockReaderLocalByteBufferReads
+ implements BlockReaderLocalTest {
+ @Override
+ public void setup(File blockFile, boolean usingChecksums)
+ throws IOException { }
+ @Override
+ public void doTest(BlockReaderLocal reader, byte original[])
+ throws IOException {
+ ByteBuffer buf = ByteBuffer.wrap(new byte[TEST_LENGTH]);
+ readFully(reader, buf, 0, 10);
+ assertArrayRegionsEqual(original, 0, buf.array(), 0, 10);
+ readFully(reader, buf, 10, 100);
+ assertArrayRegionsEqual(original, 10, buf.array(), 10, 100);
+ readFully(reader, buf, 110, 700);
+ assertArrayRegionsEqual(original, 110, buf.array(), 110, 700);
+ reader.skip(1); // skip from offset 810 to offset 811
+ readFully(reader, buf, 811, 5);
+ assertArrayRegionsEqual(original, 811, buf.array(), 811, 5);
+ }
+ }
+
+ @Test
+ public void testBlockReaderLocalByteBufferReads()
+ throws IOException {
+ runBlockReaderLocalTest(
+ new TestBlockReaderLocalByteBufferReads(), true);
+ }
- FSDataInputStream dis = cluster.getFileSystem().open(path);
- ByteBuffer buf = ByteBuffer.allocateDirect((int)FILE_LENGTH);
- boolean sawException = false;
- try {
- dis.read(buf);
- } catch (ChecksumException ex) {
- sawException = true;
+ @Test
+ public void testBlockReaderLocalByteBufferReadsNoChecksum()
+ throws IOException {
+ runBlockReaderLocalTest(
+ new TestBlockReaderLocalByteBufferReads(), false);
+ }
+
+ private static class TestBlockReaderLocalReadCorruptStart
+ implements BlockReaderLocalTest {
+ boolean usingChecksums = false;
+ @Override
+ public void setup(File blockFile, boolean usingChecksums)
+ throws IOException {
+ RandomAccessFile bf = null;
+ this.usingChecksums = usingChecksums;
+ try {
+ bf = new RandomAccessFile(blockFile, "rw");
+ bf.write(new byte[] {0,0,0,0,0,0,0,0,0,0,0,0,0,0});
+ } finally {
+ if (bf != null) bf.close();
+ }
}
- assertTrue(sawException);
- assertEquals(0, buf.position());
- assertEquals(buf.capacity(), buf.limit());
-
- dis = cluster.getFileSystem().open(path);
- buf.position(3);
- buf.limit(25);
- sawException = false;
- try {
- dis.read(buf);
- } catch (ChecksumException ex) {
- sawException = true;
+ public void doTest(BlockReaderLocal reader, byte original[])
+ throws IOException {
+ byte buf[] = new byte[TEST_LENGTH];
+ if (usingChecksums) {
+ try {
+ reader.readFully(buf, 0, 10);
+ Assert.fail("did not detect corruption");
+ } catch (IOException e) {
+ // expected
+ }
+ } else {
+ reader.readFully(buf, 0, 10);
+ }
+ }
+ }
+
+ @Test
+ public void testBlockReaderLocalReadCorruptStart()
+ throws IOException {
+ runBlockReaderLocalTest(new TestBlockReaderLocalReadCorruptStart(), true);
+ }
+
+ private static class TestBlockReaderLocalReadCorrupt
+ implements BlockReaderLocalTest {
+ boolean usingChecksums = false;
+ @Override
+ public void setup(File blockFile, boolean usingChecksums)
+ throws IOException {
+ RandomAccessFile bf = null;
+ this.usingChecksums = usingChecksums;
+ try {
+ bf = new RandomAccessFile(blockFile, "rw");
+ bf.seek(1539);
+ bf.write(new byte[] {0,0,0,0,0,0,0,0,0,0,0,0,0,0});
+ } finally {
+ if (bf != null) bf.close();
+ }
}
- assertTrue(sawException);
- assertEquals(3, buf.position());
- assertEquals(25, buf.limit());
+ public void doTest(BlockReaderLocal reader, byte original[])
+ throws IOException {
+ byte buf[] = new byte[TEST_LENGTH];
+ try {
+ reader.readFully(buf, 0, 10);
+ assertArrayRegionsEqual(original, 0, buf, 0, 10);
+ reader.readFully(buf, 10, 100);
+ assertArrayRegionsEqual(original, 10, buf, 10, 100);
+ reader.readFully(buf, 110, 700);
+ assertArrayRegionsEqual(original, 110, buf, 110, 700);
+ reader.skip(1); // skip from offset 810 to offset 811
+ reader.readFully(buf, 811, 5);
+ assertArrayRegionsEqual(original, 811, buf, 811, 5);
+ reader.readFully(buf, 816, 900);
+ if (usingChecksums) {
+ // We should detect the corruption when using a checksum file.
+ Assert.fail("did not detect corruption");
+ }
+ } catch (ChecksumException e) {
+ if (!usingChecksums) {
+ Assert.fail("didn't expect to get ChecksumException: not " +
+ "using checksums.");
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testBlockReaderLocalReadCorrupt()
+ throws IOException {
+ runBlockReaderLocalTest(new TestBlockReaderLocalReadCorrupt(), true);
+ runBlockReaderLocalTest(new TestBlockReaderLocalReadCorrupt(), false);
}
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocalLegacy.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocalLegacy.java
new file mode 100644
index 00000000000..718b349c5bb
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocalLegacy.java
@@ -0,0 +1,154 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+
+import org.apache.hadoop.fs.ChecksumException;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.net.unix.DomainSocket;
+import org.apache.hadoop.net.unix.TemporarySocketDirectory;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.junit.Assert;
+import org.junit.Assume;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestBlockReaderLocalLegacy {
+ @BeforeClass
+ public static void setupCluster() throws IOException {
+ DFSInputStream.tcpReadsDisabledForTesting = true;
+ DomainSocket.disableBindPathValidation();
+ }
+
+ private static HdfsConfiguration getConfiguration(
+ TemporarySocketDirectory socketDir) throws IOException {
+ HdfsConfiguration conf = new HdfsConfiguration();
+ if (socketDir == null) {
+ conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY, "");
+ } else {
+ conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
+ new File(socketDir.getDir(), "TestBlockReaderLocalLegacy.%d.sock").
+ getAbsolutePath());
+ }
+ conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
+ conf.setBoolean(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL, true);
+ conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
+ false);
+ conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
+ UserGroupInformation.getCurrentUser().getShortUserName());
+ conf.setBoolean(DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC, false);
+ return conf;
+ }
+
+ /**
+ * Test that, in the case of an error, the position and limit of a ByteBuffer
+ * are left unchanged. This is not mandated by ByteBufferReadable, but clients
+ * of this class might immediately issue a retry on failure, so it's polite.
+ */
+ @Test
+ public void testStablePositionAfterCorruptRead() throws Exception {
+ final short REPL_FACTOR = 1;
+ final long FILE_LENGTH = 512L;
+
+ HdfsConfiguration conf = getConfiguration(null);
+ MiniDFSCluster cluster =
+ new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+ cluster.waitActive();
+ FileSystem fs = cluster.getFileSystem();
+
+ Path path = new Path("/corrupted");
+
+ DFSTestUtil.createFile(fs, path, FILE_LENGTH, REPL_FACTOR, 12345L);
+ DFSTestUtil.waitReplication(fs, path, REPL_FACTOR);
+
+ ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, path);
+ int blockFilesCorrupted = cluster.corruptBlockOnDataNodes(block);
+ assertEquals("All replicas not corrupted", REPL_FACTOR, blockFilesCorrupted);
+
+ FSDataInputStream dis = cluster.getFileSystem().open(path);
+ ByteBuffer buf = ByteBuffer.allocateDirect((int)FILE_LENGTH);
+ boolean sawException = false;
+ try {
+ dis.read(buf);
+ } catch (ChecksumException ex) {
+ sawException = true;
+ }
+
+ assertTrue(sawException);
+ assertEquals(0, buf.position());
+ assertEquals(buf.capacity(), buf.limit());
+
+ dis = cluster.getFileSystem().open(path);
+ buf.position(3);
+ buf.limit(25);
+ sawException = false;
+ try {
+ dis.read(buf);
+ } catch (ChecksumException ex) {
+ sawException = true;
+ }
+
+ assertTrue(sawException);
+ assertEquals(3, buf.position());
+ assertEquals(25, buf.limit());
+ cluster.shutdown();
+ }
+
+ @Test
+ public void testBothOldAndNewShortCircuitConfigured() throws Exception {
+ final short REPL_FACTOR = 1;
+ final int FILE_LENGTH = 512;
+ Assume.assumeTrue(null == DomainSocket.getLoadingFailureReason());
+ TemporarySocketDirectory socketDir = new TemporarySocketDirectory();
+ HdfsConfiguration conf = getConfiguration(socketDir);
+ MiniDFSCluster cluster =
+ new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+ cluster.waitActive();
+ socketDir.close();
+ FileSystem fs = cluster.getFileSystem();
+
+ Path path = new Path("/foo");
+ byte orig[] = new byte[FILE_LENGTH];
+ for (int i = 0; i < orig.length; i++) {
+ orig[i] = (byte)(i%10);
+ }
+ FSDataOutputStream fos = fs.create(path, (short)1);
+ fos.write(orig);
+ fos.close();
+ DFSTestUtil.waitReplication(fs, path, REPL_FACTOR);
+ FSDataInputStream fis = cluster.getFileSystem().open(path);
+ byte buf[] = new byte[FILE_LENGTH];
+ IOUtils.readFully(fis, buf, 0, FILE_LENGTH);
+ fis.close();
+ Assert.assertArrayEquals(orig, buf);
+ Arrays.equals(orig, buf);
+ cluster.shutdown();
+ }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientBlockVerification.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientBlockVerification.java
index 8dd3d6fd38a..4a88e0b1d30 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientBlockVerification.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientBlockVerification.java
@@ -61,7 +61,7 @@ public class TestClientBlockVerification {
util.getBlockReader(testBlock, 0, FILE_SIZE_K * 1024));
util.readAndCheckEOS(reader, FILE_SIZE_K * 1024, true);
verify(reader).sendReadResult(Status.CHECKSUM_OK);
- reader.close();
+ reader.close(null, null);
}
/**
@@ -76,7 +76,7 @@ public class TestClientBlockVerification {
// We asked the blockreader for the whole file, and only read
// half of it, so no CHECKSUM_OK
verify(reader, never()).sendReadResult(Status.CHECKSUM_OK);
- reader.close();
+ reader.close(null, null);
}
/**
@@ -92,7 +92,7 @@ public class TestClientBlockVerification {
// And read half the file
util.readAndCheckEOS(reader, FILE_SIZE_K * 1024 / 2, true);
verify(reader).sendReadResult(Status.CHECKSUM_OK);
- reader.close();
+ reader.close(null, null);
}
/**
@@ -111,7 +111,7 @@ public class TestClientBlockVerification {
util.getBlockReader(testBlock, startOffset, length));
util.readAndCheckEOS(reader, length, true);
verify(reader).sendReadResult(Status.CHECKSUM_OK);
- reader.close();
+ reader.close(null, null);
}
}
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestConnCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestConnCache.java
index d9020e0bd0a..b140bae1109 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestConnCache.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestConnCache.java
@@ -18,28 +18,20 @@
package org.apache.hadoop.hdfs;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertSame;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.spy;
import java.io.IOException;
import java.net.InetSocketAddress;
-import java.net.Socket;
-import java.security.PrivilegedExceptionAction;
+
+import junit.framework.Assert;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
-import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
-import org.apache.hadoop.hdfs.server.datanode.DataNode;
+import org.apache.hadoop.hdfs.net.Peer;
import org.apache.hadoop.security.token.Token;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
import org.junit.Test;
import org.mockito.Matchers;
import org.mockito.Mockito;
@@ -55,59 +47,31 @@ public class TestConnCache {
static final int BLOCK_SIZE = 4096;
static final int FILE_SIZE = 3 * BLOCK_SIZE;
- final static int CACHE_SIZE = 4;
- final static long CACHE_EXPIRY_MS = 200;
- static Configuration conf = null;
- static MiniDFSCluster cluster = null;
- static FileSystem fs = null;
- static SocketCache cache;
-
- static final Path testFile = new Path("/testConnCache.dat");
- static byte authenticData[] = null;
-
- static BlockReaderTestUtil util = null;
-
/**
* A mock Answer to remember the BlockReader used.
*
* It verifies that all invocation to DFSInputStream.getBlockReader()
- * use the same socket.
+ * use the same peer.
*/
private class MockGetBlockReader implements Answer {
public RemoteBlockReader2 reader = null;
- private Socket sock = null;
+ private Peer peer = null;
@Override
public RemoteBlockReader2 answer(InvocationOnMock invocation) throws Throwable {
RemoteBlockReader2 prevReader = reader;
reader = (RemoteBlockReader2) invocation.callRealMethod();
- if (sock == null) {
- sock = reader.dnSock;
+ if (peer == null) {
+ peer = reader.getPeer();
} else if (prevReader != null) {
- assertSame("DFSInputStream should use the same socket",
- sock, reader.dnSock);
+ Assert.assertSame("DFSInputStream should use the same peer",
+ peer, reader.getPeer());
}
return reader;
}
}
- @BeforeClass
- public static void setupCluster() throws Exception {
- final int REPLICATION_FACTOR = 1;
-
- /* create a socket cache. There is only one socket cache per jvm */
- cache = SocketCache.getInstance(CACHE_SIZE, CACHE_EXPIRY_MS);
-
- util = new BlockReaderTestUtil(REPLICATION_FACTOR);
- cluster = util.getCluster();
- conf = util.getConf();
- fs = cluster.getFileSystem();
-
- authenticData = util.writeFile(testFile, FILE_SIZE / 1024);
- }
-
-
/**
* (Optionally) seek to position, read and verify data.
*
@@ -117,9 +81,10 @@ public class TestConnCache {
long pos,
byte[] buffer,
int offset,
- int length)
+ int length,
+ byte[] authenticData)
throws IOException {
- assertTrue("Test buffer too small", buffer.length >= offset + length);
+ Assert.assertTrue("Test buffer too small", buffer.length >= offset + length);
if (pos >= 0)
in.seek(pos);
@@ -129,7 +94,7 @@ public class TestConnCache {
while (length > 0) {
int cnt = in.read(buffer, offset, length);
- assertTrue("Error in read", cnt > 0);
+ Assert.assertTrue("Error in read", cnt > 0);
offset += cnt;
length -= cnt;
}
@@ -144,116 +109,23 @@ public class TestConnCache {
}
}
- /**
- * Test the SocketCache itself.
- */
- @Test
- public void testSocketCache() throws Exception {
- // Make a client
- InetSocketAddress nnAddr =
- new InetSocketAddress("localhost", cluster.getNameNodePort());
- DFSClient client = new DFSClient(nnAddr, conf);
-
- // Find out the DN addr
- LocatedBlock block =
- client.getNamenode().getBlockLocations(
- testFile.toString(), 0, FILE_SIZE)
- .getLocatedBlocks().get(0);
- DataNode dn = util.getDataNode(block);
- InetSocketAddress dnAddr = dn.getXferAddress();
-
-
- // Make some sockets to the DN
- Socket[] dnSockets = new Socket[CACHE_SIZE];
- for (int i = 0; i < dnSockets.length; ++i) {
- dnSockets[i] = client.socketFactory.createSocket(
- dnAddr.getAddress(), dnAddr.getPort());
- }
-
-
- // Insert a socket to the NN
- Socket nnSock = new Socket(nnAddr.getAddress(), nnAddr.getPort());
- cache.put(nnSock, null);
- assertSame("Read the write", nnSock, cache.get(nnAddr).sock);
- cache.put(nnSock, null);
-
- // Insert DN socks
- for (Socket dnSock : dnSockets) {
- cache.put(dnSock, null);
- }
-
- assertEquals("NN socket evicted", null, cache.get(nnAddr));
- assertTrue("Evicted socket closed", nnSock.isClosed());
-
- // Lookup the DN socks
- for (Socket dnSock : dnSockets) {
- assertEquals("Retrieve cached sockets", dnSock, cache.get(dnAddr).sock);
- dnSock.close();
- }
-
- assertEquals("Cache is empty", 0, cache.size());
- }
-
-
- /**
- * Test the SocketCache expiry.
- * Verify that socket cache entries expire after the set
- * expiry time.
- */
- @Test
- public void testSocketCacheExpiry() throws Exception {
- // Make a client
- InetSocketAddress nnAddr =
- new InetSocketAddress("localhost", cluster.getNameNodePort());
- DFSClient client = new DFSClient(nnAddr, conf);
-
- // Find out the DN addr
- LocatedBlock block =
- client.getNamenode().getBlockLocations(
- testFile.toString(), 0, FILE_SIZE)
- .getLocatedBlocks().get(0);
- DataNode dn = util.getDataNode(block);
- InetSocketAddress dnAddr = dn.getXferAddress();
-
-
- // Make some sockets to the DN and put in cache
- Socket[] dnSockets = new Socket[CACHE_SIZE];
- for (int i = 0; i < dnSockets.length; ++i) {
- dnSockets[i] = client.socketFactory.createSocket(
- dnAddr.getAddress(), dnAddr.getPort());
- cache.put(dnSockets[i], null);
- }
-
- // Client side still has the sockets cached
- assertEquals(CACHE_SIZE, client.socketCache.size());
-
- //sleep for a second and see if it expired
- Thread.sleep(CACHE_EXPIRY_MS + 1000);
-
- // Client side has no sockets cached
- assertEquals(0, client.socketCache.size());
-
- //sleep for another second and see if
- //the daemon thread runs fine on empty cache
- Thread.sleep(CACHE_EXPIRY_MS + 1000);
- }
-
-
/**
* Read a file served entirely from one DN. Seek around and read from
* different offsets. And verify that they all use the same socket.
- *
- * @throws java.io.IOException
+ * @throws Exception
*/
@Test
@SuppressWarnings("unchecked")
- public void testReadFromOneDN() throws IOException {
- LOG.info("Starting testReadFromOneDN()");
+ public void testReadFromOneDN() throws Exception {
+ BlockReaderTestUtil util = new BlockReaderTestUtil(1,
+ new HdfsConfiguration());
+ final Path testFile = new Path("/testConnCache.dat");
+ byte authenticData[] = util.writeFile(testFile, FILE_SIZE / 1024);
DFSClient client = new DFSClient(
- new InetSocketAddress("localhost", cluster.getNameNodePort()), conf);
- DFSInputStream in = spy(client.open(testFile.toString()));
+ new InetSocketAddress("localhost",
+ util.getCluster().getNameNodePort()), util.getConf());
+ DFSInputStream in = Mockito.spy(client.open(testFile.toString()));
LOG.info("opened " + testFile.toString());
-
byte[] dataBuf = new byte[BLOCK_SIZE];
MockGetBlockReader answer = new MockGetBlockReader();
@@ -270,18 +142,15 @@ public class TestConnCache {
Matchers.anyString());
// Initial read
- pread(in, 0, dataBuf, 0, dataBuf.length);
+ pread(in, 0, dataBuf, 0, dataBuf.length, authenticData);
// Read again and verify that the socket is the same
- pread(in, FILE_SIZE - dataBuf.length, dataBuf, 0, dataBuf.length);
- pread(in, 1024, dataBuf, 0, dataBuf.length);
- pread(in, -1, dataBuf, 0, dataBuf.length); // No seek; just read
- pread(in, 64, dataBuf, 0, dataBuf.length / 2);
+ pread(in, FILE_SIZE - dataBuf.length, dataBuf, 0, dataBuf.length,
+ authenticData);
+ pread(in, 1024, dataBuf, 0, dataBuf.length, authenticData);
+ // No seek; just read
+ pread(in, -1, dataBuf, 0, dataBuf.length, authenticData);
+ pread(in, 64, dataBuf, 0, dataBuf.length / 2, authenticData);
in.close();
}
-
- @AfterClass
- public static void teardownCluster() throws Exception {
- util.shutdown();
- }
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgrade.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgrade.java
index b0879683637..2e7c8b47d36 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgrade.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgrade.java
@@ -53,7 +53,7 @@ import com.google.common.base.Joiner;
*/
public class TestDFSUpgrade {
- private static final int EXPECTED_TXID = 49;
+ private static final int EXPECTED_TXID = 45;
private static final Log LOG = LogFactory.getLog(TestDFSUpgrade.class.getName());
private Configuration conf;
private int testCounter = 0;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java
index 1522a533933..3c9ee25b111 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
+import org.apache.hadoop.hdfs.net.Peer;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
@@ -70,7 +71,7 @@ public class TestDataTransferKeepalive {
.numDataNodes(1).build();
fs = cluster.getFileSystem();
dfsClient = ((DistributedFileSystem)fs).dfs;
- dfsClient.socketCache.clear();
+ dfsClient.peerCache.clear();
String poolId = cluster.getNamesystem().getBlockPoolId();
dn = cluster.getDataNodes().get(0);
@@ -93,13 +94,13 @@ public class TestDataTransferKeepalive {
DFSTestUtil.createFile(fs, TEST_FILE, 1L, (short)1, 0L);
// Clients that write aren't currently re-used.
- assertEquals(0, dfsClient.socketCache.size());
+ assertEquals(0, dfsClient.peerCache.size());
assertXceiverCount(0);
// Reads the file, so we should get a
// cached socket, and should have an xceiver on the other side.
DFSTestUtil.readFile(fs, TEST_FILE);
- assertEquals(1, dfsClient.socketCache.size());
+ assertEquals(1, dfsClient.peerCache.size());
assertXceiverCount(1);
// Sleep for a bit longer than the keepalive timeout
@@ -110,13 +111,13 @@ public class TestDataTransferKeepalive {
// The socket is still in the cache, because we don't
// notice that it's closed until we try to read
// from it again.
- assertEquals(1, dfsClient.socketCache.size());
+ assertEquals(1, dfsClient.peerCache.size());
// Take it out of the cache - reading should
// give an EOF.
- Socket s = dfsClient.socketCache.get(dnAddr).sock;
- assertNotNull(s);
- assertEquals(-1, NetUtils.getInputStream(s).read());
+ Peer peer = dfsClient.peerCache.get(dn.getDatanodeId(), false);
+ assertNotNull(peer);
+ assertEquals(-1, peer.getInputStream().read());
}
/**
@@ -175,14 +176,14 @@ public class TestDataTransferKeepalive {
}
DFSClient client = ((DistributedFileSystem)fs).dfs;
- assertEquals(5, client.socketCache.size());
+ assertEquals(5, client.peerCache.size());
// Let all the xceivers timeout
Thread.sleep(1500);
assertXceiverCount(0);
// Client side still has the sockets cached
- assertEquals(5, client.socketCache.size());
+ assertEquals(5, client.peerCache.size());
// Reading should not throw an exception.
DFSTestUtil.readFile(fs, TEST_FILE);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDisableConnCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDisableConnCache.java
new file mode 100644
index 00000000000..f7fb128bb1b
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDisableConnCache.java
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.junit.Test;
+
+/**
+ * This class tests disabling client connection caching in a single node
+ * mini-cluster.
+ */
+public class TestDisableConnCache {
+ static final Log LOG = LogFactory.getLog(TestDisableConnCache.class);
+
+ static final int BLOCK_SIZE = 4096;
+ static final int FILE_SIZE = 3 * BLOCK_SIZE;
+
+ /**
+ * Test that the socket cache can be disabled by setting the capacity to
+ * 0. Regression test for HDFS-3365.
+ * @throws Exception
+ */
+ @Test
+ public void testDisableCache() throws Exception {
+ HdfsConfiguration confWithoutCache = new HdfsConfiguration();
+ // Configure a new instance with no peer caching, ensure that it doesn't
+ // cache anything
+ confWithoutCache.setInt(
+ DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPACITY_KEY, 0);
+ BlockReaderTestUtil util = new BlockReaderTestUtil(1, confWithoutCache);
+ final Path testFile = new Path("/testConnCache.dat");
+ util.writeFile(testFile, FILE_SIZE / 1024);
+ FileSystem fsWithoutCache = FileSystem.newInstance(util.getConf());
+ try {
+ DFSTestUtil.readFile(fsWithoutCache, testFile);
+ assertEquals(0, ((DistributedFileSystem)fsWithoutCache).dfs.peerCache.size());
+ } finally {
+ fsWithoutCache.close();
+ util.shutdown();
+ }
+ }
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileInputStreamCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileInputStreamCache.java
new file mode 100644
index 00000000000..7f28a43ceb4
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileInputStreamCache.java
@@ -0,0 +1,127 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import junit.framework.Assert;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.net.unix.TemporarySocketDirectory;
+import org.junit.Test;
+
+public class TestFileInputStreamCache {
+ static final Log LOG = LogFactory.getLog(TestFileInputStreamCache.class);
+
+ @Test
+ public void testCreateAndDestroy() throws Exception {
+ FileInputStreamCache cache = new FileInputStreamCache(10, 1000);
+ cache.close();
+ }
+
+ private static class TestFileDescriptorPair {
+ TemporarySocketDirectory dir = new TemporarySocketDirectory();
+ FileInputStream fis[];
+
+ public TestFileDescriptorPair() throws IOException {
+ fis = new FileInputStream[2];
+ for (int i = 0; i < 2; i++) {
+ String name = dir.getDir() + "/file" + i;
+ FileOutputStream fos = new FileOutputStream(name);
+ fos.write(1);
+ fos.close();
+ fis[i] = new FileInputStream(name);
+ }
+ }
+
+ public FileInputStream[] getFileInputStreams() {
+ return fis;
+ }
+
+ public void close() throws IOException {
+ IOUtils.cleanup(LOG, fis);
+ dir.close();
+ }
+
+ public boolean compareWith(FileInputStream other[]) {
+ if ((other == null) || (fis == null)) {
+ return other == fis;
+ }
+ if (fis.length != other.length) return false;
+ for (int i = 0; i < fis.length; i++) {
+ if (fis[i] != other[i]) return false;
+ }
+ return true;
+ }
+ }
+
+ @Test
+ public void testAddAndRetrieve() throws Exception {
+ FileInputStreamCache cache = new FileInputStreamCache(1, 1000000);
+ DatanodeID dnId = new DatanodeID("127.0.0.1", "localhost",
+ "xyzzy", 8080, 9090, 7070);
+ ExtendedBlock block = new ExtendedBlock("poolid", 123);
+ TestFileDescriptorPair pair = new TestFileDescriptorPair();
+ cache.put(dnId, block, pair.getFileInputStreams());
+ FileInputStream fis[] = cache.get(dnId, block);
+ Assert.assertTrue(pair.compareWith(fis));
+ pair.close();
+ cache.close();
+ }
+
+ @Test
+ public void testExpiry() throws Exception {
+ FileInputStreamCache cache = new FileInputStreamCache(1, 10);
+ DatanodeID dnId = new DatanodeID("127.0.0.1", "localhost",
+ "xyzzy", 8080, 9090, 7070);
+ ExtendedBlock block = new ExtendedBlock("poolid", 123);
+ TestFileDescriptorPair pair = new TestFileDescriptorPair();
+ cache.put(dnId, block, pair.getFileInputStreams());
+ Thread.sleep(cache.getExpiryTimeMs() * 100);
+ FileInputStream fis[] = cache.get(dnId, block);
+ Assert.assertNull(fis);
+ pair.close();
+ cache.close();
+ }
+
+ @Test
+ public void testEviction() throws Exception {
+ FileInputStreamCache cache = new FileInputStreamCache(1, 10000000);
+ DatanodeID dnId = new DatanodeID("127.0.0.1", "localhost",
+ "xyzzy", 8080, 9090, 7070);
+ ExtendedBlock block = new ExtendedBlock("poolid", 123);
+ TestFileDescriptorPair pair = new TestFileDescriptorPair();
+ cache.put(dnId, block, pair.getFileInputStreams());
+ DatanodeID dnId2 = new DatanodeID("127.0.0.1", "localhost",
+ "xyzzy", 8081, 9091, 7071);
+ TestFileDescriptorPair pair2 = new TestFileDescriptorPair();
+ cache.put(dnId2, block, pair2.getFileInputStreams());
+ FileInputStream fis[] = cache.get(dnId, block);
+ Assert.assertNull(fis);
+ FileInputStream fis2[] = cache.get(dnId2, block);
+ Assert.assertTrue(pair2.compareWith(fis2));
+ pair.close();
+ cache.close();
+ }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelRead.java
index fa384cde7a6..c06fab50d11 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelRead.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelRead.java
@@ -17,53 +17,30 @@
*/
package org.apache.hadoop.hdfs;
-import java.io.IOException;
-
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtocol;
import org.apache.log4j.Level;
import org.junit.AfterClass;
import org.junit.BeforeClass;
-import org.junit.Test;
public class TestParallelRead extends TestParallelReadUtil {
-
@BeforeClass
static public void setupCluster() throws Exception {
- setupCluster(DEFAULT_REPLICATION_FACTOR, new HdfsConfiguration());
+ // This is a test of the normal (TCP) read path. For this reason, we turn
+ // off both short-circuit local reads and UNIX domain socket data traffic.
+ HdfsConfiguration conf = new HdfsConfiguration();
+ conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, false);
+ conf.setBoolean(DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC,
+ false);
+ // dfs.domain.socket.path should be ignored because the previous two keys
+ // were set to false. This is a regression test for HDFS-4473.
+ conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY, "/will/not/be/created");
+
+ setupCluster(DEFAULT_REPLICATION_FACTOR, conf);
}
@AfterClass
static public void teardownCluster() throws Exception {
TestParallelReadUtil.teardownCluster();
}
-
- /**
- * Do parallel read several times with different number of files and threads.
- *
- * Note that while this is the only "test" in a junit sense, we're actually
- * dispatching a lot more. Failures in the other methods (and other threads)
- * need to be manually collected, which is inconvenient.
- */
- @Test
- public void testParallelReadCopying() throws IOException {
- runTestWorkload(new CopyingReadWorkerHelper());
- }
-
- @Test
- public void testParallelReadByteBuffer() throws IOException {
- runTestWorkload(new DirectReadWorkerHelper());
- }
-
- @Test
- public void testParallelReadMixed() throws IOException {
- runTestWorkload(new MixedWorkloadHelper());
- }
-
- @Test
- public void testParallelNoChecksums() throws IOException {
- verifyChecksums = false;
- runTestWorkload(new MixedWorkloadHelper());
- }
-
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelReadUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelReadUtil.java
index 51c3200d2ce..5965fec2874 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelReadUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelReadUtil.java
@@ -32,12 +32,18 @@ import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.util.Time;
import org.apache.log4j.Level;
import org.apache.log4j.LogManager;
+import org.junit.Assume;
+import org.junit.Ignore;
+import org.junit.Test;
/**
* Driver class for testing the use of DFSInputStream by multiple concurrent
- * readers, using the different read APIs. See subclasses for the actual test
- * cases.
+ * readers, using the different read APIs.
+ *
+ * This class is marked as @Ignore so that junit doesn't try to execute the
+ * tests in here directly. They are executed from subclasses.
*/
+@Ignore
public class TestParallelReadUtil {
static final Log LOG = LogFactory.getLog(TestParallelReadUtil.class);
@@ -388,4 +394,31 @@ public class TestParallelReadUtil {
util.shutdown();
}
+ /**
+ * Do parallel read several times with different number of files and threads.
+ *
+ * Note that while this is the only "test" in a junit sense, we're actually
+ * dispatching a lot more. Failures in the other methods (and other threads)
+ * need to be manually collected, which is inconvenient.
+ */
+ @Test
+ public void testParallelReadCopying() throws IOException {
+ runTestWorkload(new CopyingReadWorkerHelper());
+ }
+
+ @Test
+ public void testParallelReadByteBuffer() throws IOException {
+ runTestWorkload(new DirectReadWorkerHelper());
+ }
+
+ @Test
+ public void testParallelReadMixed() throws IOException {
+ runTestWorkload(new MixedWorkloadHelper());
+ }
+
+ @Test
+ public void testParallelNoChecksums() throws IOException {
+ verifyChecksums = false;
+ runTestWorkload(new MixedWorkloadHelper());
+ }
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelLocalRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitLegacyRead.java
similarity index 61%
rename from hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelLocalRead.java
rename to hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitLegacyRead.java
index 05702d2aa4b..4ba31bdbb61 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelLocalRead.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitLegacyRead.java
@@ -17,25 +17,25 @@
*/
package org.apache.hadoop.hdfs;
-import java.io.IOException;
-
+import org.apache.hadoop.net.unix.DomainSocket;
import org.apache.hadoop.security.UserGroupInformation;
import org.junit.AfterClass;
import org.junit.BeforeClass;
-import org.junit.Test;
-
-public class TestParallelLocalRead extends TestParallelReadUtil {
+public class TestParallelShortCircuitLegacyRead extends TestParallelReadUtil {
@BeforeClass
static public void setupCluster() throws Exception {
+ DFSInputStream.tcpReadsDisabledForTesting = true;
HdfsConfiguration conf = new HdfsConfiguration();
-
+ conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY, "");
+ conf.setBoolean(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL, true);
+ conf.setBoolean(DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC, false);
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
- conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
- false);
+ conf.setBoolean(DFSConfigKeys.
+ DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, false);
conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
UserGroupInformation.getCurrentUser().getShortUserName());
-
+ DomainSocket.disableBindPathValidation();
setupCluster(1, conf);
}
@@ -43,26 +43,4 @@ public class TestParallelLocalRead extends TestParallelReadUtil {
static public void teardownCluster() throws Exception {
TestParallelReadUtil.teardownCluster();
}
-
- /**
- * Do parallel read several times with different number of files and threads.
- *
- * Note that while this is the only "test" in a junit sense, we're actually
- * dispatching a lot more. Failures in the other methods (and other threads)
- * need to be manually collected, which is inconvenient.
- */
- @Test
- public void testParallelReadCopying() throws IOException {
- runTestWorkload(new CopyingReadWorkerHelper());
- }
-
- @Test
- public void testParallelReadByteBuffer() throws IOException {
- runTestWorkload(new DirectReadWorkerHelper());
- }
-
- @Test
- public void testParallelReadMixed() throws IOException {
- runTestWorkload(new MixedWorkloadHelper());
- }
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitRead.java
new file mode 100644
index 00000000000..a0450fa1a23
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitRead.java
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import java.io.File;
+
+import org.apache.hadoop.net.unix.DomainSocket;
+import org.apache.hadoop.net.unix.TemporarySocketDirectory;
+import org.junit.AfterClass;
+import org.junit.Assume;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import static org.hamcrest.CoreMatchers.*;
+
+public class TestParallelShortCircuitRead extends TestParallelReadUtil {
+ private static TemporarySocketDirectory sockDir;
+
+ @BeforeClass
+ static public void setupCluster() throws Exception {
+ if (DomainSocket.getLoadingFailureReason() != null) return;
+ DFSInputStream.tcpReadsDisabledForTesting = true;
+ sockDir = new TemporarySocketDirectory();
+ HdfsConfiguration conf = new HdfsConfiguration();
+ conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
+ new File(sockDir.getDir(), "TestParallelLocalRead.%d.sock").getAbsolutePath());
+ conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
+ conf.setBoolean(DFSConfigKeys.
+ DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, false);
+ DomainSocket.disableBindPathValidation();
+ setupCluster(1, conf);
+ }
+
+ @Before
+ public void before() {
+ Assume.assumeThat(DomainSocket.getLoadingFailureReason(), equalTo(null));
+ }
+
+ @AfterClass
+ static public void teardownCluster() throws Exception {
+ if (DomainSocket.getLoadingFailureReason() != null) return;
+ sockDir.close();
+ TestParallelReadUtil.teardownCluster();
+ }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadNoChecksum.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadNoChecksum.java
new file mode 100644
index 00000000000..172af31ce17
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadNoChecksum.java
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import java.io.File;
+
+import org.apache.hadoop.net.unix.DomainSocket;
+import org.apache.hadoop.net.unix.TemporarySocketDirectory;
+import org.junit.AfterClass;
+import org.junit.Assume;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import static org.hamcrest.CoreMatchers.*;
+
+public class TestParallelShortCircuitReadNoChecksum extends TestParallelReadUtil {
+ private static TemporarySocketDirectory sockDir;
+
+ @BeforeClass
+ static public void setupCluster() throws Exception {
+ if (DomainSocket.getLoadingFailureReason() != null) return;
+ DFSInputStream.tcpReadsDisabledForTesting = true;
+ sockDir = new TemporarySocketDirectory();
+ HdfsConfiguration conf = new HdfsConfiguration();
+ conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
+ new File(sockDir.getDir(), "TestParallelLocalRead.%d.sock").getAbsolutePath());
+ conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
+ conf.setBoolean(DFSConfigKeys.
+ DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, true);
+ DomainSocket.disableBindPathValidation();
+ setupCluster(1, conf);
+ }
+
+ @Before
+ public void before() {
+ Assume.assumeThat(DomainSocket.getLoadingFailureReason(), equalTo(null));
+ }
+
+ @AfterClass
+ static public void teardownCluster() throws Exception {
+ if (DomainSocket.getLoadingFailureReason() != null) return;
+ sockDir.close();
+ TestParallelReadUtil.teardownCluster();
+ }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadUnCached.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadUnCached.java
new file mode 100644
index 00000000000..634eef8a7e2
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadUnCached.java
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import java.io.File;
+import org.apache.hadoop.net.unix.DomainSocket;
+import org.apache.hadoop.net.unix.TemporarySocketDirectory;
+import org.junit.AfterClass;
+import org.junit.Assume;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import static org.hamcrest.CoreMatchers.*;
+
+/**
+ * This class tests short-circuit local reads without any FileInputStream or
+ * Socket caching. This is a regression test for HDFS-4417.
+ */
+public class TestParallelShortCircuitReadUnCached extends TestParallelReadUtil {
+ private static TemporarySocketDirectory sockDir;
+
+ @BeforeClass
+ static public void setupCluster() throws Exception {
+ if (DomainSocket.getLoadingFailureReason() != null) return;
+ sockDir = new TemporarySocketDirectory();
+ HdfsConfiguration conf = new HdfsConfiguration();
+ conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
+ new File(sockDir.getDir(),
+ "TestParallelShortCircuitReadUnCached._PORT.sock").getAbsolutePath());
+ conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
+ conf.setBoolean(DFSConfigKeys.
+ DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, false);
+ conf.setBoolean(DFSConfigKeys.
+ DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC, true);
+ // We want to test reading from stale sockets.
+ conf.setInt(DFSConfigKeys.DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_KEY, 1);
+ conf.setLong(DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY,
+ 5 * 60 * 1000);
+ conf.setInt(DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPACITY_KEY, 32);
+ // Avoid using the FileInputStreamCache.
+ conf.setInt(DFSConfigKeys.
+ DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_KEY, 0);
+ DomainSocket.disableBindPathValidation();
+ DFSInputStream.tcpReadsDisabledForTesting = true;
+ setupCluster(1, conf);
+ }
+
+ @Before
+ public void before() {
+ Assume.assumeThat(DomainSocket.getLoadingFailureReason(), equalTo(null));
+ }
+
+ @AfterClass
+ static public void teardownCluster() throws Exception {
+ if (DomainSocket.getLoadingFailureReason() != null) return;
+ sockDir.close();
+ TestParallelReadUtil.teardownCluster();
+ }
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelUnixDomainRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelUnixDomainRead.java
new file mode 100644
index 00000000000..00c65f9e61e
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelUnixDomainRead.java
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import java.io.File;
+
+import org.apache.hadoop.net.unix.DomainSocket;
+import org.apache.hadoop.net.unix.TemporarySocketDirectory;
+import org.junit.AfterClass;
+import org.junit.Assume;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import static org.hamcrest.CoreMatchers.*;
+
+public class TestParallelUnixDomainRead extends TestParallelReadUtil {
+ private static TemporarySocketDirectory sockDir;
+
+ @BeforeClass
+ static public void setupCluster() throws Exception {
+ if (DomainSocket.getLoadingFailureReason() != null) return;
+ DFSInputStream.tcpReadsDisabledForTesting = true;
+ sockDir = new TemporarySocketDirectory();
+ HdfsConfiguration conf = new HdfsConfiguration();
+ conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
+ new File(sockDir.getDir(), "TestParallelLocalRead.%d.sock").getAbsolutePath());
+ conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, false);
+ conf.setBoolean(DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC, true);
+ DomainSocket.disableBindPathValidation();
+ setupCluster(1, conf);
+ }
+
+ @Before
+ public void before() {
+ Assume.assumeThat(DomainSocket.getLoadingFailureReason(), equalTo(null));
+ }
+
+ @AfterClass
+ static public void teardownCluster() throws Exception {
+ if (DomainSocket.getLoadingFailureReason() != null) return;
+ sockDir.close();
+ TestParallelReadUtil.teardownCluster();
+ }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java
new file mode 100644
index 00000000000..7836bc66805
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java
@@ -0,0 +1,288 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.channels.ReadableByteChannel;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
+import org.apache.hadoop.hdfs.net.Peer;
+import org.apache.hadoop.net.unix.DomainSocket;
+import org.junit.Test;
+import org.mockito.Mockito;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+import com.google.common.collect.HashMultiset;
+
+public class TestPeerCache {
+ static final Log LOG = LogFactory.getLog(TestPeerCache.class);
+
+ private static class FakePeer implements Peer {
+ private boolean closed = false;
+ private final boolean hasDomain;
+
+ private DatanodeID dnId;
+
+ public FakePeer(DatanodeID dnId, boolean hasDomain) {
+ this.dnId = dnId;
+ this.hasDomain = hasDomain;
+ }
+
+ @Override
+ public ReadableByteChannel getInputStreamChannel() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void setReadTimeout(int timeoutMs) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int getReceiveBufferSize() throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean getTcpNoDelay() throws IOException {
+ return false;
+ }
+
+ @Override
+ public void setWriteTimeout(int timeoutMs) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean isClosed() {
+ return closed;
+ }
+
+ @Override
+ public void close() throws IOException {
+ closed = true;
+ }
+
+ @Override
+ public String getRemoteAddressString() {
+ return dnId.getInfoAddr();
+ }
+
+ @Override
+ public String getLocalAddressString() {
+ return "127.0.0.1:123";
+ }
+
+ @Override
+ public InputStream getInputStream() throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public OutputStream getOutputStream() throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean isLocal() {
+ return true;
+ }
+
+ @Override
+ public String toString() {
+ return "FakePeer(dnId=" + dnId + ")";
+ }
+
+ @Override
+ public DomainSocket getDomainSocket() {
+ if (!hasDomain) return null;
+ // Return a mock which throws an exception whenever any function is
+ // called.
+ return Mockito.mock(DomainSocket.class,
+ new Answer