HDFS-4356. BlockReaderLocal should use passed file descriptors rather than paths. Contributed by Colin Patrick McCabe.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1432335 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d94621a0cd
commit
9a4030e0e8
|
@ -111,7 +111,7 @@ public class DomainSocket implements Closeable {
|
||||||
* Disable validation of the server bind paths.
|
* Disable validation of the server bind paths.
|
||||||
*/
|
*/
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
static void disableBindPathValidation() {
|
public static void disableBindPathValidation() {
|
||||||
validateBindPaths = false;
|
validateBindPaths = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -104,7 +104,7 @@ public class DataChecksum implements Checksum {
|
||||||
( (bytes[offset+2] & 0xff) << 16 ) |
|
( (bytes[offset+2] & 0xff) << 16 ) |
|
||||||
( (bytes[offset+3] & 0xff) << 8 ) |
|
( (bytes[offset+3] & 0xff) << 8 ) |
|
||||||
( (bytes[offset+4] & 0xff) );
|
( (bytes[offset+4] & 0xff) );
|
||||||
return newDataChecksum( Type.valueOf(bytes[0]), bytesPerChecksum );
|
return newDataChecksum( Type.valueOf(bytes[offset]), bytesPerChecksum );
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -7,3 +7,6 @@ HDFS-4353. Encapsulate connections to peers in Peer and PeerServer classes.
|
||||||
|
|
||||||
HDFS-4354. Create DomainSocket and DomainPeer and associated unit tests.
|
HDFS-4354. Create DomainSocket and DomainPeer and associated unit tests.
|
||||||
(Colin Patrick McCabe via todd)
|
(Colin Patrick McCabe via todd)
|
||||||
|
|
||||||
|
HDFS-4356. BlockReaderLocal should use passed file descriptors rather than paths.
|
||||||
|
(Colin Patrick McCabe via todd)
|
||||||
|
|
|
@ -290,6 +290,14 @@
|
||||||
<Method name="persistPaxosData" />
|
<Method name="persistPaxosData" />
|
||||||
<Bug pattern="OS_OPEN_STREAM" />
|
<Bug pattern="OS_OPEN_STREAM" />
|
||||||
</Match>
|
</Match>
|
||||||
|
|
||||||
|
<!-- getShortCircuitFdsForRead is supposed to return open streams. -->
|
||||||
|
<Match>
|
||||||
|
<Class name="org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl" />
|
||||||
|
<Method name="getShortCircuitFdsForRead" />
|
||||||
|
<Bug pattern="OBL_UNSATISFIED_OBLIGATION" />
|
||||||
|
</Match>
|
||||||
|
|
||||||
<!-- Don't complain about LocalDatanodeInfo's anonymous class -->
|
<!-- Don't complain about LocalDatanodeInfo's anonymous class -->
|
||||||
<Match>
|
<Match>
|
||||||
<Class name="org.apache.hadoop.hdfs.BlockReaderLocal$LocalDatanodeInfo$1" />
|
<Class name="org.apache.hadoop.hdfs.BlockReaderLocal$LocalDatanodeInfo$1" />
|
||||||
|
|
|
@ -41,18 +41,29 @@ public interface BlockReader extends ByteBufferReadable {
|
||||||
*/
|
*/
|
||||||
long skip(long n) throws IOException;
|
long skip(long n) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an estimate of the number of bytes that can be read
|
||||||
|
* (or skipped over) from this input stream without performing
|
||||||
|
* network I/O.
|
||||||
|
*/
|
||||||
|
int available() throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Close the block reader.
|
* Close the block reader.
|
||||||
*
|
*
|
||||||
* @param peerCache The PeerCache to put the Peer we're using back
|
* @param peerCache The PeerCache to put the Peer we're using back
|
||||||
* into, or null if we should simply close the Peer
|
* into, or null if we should simply close the Peer
|
||||||
* we're using (along with its Socket).
|
* we're using (along with its Socket).
|
||||||
* Some block readers, like BlockReaderLocal, may
|
* Ignored by Readers that don't maintain Peers.
|
||||||
* not make use of this parameter.
|
* @param fisCache The FileInputStreamCache to put our FileInputStreams
|
||||||
|
* back into, or null if we should simply close them.
|
||||||
|
* Ignored by Readers that don't maintain
|
||||||
|
* FileInputStreams.
|
||||||
*
|
*
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
void close(PeerCache peerCache) throws IOException;
|
void close(PeerCache peerCache, FileInputStreamCache fisCache)
|
||||||
|
throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read exactly the given amount of data, throwing an exception
|
* Read exactly the given amount of data, throwing an exception
|
||||||
|
|
|
@ -17,22 +17,26 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdfs;
|
package org.apache.hadoop.hdfs;
|
||||||
|
|
||||||
|
import java.io.BufferedOutputStream;
|
||||||
|
import java.io.DataInputStream;
|
||||||
|
import java.io.DataOutputStream;
|
||||||
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.InetSocketAddress;
|
import java.net.InetSocketAddress;
|
||||||
import java.net.Socket;
|
|
||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.hdfs.DFSClient.Conf;
|
|
||||||
import org.apache.hadoop.hdfs.net.Peer;
|
import org.apache.hadoop.hdfs.net.Peer;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferEncryptor;
|
import org.apache.hadoop.hdfs.protocol.HdfsProtoUtil;
|
||||||
import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
|
import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
|
||||||
import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
|
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto;
|
||||||
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
||||||
|
import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
|
||||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
||||||
import org.apache.hadoop.net.NetUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
|
import org.apache.hadoop.net.unix.DomainSocket;
|
||||||
import org.apache.hadoop.security.token.Token;
|
import org.apache.hadoop.security.token.Token;
|
||||||
|
|
||||||
|
|
||||||
|
@ -58,6 +62,12 @@ public class BlockReaderFactory {
|
||||||
* @param clientName Client name. Used for log messages.
|
* @param clientName Client name. Used for log messages.
|
||||||
* @param peer The peer
|
* @param peer The peer
|
||||||
* @param datanodeID The datanode that the Peer is connected to
|
* @param datanodeID The datanode that the Peer is connected to
|
||||||
|
* @param domainSocketFactory The DomainSocketFactory to notify if the Peer
|
||||||
|
* is a DomainPeer which turns out to be faulty.
|
||||||
|
* If null, no factory will be notified in this
|
||||||
|
* case.
|
||||||
|
* @param allowShortCircuitLocalReads True if short-circuit local reads
|
||||||
|
* should be allowed.
|
||||||
* @return New BlockReader instance, or null on error.
|
* @return New BlockReader instance, or null on error.
|
||||||
*/
|
*/
|
||||||
@SuppressWarnings("deprecation")
|
@SuppressWarnings("deprecation")
|
||||||
|
@ -70,11 +80,44 @@ public class BlockReaderFactory {
|
||||||
boolean verifyChecksum,
|
boolean verifyChecksum,
|
||||||
String clientName,
|
String clientName,
|
||||||
Peer peer,
|
Peer peer,
|
||||||
DatanodeID datanodeID)
|
DatanodeID datanodeID,
|
||||||
throws IOException {
|
DomainSocketFactory domSockFactory,
|
||||||
|
boolean allowShortCircuitLocalReads)
|
||||||
|
throws IOException {
|
||||||
peer.setReadTimeout(conf.getInt(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY,
|
peer.setReadTimeout(conf.getInt(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY,
|
||||||
HdfsServerConstants.READ_TIMEOUT));
|
HdfsServerConstants.READ_TIMEOUT));
|
||||||
peer.setWriteTimeout(HdfsServerConstants.WRITE_TIMEOUT);
|
peer.setWriteTimeout(HdfsServerConstants.WRITE_TIMEOUT);
|
||||||
|
|
||||||
|
if (peer.getDomainSocket() != null) {
|
||||||
|
if (allowShortCircuitLocalReads) {
|
||||||
|
// If this is a domain socket, and short-circuit local reads are
|
||||||
|
// enabled, try to set up a BlockReaderLocal.
|
||||||
|
BlockReader reader = newShortCircuitBlockReader(conf, file,
|
||||||
|
block, blockToken, startOffset, len, peer, datanodeID,
|
||||||
|
domSockFactory, verifyChecksum);
|
||||||
|
if (reader != null) {
|
||||||
|
// One we've constructed the short-circuit block reader, we don't
|
||||||
|
// need the socket any more. So let's return it to the cache.
|
||||||
|
PeerCache peerCache = PeerCache.getInstance(
|
||||||
|
conf.getInt(DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPACITY_KEY,
|
||||||
|
DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT),
|
||||||
|
conf.getLong(DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY,
|
||||||
|
DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_DEFAULT));
|
||||||
|
peerCache.put(datanodeID, peer);
|
||||||
|
return reader;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If this is a domain socket and we couldn't (or didn't want to) set
|
||||||
|
// up a BlockReaderLocal, check that we are allowed to pass data traffic
|
||||||
|
// over the socket before proceeding.
|
||||||
|
if (!conf.getBoolean(DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC,
|
||||||
|
DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT)) {
|
||||||
|
throw new IOException("Because we can't do short-circuit access, " +
|
||||||
|
"and data traffic over domain sockets is disabled, " +
|
||||||
|
"we cannot use this socket to talk to " + datanodeID);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (conf.getBoolean(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER,
|
if (conf.getBoolean(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER,
|
||||||
DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER_DEFAULT)) {
|
DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER_DEFAULT)) {
|
||||||
return RemoteBlockReader.newBlockReader(file,
|
return RemoteBlockReader.newBlockReader(file,
|
||||||
|
@ -88,7 +131,94 @@ public class BlockReaderFactory {
|
||||||
verifyChecksum, clientName, peer, datanodeID);
|
verifyChecksum, clientName, peer, datanodeID);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new short-circuit BlockReader.
|
||||||
|
*
|
||||||
|
* Here, we ask the DataNode to pass us file descriptors over our
|
||||||
|
* DomainSocket. If the DataNode declines to do so, we'll return null here;
|
||||||
|
* otherwise, we'll return the BlockReaderLocal. If the DataNode declines,
|
||||||
|
* this function will inform the DomainSocketFactory that short-circuit local
|
||||||
|
* reads are disabled for this DataNode, so that we don't ask again.
|
||||||
|
*
|
||||||
|
* @param conf the configuration.
|
||||||
|
* @param file the file name. Used in log messages.
|
||||||
|
* @param block The block object.
|
||||||
|
* @param blockToken The block token for security.
|
||||||
|
* @param startOffset The read offset, relative to block head.
|
||||||
|
* @param len The number of bytes to read, or -1 to read
|
||||||
|
* as many as possible.
|
||||||
|
* @param peer The peer to use.
|
||||||
|
* @param datanodeID The datanode that the Peer is connected to.
|
||||||
|
* @param domSockFactory The DomainSocketFactory to notify if the Peer
|
||||||
|
* is a DomainPeer which turns out to be faulty.
|
||||||
|
* If null, no factory will be notified in this
|
||||||
|
* case.
|
||||||
|
* @param verifyChecksum True if we should verify the checksums.
|
||||||
|
* Note: even if this is true, when
|
||||||
|
* DFS_CLIENT_READ_CHECKSUM_SKIP_CHECKSUM_KEY is
|
||||||
|
* set, we will skip checksums.
|
||||||
|
*
|
||||||
|
* @return The BlockReaderLocal, or null if the
|
||||||
|
* DataNode declined to provide short-circuit
|
||||||
|
* access.
|
||||||
|
* @throws IOException If there was a communication error.
|
||||||
|
*/
|
||||||
|
private static BlockReaderLocal newShortCircuitBlockReader(
|
||||||
|
Configuration conf, String file, ExtendedBlock block,
|
||||||
|
Token<BlockTokenIdentifier> blockToken, long startOffset,
|
||||||
|
long len, Peer peer, DatanodeID datanodeID,
|
||||||
|
DomainSocketFactory domSockFactory, boolean verifyChecksum)
|
||||||
|
throws IOException {
|
||||||
|
final DataOutputStream out =
|
||||||
|
new DataOutputStream(new BufferedOutputStream(
|
||||||
|
peer.getOutputStream()));
|
||||||
|
new Sender(out).requestShortCircuitFds(block, blockToken, 1);
|
||||||
|
DataInputStream in =
|
||||||
|
new DataInputStream(peer.getInputStream());
|
||||||
|
BlockOpResponseProto resp = BlockOpResponseProto.parseFrom(
|
||||||
|
HdfsProtoUtil.vintPrefixed(in));
|
||||||
|
DomainSocket sock = peer.getDomainSocket();
|
||||||
|
switch (resp.getStatus()) {
|
||||||
|
case SUCCESS:
|
||||||
|
BlockReaderLocal reader = null;
|
||||||
|
byte buf[] = new byte[1];
|
||||||
|
FileInputStream fis[] = new FileInputStream[2];
|
||||||
|
sock.recvFileInputStreams(fis, buf, 0, buf.length);
|
||||||
|
try {
|
||||||
|
reader = new BlockReaderLocal(conf, file, block,
|
||||||
|
startOffset, len, fis[0], fis[1], datanodeID, verifyChecksum);
|
||||||
|
} finally {
|
||||||
|
if (reader == null) {
|
||||||
|
IOUtils.cleanup(DFSClient.LOG, fis[0], fis[1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return reader;
|
||||||
|
case ERROR_UNSUPPORTED:
|
||||||
|
if (!resp.hasShortCircuitAccessVersion()) {
|
||||||
|
DFSClient.LOG.warn("short-circuit read access is disabled for " +
|
||||||
|
"DataNode " + datanodeID + ". reason: " + resp.getMessage());
|
||||||
|
domSockFactory.disableShortCircuitForPath(sock.getPath());
|
||||||
|
} else {
|
||||||
|
DFSClient.LOG.warn("short-circuit read access for the file " +
|
||||||
|
file + " is disabled for DataNode " + datanodeID +
|
||||||
|
". reason: " + resp.getMessage());
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
case ERROR_ACCESS_TOKEN:
|
||||||
|
String msg = "access control error while " +
|
||||||
|
"attempting to set up short-circuit access to " +
|
||||||
|
file + resp.getMessage();
|
||||||
|
DFSClient.LOG.debug(msg);
|
||||||
|
throw new InvalidBlockTokenException(msg);
|
||||||
|
default:
|
||||||
|
DFSClient.LOG.warn("error while attempting to set up short-circuit " +
|
||||||
|
"access to " + file + ": " + resp.getMessage());
|
||||||
|
domSockFactory.disableShortCircuitForPath(sock.getPath());
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* File name to print when accessing a block directly (from servlets)
|
* File name to print when accessing a block directly (from servlets)
|
||||||
* @param s Address of the block location
|
* @param s Address of the block location
|
||||||
|
|
|
@ -18,30 +18,18 @@
|
||||||
package org.apache.hadoop.hdfs;
|
package org.apache.hadoop.hdfs;
|
||||||
|
|
||||||
import java.io.DataInputStream;
|
import java.io.DataInputStream;
|
||||||
import java.io.File;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.Socket;
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.LinkedHashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
|
|
||||||
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
|
||||||
import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
|
import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
|
||||||
import org.apache.hadoop.hdfs.util.DirectBufferPool;
|
import org.apache.hadoop.hdfs.util.DirectBufferPool;
|
||||||
import org.apache.hadoop.ipc.RPC;
|
|
||||||
import org.apache.hadoop.io.IOUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
import org.apache.hadoop.security.token.Token;
|
|
||||||
import org.apache.hadoop.util.DataChecksum;
|
import org.apache.hadoop.util.DataChecksum;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -53,74 +41,19 @@ import org.apache.hadoop.util.DataChecksum;
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>The client performing short circuit reads must be configured at the
|
* <li>The client performing short circuit reads must be configured at the
|
||||||
* datanode.</li>
|
* datanode.</li>
|
||||||
* <li>The client gets the path to the file where block is stored using
|
* <li>The client gets the file descriptors for the metadata file and the data
|
||||||
* {@link org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol#getBlockLocalPathInfo(ExtendedBlock, Token)}
|
* file for the block using
|
||||||
* RPC call</li>
|
* {@link org.apache.hadoop.hdfs.server.datanode.DataXceiver#requestShortCircuitFds}.
|
||||||
* <li>Client uses kerberos authentication to connect to the datanode over RPC,
|
* </li>
|
||||||
* if security is enabled.</li>
|
* <li>The client reads the file descriptors.</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
*/
|
*/
|
||||||
class BlockReaderLocal implements BlockReader {
|
class BlockReaderLocal implements BlockReader {
|
||||||
private static final Log LOG = LogFactory.getLog(DFSClient.class);
|
static final Log LOG = LogFactory.getLog(DFSClient.class);
|
||||||
|
|
||||||
//Stores the cache and proxy for a local datanode.
|
|
||||||
private static class LocalDatanodeInfo {
|
|
||||||
private ClientDatanodeProtocol proxy = null;
|
|
||||||
private final Map<ExtendedBlock, BlockLocalPathInfo> cache;
|
|
||||||
|
|
||||||
LocalDatanodeInfo() {
|
|
||||||
final int cacheSize = 10000;
|
|
||||||
final float hashTableLoadFactor = 0.75f;
|
|
||||||
int hashTableCapacity = (int) Math.ceil(cacheSize / hashTableLoadFactor) + 1;
|
|
||||||
cache = Collections
|
|
||||||
.synchronizedMap(new LinkedHashMap<ExtendedBlock, BlockLocalPathInfo>(
|
|
||||||
hashTableCapacity, hashTableLoadFactor, true) {
|
|
||||||
private static final long serialVersionUID = 1;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected boolean removeEldestEntry(
|
|
||||||
Map.Entry<ExtendedBlock, BlockLocalPathInfo> eldest) {
|
|
||||||
return size() > cacheSize;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
private synchronized ClientDatanodeProtocol getDatanodeProxy(
|
|
||||||
DatanodeInfo node, Configuration conf, int socketTimeout,
|
|
||||||
boolean connectToDnViaHostname) throws IOException {
|
|
||||||
if (proxy == null) {
|
|
||||||
proxy = DFSUtil.createClientDatanodeProtocolProxy(node, conf,
|
|
||||||
socketTimeout, connectToDnViaHostname);
|
|
||||||
}
|
|
||||||
return proxy;
|
|
||||||
}
|
|
||||||
|
|
||||||
private synchronized void resetDatanodeProxy() {
|
|
||||||
if (null != proxy) {
|
|
||||||
RPC.stopProxy(proxy);
|
|
||||||
proxy = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private BlockLocalPathInfo getBlockLocalPathInfo(ExtendedBlock b) {
|
|
||||||
return cache.get(b);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void setBlockLocalPathInfo(ExtendedBlock b, BlockLocalPathInfo info) {
|
|
||||||
cache.put(b, info);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void removeBlockLocalPathInfo(ExtendedBlock b) {
|
|
||||||
cache.remove(b);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Multiple datanodes could be running on the local machine. Store proxies in
|
|
||||||
// a map keyed by the ipc port of the datanode.
|
|
||||||
private static Map<Integer, LocalDatanodeInfo> localDatanodeInfoMap = new HashMap<Integer, LocalDatanodeInfo>();
|
|
||||||
|
|
||||||
private final FileInputStream dataIn; // reader for the data file
|
private final FileInputStream dataIn; // reader for the data file
|
||||||
private final FileInputStream checksumIn; // reader for the checksum file
|
private final FileInputStream checksumIn; // reader for the checksum file
|
||||||
|
private final boolean verifyChecksum;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Offset from the most recent chunk boundary at which the next read should
|
* Offset from the most recent chunk boundary at which the next read should
|
||||||
|
@ -140,7 +73,6 @@ class BlockReaderLocal implements BlockReader {
|
||||||
private ByteBuffer slowReadBuff = null;
|
private ByteBuffer slowReadBuff = null;
|
||||||
private ByteBuffer checksumBuff = null;
|
private ByteBuffer checksumBuff = null;
|
||||||
private DataChecksum checksum;
|
private DataChecksum checksum;
|
||||||
private final boolean verifyChecksum;
|
|
||||||
|
|
||||||
private static DirectBufferPool bufferPool = new DirectBufferPool();
|
private static DirectBufferPool bufferPool = new DirectBufferPool();
|
||||||
|
|
||||||
|
@ -150,186 +82,90 @@ class BlockReaderLocal implements BlockReader {
|
||||||
/** offset in block where reader wants to actually read */
|
/** offset in block where reader wants to actually read */
|
||||||
private long startOffset;
|
private long startOffset;
|
||||||
private final String filename;
|
private final String filename;
|
||||||
|
|
||||||
|
private final DatanodeID datanodeID;
|
||||||
|
private final ExtendedBlock block;
|
||||||
|
|
||||||
/**
|
private static int getSlowReadBufferNumChunks(Configuration conf,
|
||||||
* The only way this object can be instantiated.
|
int bytesPerChecksum) {
|
||||||
*/
|
|
||||||
static BlockReaderLocal newBlockReader(Configuration conf, String file,
|
|
||||||
ExtendedBlock blk, Token<BlockTokenIdentifier> token, DatanodeInfo node,
|
|
||||||
int socketTimeout, long startOffset, long length,
|
|
||||||
boolean connectToDnViaHostname) throws IOException {
|
|
||||||
|
|
||||||
LocalDatanodeInfo localDatanodeInfo = getLocalDatanodeInfo(node
|
int bufSize =
|
||||||
.getIpcPort());
|
conf.getInt(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY,
|
||||||
// check the cache first
|
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT);
|
||||||
BlockLocalPathInfo pathinfo = localDatanodeInfo.getBlockLocalPathInfo(blk);
|
|
||||||
if (pathinfo == null) {
|
|
||||||
pathinfo = getBlockPathInfo(blk, node, conf, socketTimeout, token,
|
|
||||||
connectToDnViaHostname);
|
|
||||||
}
|
|
||||||
|
|
||||||
// check to see if the file exists. It may so happen that the
|
if (bufSize < bytesPerChecksum) {
|
||||||
// HDFS file has been deleted and this block-lookup is occurring
|
throw new IllegalArgumentException("Configured BlockReaderLocal buffer size (" +
|
||||||
// on behalf of a new HDFS file. This time, the block file could
|
bufSize + ") is not large enough to hold a single chunk (" +
|
||||||
// be residing in a different portion of the fs.data.dir directory.
|
bytesPerChecksum + "). Please configure " +
|
||||||
// In this case, we remove this entry from the cache. The next
|
|
||||||
// call to this method will re-populate the cache.
|
|
||||||
FileInputStream dataIn = null;
|
|
||||||
FileInputStream checksumIn = null;
|
|
||||||
BlockReaderLocal localBlockReader = null;
|
|
||||||
boolean skipChecksumCheck = skipChecksumCheck(conf);
|
|
||||||
try {
|
|
||||||
// get a local file system
|
|
||||||
File blkfile = new File(pathinfo.getBlockPath());
|
|
||||||
dataIn = new FileInputStream(blkfile);
|
|
||||||
|
|
||||||
if (LOG.isDebugEnabled()) {
|
|
||||||
LOG.debug("New BlockReaderLocal for file " + blkfile + " of size "
|
|
||||||
+ blkfile.length() + " startOffset " + startOffset + " length "
|
|
||||||
+ length + " short circuit checksum " + !skipChecksumCheck);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!skipChecksumCheck) {
|
|
||||||
// get the metadata file
|
|
||||||
File metafile = new File(pathinfo.getMetaPath());
|
|
||||||
checksumIn = new FileInputStream(metafile);
|
|
||||||
|
|
||||||
// read and handle the common header here. For now just a version
|
|
||||||
BlockMetadataHeader header = BlockMetadataHeader
|
|
||||||
.readHeader(new DataInputStream(checksumIn));
|
|
||||||
short version = header.getVersion();
|
|
||||||
if (version != BlockMetadataHeader.VERSION) {
|
|
||||||
LOG.warn("Wrong version (" + version + ") for metadata file for "
|
|
||||||
+ blk + " ignoring ...");
|
|
||||||
}
|
|
||||||
DataChecksum checksum = header.getChecksum();
|
|
||||||
long firstChunkOffset = startOffset
|
|
||||||
- (startOffset % checksum.getBytesPerChecksum());
|
|
||||||
localBlockReader = new BlockReaderLocal(conf, file, blk, token,
|
|
||||||
startOffset, length, pathinfo, checksum, true, dataIn,
|
|
||||||
firstChunkOffset, checksumIn);
|
|
||||||
} else {
|
|
||||||
localBlockReader = new BlockReaderLocal(conf, file, blk, token,
|
|
||||||
startOffset, length, pathinfo, dataIn);
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
// remove from cache
|
|
||||||
localDatanodeInfo.removeBlockLocalPathInfo(blk);
|
|
||||||
DFSClient.LOG.warn("BlockReaderLocal: Removing " + blk
|
|
||||||
+ " from cache because local file " + pathinfo.getBlockPath()
|
|
||||||
+ " could not be opened.");
|
|
||||||
throw e;
|
|
||||||
} finally {
|
|
||||||
if (localBlockReader == null) {
|
|
||||||
if (dataIn != null) {
|
|
||||||
dataIn.close();
|
|
||||||
}
|
|
||||||
if (checksumIn != null) {
|
|
||||||
checksumIn.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return localBlockReader;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static synchronized LocalDatanodeInfo getLocalDatanodeInfo(int port) {
|
|
||||||
LocalDatanodeInfo ldInfo = localDatanodeInfoMap.get(port);
|
|
||||||
if (ldInfo == null) {
|
|
||||||
ldInfo = new LocalDatanodeInfo();
|
|
||||||
localDatanodeInfoMap.put(port, ldInfo);
|
|
||||||
}
|
|
||||||
return ldInfo;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static BlockLocalPathInfo getBlockPathInfo(ExtendedBlock blk,
|
|
||||||
DatanodeInfo node, Configuration conf, int timeout,
|
|
||||||
Token<BlockTokenIdentifier> token, boolean connectToDnViaHostname)
|
|
||||||
throws IOException {
|
|
||||||
LocalDatanodeInfo localDatanodeInfo = getLocalDatanodeInfo(node.getIpcPort());
|
|
||||||
BlockLocalPathInfo pathinfo = null;
|
|
||||||
ClientDatanodeProtocol proxy = localDatanodeInfo.getDatanodeProxy(node,
|
|
||||||
conf, timeout, connectToDnViaHostname);
|
|
||||||
try {
|
|
||||||
// make RPC to local datanode to find local pathnames of blocks
|
|
||||||
pathinfo = proxy.getBlockLocalPathInfo(blk, token);
|
|
||||||
if (pathinfo != null) {
|
|
||||||
if (LOG.isDebugEnabled()) {
|
|
||||||
LOG.debug("Cached location of block " + blk + " as " + pathinfo);
|
|
||||||
}
|
|
||||||
localDatanodeInfo.setBlockLocalPathInfo(blk, pathinfo);
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
localDatanodeInfo.resetDatanodeProxy(); // Reset proxy on error
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
return pathinfo;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static boolean skipChecksumCheck(Configuration conf) {
|
|
||||||
return conf.getBoolean(
|
|
||||||
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
|
|
||||||
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_DEFAULT);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int getSlowReadBufferNumChunks(Configuration conf, int bytesPerChecksum) {
|
|
||||||
int bufferSizeBytes = conf.getInt(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY,
|
|
||||||
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT);
|
|
||||||
|
|
||||||
if (bufferSizeBytes < bytesPerChecksum) {
|
|
||||||
throw new IllegalArgumentException("Configured BlockReaderLocal buffer size (" + bufferSizeBytes + ") " +
|
|
||||||
"is not large enough to hold a single chunk (" + bytesPerChecksum + "). Please configure " +
|
|
||||||
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY + " appropriately");
|
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY + " appropriately");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Round down to nearest chunk size
|
// Round down to nearest chunk size
|
||||||
return bufferSizeBytes / bytesPerChecksum;
|
return bufSize / bytesPerChecksum;
|
||||||
}
|
}
|
||||||
|
|
||||||
private BlockReaderLocal(Configuration conf, String hdfsfile,
|
public BlockReaderLocal(Configuration conf, String filename,
|
||||||
ExtendedBlock block, Token<BlockTokenIdentifier> token, long startOffset,
|
ExtendedBlock block, long startOffset, long length,
|
||||||
long length, BlockLocalPathInfo pathinfo, FileInputStream dataIn)
|
FileInputStream dataIn, FileInputStream checksumIn,
|
||||||
throws IOException {
|
DatanodeID datanodeID, boolean verifyChecksum) throws IOException {
|
||||||
this(conf, hdfsfile, block, token, startOffset, length, pathinfo,
|
|
||||||
DataChecksum.newDataChecksum(DataChecksum.Type.NULL, 4), false,
|
|
||||||
dataIn, startOffset, null);
|
|
||||||
}
|
|
||||||
|
|
||||||
private BlockReaderLocal(Configuration conf, String hdfsfile,
|
|
||||||
ExtendedBlock block, Token<BlockTokenIdentifier> token, long startOffset,
|
|
||||||
long length, BlockLocalPathInfo pathinfo, DataChecksum checksum,
|
|
||||||
boolean verifyChecksum, FileInputStream dataIn, long firstChunkOffset,
|
|
||||||
FileInputStream checksumIn) throws IOException {
|
|
||||||
this.filename = hdfsfile;
|
|
||||||
this.checksum = checksum;
|
|
||||||
this.verifyChecksum = verifyChecksum;
|
|
||||||
this.startOffset = Math.max(startOffset, 0);
|
|
||||||
|
|
||||||
bytesPerChecksum = this.checksum.getBytesPerChecksum();
|
|
||||||
checksumSize = this.checksum.getChecksumSize();
|
|
||||||
|
|
||||||
this.dataIn = dataIn;
|
this.dataIn = dataIn;
|
||||||
this.checksumIn = checksumIn;
|
this.checksumIn = checksumIn;
|
||||||
this.offsetFromChunkBoundary = (int) (startOffset-firstChunkOffset);
|
this.startOffset = Math.max(startOffset, 0);
|
||||||
|
this.filename = filename;
|
||||||
|
this.datanodeID = datanodeID;
|
||||||
|
this.block = block;
|
||||||
|
|
||||||
int chunksPerChecksumRead = getSlowReadBufferNumChunks(conf, bytesPerChecksum);
|
// read and handle the common header here. For now just a version
|
||||||
slowReadBuff = bufferPool.getBuffer(bytesPerChecksum * chunksPerChecksumRead);
|
checksumIn.getChannel().position(0);
|
||||||
checksumBuff = bufferPool.getBuffer(checksumSize * chunksPerChecksumRead);
|
BlockMetadataHeader header = BlockMetadataHeader
|
||||||
// Initially the buffers have nothing to read.
|
.readHeader(new DataInputStream(checksumIn));
|
||||||
slowReadBuff.flip();
|
short version = header.getVersion();
|
||||||
checksumBuff.flip();
|
if (version != BlockMetadataHeader.VERSION) {
|
||||||
|
throw new IOException("Wrong version (" + version + ") of the " +
|
||||||
|
"metadata file for " + filename + ".");
|
||||||
|
}
|
||||||
|
if (!verifyChecksum) {
|
||||||
|
this.verifyChecksum = false;
|
||||||
|
} else {
|
||||||
|
this.verifyChecksum = !conf.getBoolean(DFSConfigKeys.
|
||||||
|
DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
|
||||||
|
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_DEFAULT);
|
||||||
|
}
|
||||||
|
long firstChunkOffset;
|
||||||
|
if (this.verifyChecksum) {
|
||||||
|
this.checksum = header.getChecksum();
|
||||||
|
this.bytesPerChecksum = this.checksum.getBytesPerChecksum();
|
||||||
|
this.checksumSize = this.checksum.getChecksumSize();
|
||||||
|
firstChunkOffset = startOffset
|
||||||
|
- (startOffset % checksum.getBytesPerChecksum());
|
||||||
|
this.offsetFromChunkBoundary = (int) (startOffset - firstChunkOffset);
|
||||||
|
|
||||||
|
int chunksPerChecksumRead = getSlowReadBufferNumChunks(conf, bytesPerChecksum);
|
||||||
|
slowReadBuff = bufferPool.getBuffer(bytesPerChecksum * chunksPerChecksumRead);
|
||||||
|
checksumBuff = bufferPool.getBuffer(checksumSize * chunksPerChecksumRead);
|
||||||
|
// Initially the buffers have nothing to read.
|
||||||
|
slowReadBuff.flip();
|
||||||
|
checksumBuff.flip();
|
||||||
|
long checkSumOffset = (firstChunkOffset / bytesPerChecksum) * checksumSize;
|
||||||
|
IOUtils.skipFully(checksumIn, checkSumOffset);
|
||||||
|
} else {
|
||||||
|
firstChunkOffset = startOffset;
|
||||||
|
this.checksum = null;
|
||||||
|
this.bytesPerChecksum = 0;
|
||||||
|
this.checksumSize = 0;
|
||||||
|
this.offsetFromChunkBoundary = 0;
|
||||||
|
}
|
||||||
|
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
// Skip both input streams to beginning of the chunk containing startOffset
|
// Reposition both input streams to the beginning of the chunk
|
||||||
IOUtils.skipFully(dataIn, firstChunkOffset);
|
// containing startOffset
|
||||||
if (checksumIn != null) {
|
this.dataIn.getChannel().position(firstChunkOffset);
|
||||||
long checkSumOffset = (firstChunkOffset / bytesPerChecksum) * checksumSize;
|
|
||||||
IOUtils.skipFully(checksumIn, checkSumOffset);
|
|
||||||
}
|
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
bufferPool.returnBuffer(slowReadBuff);
|
if (slowReadBuff != null) bufferPool.returnBuffer(slowReadBuff);
|
||||||
bufferPool.returnBuffer(checksumBuff);
|
if (checksumBuff != null) bufferPool.returnBuffer(checksumBuff);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -649,9 +485,17 @@ class BlockReaderLocal implements BlockReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized void close(PeerCache peerCache) throws IOException {
|
public synchronized void close(PeerCache peerCache,
|
||||||
dataIn.close();
|
FileInputStreamCache fisCache) throws IOException {
|
||||||
if (checksumIn != null) {
|
if (fisCache != null) {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("putting FileInputStream for " + filename +
|
||||||
|
" back into FileInputStreamCache");
|
||||||
|
}
|
||||||
|
fisCache.put(datanodeID, block, new FileInputStream[] {dataIn, checksumIn});
|
||||||
|
} else {
|
||||||
|
LOG.debug("closing FileInputStream for " + filename);
|
||||||
|
dataIn.close();
|
||||||
checksumIn.close();
|
checksumIn.close();
|
||||||
}
|
}
|
||||||
if (slowReadBuff != null) {
|
if (slowReadBuff != null) {
|
||||||
|
@ -675,4 +519,10 @@ class BlockReaderLocal implements BlockReader {
|
||||||
public void readFully(byte[] buf, int off, int len) throws IOException {
|
public void readFully(byte[] buf, int off, int len) throws IOException {
|
||||||
BlockReaderUtil.readFully(this, buf, off, len);
|
BlockReaderUtil.readFully(this, buf, off, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int available() throws IOException {
|
||||||
|
// We never do network I/O in BlockReaderLocal.
|
||||||
|
return Integer.MAX_VALUE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -128,7 +128,6 @@ import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
|
||||||
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto;
|
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto;
|
||||||
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpBlockChecksumResponseProto;
|
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpBlockChecksumResponseProto;
|
||||||
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status;
|
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status;
|
||||||
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
|
||||||
import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
|
import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
|
||||||
import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
|
import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
|
||||||
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
|
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
|
||||||
|
@ -227,6 +226,11 @@ public class DFSClient implements java.io.Closeable {
|
||||||
final boolean getHdfsBlocksMetadataEnabled;
|
final boolean getHdfsBlocksMetadataEnabled;
|
||||||
final int getFileBlockStorageLocationsNumThreads;
|
final int getFileBlockStorageLocationsNumThreads;
|
||||||
final int getFileBlockStorageLocationsTimeout;
|
final int getFileBlockStorageLocationsTimeout;
|
||||||
|
final String domainSocketPath;
|
||||||
|
final boolean skipShortCircuitChecksums;
|
||||||
|
final int shortCircuitBufferSize;
|
||||||
|
final boolean shortCircuitLocalReads;
|
||||||
|
final boolean domainSocketDataTraffic;
|
||||||
|
|
||||||
Conf(Configuration conf) {
|
Conf(Configuration conf) {
|
||||||
maxFailoverAttempts = conf.getInt(
|
maxFailoverAttempts = conf.getInt(
|
||||||
|
@ -288,6 +292,19 @@ public class DFSClient implements java.io.Closeable {
|
||||||
getFileBlockStorageLocationsTimeout = conf.getInt(
|
getFileBlockStorageLocationsTimeout = conf.getInt(
|
||||||
DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT,
|
DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT,
|
||||||
DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT_DEFAULT);
|
DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT_DEFAULT);
|
||||||
|
domainSocketPath = conf.get(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY);
|
||||||
|
skipShortCircuitChecksums = conf.getBoolean(
|
||||||
|
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
|
||||||
|
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_DEFAULT);
|
||||||
|
shortCircuitBufferSize = conf.getInt(
|
||||||
|
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY,
|
||||||
|
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT);
|
||||||
|
shortCircuitLocalReads = conf.getBoolean(
|
||||||
|
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY,
|
||||||
|
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT);
|
||||||
|
domainSocketDataTraffic = conf.getBoolean(
|
||||||
|
DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC,
|
||||||
|
DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT);
|
||||||
}
|
}
|
||||||
|
|
||||||
private DataChecksum.Type getChecksumType(Configuration conf) {
|
private DataChecksum.Type getChecksumType(Configuration conf) {
|
||||||
|
@ -345,7 +362,7 @@ public class DFSClient implements java.io.Closeable {
|
||||||
private final Map<String, DFSOutputStream> filesBeingWritten
|
private final Map<String, DFSOutputStream> filesBeingWritten
|
||||||
= new HashMap<String, DFSOutputStream>();
|
= new HashMap<String, DFSOutputStream>();
|
||||||
|
|
||||||
private boolean shortCircuitLocalReads;
|
private final DomainSocketFactory domainSocketFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Same as this(NameNode.getAddress(conf), conf);
|
* Same as this(NameNode.getAddress(conf), conf);
|
||||||
|
@ -417,12 +434,8 @@ public class DFSClient implements java.io.Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
// read directly from the block file if configured.
|
// read directly from the block file if configured.
|
||||||
this.shortCircuitLocalReads = conf.getBoolean(
|
this.domainSocketFactory = new DomainSocketFactory(dfsClientConf);
|
||||||
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY,
|
|
||||||
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT);
|
|
||||||
if (LOG.isDebugEnabled()) {
|
|
||||||
LOG.debug("Short circuit read is " + shortCircuitLocalReads);
|
|
||||||
}
|
|
||||||
String localInterfaces[] =
|
String localInterfaces[] =
|
||||||
conf.getTrimmedStrings(DFSConfigKeys.DFS_CLIENT_LOCAL_INTERFACES);
|
conf.getTrimmedStrings(DFSConfigKeys.DFS_CLIENT_LOCAL_INTERFACES);
|
||||||
localInterfaceAddrs = getLocalInterfaceAddrs(localInterfaces);
|
localInterfaceAddrs = getLocalInterfaceAddrs(localInterfaces);
|
||||||
|
@ -787,28 +800,11 @@ public class DFSClient implements java.io.Closeable {
|
||||||
AccessControlException.class);
|
AccessControlException.class);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Get {@link BlockReader} for short circuited local reads.
|
|
||||||
*/
|
|
||||||
static BlockReader getLocalBlockReader(Configuration conf,
|
|
||||||
String src, ExtendedBlock blk, Token<BlockTokenIdentifier> accessToken,
|
|
||||||
DatanodeInfo chosenNode, int socketTimeout, long offsetIntoBlock,
|
|
||||||
boolean connectToDnViaHostname) throws InvalidToken, IOException {
|
|
||||||
try {
|
|
||||||
return BlockReaderLocal.newBlockReader(conf, src, blk, accessToken,
|
|
||||||
chosenNode, socketTimeout, offsetIntoBlock, blk.getNumBytes()
|
|
||||||
- offsetIntoBlock, connectToDnViaHostname);
|
|
||||||
} catch (RemoteException re) {
|
|
||||||
throw re.unwrapRemoteException(InvalidToken.class,
|
|
||||||
AccessControlException.class);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Map<String, Boolean> localAddrMap = Collections
|
private static Map<String, Boolean> localAddrMap = Collections
|
||||||
.synchronizedMap(new HashMap<String, Boolean>());
|
.synchronizedMap(new HashMap<String, Boolean>());
|
||||||
|
|
||||||
private static boolean isLocalAddress(InetSocketAddress targetAddr) {
|
static boolean isLocalAddress(InetSocketAddress targetAddr) {
|
||||||
InetAddress addr = targetAddr.getAddress();
|
InetAddress addr = targetAddr.getAddress();
|
||||||
Boolean cached = localAddrMap.get(addr.getHostAddress());
|
Boolean cached = localAddrMap.get(addr.getHostAddress());
|
||||||
if (cached != null) {
|
if (cached != null) {
|
||||||
|
@ -2108,10 +2104,6 @@ public class DFSClient implements java.io.Closeable {
|
||||||
super(in);
|
super(in);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean shouldTryShortCircuitRead(InetSocketAddress targetAddr) {
|
|
||||||
return shortCircuitLocalReads && isLocalAddress(targetAddr);
|
|
||||||
}
|
|
||||||
|
|
||||||
void reportChecksumFailure(String file, ExtendedBlock blk, DatanodeInfo dn) {
|
void reportChecksumFailure(String file, ExtendedBlock blk, DatanodeInfo dn) {
|
||||||
DatanodeInfo [] dnArr = { dn };
|
DatanodeInfo [] dnArr = { dn };
|
||||||
|
@ -2135,7 +2127,7 @@ public class DFSClient implements java.io.Closeable {
|
||||||
+ ", ugi=" + ugi + "]";
|
+ ", ugi=" + ugi + "]";
|
||||||
}
|
}
|
||||||
|
|
||||||
void disableShortCircuit() {
|
public DomainSocketFactory getDomainSocketFactory() {
|
||||||
shortCircuitLocalReads = false;
|
return domainSocketFactory;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -342,7 +342,13 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
|
||||||
public static final String DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY = "dfs.client.read.shortcircuit.skip.checksum";
|
public static final String DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY = "dfs.client.read.shortcircuit.skip.checksum";
|
||||||
public static final boolean DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_DEFAULT = false;
|
public static final boolean DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_DEFAULT = false;
|
||||||
public static final String DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY = "dfs.client.read.shortcircuit.buffer.size";
|
public static final String DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY = "dfs.client.read.shortcircuit.buffer.size";
|
||||||
|
public static final String DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_KEY = "dfs.client.read.shortcircuit.streams.cache.size";
|
||||||
|
public static final int DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_DEFAULT = 10;
|
||||||
|
public static final String DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_KEY = "dfs.client.read.shortcircuit.streams.cache.expiry.ms";
|
||||||
|
public static final long DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT = 60000;
|
||||||
public static final int DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT = 1024 * 1024;
|
public static final int DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT = 1024 * 1024;
|
||||||
|
public static final String DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC = "dfs.client.domain.socket.data.traffic";
|
||||||
|
public static final boolean DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT = false;
|
||||||
|
|
||||||
// property for fsimage compression
|
// property for fsimage compression
|
||||||
public static final String DFS_IMAGE_COMPRESS_KEY = "dfs.image.compress";
|
public static final String DFS_IMAGE_COMPRESS_KEY = "dfs.image.compress";
|
||||||
|
@ -393,6 +399,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
|
||||||
public static final String DFS_WEB_AUTHENTICATION_KERBEROS_KEYTAB_KEY = "dfs.web.authentication.kerberos.keytab";
|
public static final String DFS_WEB_AUTHENTICATION_KERBEROS_KEYTAB_KEY = "dfs.web.authentication.kerberos.keytab";
|
||||||
|
|
||||||
public static final String DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY = "dfs.block.local-path-access.user";
|
public static final String DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY = "dfs.block.local-path-access.user";
|
||||||
|
public static final String DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY = "dfs.datanode.domain.socket.path";
|
||||||
|
|
||||||
// HA related configuration
|
// HA related configuration
|
||||||
public static final String DFS_HA_NAMENODES_KEY_PREFIX = "dfs.ha.namenodes";
|
public static final String DFS_HA_NAMENODES_KEY_PREFIX = "dfs.ha.namenodes";
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdfs;
|
package org.apache.hadoop.hdfs;
|
||||||
|
|
||||||
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.InetSocketAddress;
|
import java.net.InetSocketAddress;
|
||||||
import java.net.Socket;
|
import java.net.Socket;
|
||||||
|
@ -38,7 +39,7 @@ import org.apache.hadoop.fs.ChecksumException;
|
||||||
import org.apache.hadoop.fs.ByteBufferReadable;
|
import org.apache.hadoop.fs.ByteBufferReadable;
|
||||||
import org.apache.hadoop.fs.FSInputStream;
|
import org.apache.hadoop.fs.FSInputStream;
|
||||||
import org.apache.hadoop.fs.UnresolvedLinkException;
|
import org.apache.hadoop.fs.UnresolvedLinkException;
|
||||||
import org.apache.hadoop.hdfs.net.EncryptedPeer;
|
import org.apache.hadoop.hdfs.net.DomainPeer;
|
||||||
import org.apache.hadoop.hdfs.net.Peer;
|
import org.apache.hadoop.hdfs.net.Peer;
|
||||||
import org.apache.hadoop.hdfs.net.TcpPeerServer;
|
import org.apache.hadoop.hdfs.net.TcpPeerServer;
|
||||||
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
|
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
|
||||||
|
@ -46,17 +47,16 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||||
import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.datatransfer.InvalidEncryptionKeyException;
|
import org.apache.hadoop.hdfs.protocol.datatransfer.InvalidEncryptionKeyException;
|
||||||
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
||||||
import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
|
|
||||||
import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
|
import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.ReplicaNotFoundException;
|
import org.apache.hadoop.hdfs.server.datanode.ReplicaNotFoundException;
|
||||||
import org.apache.hadoop.ipc.RPC;
|
import org.apache.hadoop.ipc.RPC;
|
||||||
import org.apache.hadoop.ipc.RemoteException;
|
import org.apache.hadoop.ipc.RemoteException;
|
||||||
import org.apache.hadoop.net.NetUtils;
|
import org.apache.hadoop.net.NetUtils;
|
||||||
import org.apache.hadoop.security.AccessControlException;
|
import org.apache.hadoop.net.unix.DomainSocket;
|
||||||
import org.apache.hadoop.security.token.Token;
|
import org.apache.hadoop.security.token.Token;
|
||||||
|
import org.apache.hadoop.hdfs.FileInputStreamCache;
|
||||||
|
|
||||||
/****************************************************************
|
/****************************************************************
|
||||||
* DFSInputStream provides bytes from a named file. It handles
|
* DFSInputStream provides bytes from a named file. It handles
|
||||||
|
@ -80,6 +80,8 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
|
||||||
private long pos = 0;
|
private long pos = 0;
|
||||||
private long blockEnd = -1;
|
private long blockEnd = -1;
|
||||||
|
|
||||||
|
private final FileInputStreamCache fileInputStreamCache;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This variable tracks the number of failures since the start of the
|
* This variable tracks the number of failures since the start of the
|
||||||
* most recent user-facing operation. That is to say, it should be reset
|
* most recent user-facing operation. That is to say, it should be reset
|
||||||
|
@ -115,6 +117,13 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
|
||||||
this.buffersize = buffersize;
|
this.buffersize = buffersize;
|
||||||
this.src = src;
|
this.src = src;
|
||||||
this.peerCache = dfsClient.peerCache;
|
this.peerCache = dfsClient.peerCache;
|
||||||
|
this.fileInputStreamCache = new FileInputStreamCache(
|
||||||
|
dfsClient.conf.getInt(DFSConfigKeys.
|
||||||
|
DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_KEY,
|
||||||
|
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_DEFAULT),
|
||||||
|
dfsClient.conf.getLong(DFSConfigKeys.
|
||||||
|
DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_KEY,
|
||||||
|
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT));
|
||||||
prefetchSize = dfsClient.getConf().prefetchSize;
|
prefetchSize = dfsClient.getConf().prefetchSize;
|
||||||
timeWindow = dfsClient.getConf().timeWindow;
|
timeWindow = dfsClient.getConf().timeWindow;
|
||||||
nCachedConnRetry = dfsClient.getConf().nCachedConnRetry;
|
nCachedConnRetry = dfsClient.getConf().nCachedConnRetry;
|
||||||
|
@ -247,7 +256,9 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
|
||||||
locatedBlocks.getFileLength() + lastBlockBeingWrittenLength;
|
locatedBlocks.getFileLength() + lastBlockBeingWrittenLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
private synchronized boolean blockUnderConstruction() {
|
// Short circuit local reads are forbidden for files that are
|
||||||
|
// under construction. See HDFS-2757.
|
||||||
|
synchronized boolean shortCircuitForbidden() {
|
||||||
return locatedBlocks.isUnderConstruction();
|
return locatedBlocks.isUnderConstruction();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -428,7 +439,7 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
|
||||||
|
|
||||||
// Will be getting a new BlockReader.
|
// Will be getting a new BlockReader.
|
||||||
if (blockReader != null) {
|
if (blockReader != null) {
|
||||||
blockReader.close(peerCache);
|
blockReader.close(peerCache, fileInputStreamCache);
|
||||||
blockReader = null;
|
blockReader = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -510,10 +521,11 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
|
||||||
dfsClient.checkOpen();
|
dfsClient.checkOpen();
|
||||||
|
|
||||||
if (blockReader != null) {
|
if (blockReader != null) {
|
||||||
blockReader.close(peerCache);
|
blockReader.close(peerCache, fileInputStreamCache);
|
||||||
blockReader = null;
|
blockReader = null;
|
||||||
}
|
}
|
||||||
super.close();
|
super.close();
|
||||||
|
fileInputStreamCache.close();
|
||||||
closed = true;
|
closed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -809,10 +821,6 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
|
||||||
e.getPos() + " from " + chosenNode);
|
e.getPos() + " from " + chosenNode);
|
||||||
// we want to remember what we have tried
|
// we want to remember what we have tried
|
||||||
addIntoCorruptedBlockMap(block.getBlock(), chosenNode, corruptedBlockMap);
|
addIntoCorruptedBlockMap(block.getBlock(), chosenNode, corruptedBlockMap);
|
||||||
} catch (AccessControlException ex) {
|
|
||||||
DFSClient.LOG.warn("Short circuit access failed ", ex);
|
|
||||||
dfsClient.disableShortCircuit();
|
|
||||||
continue;
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
if (e instanceof InvalidEncryptionKeyException && refetchEncryptionKey > 0) {
|
if (e instanceof InvalidEncryptionKeyException && refetchEncryptionKey > 0) {
|
||||||
DFSClient.LOG.info("Will fetch a new encryption key and retry, "
|
DFSClient.LOG.info("Will fetch a new encryption key and retry, "
|
||||||
|
@ -837,7 +845,7 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
if (reader != null) {
|
if (reader != null) {
|
||||||
reader.close(peerCache);
|
reader.close(peerCache, fileInputStreamCache);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Put chosen node into dead list, continue
|
// Put chosen node into dead list, continue
|
||||||
|
@ -849,19 +857,29 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
|
||||||
Peer peer = null;
|
Peer peer = null;
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
Socket sock = null;
|
Socket sock = null;
|
||||||
|
DomainSocket domSock = null;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
sock = dfsClient.socketFactory.createSocket();
|
domSock = dfsClient.getDomainSocketFactory().create(addr, this);
|
||||||
NetUtils.connect(sock, addr,
|
if (domSock != null) {
|
||||||
dfsClient.getRandomLocalInterfaceAddr(),
|
// Create a UNIX Domain peer.
|
||||||
dfsClient.getConf().socketTimeout);
|
peer = new DomainPeer(domSock);
|
||||||
peer = TcpPeerServer.peerFromSocketAndKey(sock,
|
} else {
|
||||||
dfsClient.getDataEncryptionKey());
|
// Create a conventional TCP-based Peer.
|
||||||
|
sock = dfsClient.socketFactory.createSocket();
|
||||||
|
NetUtils.connect(sock, addr,
|
||||||
|
dfsClient.getRandomLocalInterfaceAddr(),
|
||||||
|
dfsClient.getConf().socketTimeout);
|
||||||
|
peer = TcpPeerServer.peerFromSocketAndKey(sock,
|
||||||
|
dfsClient.getDataEncryptionKey());
|
||||||
|
}
|
||||||
success = true;
|
success = true;
|
||||||
return peer;
|
return peer;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
IOUtils.closeQuietly(peer);
|
IOUtils.closeQuietly(peer);
|
||||||
IOUtils.closeQuietly(sock);
|
IOUtils.closeQuietly(sock);
|
||||||
|
IOUtils.closeQuietly(domSock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -895,49 +913,77 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
|
||||||
String clientName)
|
String clientName)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
// Can't local read a block under construction, see HDFS-2757
|
|
||||||
if (dfsClient.shouldTryShortCircuitRead(dnAddr) &&
|
|
||||||
!blockUnderConstruction()) {
|
|
||||||
return DFSClient.getLocalBlockReader(dfsClient.conf, src, block,
|
|
||||||
blockToken, chosenNode, dfsClient.hdfsTimeout, startOffset,
|
|
||||||
dfsClient.connectToDnViaHostname());
|
|
||||||
}
|
|
||||||
|
|
||||||
IOException err = null;
|
IOException err = null;
|
||||||
boolean fromCache = true;
|
|
||||||
|
|
||||||
// Allow retry since there is no way of knowing whether the cached socket
|
// Firstly, we check to see if we have cached any file descriptors for
|
||||||
// is good until we actually use it.
|
// local blocks. If so, we can just re-use those file descriptors.
|
||||||
for (int retries = 0; retries <= nCachedConnRetry && fromCache; ++retries) {
|
FileInputStream fis[] = fileInputStreamCache.get(chosenNode, block);
|
||||||
|
if (fis != null) {
|
||||||
|
if (DFSClient.LOG.isDebugEnabled()) {
|
||||||
|
DFSClient.LOG.debug("got FileInputStreams for " + block + " from " +
|
||||||
|
"the FileInputStreamCache.");
|
||||||
|
}
|
||||||
|
return new BlockReaderLocal(dfsClient.conf, file,
|
||||||
|
block, startOffset, len, fis[0], fis[1], chosenNode, verifyChecksum);
|
||||||
|
}
|
||||||
|
|
||||||
|
// We retry several times here.
|
||||||
|
// On the first nCachedConnRetry times, we try to fetch a socket from
|
||||||
|
// the socketCache and use it. This may fail, since the old socket may
|
||||||
|
// have been closed by the peer.
|
||||||
|
// After that, we try to create a new socket using newPeer().
|
||||||
|
// This may create either a TCP socket or a UNIX domain socket, depending
|
||||||
|
// on the configuration and whether the peer is remote.
|
||||||
|
// If we try to create a UNIX domain socket and fail, we will not try that
|
||||||
|
// again. Instead, we'll try to create a TCP socket. Only after we've
|
||||||
|
// failed to create a TCP-based BlockReader will we throw an IOException
|
||||||
|
// from this function. Throwing an IOException from here is basically
|
||||||
|
// equivalent to declaring the DataNode bad.
|
||||||
|
boolean triedNonDomainSocketReader = false;
|
||||||
|
for (int retries = 0;
|
||||||
|
retries < nCachedConnRetry && (!triedNonDomainSocketReader);
|
||||||
|
++retries) {
|
||||||
Peer peer = null;
|
Peer peer = null;
|
||||||
// Don't use the cache on the last attempt - it's possible that there
|
|
||||||
// are arbitrarily many unusable sockets in the cache, but we don't
|
|
||||||
// want to fail the read.
|
|
||||||
if (retries < nCachedConnRetry) {
|
if (retries < nCachedConnRetry) {
|
||||||
peer = peerCache.get(chosenNode);
|
peer = peerCache.get(chosenNode);
|
||||||
}
|
}
|
||||||
if (peer == null) {
|
if (peer == null) {
|
||||||
peer = newPeer(dnAddr);
|
peer = newPeer(dnAddr);
|
||||||
fromCache = false;
|
if (peer.getDomainSocket() == null) {
|
||||||
|
triedNonDomainSocketReader = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
// The OP_READ_BLOCK request is sent as we make the BlockReader
|
boolean allowShortCircuitLocalReads =
|
||||||
BlockReader reader =
|
(peer.getDomainSocket() != null) &&
|
||||||
BlockReaderFactory.newBlockReader(dfsClient.conf,
|
dfsClient.getConf().shortCircuitLocalReads &&
|
||||||
file, block,
|
(!shortCircuitForbidden());
|
||||||
blockToken,
|
// Here we will try to send either an OP_READ_BLOCK request or an
|
||||||
startOffset, len,
|
// OP_REQUEST_SHORT_CIRCUIT_FDS, depending on what kind of block reader
|
||||||
verifyChecksum,
|
// we're trying to create.
|
||||||
clientName,
|
BlockReader blockReader = BlockReaderFactory.newBlockReader(
|
||||||
peer,
|
dfsClient.conf, file, block, blockToken, startOffset,
|
||||||
chosenNode);
|
len, verifyChecksum, clientName, peer, chosenNode,
|
||||||
return reader;
|
dfsClient.getDomainSocketFactory(), allowShortCircuitLocalReads);
|
||||||
} catch (IOException ex) {
|
success = true;
|
||||||
// Our socket is no good.
|
return blockReader;
|
||||||
DFSClient.LOG.debug("Error making BlockReader. Closing stale " + peer, ex);
|
} catch (IOException ex) {
|
||||||
IOUtils.closeQuietly(peer);
|
// Our socket is no good.
|
||||||
|
DFSClient.LOG.debug("Error making BlockReader. " +
|
||||||
|
"Closing stale " + peer, ex);
|
||||||
|
if (peer.getDomainSocket() != null) {
|
||||||
|
// If the Peer that we got the error from was a DomainPeer,
|
||||||
|
// mark the socket path as bad, so that newDataSocket will not try
|
||||||
|
// to re-open this socket for a while.
|
||||||
|
dfsClient.getDomainSocketFactory().
|
||||||
|
disableDomainSocketPath(peer.getDomainSocket().getPath());
|
||||||
|
}
|
||||||
err = ex;
|
err = ex;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeQuietly(peer);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1075,7 +1121,7 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
|
||||||
// the TCP buffer, then just eat up the intervening data.
|
// the TCP buffer, then just eat up the intervening data.
|
||||||
//
|
//
|
||||||
int diff = (int)(targetPos - pos);
|
int diff = (int)(targetPos - pos);
|
||||||
if (diff <= DFSClient.TCP_WINDOW_SIZE) {
|
if (diff <= blockReader.available()) {
|
||||||
try {
|
try {
|
||||||
pos += blockReader.skip(diff);
|
pos += blockReader.skip(diff);
|
||||||
if (pos == targetPos) {
|
if (pos == targetPos) {
|
||||||
|
|
|
@ -0,0 +1,137 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.InetSocketAddress;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.hdfs.DFSClient.Conf;
|
||||||
|
|
||||||
|
import org.apache.hadoop.net.unix.DomainSocket;
|
||||||
|
|
||||||
|
import com.google.common.cache.Cache;
|
||||||
|
import com.google.common.cache.CacheBuilder;
|
||||||
|
|
||||||
|
class DomainSocketFactory {
|
||||||
|
public static final Log LOG = LogFactory.getLog(DomainSocketFactory.class);
|
||||||
|
private final Conf conf;
|
||||||
|
|
||||||
|
enum PathStatus {
|
||||||
|
UNUSABLE,
|
||||||
|
SHORT_CIRCUIT_DISABLED,
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Information about domain socket paths.
|
||||||
|
*/
|
||||||
|
Cache<String, PathStatus> pathInfo =
|
||||||
|
CacheBuilder.newBuilder()
|
||||||
|
.expireAfterWrite(10, TimeUnit.MINUTES)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public DomainSocketFactory(Conf conf) {
|
||||||
|
this.conf = conf;
|
||||||
|
|
||||||
|
String feature = null;
|
||||||
|
if (conf.shortCircuitLocalReads) {
|
||||||
|
feature = "The short-circuit local reads feature";
|
||||||
|
} else if (conf.domainSocketDataTraffic) {
|
||||||
|
feature = "UNIX domain socket data traffic";
|
||||||
|
}
|
||||||
|
if (feature != null) {
|
||||||
|
if (conf.domainSocketPath == null) {
|
||||||
|
LOG.warn(feature + " is disabled because you have not set " +
|
||||||
|
DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY);
|
||||||
|
} else if (DomainSocket.getLoadingFailureReason() != null) {
|
||||||
|
LOG.error(feature + " is disabled because " +
|
||||||
|
DomainSocket.getLoadingFailureReason());
|
||||||
|
} else {
|
||||||
|
LOG.debug(feature + "is enabled.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a DomainSocket.
|
||||||
|
*
|
||||||
|
* @param addr The address of the DataNode
|
||||||
|
* @param stream The DFSInputStream the socket will be created for.
|
||||||
|
*
|
||||||
|
* @return null if the socket could not be created; the
|
||||||
|
* socket otherwise. If there was an error while
|
||||||
|
* creating the socket, we will add the socket path
|
||||||
|
* to our list of failed domain socket paths.
|
||||||
|
*/
|
||||||
|
DomainSocket create(InetSocketAddress addr, DFSInputStream stream) {
|
||||||
|
// If there is no domain socket path configured, we can't use domain
|
||||||
|
// sockets.
|
||||||
|
if (conf.domainSocketPath == null) return null;
|
||||||
|
// UNIX domain sockets can only be used to talk to local peers
|
||||||
|
if (!DFSClient.isLocalAddress(addr)) return null;
|
||||||
|
// If the DomainSocket code is not loaded, we can't create
|
||||||
|
// DomainSocket objects.
|
||||||
|
if (DomainSocket.getLoadingFailureReason() != null) return null;
|
||||||
|
String escapedPath = DomainSocket.
|
||||||
|
getEffectivePath(conf.domainSocketPath, addr.getPort());
|
||||||
|
PathStatus info = pathInfo.getIfPresent(escapedPath);
|
||||||
|
if (info == PathStatus.UNUSABLE) {
|
||||||
|
// We tried to connect to this domain socket before, and it was totally
|
||||||
|
// unusable.
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if ((!conf.domainSocketDataTraffic) &&
|
||||||
|
((info == PathStatus.SHORT_CIRCUIT_DISABLED) ||
|
||||||
|
stream.shortCircuitForbidden())) {
|
||||||
|
// If we don't want to pass data over domain sockets, and we don't want
|
||||||
|
// to pass file descriptors over them either, we have no use for domain
|
||||||
|
// sockets.
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
boolean success = false;
|
||||||
|
DomainSocket sock = null;
|
||||||
|
try {
|
||||||
|
sock = DomainSocket.connect(escapedPath);
|
||||||
|
sock.setAttribute(DomainSocket.RCV_TIMEO, conf.socketTimeout);
|
||||||
|
success = true;
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.error("error creating DomainSocket", e);
|
||||||
|
// fall through
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
if (sock != null) {
|
||||||
|
IOUtils.closeQuietly(sock);
|
||||||
|
}
|
||||||
|
pathInfo.put(escapedPath, PathStatus.UNUSABLE);
|
||||||
|
sock = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sock;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void disableShortCircuitForPath(String path) {
|
||||||
|
pathInfo.put(path, PathStatus.SHORT_CIRCUIT_DISABLED);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void disableDomainSocketPath(String path) {
|
||||||
|
pathInfo.put(path, PathStatus.UNUSABLE);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,265 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs;
|
||||||
|
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map.Entry;
|
||||||
|
import java.util.concurrent.ScheduledThreadPoolExecutor;
|
||||||
|
import java.util.concurrent.ThreadFactory;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
|
import org.apache.hadoop.io.IOUtils;
|
||||||
|
import org.apache.hadoop.util.Time;
|
||||||
|
|
||||||
|
import com.google.common.collect.LinkedListMultimap;
|
||||||
|
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* FileInputStream cache is used to cache FileInputStream objects that we
|
||||||
|
* have received from the DataNode.
|
||||||
|
*/
|
||||||
|
class FileInputStreamCache {
|
||||||
|
private final static Log LOG = LogFactory.getLog(FileInputStreamCache.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The executor service that runs the cacheCleaner. There is only one of
|
||||||
|
* these per VM.
|
||||||
|
*/
|
||||||
|
private final static ScheduledThreadPoolExecutor executor
|
||||||
|
= new ScheduledThreadPoolExecutor(1, new ThreadFactoryBuilder().
|
||||||
|
setDaemon(true).setNameFormat("FileInputStreamCache Cleaner").
|
||||||
|
build());
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The CacheCleaner for this FileInputStreamCache. We don't create this
|
||||||
|
* and schedule it until it becomes necessary.
|
||||||
|
*/
|
||||||
|
private CacheCleaner cacheCleaner;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maximum number of entries to allow in the cache.
|
||||||
|
*/
|
||||||
|
private final int maxCacheSize;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The minimum time in milliseconds to preserve an element in the cache.
|
||||||
|
*/
|
||||||
|
private final long expiryTimeMs;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* True if the FileInputStreamCache is closed.
|
||||||
|
*/
|
||||||
|
private boolean closed = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cache entries.
|
||||||
|
*/
|
||||||
|
private final LinkedListMultimap<Key, Value> map = LinkedListMultimap.create();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Expiry thread which makes sure that the file descriptors get closed
|
||||||
|
* after a while.
|
||||||
|
*/
|
||||||
|
class CacheCleaner implements Runnable {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
synchronized(FileInputStreamCache.this) {
|
||||||
|
if (closed) return;
|
||||||
|
long curTime = Time.monotonicNow();
|
||||||
|
for (Iterator<Entry<Key, Value>> iter = map.entries().iterator();
|
||||||
|
iter.hasNext();
|
||||||
|
iter = map.entries().iterator()) {
|
||||||
|
Entry<Key, Value> entry = iter.next();
|
||||||
|
if (entry.getValue().getTime() + expiryTimeMs >= curTime) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
entry.getValue().close();
|
||||||
|
iter.remove();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The key identifying a FileInputStream array.
|
||||||
|
*/
|
||||||
|
static class Key {
|
||||||
|
private final DatanodeID datanodeID;
|
||||||
|
private final ExtendedBlock block;
|
||||||
|
|
||||||
|
public Key(DatanodeID datanodeID, ExtendedBlock block) {
|
||||||
|
this.datanodeID = datanodeID;
|
||||||
|
this.block = block;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object other) {
|
||||||
|
if (!(other instanceof FileInputStreamCache.Key)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
FileInputStreamCache.Key otherKey = (FileInputStreamCache.Key)other;
|
||||||
|
return (block.equals(otherKey.block) &
|
||||||
|
(block.getGenerationStamp() == otherKey.block.getGenerationStamp()) &
|
||||||
|
datanodeID.equals(otherKey.datanodeID));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return block.hashCode();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The value containing a FileInputStream array and the time it was added to
|
||||||
|
* the cache.
|
||||||
|
*/
|
||||||
|
static class Value {
|
||||||
|
private final FileInputStream fis[];
|
||||||
|
private final long time;
|
||||||
|
|
||||||
|
public Value (FileInputStream fis[]) {
|
||||||
|
this.fis = fis;
|
||||||
|
this.time = Time.monotonicNow();
|
||||||
|
}
|
||||||
|
|
||||||
|
public FileInputStream[] getFileInputStreams() {
|
||||||
|
return fis;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getTime() {
|
||||||
|
return time;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() {
|
||||||
|
IOUtils.cleanup(LOG, fis);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new FileInputStream
|
||||||
|
*
|
||||||
|
* @param maxCacheSize The maximum number of elements to allow in
|
||||||
|
* the cache.
|
||||||
|
* @param expiryTimeMs The minimum time in milliseconds to preserve
|
||||||
|
* elements in the cache.
|
||||||
|
*/
|
||||||
|
public FileInputStreamCache(int maxCacheSize, long expiryTimeMs) {
|
||||||
|
this.maxCacheSize = maxCacheSize;
|
||||||
|
this.expiryTimeMs = expiryTimeMs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Put an array of FileInputStream objects into the cache.
|
||||||
|
*
|
||||||
|
* @param datanodeID The DatanodeID to store the streams under.
|
||||||
|
* @param block The Block to store the streams under.
|
||||||
|
* @param fis The streams.
|
||||||
|
*/
|
||||||
|
public void put(DatanodeID datanodeID, ExtendedBlock block,
|
||||||
|
FileInputStream fis[]) {
|
||||||
|
boolean inserted = false;
|
||||||
|
try {
|
||||||
|
synchronized(this) {
|
||||||
|
if (closed) return;
|
||||||
|
if (map.size() + 1 > maxCacheSize) {
|
||||||
|
Iterator<Entry<Key, Value>> iter = map.entries().iterator();
|
||||||
|
if (!iter.hasNext()) return;
|
||||||
|
Entry<Key, Value> entry = iter.next();
|
||||||
|
entry.getValue().close();
|
||||||
|
iter.remove();
|
||||||
|
}
|
||||||
|
if (cacheCleaner == null) {
|
||||||
|
cacheCleaner = new CacheCleaner();
|
||||||
|
executor.scheduleAtFixedRate(cacheCleaner, expiryTimeMs, expiryTimeMs,
|
||||||
|
TimeUnit.MILLISECONDS);
|
||||||
|
}
|
||||||
|
map.put(new Key(datanodeID, block), new Value(fis));
|
||||||
|
inserted = true;
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
if (!inserted) {
|
||||||
|
IOUtils.cleanup(LOG, fis);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find and remove an array of FileInputStream objects from the cache.
|
||||||
|
*
|
||||||
|
* @param datanodeID The DatanodeID to search for.
|
||||||
|
* @param block The Block to search for.
|
||||||
|
*
|
||||||
|
* @return null if no streams can be found; the
|
||||||
|
* array otherwise. If this is non-null, the
|
||||||
|
* array will have been removed from the cache.
|
||||||
|
*/
|
||||||
|
public synchronized FileInputStream[] get(DatanodeID datanodeID,
|
||||||
|
ExtendedBlock block) {
|
||||||
|
Key key = new Key(datanodeID, block);
|
||||||
|
List<Value> ret = map.get(key);
|
||||||
|
if (ret.isEmpty()) return null;
|
||||||
|
Value val = ret.get(0);
|
||||||
|
map.remove(key, val);
|
||||||
|
return val.getFileInputStreams();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close the cache and free all associated resources.
|
||||||
|
*/
|
||||||
|
public synchronized void close() {
|
||||||
|
if (closed) return;
|
||||||
|
closed = true;
|
||||||
|
if (cacheCleaner != null) {
|
||||||
|
executor.remove(cacheCleaner);
|
||||||
|
}
|
||||||
|
for (Iterator<Entry<Key, Value>> iter = map.entries().iterator();
|
||||||
|
iter.hasNext();
|
||||||
|
iter = map.entries().iterator()) {
|
||||||
|
Entry<Key, Value> entry = iter.next();
|
||||||
|
entry.getValue().close();
|
||||||
|
iter.remove();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized String toString() {
|
||||||
|
StringBuilder bld = new StringBuilder();
|
||||||
|
bld.append("FileInputStreamCache(");
|
||||||
|
String prefix = "";
|
||||||
|
for (Entry<Key, Value> entry : map.entries()) {
|
||||||
|
bld.append(prefix);
|
||||||
|
bld.append(entry.getKey());
|
||||||
|
prefix = ", ";
|
||||||
|
}
|
||||||
|
bld.append(")");
|
||||||
|
return bld.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getExpiryTimeMs() {
|
||||||
|
return expiryTimeMs;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getMaxCacheSize() {
|
||||||
|
return maxCacheSize;
|
||||||
|
}
|
||||||
|
}
|
|
@ -413,7 +413,8 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized void close(PeerCache peerCache) throws IOException {
|
public synchronized void close(PeerCache peerCache,
|
||||||
|
FileInputStreamCache fisCache) throws IOException {
|
||||||
startOffset = -1;
|
startOffset = -1;
|
||||||
checksum = null;
|
checksum = null;
|
||||||
if (peerCache != null & sentStatusCode) {
|
if (peerCache != null & sentStatusCode) {
|
||||||
|
@ -470,4 +471,11 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
|
||||||
public int read(ByteBuffer buf) throws IOException {
|
public int read(ByteBuffer buf) throws IOException {
|
||||||
throw new UnsupportedOperationException("readDirect unsupported in RemoteBlockReader");
|
throw new UnsupportedOperationException("readDirect unsupported in RemoteBlockReader");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int available() throws IOException {
|
||||||
|
// An optimistic estimate of how much data is available
|
||||||
|
// to us without doing network I/O.
|
||||||
|
return DFSClient.TCP_WINDOW_SIZE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -275,7 +275,8 @@ public class RemoteBlockReader2 implements BlockReader {
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized void close(PeerCache peerCache) throws IOException {
|
public synchronized void close(PeerCache peerCache,
|
||||||
|
FileInputStreamCache fisCache) throws IOException {
|
||||||
packetReceiver.close();
|
packetReceiver.close();
|
||||||
startOffset = -1;
|
startOffset = -1;
|
||||||
checksum = null;
|
checksum = null;
|
||||||
|
@ -422,4 +423,11 @@ public class RemoteBlockReader2 implements BlockReader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int available() throws IOException {
|
||||||
|
// An optimistic estimate of how much data is available
|
||||||
|
// to us without doing network I/O.
|
||||||
|
return DFSClient.TCP_WINDOW_SIZE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,6 @@
|
||||||
|
|
||||||
package org.apache.hadoop.hdfs.net;
|
package org.apache.hadoop.hdfs.net;
|
||||||
|
|
||||||
import java.io.Closeable;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.SocketTimeoutException;
|
import java.net.SocketTimeoutException;
|
||||||
|
|
||||||
|
@ -27,8 +26,10 @@ import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.io.IOUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
import org.apache.hadoop.hdfs.net.PeerServer;
|
import org.apache.hadoop.hdfs.net.PeerServer;
|
||||||
import org.apache.hadoop.net.unix.DomainSocket;
|
import org.apache.hadoop.net.unix.DomainSocket;
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
|
||||||
class DomainPeerServer implements PeerServer {
|
@InterfaceAudience.Private
|
||||||
|
public class DomainPeerServer implements PeerServer {
|
||||||
static Log LOG = LogFactory.getLog(DomainPeerServer.class);
|
static Log LOG = LogFactory.getLog(DomainPeerServer.class);
|
||||||
private final DomainSocket sock;
|
private final DomainSocket sock;
|
||||||
|
|
||||||
|
|
|
@ -104,6 +104,18 @@ public interface DataTransferProtocol {
|
||||||
final String clientName,
|
final String clientName,
|
||||||
final DatanodeInfo[] targets) throws IOException;
|
final DatanodeInfo[] targets) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Request short circuit access file descriptors from a DataNode.
|
||||||
|
*
|
||||||
|
* @param blk The block to get file descriptors for.
|
||||||
|
* @param blockToken Security token for accessing the block.
|
||||||
|
* @param maxVersion Maximum version of the block data the client
|
||||||
|
* can understand.
|
||||||
|
*/
|
||||||
|
public void requestShortCircuitFds(final ExtendedBlock blk,
|
||||||
|
final Token<BlockTokenIdentifier> blockToken,
|
||||||
|
int maxVersion) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Receive a block from a source datanode
|
* Receive a block from a source datanode
|
||||||
* and then notifies the namenode
|
* and then notifies the namenode
|
||||||
|
|
|
@ -34,7 +34,8 @@ public enum Op {
|
||||||
REPLACE_BLOCK((byte)83),
|
REPLACE_BLOCK((byte)83),
|
||||||
COPY_BLOCK((byte)84),
|
COPY_BLOCK((byte)84),
|
||||||
BLOCK_CHECKSUM((byte)85),
|
BLOCK_CHECKSUM((byte)85),
|
||||||
TRANSFER_BLOCK((byte)86);
|
TRANSFER_BLOCK((byte)86),
|
||||||
|
REQUEST_SHORT_CIRCUIT_FDS((byte)87);
|
||||||
|
|
||||||
/** The code for this operation. */
|
/** The code for this operation. */
|
||||||
public final byte code;
|
public final byte code;
|
||||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpCopyBlockProto
|
||||||
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpReadBlockProto;
|
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpReadBlockProto;
|
||||||
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpReplaceBlockProto;
|
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpReplaceBlockProto;
|
||||||
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpTransferBlockProto;
|
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpTransferBlockProto;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpRequestShortCircuitAccessProto;
|
||||||
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpWriteBlockProto;
|
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpWriteBlockProto;
|
||||||
|
|
||||||
/** Receiver */
|
/** Receiver */
|
||||||
|
@ -77,6 +78,9 @@ public abstract class Receiver implements DataTransferProtocol {
|
||||||
case TRANSFER_BLOCK:
|
case TRANSFER_BLOCK:
|
||||||
opTransferBlock(in);
|
opTransferBlock(in);
|
||||||
break;
|
break;
|
||||||
|
case REQUEST_SHORT_CIRCUIT_FDS:
|
||||||
|
opRequestShortCircuitFds(in);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
throw new IOException("Unknown op " + op + " in data stream");
|
throw new IOException("Unknown op " + op + " in data stream");
|
||||||
}
|
}
|
||||||
|
@ -117,6 +121,15 @@ public abstract class Receiver implements DataTransferProtocol {
|
||||||
fromProtos(proto.getTargetsList()));
|
fromProtos(proto.getTargetsList()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Receive {@link Op#REQUEST_SHORT_CIRCUIT_FDS} */
|
||||||
|
private void opRequestShortCircuitFds(DataInputStream in) throws IOException {
|
||||||
|
final OpRequestShortCircuitAccessProto proto =
|
||||||
|
OpRequestShortCircuitAccessProto.parseFrom(vintPrefixed(in));
|
||||||
|
requestShortCircuitFds(fromProto(proto.getHeader().getBlock()),
|
||||||
|
fromProto(proto.getHeader().getToken()),
|
||||||
|
proto.getMaxVersion());
|
||||||
|
}
|
||||||
|
|
||||||
/** Receive OP_REPLACE_BLOCK */
|
/** Receive OP_REPLACE_BLOCK */
|
||||||
private void opReplaceBlock(DataInputStream in) throws IOException {
|
private void opReplaceBlock(DataInputStream in) throws IOException {
|
||||||
OpReplaceBlockProto proto = OpReplaceBlockProto.parseFrom(vintPrefixed(in));
|
OpReplaceBlockProto proto = OpReplaceBlockProto.parseFrom(vintPrefixed(in));
|
||||||
|
|
|
@ -36,6 +36,7 @@ import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpCopyBlockProto
|
||||||
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpReadBlockProto;
|
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpReadBlockProto;
|
||||||
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpReplaceBlockProto;
|
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpReplaceBlockProto;
|
||||||
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpTransferBlockProto;
|
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpTransferBlockProto;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpRequestShortCircuitAccessProto;
|
||||||
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpWriteBlockProto;
|
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpWriteBlockProto;
|
||||||
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
||||||
import org.apache.hadoop.security.token.Token;
|
import org.apache.hadoop.security.token.Token;
|
||||||
|
@ -135,6 +136,17 @@ public class Sender implements DataTransferProtocol {
|
||||||
send(out, Op.TRANSFER_BLOCK, proto);
|
send(out, Op.TRANSFER_BLOCK, proto);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void requestShortCircuitFds(final ExtendedBlock blk,
|
||||||
|
final Token<BlockTokenIdentifier> blockToken,
|
||||||
|
int maxVersion) throws IOException {
|
||||||
|
OpRequestShortCircuitAccessProto proto =
|
||||||
|
OpRequestShortCircuitAccessProto.newBuilder()
|
||||||
|
.setHeader(DataTransferProtoUtil.buildBaseHeader(
|
||||||
|
blk, blockToken)).setMaxVersion(maxVersion).build();
|
||||||
|
send(out, Op.REQUEST_SHORT_CIRCUIT_FDS, proto);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void replaceBlock(final ExtendedBlock blk,
|
public void replaceBlock(final ExtendedBlock blk,
|
||||||
final Token<BlockTokenIdentifier> blockToken,
|
final Token<BlockTokenIdentifier> blockToken,
|
||||||
|
|
|
@ -213,7 +213,7 @@ public class JspHelper {
|
||||||
offsetIntoBlock, amtToRead, true,
|
offsetIntoBlock, amtToRead, true,
|
||||||
"JspHelper", TcpPeerServer.peerFromSocketAndKey(s, encryptionKey),
|
"JspHelper", TcpPeerServer.peerFromSocketAndKey(s, encryptionKey),
|
||||||
new DatanodeID(addr.getAddress().toString(),
|
new DatanodeID(addr.getAddress().toString(),
|
||||||
addr.getHostName(), poolId, addr.getPort(), 0, 0));
|
addr.getHostName(), poolId, addr.getPort(), 0, 0), null, false);
|
||||||
|
|
||||||
byte[] buf = new byte[(int)amtToRead];
|
byte[] buf = new byte[(int)amtToRead];
|
||||||
int readOffset = 0;
|
int readOffset = 0;
|
||||||
|
@ -232,8 +232,7 @@ public class JspHelper {
|
||||||
amtToRead -= numRead;
|
amtToRead -= numRead;
|
||||||
readOffset += numRead;
|
readOffset += numRead;
|
||||||
}
|
}
|
||||||
blockReader = null;
|
blockReader.close(null, null);
|
||||||
s.close();
|
|
||||||
out.print(HtmlQuoting.quoteHtmlChars(new String(buf)));
|
out.print(HtmlQuoting.quoteHtmlChars(new String(buf)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -53,16 +53,15 @@ import java.io.ByteArrayInputStream;
|
||||||
import java.io.DataInputStream;
|
import java.io.DataInputStream;
|
||||||
import java.io.DataOutputStream;
|
import java.io.DataOutputStream;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.net.InetSocketAddress;
|
import java.net.InetSocketAddress;
|
||||||
import java.net.ServerSocket;
|
|
||||||
import java.net.Socket;
|
import java.net.Socket;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.net.UnknownHostException;
|
import java.net.UnknownHostException;
|
||||||
import java.nio.channels.ServerSocketChannel;
|
|
||||||
import java.nio.channels.SocketChannel;
|
import java.nio.channels.SocketChannel;
|
||||||
import java.security.PrivilegedExceptionAction;
|
import java.security.PrivilegedExceptionAction;
|
||||||
import java.util.AbstractList;
|
import java.util.AbstractList;
|
||||||
|
@ -90,6 +89,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
import org.apache.hadoop.hdfs.DFSUtil;
|
import org.apache.hadoop.hdfs.DFSUtil;
|
||||||
import org.apache.hadoop.hdfs.HDFSPolicyProvider;
|
import org.apache.hadoop.hdfs.HDFSPolicyProvider;
|
||||||
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
||||||
|
import org.apache.hadoop.hdfs.net.DomainPeerServer;
|
||||||
import org.apache.hadoop.hdfs.net.TcpPeerServer;
|
import org.apache.hadoop.hdfs.net.TcpPeerServer;
|
||||||
import org.apache.hadoop.hdfs.protocol.Block;
|
import org.apache.hadoop.hdfs.protocol.Block;
|
||||||
import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
|
import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
|
||||||
|
@ -149,11 +149,11 @@ import org.apache.hadoop.io.ReadaheadPool;
|
||||||
import org.apache.hadoop.ipc.ProtobufRpcEngine;
|
import org.apache.hadoop.ipc.ProtobufRpcEngine;
|
||||||
import org.apache.hadoop.ipc.RPC;
|
import org.apache.hadoop.ipc.RPC;
|
||||||
import org.apache.hadoop.ipc.RemoteException;
|
import org.apache.hadoop.ipc.RemoteException;
|
||||||
import org.apache.hadoop.ipc.Server;
|
|
||||||
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||||
import org.apache.hadoop.metrics2.util.MBeans;
|
import org.apache.hadoop.metrics2.util.MBeans;
|
||||||
import org.apache.hadoop.net.DNS;
|
import org.apache.hadoop.net.DNS;
|
||||||
import org.apache.hadoop.net.NetUtils;
|
import org.apache.hadoop.net.NetUtils;
|
||||||
|
import org.apache.hadoop.net.unix.DomainSocket;
|
||||||
import org.apache.hadoop.security.AccessControlException;
|
import org.apache.hadoop.security.AccessControlException;
|
||||||
import org.apache.hadoop.security.SecurityUtil;
|
import org.apache.hadoop.security.SecurityUtil;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
|
@ -233,6 +233,7 @@ public class DataNode extends Configured
|
||||||
LogFactory.getLog(DataNode.class.getName() + ".clienttrace");
|
LogFactory.getLog(DataNode.class.getName() + ".clienttrace");
|
||||||
|
|
||||||
private static final String USAGE = "Usage: java DataNode [-rollback | -regular]";
|
private static final String USAGE = "Usage: java DataNode [-rollback | -regular]";
|
||||||
|
static final int CURRENT_BLOCK_FORMAT_VERSION = 1;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Use {@link NetUtils#createSocketAddr(String)} instead.
|
* Use {@link NetUtils#createSocketAddr(String)} instead.
|
||||||
|
@ -250,6 +251,7 @@ public class DataNode extends Configured
|
||||||
public final static String EMPTY_DEL_HINT = "";
|
public final static String EMPTY_DEL_HINT = "";
|
||||||
AtomicInteger xmitsInProgress = new AtomicInteger();
|
AtomicInteger xmitsInProgress = new AtomicInteger();
|
||||||
Daemon dataXceiverServer = null;
|
Daemon dataXceiverServer = null;
|
||||||
|
Daemon localDataXceiverServer = null;
|
||||||
ThreadGroup threadGroup = null;
|
ThreadGroup threadGroup = null;
|
||||||
private DNConf dnConf;
|
private DNConf dnConf;
|
||||||
private volatile boolean heartbeatsDisabledForTests = false;
|
private volatile boolean heartbeatsDisabledForTests = false;
|
||||||
|
@ -261,6 +263,7 @@ public class DataNode extends Configured
|
||||||
private String hostName;
|
private String hostName;
|
||||||
private DatanodeID id;
|
private DatanodeID id;
|
||||||
|
|
||||||
|
final private String fileDescriptorPassingDisabledReason;
|
||||||
boolean isBlockTokenEnabled;
|
boolean isBlockTokenEnabled;
|
||||||
BlockPoolTokenSecretManager blockPoolTokenSecretManager;
|
BlockPoolTokenSecretManager blockPoolTokenSecretManager;
|
||||||
private boolean hasAnyBlockPoolRegistered = false;
|
private boolean hasAnyBlockPoolRegistered = false;
|
||||||
|
@ -309,6 +312,24 @@ public class DataNode extends Configured
|
||||||
this.getHdfsBlockLocationsEnabled = conf.getBoolean(
|
this.getHdfsBlockLocationsEnabled = conf.getBoolean(
|
||||||
DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED,
|
DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED,
|
||||||
DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED_DEFAULT);
|
DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED_DEFAULT);
|
||||||
|
|
||||||
|
// Determine whether we should try to pass file descriptors to clients.
|
||||||
|
if (conf.getBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY,
|
||||||
|
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT)) {
|
||||||
|
String reason = DomainSocket.getLoadingFailureReason();
|
||||||
|
if (reason != null) {
|
||||||
|
LOG.warn("File descriptor passing is disabled because " + reason);
|
||||||
|
this.fileDescriptorPassingDisabledReason = reason;
|
||||||
|
} else {
|
||||||
|
LOG.info("File descriptor passing is enabled.");
|
||||||
|
this.fileDescriptorPassingDisabledReason = null;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
this.fileDescriptorPassingDisabledReason =
|
||||||
|
"File descriptor passing was not configured.";
|
||||||
|
LOG.debug(this.fileDescriptorPassingDisabledReason);
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
hostName = getHostName(conf);
|
hostName = getHostName(conf);
|
||||||
LOG.info("Configured hostname is " + hostName);
|
LOG.info("Configured hostname is " + hostName);
|
||||||
|
@ -537,6 +558,41 @@ public class DataNode extends Configured
|
||||||
this.dataXceiverServer = new Daemon(threadGroup,
|
this.dataXceiverServer = new Daemon(threadGroup,
|
||||||
new DataXceiverServer(tcpPeerServer, conf, this));
|
new DataXceiverServer(tcpPeerServer, conf, this));
|
||||||
this.threadGroup.setDaemon(true); // auto destroy when empty
|
this.threadGroup.setDaemon(true); // auto destroy when empty
|
||||||
|
|
||||||
|
DomainPeerServer domainPeerServer =
|
||||||
|
getDomainPeerServer(conf, streamingAddr.getPort());
|
||||||
|
if (domainPeerServer != null) {
|
||||||
|
this.localDataXceiverServer = new Daemon(threadGroup,
|
||||||
|
new DataXceiverServer(domainPeerServer, conf, this));
|
||||||
|
LOG.info("Listening on UNIX domain socket: " +
|
||||||
|
domainPeerServer.getBindPath());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static DomainPeerServer getDomainPeerServer(Configuration conf,
|
||||||
|
int port) throws IOException {
|
||||||
|
String domainSocketPath =
|
||||||
|
conf.get(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY);
|
||||||
|
if (domainSocketPath == null) {
|
||||||
|
if (conf.getBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY,
|
||||||
|
DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT)) {
|
||||||
|
LOG.warn("Although short-circuit local reads are configured, " +
|
||||||
|
"they are disabled because you didn't configure " +
|
||||||
|
DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (DomainSocket.getLoadingFailureReason() != null) {
|
||||||
|
throw new RuntimeException("Although a UNIX domain socket " +
|
||||||
|
"path is configured as " + domainSocketPath + ", we cannot " +
|
||||||
|
"start a localDataXceiverServer because " +
|
||||||
|
DomainSocket.getLoadingFailureReason());
|
||||||
|
}
|
||||||
|
DomainPeerServer domainPeerServer =
|
||||||
|
new DomainPeerServer(domainSocketPath, port);
|
||||||
|
domainPeerServer.setReceiveBufferSize(
|
||||||
|
HdfsConstants.DEFAULT_DATA_SOCKET_SIZE);
|
||||||
|
return domainPeerServer;
|
||||||
}
|
}
|
||||||
|
|
||||||
// calls specific to BP
|
// calls specific to BP
|
||||||
|
@ -1039,6 +1095,42 @@ public class DataNode extends Configured
|
||||||
return info;
|
return info;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@InterfaceAudience.LimitedPrivate("HDFS")
|
||||||
|
static public class ShortCircuitFdsUnsupportedException extends IOException {
|
||||||
|
private static final long serialVersionUID = 1L;
|
||||||
|
public ShortCircuitFdsUnsupportedException(String msg) {
|
||||||
|
super(msg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@InterfaceAudience.LimitedPrivate("HDFS")
|
||||||
|
static public class ShortCircuitFdsVersionException extends IOException {
|
||||||
|
private static final long serialVersionUID = 1L;
|
||||||
|
public ShortCircuitFdsVersionException(String msg) {
|
||||||
|
super(msg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
FileInputStream[] requestShortCircuitFdsForRead(final ExtendedBlock blk,
|
||||||
|
final Token<BlockTokenIdentifier> token, int maxVersion)
|
||||||
|
throws ShortCircuitFdsUnsupportedException,
|
||||||
|
ShortCircuitFdsVersionException, IOException {
|
||||||
|
if (fileDescriptorPassingDisabledReason != null) {
|
||||||
|
throw new ShortCircuitFdsUnsupportedException(
|
||||||
|
fileDescriptorPassingDisabledReason);
|
||||||
|
}
|
||||||
|
checkBlockToken(blk, token, BlockTokenSecretManager.AccessMode.READ);
|
||||||
|
int blkVersion = CURRENT_BLOCK_FORMAT_VERSION;
|
||||||
|
if (maxVersion < blkVersion) {
|
||||||
|
throw new ShortCircuitFdsVersionException("Your client is too old " +
|
||||||
|
"to read this block! Its format version is " +
|
||||||
|
blkVersion + ", but the highest format version you can read is " +
|
||||||
|
maxVersion);
|
||||||
|
}
|
||||||
|
metrics.incrBlocksGetLocalPathInfo();
|
||||||
|
return data.getShortCircuitFdsForRead(blk);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public HdfsBlocksMetadata getHdfsBlocksMetadata(List<ExtendedBlock> blocks,
|
public HdfsBlocksMetadata getHdfsBlocksMetadata(List<ExtendedBlock> blocks,
|
||||||
List<Token<BlockTokenIdentifier>> tokens) throws IOException,
|
List<Token<BlockTokenIdentifier>> tokens) throws IOException,
|
||||||
|
@ -1113,32 +1205,45 @@ public class DataNode extends Configured
|
||||||
if (dataXceiverServer != null) {
|
if (dataXceiverServer != null) {
|
||||||
((DataXceiverServer) this.dataXceiverServer.getRunnable()).kill();
|
((DataXceiverServer) this.dataXceiverServer.getRunnable()).kill();
|
||||||
this.dataXceiverServer.interrupt();
|
this.dataXceiverServer.interrupt();
|
||||||
|
}
|
||||||
// wait for all data receiver threads to exit
|
if (localDataXceiverServer != null) {
|
||||||
if (this.threadGroup != null) {
|
((DataXceiverServer) this.localDataXceiverServer.getRunnable()).kill();
|
||||||
int sleepMs = 2;
|
this.localDataXceiverServer.interrupt();
|
||||||
while (true) {
|
}
|
||||||
this.threadGroup.interrupt();
|
// wait for all data receiver threads to exit
|
||||||
LOG.info("Waiting for threadgroup to exit, active threads is " +
|
if (this.threadGroup != null) {
|
||||||
this.threadGroup.activeCount());
|
int sleepMs = 2;
|
||||||
if (this.threadGroup.activeCount() == 0) {
|
while (true) {
|
||||||
break;
|
this.threadGroup.interrupt();
|
||||||
}
|
LOG.info("Waiting for threadgroup to exit, active threads is " +
|
||||||
try {
|
this.threadGroup.activeCount());
|
||||||
Thread.sleep(sleepMs);
|
if (this.threadGroup.activeCount() == 0) {
|
||||||
} catch (InterruptedException e) {}
|
break;
|
||||||
sleepMs = sleepMs * 3 / 2; // exponential backoff
|
}
|
||||||
if (sleepMs > 1000) {
|
try {
|
||||||
sleepMs = 1000;
|
Thread.sleep(sleepMs);
|
||||||
}
|
} catch (InterruptedException e) {}
|
||||||
|
sleepMs = sleepMs * 3 / 2; // exponential backoff
|
||||||
|
if (sleepMs > 1000) {
|
||||||
|
sleepMs = 1000;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// wait for dataXceiveServer to terminate
|
this.threadGroup = null;
|
||||||
|
}
|
||||||
|
if (this.dataXceiverServer != null) {
|
||||||
|
// wait for dataXceiverServer to terminate
|
||||||
try {
|
try {
|
||||||
this.dataXceiverServer.join();
|
this.dataXceiverServer.join();
|
||||||
} catch (InterruptedException ie) {
|
} catch (InterruptedException ie) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (this.localDataXceiverServer != null) {
|
||||||
|
// wait for localDataXceiverServer to terminate
|
||||||
|
try {
|
||||||
|
this.localDataXceiverServer.join();
|
||||||
|
} catch (InterruptedException ie) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if(blockPoolManager != null) {
|
if(blockPoolManager != null) {
|
||||||
try {
|
try {
|
||||||
|
@ -1523,6 +1628,9 @@ public class DataNode extends Configured
|
||||||
|
|
||||||
// start dataXceiveServer
|
// start dataXceiveServer
|
||||||
dataXceiverServer.start();
|
dataXceiverServer.start();
|
||||||
|
if (localDataXceiverServer != null) {
|
||||||
|
localDataXceiverServer.start();
|
||||||
|
}
|
||||||
ipcServer.start();
|
ipcServer.start();
|
||||||
startPlugins(conf);
|
startPlugins(conf);
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
package org.apache.hadoop.hdfs.server.datanode;
|
package org.apache.hadoop.hdfs.server.datanode;
|
||||||
|
|
||||||
import static org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status.ERROR;
|
import static org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status.ERROR;
|
||||||
|
import static org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status.ERROR_UNSUPPORTED;
|
||||||
import static org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status.ERROR_ACCESS_TOKEN;
|
import static org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status.ERROR_ACCESS_TOKEN;
|
||||||
import static org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status.SUCCESS;
|
import static org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status.SUCCESS;
|
||||||
import static org.apache.hadoop.util.Time.now;
|
import static org.apache.hadoop.util.Time.now;
|
||||||
|
@ -28,6 +29,8 @@ import java.io.BufferedOutputStream;
|
||||||
import java.io.DataInputStream;
|
import java.io.DataInputStream;
|
||||||
import java.io.DataOutputStream;
|
import java.io.DataOutputStream;
|
||||||
import java.io.EOFException;
|
import java.io.EOFException;
|
||||||
|
import java.io.FileDescriptor;
|
||||||
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.InterruptedIOException;
|
import java.io.InterruptedIOException;
|
||||||
|
@ -60,11 +63,14 @@ import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status;
|
||||||
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
||||||
import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
|
import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
|
||||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
||||||
|
import org.apache.hadoop.hdfs.server.datanode.DataNode.ShortCircuitFdsUnsupportedException;
|
||||||
|
import org.apache.hadoop.hdfs.server.datanode.DataNode.ShortCircuitFdsVersionException;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.fsdataset.LengthInputStream;
|
import org.apache.hadoop.hdfs.server.datanode.fsdataset.LengthInputStream;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
||||||
import org.apache.hadoop.io.IOUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
import org.apache.hadoop.io.MD5Hash;
|
import org.apache.hadoop.io.MD5Hash;
|
||||||
import org.apache.hadoop.net.NetUtils;
|
import org.apache.hadoop.net.NetUtils;
|
||||||
|
import org.apache.hadoop.net.unix.DomainSocket;
|
||||||
import org.apache.hadoop.security.token.SecretManager.InvalidToken;
|
import org.apache.hadoop.security.token.SecretManager.InvalidToken;
|
||||||
import org.apache.hadoop.security.token.Token;
|
import org.apache.hadoop.security.token.Token;
|
||||||
import org.apache.hadoop.util.DataChecksum;
|
import org.apache.hadoop.util.DataChecksum;
|
||||||
|
@ -232,6 +238,68 @@ class DataXceiver extends Receiver implements Runnable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void requestShortCircuitFds(final ExtendedBlock blk,
|
||||||
|
final Token<BlockTokenIdentifier> token,
|
||||||
|
int maxVersion) throws IOException {
|
||||||
|
updateCurrentThreadName("Passing file descriptors for block " + blk);
|
||||||
|
BlockOpResponseProto.Builder bld = BlockOpResponseProto.newBuilder();
|
||||||
|
FileInputStream fis[] = null;
|
||||||
|
try {
|
||||||
|
if (peer.getDomainSocket() == null) {
|
||||||
|
throw new IOException("You cannot pass file descriptors over " +
|
||||||
|
"anything but a UNIX domain socket.");
|
||||||
|
}
|
||||||
|
fis = datanode.requestShortCircuitFdsForRead(blk, token, maxVersion);
|
||||||
|
bld.setStatus(SUCCESS);
|
||||||
|
bld.setShortCircuitAccessVersion(DataNode.CURRENT_BLOCK_FORMAT_VERSION);
|
||||||
|
} catch (ShortCircuitFdsVersionException e) {
|
||||||
|
bld.setStatus(ERROR_UNSUPPORTED);
|
||||||
|
bld.setShortCircuitAccessVersion(DataNode.CURRENT_BLOCK_FORMAT_VERSION);
|
||||||
|
bld.setMessage(e.getMessage());
|
||||||
|
} catch (ShortCircuitFdsUnsupportedException e) {
|
||||||
|
bld.setStatus(ERROR_UNSUPPORTED);
|
||||||
|
bld.setMessage(e.getMessage());
|
||||||
|
} catch (InvalidToken e) {
|
||||||
|
bld.setStatus(ERROR_ACCESS_TOKEN);
|
||||||
|
bld.setMessage(e.getMessage());
|
||||||
|
} catch (IOException e) {
|
||||||
|
bld.setStatus(ERROR);
|
||||||
|
bld.setMessage(e.getMessage());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
bld.build().writeDelimitedTo(socketOut);
|
||||||
|
if (fis != null) {
|
||||||
|
FileDescriptor fds[] = new FileDescriptor[fis.length];
|
||||||
|
for (int i = 0; i < fds.length; i++) {
|
||||||
|
fds[i] = fis[i].getFD();
|
||||||
|
}
|
||||||
|
byte buf[] = new byte[] { (byte)0 };
|
||||||
|
peer.getDomainSocket().
|
||||||
|
sendFileDescriptors(fds, buf, 0, buf.length);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
if (ClientTraceLog.isInfoEnabled()) {
|
||||||
|
DatanodeRegistration dnR = datanode.getDNRegistrationForBP(blk
|
||||||
|
.getBlockPoolId());
|
||||||
|
BlockSender.ClientTraceLog.info(String.format(
|
||||||
|
String.format(
|
||||||
|
"src: %s, dest: %s, op: %s, blockid: %s, srvID: %s, " +
|
||||||
|
"success: %b",
|
||||||
|
"127.0.0.1", // src IP
|
||||||
|
"127.0.0.1", // dst IP
|
||||||
|
"REQUEST_SHORT_CIRCUIT_FDS", // operation
|
||||||
|
blk.getBlockId(), // block id
|
||||||
|
dnR.getStorageID(),
|
||||||
|
(fis != null)
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
if (fis != null) {
|
||||||
|
IOUtils.cleanup(LOG, fis);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void readBlock(final ExtendedBlock block,
|
public void readBlock(final ExtendedBlock block,
|
||||||
final Token<BlockTokenIdentifier> blockToken,
|
final Token<BlockTokenIdentifier> blockToken,
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.datanode.fsdataset;
|
||||||
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -386,4 +387,6 @@ public interface FsDatasetSpi<V extends FsVolumeSpi> extends FSDatasetMBean {
|
||||||
public HdfsBlocksMetadata getHdfsBlocksMetadata(List<ExtendedBlock> blocks)
|
public HdfsBlocksMetadata getHdfsBlocksMetadata(List<ExtendedBlock> blocks)
|
||||||
throws IOException;
|
throws IOException;
|
||||||
|
|
||||||
|
FileInputStream[] getShortCircuitFdsForRead(ExtendedBlock block)
|
||||||
|
throws IOException;
|
||||||
}
|
}
|
||||||
|
|
|
@ -75,6 +75,7 @@ import org.apache.hadoop.hdfs.server.datanode.fsdataset.VolumeChoosingPolicy;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.metrics.FSDatasetMBean;
|
import org.apache.hadoop.hdfs.server.datanode.metrics.FSDatasetMBean;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand.RecoveringBlock;
|
import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand.RecoveringBlock;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo;
|
import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo;
|
||||||
|
import org.apache.hadoop.io.IOUtils;
|
||||||
import org.apache.hadoop.metrics2.util.MBeans;
|
import org.apache.hadoop.metrics2.util.MBeans;
|
||||||
import org.apache.hadoop.util.DataChecksum;
|
import org.apache.hadoop.util.DataChecksum;
|
||||||
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
|
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
|
||||||
|
@ -1668,6 +1669,26 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
||||||
return info;
|
return info;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override // FsDatasetSpi
|
||||||
|
public FileInputStream[] getShortCircuitFdsForRead(ExtendedBlock block)
|
||||||
|
throws IOException {
|
||||||
|
File datafile = getBlockFile(block);
|
||||||
|
File metafile = FsDatasetUtil.getMetaFile(datafile,
|
||||||
|
block.getGenerationStamp());
|
||||||
|
FileInputStream fis[] = new FileInputStream[2];
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
fis[0] = new FileInputStream(datafile);
|
||||||
|
fis[1] = new FileInputStream(metafile);
|
||||||
|
success = true;
|
||||||
|
return fis;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.cleanup(null, fis);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override // FsDatasetSpi
|
@Override // FsDatasetSpi
|
||||||
public HdfsBlocksMetadata getHdfsBlocksMetadata(List<ExtendedBlock> blocks)
|
public HdfsBlocksMetadata getHdfsBlocksMetadata(List<ExtendedBlock> blocks)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
|
@ -563,7 +563,7 @@ public class NamenodeFsck {
|
||||||
conf, file, block, lblock.getBlockToken(), 0, -1, true, "fsck",
|
conf, file, block, lblock.getBlockToken(), 0, -1, true, "fsck",
|
||||||
TcpPeerServer.peerFromSocketAndKey(s, namenode.getRpcServer().
|
TcpPeerServer.peerFromSocketAndKey(s, namenode.getRpcServer().
|
||||||
getDataEncryptionKey()),
|
getDataEncryptionKey()),
|
||||||
chosenNode);
|
chosenNode, null, false);
|
||||||
|
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
// Put chosen node into dead list, continue
|
// Put chosen node into dead list, continue
|
||||||
|
|
|
@ -74,6 +74,8 @@ message DeleteBlockPoolResponseProto {
|
||||||
* Gets the file information where block and its metadata is stored
|
* Gets the file information where block and its metadata is stored
|
||||||
* block - block for which path information is being requested
|
* block - block for which path information is being requested
|
||||||
* token - block token
|
* token - block token
|
||||||
|
*
|
||||||
|
* This message is deprecated in favor of file descriptor passing.
|
||||||
*/
|
*/
|
||||||
message GetBlockLocalPathInfoRequestProto {
|
message GetBlockLocalPathInfoRequestProto {
|
||||||
required ExtendedBlockProto block = 1;
|
required ExtendedBlockProto block = 1;
|
||||||
|
@ -84,6 +86,8 @@ message GetBlockLocalPathInfoRequestProto {
|
||||||
* block - block for which file path information is being returned
|
* block - block for which file path information is being returned
|
||||||
* localPath - file path where the block data is stored
|
* localPath - file path where the block data is stored
|
||||||
* localMetaPath - file path where the block meta data is stored
|
* localMetaPath - file path where the block meta data is stored
|
||||||
|
*
|
||||||
|
* This message is deprecated in favor of file descriptor passing.
|
||||||
*/
|
*/
|
||||||
message GetBlockLocalPathInfoResponseProto {
|
message GetBlockLocalPathInfoResponseProto {
|
||||||
required ExtendedBlockProto block = 1;
|
required ExtendedBlockProto block = 1;
|
||||||
|
|
|
@ -114,6 +114,16 @@ message OpBlockChecksumProto {
|
||||||
required BaseHeaderProto header = 1;
|
required BaseHeaderProto header = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message OpRequestShortCircuitAccessProto {
|
||||||
|
required BaseHeaderProto header = 1;
|
||||||
|
|
||||||
|
/** In order to get short-circuit access to block data, clients must set this
|
||||||
|
* to the highest version of the block data that they can understand.
|
||||||
|
* Currently 1 is the only version, but more versions may exist in the future
|
||||||
|
* if the on-disk format changes.
|
||||||
|
*/
|
||||||
|
required uint32 maxVersion = 2;
|
||||||
|
}
|
||||||
|
|
||||||
message PacketHeaderProto {
|
message PacketHeaderProto {
|
||||||
// All fields must be fixed-length!
|
// All fields must be fixed-length!
|
||||||
|
@ -132,6 +142,7 @@ enum Status {
|
||||||
ERROR_EXISTS = 4;
|
ERROR_EXISTS = 4;
|
||||||
ERROR_ACCESS_TOKEN = 5;
|
ERROR_ACCESS_TOKEN = 5;
|
||||||
CHECKSUM_OK = 6;
|
CHECKSUM_OK = 6;
|
||||||
|
ERROR_UNSUPPORTED = 7;
|
||||||
}
|
}
|
||||||
|
|
||||||
message PipelineAckProto {
|
message PipelineAckProto {
|
||||||
|
@ -164,6 +175,16 @@ message BlockOpResponseProto {
|
||||||
|
|
||||||
/** explanatory text which may be useful to log on the client side */
|
/** explanatory text which may be useful to log on the client side */
|
||||||
optional string message = 5;
|
optional string message = 5;
|
||||||
|
|
||||||
|
/** If the server chooses to agree to the request of a client for
|
||||||
|
* short-circuit access, it will send a response message with the relevant
|
||||||
|
* file descriptors attached.
|
||||||
|
*
|
||||||
|
* In the body of the message, this version number will be set to the
|
||||||
|
* specific version number of the block data that the client is about to
|
||||||
|
* read.
|
||||||
|
*/
|
||||||
|
optional uint32 shortCircuitAccessVersion = 6;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -38,6 +38,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||||
import org.apache.hadoop.net.NetUtils;
|
import org.apache.hadoop.net.NetUtils;
|
||||||
|
import org.apache.hadoop.net.unix.DomainSocket;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A helper class to setup the cluster, and get to BlockReader and DataNode for a block.
|
* A helper class to setup the cluster, and get to BlockReader and DataNode for a block.
|
||||||
|
@ -156,7 +157,7 @@ public class BlockReaderTestUtil {
|
||||||
testBlock.getBlockToken(),
|
testBlock.getBlockToken(),
|
||||||
offset, lenToRead,
|
offset, lenToRead,
|
||||||
true, "BlockReaderTestUtil", TcpPeerServer.peerFromSocket(sock),
|
true, "BlockReaderTestUtil", TcpPeerServer.peerFromSocket(sock),
|
||||||
nodes[0]);
|
nodes[0], null, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -168,4 +169,12 @@ public class BlockReaderTestUtil {
|
||||||
return cluster.getDataNode(ipcport);
|
return cluster.getDataNode(ipcport);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean haveRequiredResources() {
|
||||||
|
if (conf.get(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY) != null) {
|
||||||
|
// To use UNIX Domain sockets, we must have the native code loaded.
|
||||||
|
return DomainSocket.getLoadingFailureReason() == null;
|
||||||
|
} else {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2189,14 +2189,27 @@ public class MiniDFSCluster {
|
||||||
/**
|
/**
|
||||||
* Get file correpsonding to a block
|
* Get file correpsonding to a block
|
||||||
* @param storageDir storage directory
|
* @param storageDir storage directory
|
||||||
* @param blk block to be corrupted
|
* @param blk the block
|
||||||
* @return file corresponding to the block
|
* @return data file corresponding to the block
|
||||||
*/
|
*/
|
||||||
public static File getBlockFile(File storageDir, ExtendedBlock blk) {
|
public static File getBlockFile(File storageDir, ExtendedBlock blk) {
|
||||||
return new File(getFinalizedDir(storageDir, blk.getBlockPoolId()),
|
return new File(getFinalizedDir(storageDir, blk.getBlockPoolId()),
|
||||||
blk.getBlockName());
|
blk.getBlockName());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the latest metadata file correpsonding to a block
|
||||||
|
* @param storageDir storage directory
|
||||||
|
* @param blk the block
|
||||||
|
* @return metadata file corresponding to the block
|
||||||
|
*/
|
||||||
|
public static File getBlockMetadataFile(File storageDir, ExtendedBlock blk) {
|
||||||
|
return new File(getFinalizedDir(storageDir, blk.getBlockPoolId()),
|
||||||
|
blk.getBlockName() + "_" + blk.getGenerationStamp() +
|
||||||
|
Block.METADATA_EXTENSION);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Shut down a cluster if it is not null
|
* Shut down a cluster if it is not null
|
||||||
* @param cluster cluster reference or null
|
* @param cluster cluster reference or null
|
||||||
|
@ -2224,7 +2237,7 @@ public class MiniDFSCluster {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get files related to a block for a given datanode
|
* Get the block data file for a block from a given datanode
|
||||||
* @param dnIndex Index of the datanode to get block files for
|
* @param dnIndex Index of the datanode to get block files for
|
||||||
* @param block block for which corresponding files are needed
|
* @param block block for which corresponding files are needed
|
||||||
*/
|
*/
|
||||||
|
@ -2239,6 +2252,24 @@ public class MiniDFSCluster {
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the block metadata file for a block from a given datanode
|
||||||
|
*
|
||||||
|
* @param dnIndex Index of the datanode to get block files for
|
||||||
|
* @param block block for which corresponding files are needed
|
||||||
|
*/
|
||||||
|
public static File getBlockMetadataFile(int dnIndex, ExtendedBlock block) {
|
||||||
|
// Check for block file in the two storage directories of the datanode
|
||||||
|
for (int i = 0; i <=1 ; i++) {
|
||||||
|
File storageDir = MiniDFSCluster.getStorageDir(dnIndex, i);
|
||||||
|
File blockMetaFile = getBlockMetadataFile(storageDir, block);
|
||||||
|
if (blockMetaFile.exists()) {
|
||||||
|
return blockMetaFile;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Throw an exception if the MiniDFSCluster is not started with a single
|
* Throw an exception if the MiniDFSCluster is not started with a single
|
||||||
|
|
|
@ -17,90 +17,333 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdfs;
|
package org.apache.hadoop.hdfs;
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
import java.io.EOFException;
|
||||||
import static org.junit.Assert.assertTrue;
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.RandomAccessFile;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
|
import java.util.concurrent.TimeoutException;
|
||||||
|
|
||||||
import org.apache.hadoop.fs.ChecksumException;
|
import org.apache.hadoop.fs.ChecksumException;
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
import org.apache.hadoop.fs.FSDataInputStream;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
import org.junit.AfterClass;
|
import org.junit.Assert;
|
||||||
import org.junit.BeforeClass;
|
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
public class TestBlockReaderLocal {
|
public class TestBlockReaderLocal {
|
||||||
static MiniDFSCluster cluster;
|
public static void assertArrayRegionsEqual(byte []buf1, int off1, byte []buf2,
|
||||||
static HdfsConfiguration conf;
|
int off2, int len) {
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
@BeforeClass
|
if (buf1[off1 + i] != buf2[off2 + i]) {
|
||||||
public static void setupCluster() throws IOException {
|
Assert.fail("arrays differ at byte " + i + ". " +
|
||||||
conf = new HdfsConfiguration();
|
"The first array has " + (int)buf1[off1 + i] +
|
||||||
|
", but the second array has " + (int)buf2[off2 + i]);
|
||||||
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
|
}
|
||||||
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
|
}
|
||||||
false);
|
|
||||||
conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
|
|
||||||
UserGroupInformation.getCurrentUser().getShortUserName());
|
|
||||||
|
|
||||||
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
|
|
||||||
}
|
|
||||||
|
|
||||||
@AfterClass
|
|
||||||
public static void teardownCluster() {
|
|
||||||
cluster.shutdown();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test that, in the case of an error, the position and limit of a ByteBuffer
|
* Similar to IOUtils#readFully(). Reads bytes in a loop.
|
||||||
* are left unchanged. This is not mandated by ByteBufferReadable, but clients
|
*
|
||||||
* of this class might immediately issue a retry on failure, so it's polite.
|
* @param reader The BlockReaderLocal to read bytes from
|
||||||
|
* @param buf The ByteBuffer to read into
|
||||||
|
* @param off The offset in the buffer to read into
|
||||||
|
* @param len The number of bytes to read.
|
||||||
|
*
|
||||||
|
* @throws IOException If it could not read the requested number of bytes
|
||||||
*/
|
*/
|
||||||
|
private static void readFully(BlockReaderLocal reader,
|
||||||
|
ByteBuffer buf, int off, int len) throws IOException {
|
||||||
|
int amt = len;
|
||||||
|
while (amt > 0) {
|
||||||
|
buf.limit(off + len);
|
||||||
|
buf.position(off);
|
||||||
|
long ret = reader.read(buf);
|
||||||
|
if (ret < 0) {
|
||||||
|
throw new EOFException( "Premature EOF from BlockReaderLocal " +
|
||||||
|
"after reading " + (len - amt) + " byte(s).");
|
||||||
|
}
|
||||||
|
amt -= ret;
|
||||||
|
off += ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static interface BlockReaderLocalTest {
|
||||||
|
final int TEST_LENGTH = 12345;
|
||||||
|
public void setup(File blockFile, boolean usingChecksums)
|
||||||
|
throws IOException;
|
||||||
|
public void doTest(BlockReaderLocal reader, byte original[])
|
||||||
|
throws IOException;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void runBlockReaderLocalTest(BlockReaderLocalTest test,
|
||||||
|
boolean checksum) throws IOException {
|
||||||
|
MiniDFSCluster cluster = null;
|
||||||
|
HdfsConfiguration conf = new HdfsConfiguration();
|
||||||
|
conf.setBoolean(DFSConfigKeys.
|
||||||
|
DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, !checksum);
|
||||||
|
conf.set(DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY, "CRC32C");
|
||||||
|
FileInputStream dataIn = null, checkIn = null;
|
||||||
|
final Path TEST_PATH = new Path("/a");
|
||||||
|
final long RANDOM_SEED = 4567L;
|
||||||
|
BlockReaderLocal blockReaderLocal = null;
|
||||||
|
FSDataInputStream fsIn = null;
|
||||||
|
byte original[] = new byte[BlockReaderLocalTest.TEST_LENGTH];
|
||||||
|
|
||||||
|
try {
|
||||||
|
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
|
||||||
|
cluster.waitActive();
|
||||||
|
FileSystem fs = cluster.getFileSystem();
|
||||||
|
DFSTestUtil.createFile(fs, TEST_PATH,
|
||||||
|
BlockReaderLocalTest.TEST_LENGTH, (short)1, RANDOM_SEED);
|
||||||
|
try {
|
||||||
|
DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Assert.fail("unexpected InterruptedException during " +
|
||||||
|
"waitReplication: " + e);
|
||||||
|
} catch (TimeoutException e) {
|
||||||
|
Assert.fail("unexpected TimeoutException during " +
|
||||||
|
"waitReplication: " + e);
|
||||||
|
}
|
||||||
|
fsIn = fs.open(TEST_PATH);
|
||||||
|
IOUtils.readFully(fsIn, original, 0,
|
||||||
|
BlockReaderLocalTest.TEST_LENGTH);
|
||||||
|
fsIn.close();
|
||||||
|
fsIn = null;
|
||||||
|
ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
|
||||||
|
File dataFile = MiniDFSCluster.getBlockFile(0, block);
|
||||||
|
File metaFile = MiniDFSCluster.getBlockMetadataFile(0, block);
|
||||||
|
|
||||||
|
DatanodeID datanodeID = cluster.getDataNodes().get(0).getDatanodeId();
|
||||||
|
cluster.shutdown();
|
||||||
|
cluster = null;
|
||||||
|
test.setup(dataFile, checksum);
|
||||||
|
dataIn = new FileInputStream(dataFile);
|
||||||
|
checkIn = new FileInputStream(metaFile);
|
||||||
|
blockReaderLocal = new BlockReaderLocal(conf,
|
||||||
|
TEST_PATH.getName(), block, 0, -1,
|
||||||
|
dataIn, checkIn, datanodeID, checksum);
|
||||||
|
dataIn = null;
|
||||||
|
checkIn = null;
|
||||||
|
test.doTest(blockReaderLocal, original);
|
||||||
|
} finally {
|
||||||
|
if (fsIn != null) fsIn.close();
|
||||||
|
if (cluster != null) cluster.shutdown();
|
||||||
|
if (dataIn != null) dataIn.close();
|
||||||
|
if (checkIn != null) checkIn.close();
|
||||||
|
if (blockReaderLocal != null) blockReaderLocal.close(null, null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class TestBlockReaderLocalImmediateClose
|
||||||
|
implements BlockReaderLocalTest {
|
||||||
|
@Override
|
||||||
|
public void setup(File blockFile, boolean usingChecksums)
|
||||||
|
throws IOException { }
|
||||||
|
@Override
|
||||||
|
public void doTest(BlockReaderLocal reader, byte original[])
|
||||||
|
throws IOException { }
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testStablePositionAfterCorruptRead() throws Exception {
|
public void testBlockReaderLocalImmediateClose() throws IOException {
|
||||||
final short REPL_FACTOR = 1;
|
runBlockReaderLocalTest(new TestBlockReaderLocalImmediateClose(), true);
|
||||||
final long FILE_LENGTH = 512L;
|
runBlockReaderLocalTest(new TestBlockReaderLocalImmediateClose(), false);
|
||||||
cluster.waitActive();
|
}
|
||||||
FileSystem fs = cluster.getFileSystem();
|
|
||||||
|
private static class TestBlockReaderSimpleReads
|
||||||
|
implements BlockReaderLocalTest {
|
||||||
|
@Override
|
||||||
|
public void setup(File blockFile, boolean usingChecksums)
|
||||||
|
throws IOException { }
|
||||||
|
@Override
|
||||||
|
public void doTest(BlockReaderLocal reader, byte original[])
|
||||||
|
throws IOException {
|
||||||
|
byte buf[] = new byte[TEST_LENGTH];
|
||||||
|
reader.readFully(buf, 0, 512);
|
||||||
|
assertArrayRegionsEqual(original, 0, buf, 0, 512);
|
||||||
|
reader.readFully(buf, 512, 512);
|
||||||
|
assertArrayRegionsEqual(original, 512, buf, 512, 512);
|
||||||
|
reader.readFully(buf, 1024, 513);
|
||||||
|
assertArrayRegionsEqual(original, 1024, buf, 1024, 513);
|
||||||
|
reader.readFully(buf, 1537, 514);
|
||||||
|
assertArrayRegionsEqual(original, 1537, buf, 1537, 514);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBlockReaderSimpleReads() throws IOException {
|
||||||
|
runBlockReaderLocalTest(new TestBlockReaderSimpleReads(), true);
|
||||||
|
}
|
||||||
|
|
||||||
Path path = new Path("/corrupted");
|
@Test
|
||||||
|
public void testBlockReaderSimpleReadsNoChecksum() throws IOException {
|
||||||
|
runBlockReaderLocalTest(new TestBlockReaderSimpleReads(), false);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class TestBlockReaderLocalArrayReads2
|
||||||
|
implements BlockReaderLocalTest {
|
||||||
|
@Override
|
||||||
|
public void setup(File blockFile, boolean usingChecksums)
|
||||||
|
throws IOException { }
|
||||||
|
@Override
|
||||||
|
public void doTest(BlockReaderLocal reader, byte original[])
|
||||||
|
throws IOException {
|
||||||
|
byte buf[] = new byte[TEST_LENGTH];
|
||||||
|
reader.readFully(buf, 0, 10);
|
||||||
|
assertArrayRegionsEqual(original, 0, buf, 0, 10);
|
||||||
|
reader.readFully(buf, 10, 100);
|
||||||
|
assertArrayRegionsEqual(original, 10, buf, 10, 100);
|
||||||
|
reader.readFully(buf, 110, 700);
|
||||||
|
assertArrayRegionsEqual(original, 110, buf, 110, 700);
|
||||||
|
reader.readFully(buf, 810, 1); // from offset 810 to offset 811
|
||||||
|
reader.readFully(buf, 811, 5);
|
||||||
|
assertArrayRegionsEqual(original, 811, buf, 811, 5);
|
||||||
|
reader.readFully(buf, 816, 900); // skip from offset 816 to offset 1716
|
||||||
|
reader.readFully(buf, 1716, 5);
|
||||||
|
assertArrayRegionsEqual(original, 1716, buf, 1716, 5);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBlockReaderLocalArrayReads2() throws IOException {
|
||||||
|
runBlockReaderLocalTest(new TestBlockReaderLocalArrayReads2(),
|
||||||
|
true);
|
||||||
|
}
|
||||||
|
|
||||||
DFSTestUtil.createFile(fs, path, FILE_LENGTH, REPL_FACTOR, 12345L);
|
@Test
|
||||||
DFSTestUtil.waitReplication(fs, path, REPL_FACTOR);
|
public void testBlockReaderLocalArrayReads2NoChecksum()
|
||||||
|
throws IOException {
|
||||||
|
runBlockReaderLocalTest(new TestBlockReaderLocalArrayReads2(),
|
||||||
|
false);
|
||||||
|
}
|
||||||
|
|
||||||
ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, path);
|
private static class TestBlockReaderLocalByteBufferReads
|
||||||
int blockFilesCorrupted = cluster.corruptBlockOnDataNodes(block);
|
implements BlockReaderLocalTest {
|
||||||
assertEquals("All replicas not corrupted", REPL_FACTOR, blockFilesCorrupted);
|
@Override
|
||||||
|
public void setup(File blockFile, boolean usingChecksums)
|
||||||
|
throws IOException { }
|
||||||
|
@Override
|
||||||
|
public void doTest(BlockReaderLocal reader, byte original[])
|
||||||
|
throws IOException {
|
||||||
|
ByteBuffer buf = ByteBuffer.wrap(new byte[TEST_LENGTH]);
|
||||||
|
readFully(reader, buf, 0, 10);
|
||||||
|
assertArrayRegionsEqual(original, 0, buf.array(), 0, 10);
|
||||||
|
readFully(reader, buf, 10, 100);
|
||||||
|
assertArrayRegionsEqual(original, 10, buf.array(), 10, 100);
|
||||||
|
readFully(reader, buf, 110, 700);
|
||||||
|
assertArrayRegionsEqual(original, 110, buf.array(), 110, 700);
|
||||||
|
reader.skip(1); // skip from offset 810 to offset 811
|
||||||
|
readFully(reader, buf, 811, 5);
|
||||||
|
assertArrayRegionsEqual(original, 811, buf.array(), 811, 5);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBlockReaderLocalByteBufferReads()
|
||||||
|
throws IOException {
|
||||||
|
runBlockReaderLocalTest(
|
||||||
|
new TestBlockReaderLocalByteBufferReads(), true);
|
||||||
|
}
|
||||||
|
|
||||||
FSDataInputStream dis = cluster.getFileSystem().open(path);
|
@Test
|
||||||
ByteBuffer buf = ByteBuffer.allocateDirect((int)FILE_LENGTH);
|
public void testBlockReaderLocalByteBufferReadsNoChecksum()
|
||||||
boolean sawException = false;
|
throws IOException {
|
||||||
try {
|
runBlockReaderLocalTest(
|
||||||
dis.read(buf);
|
new TestBlockReaderLocalByteBufferReads(), false);
|
||||||
} catch (ChecksumException ex) {
|
}
|
||||||
sawException = true;
|
|
||||||
|
private static class TestBlockReaderLocalReadCorruptStart
|
||||||
|
implements BlockReaderLocalTest {
|
||||||
|
boolean usingChecksums = false;
|
||||||
|
@Override
|
||||||
|
public void setup(File blockFile, boolean usingChecksums)
|
||||||
|
throws IOException {
|
||||||
|
RandomAccessFile bf = null;
|
||||||
|
this.usingChecksums = usingChecksums;
|
||||||
|
try {
|
||||||
|
bf = new RandomAccessFile(blockFile, "rw");
|
||||||
|
bf.write(new byte[] {0,0,0,0,0,0,0,0,0,0,0,0,0,0});
|
||||||
|
} finally {
|
||||||
|
if (bf != null) bf.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assertTrue(sawException);
|
public void doTest(BlockReaderLocal reader, byte original[])
|
||||||
assertEquals(0, buf.position());
|
throws IOException {
|
||||||
assertEquals(buf.capacity(), buf.limit());
|
byte buf[] = new byte[TEST_LENGTH];
|
||||||
|
if (usingChecksums) {
|
||||||
dis = cluster.getFileSystem().open(path);
|
try {
|
||||||
buf.position(3);
|
reader.readFully(buf, 0, 10);
|
||||||
buf.limit(25);
|
Assert.fail("did not detect corruption");
|
||||||
sawException = false;
|
} catch (IOException e) {
|
||||||
try {
|
// expected
|
||||||
dis.read(buf);
|
}
|
||||||
} catch (ChecksumException ex) {
|
} else {
|
||||||
sawException = true;
|
reader.readFully(buf, 0, 10);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBlockReaderLocalReadCorruptStart()
|
||||||
|
throws IOException {
|
||||||
|
runBlockReaderLocalTest(new TestBlockReaderLocalReadCorruptStart(), true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class TestBlockReaderLocalReadCorrupt
|
||||||
|
implements BlockReaderLocalTest {
|
||||||
|
boolean usingChecksums = false;
|
||||||
|
@Override
|
||||||
|
public void setup(File blockFile, boolean usingChecksums)
|
||||||
|
throws IOException {
|
||||||
|
RandomAccessFile bf = null;
|
||||||
|
this.usingChecksums = usingChecksums;
|
||||||
|
try {
|
||||||
|
bf = new RandomAccessFile(blockFile, "rw");
|
||||||
|
bf.seek(1539);
|
||||||
|
bf.write(new byte[] {0,0,0,0,0,0,0,0,0,0,0,0,0,0});
|
||||||
|
} finally {
|
||||||
|
if (bf != null) bf.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assertTrue(sawException);
|
public void doTest(BlockReaderLocal reader, byte original[])
|
||||||
assertEquals(3, buf.position());
|
throws IOException {
|
||||||
assertEquals(25, buf.limit());
|
byte buf[] = new byte[TEST_LENGTH];
|
||||||
|
try {
|
||||||
|
reader.readFully(buf, 0, 10);
|
||||||
|
assertArrayRegionsEqual(original, 0, buf, 0, 10);
|
||||||
|
reader.readFully(buf, 10, 100);
|
||||||
|
assertArrayRegionsEqual(original, 10, buf, 10, 100);
|
||||||
|
reader.readFully(buf, 110, 700);
|
||||||
|
assertArrayRegionsEqual(original, 110, buf, 110, 700);
|
||||||
|
reader.skip(1); // skip from offset 810 to offset 811
|
||||||
|
reader.readFully(buf, 811, 5);
|
||||||
|
assertArrayRegionsEqual(original, 811, buf, 811, 5);
|
||||||
|
reader.readFully(buf, 816, 900);
|
||||||
|
if (usingChecksums) {
|
||||||
|
// We should detect the corruption when using a checksum file.
|
||||||
|
Assert.fail("did not detect corruption");
|
||||||
|
}
|
||||||
|
} catch (ChecksumException e) {
|
||||||
|
if (!usingChecksums) {
|
||||||
|
Assert.fail("didn't expect to get ChecksumException: not " +
|
||||||
|
"using checksums.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBlockReaderLocalReadCorrupt()
|
||||||
|
throws IOException {
|
||||||
|
runBlockReaderLocalTest(new TestBlockReaderLocalReadCorrupt(), true);
|
||||||
|
runBlockReaderLocalTest(new TestBlockReaderLocalReadCorrupt(), false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,7 +61,7 @@ public class TestClientBlockVerification {
|
||||||
util.getBlockReader(testBlock, 0, FILE_SIZE_K * 1024));
|
util.getBlockReader(testBlock, 0, FILE_SIZE_K * 1024));
|
||||||
util.readAndCheckEOS(reader, FILE_SIZE_K * 1024, true);
|
util.readAndCheckEOS(reader, FILE_SIZE_K * 1024, true);
|
||||||
verify(reader).sendReadResult(Status.CHECKSUM_OK);
|
verify(reader).sendReadResult(Status.CHECKSUM_OK);
|
||||||
reader.close(null);
|
reader.close(null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -76,7 +76,7 @@ public class TestClientBlockVerification {
|
||||||
// We asked the blockreader for the whole file, and only read
|
// We asked the blockreader for the whole file, and only read
|
||||||
// half of it, so no CHECKSUM_OK
|
// half of it, so no CHECKSUM_OK
|
||||||
verify(reader, never()).sendReadResult(Status.CHECKSUM_OK);
|
verify(reader, never()).sendReadResult(Status.CHECKSUM_OK);
|
||||||
reader.close(null);
|
reader.close(null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -92,7 +92,7 @@ public class TestClientBlockVerification {
|
||||||
// And read half the file
|
// And read half the file
|
||||||
util.readAndCheckEOS(reader, FILE_SIZE_K * 1024 / 2, true);
|
util.readAndCheckEOS(reader, FILE_SIZE_K * 1024 / 2, true);
|
||||||
verify(reader).sendReadResult(Status.CHECKSUM_OK);
|
verify(reader).sendReadResult(Status.CHECKSUM_OK);
|
||||||
reader.close(null);
|
reader.close(null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -111,7 +111,7 @@ public class TestClientBlockVerification {
|
||||||
util.getBlockReader(testBlock, startOffset, length));
|
util.getBlockReader(testBlock, startOffset, length));
|
||||||
util.readAndCheckEOS(reader, length, true);
|
util.readAndCheckEOS(reader, length, true);
|
||||||
verify(reader).sendReadResult(Status.CHECKSUM_OK);
|
verify(reader).sendReadResult(Status.CHECKSUM_OK);
|
||||||
reader.close(null);
|
reader.close(null, null);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,127 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs;
|
||||||
|
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import junit.framework.Assert;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
|
import org.apache.hadoop.io.IOUtils;
|
||||||
|
import org.apache.hadoop.net.unix.TemporarySocketDirectory;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class TestFileInputStreamCache {
|
||||||
|
static final Log LOG = LogFactory.getLog(TestFileInputStreamCache.class);
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCreateAndDestroy() throws Exception {
|
||||||
|
FileInputStreamCache cache = new FileInputStreamCache(10, 1000);
|
||||||
|
cache.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class TestFileDescriptorPair {
|
||||||
|
TemporarySocketDirectory dir = new TemporarySocketDirectory();
|
||||||
|
FileInputStream fis[];
|
||||||
|
|
||||||
|
public TestFileDescriptorPair() throws IOException {
|
||||||
|
fis = new FileInputStream[2];
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
String name = dir.getDir() + "/file" + i;
|
||||||
|
FileOutputStream fos = new FileOutputStream(name);
|
||||||
|
fos.write(1);
|
||||||
|
fos.close();
|
||||||
|
fis[i] = new FileInputStream(name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public FileInputStream[] getFileInputStreams() {
|
||||||
|
return fis;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() throws IOException {
|
||||||
|
IOUtils.cleanup(LOG, fis);
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean compareWith(FileInputStream other[]) {
|
||||||
|
if ((other == null) || (fis == null)) {
|
||||||
|
return other == fis;
|
||||||
|
}
|
||||||
|
if (fis.length != other.length) return false;
|
||||||
|
for (int i = 0; i < fis.length; i++) {
|
||||||
|
if (fis[i] != other[i]) return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAddAndRetrieve() throws Exception {
|
||||||
|
FileInputStreamCache cache = new FileInputStreamCache(1, 1000000);
|
||||||
|
DatanodeID dnId = new DatanodeID("127.0.0.1", "localhost",
|
||||||
|
"xyzzy", 8080, 9090, 7070);
|
||||||
|
ExtendedBlock block = new ExtendedBlock("poolid", 123);
|
||||||
|
TestFileDescriptorPair pair = new TestFileDescriptorPair();
|
||||||
|
cache.put(dnId, block, pair.getFileInputStreams());
|
||||||
|
FileInputStream fis[] = cache.get(dnId, block);
|
||||||
|
Assert.assertTrue(pair.compareWith(fis));
|
||||||
|
pair.close();
|
||||||
|
cache.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testExpiry() throws Exception {
|
||||||
|
FileInputStreamCache cache = new FileInputStreamCache(1, 10);
|
||||||
|
DatanodeID dnId = new DatanodeID("127.0.0.1", "localhost",
|
||||||
|
"xyzzy", 8080, 9090, 7070);
|
||||||
|
ExtendedBlock block = new ExtendedBlock("poolid", 123);
|
||||||
|
TestFileDescriptorPair pair = new TestFileDescriptorPair();
|
||||||
|
cache.put(dnId, block, pair.getFileInputStreams());
|
||||||
|
Thread.sleep(cache.getExpiryTimeMs() * 100);
|
||||||
|
FileInputStream fis[] = cache.get(dnId, block);
|
||||||
|
Assert.assertNull(fis);
|
||||||
|
pair.close();
|
||||||
|
cache.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEviction() throws Exception {
|
||||||
|
FileInputStreamCache cache = new FileInputStreamCache(1, 10000000);
|
||||||
|
DatanodeID dnId = new DatanodeID("127.0.0.1", "localhost",
|
||||||
|
"xyzzy", 8080, 9090, 7070);
|
||||||
|
ExtendedBlock block = new ExtendedBlock("poolid", 123);
|
||||||
|
TestFileDescriptorPair pair = new TestFileDescriptorPair();
|
||||||
|
cache.put(dnId, block, pair.getFileInputStreams());
|
||||||
|
DatanodeID dnId2 = new DatanodeID("127.0.0.1", "localhost",
|
||||||
|
"xyzzy", 8081, 9091, 7071);
|
||||||
|
TestFileDescriptorPair pair2 = new TestFileDescriptorPair();
|
||||||
|
cache.put(dnId2, block, pair2.getFileInputStreams());
|
||||||
|
FileInputStream fis[] = cache.get(dnId, block);
|
||||||
|
Assert.assertNull(fis);
|
||||||
|
FileInputStream fis2[] = cache.get(dnId2, block);
|
||||||
|
Assert.assertTrue(pair2.compareWith(fis2));
|
||||||
|
pair.close();
|
||||||
|
cache.close();
|
||||||
|
}
|
||||||
|
}
|
|
@ -17,14 +17,10 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdfs;
|
package org.apache.hadoop.hdfs;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.junit.AfterClass;
|
import org.junit.AfterClass;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
public class TestParallelRead extends TestParallelReadUtil {
|
public class TestParallelRead extends TestParallelReadUtil {
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
static public void setupCluster() throws Exception {
|
static public void setupCluster() throws Exception {
|
||||||
setupCluster(DEFAULT_REPLICATION_FACTOR, new HdfsConfiguration());
|
setupCluster(DEFAULT_REPLICATION_FACTOR, new HdfsConfiguration());
|
||||||
|
@ -34,26 +30,4 @@ public class TestParallelRead extends TestParallelReadUtil {
|
||||||
static public void teardownCluster() throws Exception {
|
static public void teardownCluster() throws Exception {
|
||||||
TestParallelReadUtil.teardownCluster();
|
TestParallelReadUtil.teardownCluster();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Do parallel read several times with different number of files and threads.
|
|
||||||
*
|
|
||||||
* Note that while this is the only "test" in a junit sense, we're actually
|
|
||||||
* dispatching a lot more. Failures in the other methods (and other threads)
|
|
||||||
* need to be manually collected, which is inconvenient.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testParallelReadCopying() throws IOException {
|
|
||||||
runTestWorkload(new CopyingReadWorkerHelper());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testParallelReadByteBuffer() throws IOException {
|
|
||||||
runTestWorkload(new DirectReadWorkerHelper());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testParallelReadMixed() throws IOException {
|
|
||||||
runTestWorkload(new MixedWorkloadHelper());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,12 +32,18 @@ import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||||
import org.apache.hadoop.util.Time;
|
import org.apache.hadoop.util.Time;
|
||||||
import org.apache.log4j.Level;
|
import org.apache.log4j.Level;
|
||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
|
import org.junit.Assume;
|
||||||
|
import org.junit.Ignore;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Driver class for testing the use of DFSInputStream by multiple concurrent
|
* Driver class for testing the use of DFSInputStream by multiple concurrent
|
||||||
* readers, using the different read APIs. See subclasses for the actual test
|
* readers, using the different read APIs.
|
||||||
* cases.
|
*
|
||||||
|
* This class is marked as @Ignore so that junit doesn't try to execute the
|
||||||
|
* tests in here directly. They are executed from subclasses.
|
||||||
*/
|
*/
|
||||||
|
@Ignore
|
||||||
public class TestParallelReadUtil {
|
public class TestParallelReadUtil {
|
||||||
|
|
||||||
static final Log LOG = LogFactory.getLog(TestParallelReadUtil.class);
|
static final Log LOG = LogFactory.getLog(TestParallelReadUtil.class);
|
||||||
|
@ -386,4 +392,28 @@ public class TestParallelReadUtil {
|
||||||
util.shutdown();
|
util.shutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Do parallel read several times with different number of files and threads.
|
||||||
|
*
|
||||||
|
* Note that while this is the only "test" in a junit sense, we're actually
|
||||||
|
* dispatching a lot more. Failures in the other methods (and other threads)
|
||||||
|
* need to be manually collected, which is inconvenient.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testParallelReadCopying() throws IOException {
|
||||||
|
Assume.assumeTrue(util.haveRequiredResources());
|
||||||
|
runTestWorkload(new CopyingReadWorkerHelper());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testParallelReadByteBuffer() throws IOException {
|
||||||
|
Assume.assumeTrue(util.haveRequiredResources());
|
||||||
|
runTestWorkload(new DirectReadWorkerHelper());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testParallelReadMixed() throws IOException {
|
||||||
|
Assume.assumeTrue(util.haveRequiredResources());
|
||||||
|
runTestWorkload(new MixedWorkloadHelper());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,48 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
import org.apache.hadoop.net.unix.DomainSocket;
|
||||||
|
import org.apache.hadoop.net.unix.TemporarySocketDirectory;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
|
||||||
|
public class TestParallelShortCircuitRead extends TestParallelReadUtil {
|
||||||
|
private static TemporarySocketDirectory sockDir;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
static public void setupCluster() throws Exception {
|
||||||
|
sockDir = new TemporarySocketDirectory();
|
||||||
|
HdfsConfiguration conf = new HdfsConfiguration();
|
||||||
|
conf.set(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY,
|
||||||
|
new File(sockDir.getDir(), "TestParallelLocalRead.%d.sock").getAbsolutePath());
|
||||||
|
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
|
||||||
|
conf.setBoolean(DFSConfigKeys.
|
||||||
|
DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, false);
|
||||||
|
DomainSocket.disableBindPathValidation();
|
||||||
|
setupCluster(1, conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
static public void teardownCluster() throws Exception {
|
||||||
|
sockDir.close();
|
||||||
|
TestParallelReadUtil.teardownCluster();
|
||||||
|
}
|
||||||
|
}
|
|
@ -17,52 +17,32 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdfs;
|
package org.apache.hadoop.hdfs;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.File;
|
||||||
|
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.net.unix.DomainSocket;
|
||||||
|
import org.apache.hadoop.net.unix.TemporarySocketDirectory;
|
||||||
import org.junit.AfterClass;
|
import org.junit.AfterClass;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
public class TestParallelLocalRead extends TestParallelReadUtil {
|
public class TestParallelShortCircuitReadNoChecksum extends TestParallelReadUtil {
|
||||||
|
private static TemporarySocketDirectory sockDir;
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
static public void setupCluster() throws Exception {
|
static public void setupCluster() throws Exception {
|
||||||
|
sockDir = new TemporarySocketDirectory();
|
||||||
HdfsConfiguration conf = new HdfsConfiguration();
|
HdfsConfiguration conf = new HdfsConfiguration();
|
||||||
|
conf.set(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY,
|
||||||
|
new File(sockDir.getDir(), "TestParallelLocalRead.%d.sock").getAbsolutePath());
|
||||||
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
|
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
|
||||||
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
|
conf.setBoolean(DFSConfigKeys.
|
||||||
false);
|
DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, true);
|
||||||
conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
|
DomainSocket.disableBindPathValidation();
|
||||||
UserGroupInformation.getCurrentUser().getShortUserName());
|
|
||||||
|
|
||||||
setupCluster(1, conf);
|
setupCluster(1, conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
@AfterClass
|
@AfterClass
|
||||||
static public void teardownCluster() throws Exception {
|
static public void teardownCluster() throws Exception {
|
||||||
|
sockDir.close();
|
||||||
TestParallelReadUtil.teardownCluster();
|
TestParallelReadUtil.teardownCluster();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Do parallel read several times with different number of files and threads.
|
|
||||||
*
|
|
||||||
* Note that while this is the only "test" in a junit sense, we're actually
|
|
||||||
* dispatching a lot more. Failures in the other methods (and other threads)
|
|
||||||
* need to be manually collected, which is inconvenient.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testParallelReadCopying() throws IOException {
|
|
||||||
runTestWorkload(new CopyingReadWorkerHelper());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testParallelReadByteBuffer() throws IOException {
|
|
||||||
runTestWorkload(new DirectReadWorkerHelper());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testParallelReadMixed() throws IOException {
|
|
||||||
runTestWorkload(new MixedWorkloadHelper());
|
|
||||||
}
|
|
||||||
}
|
}
|
|
@ -0,0 +1,46 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
import org.apache.hadoop.net.unix.DomainSocket;
|
||||||
|
import org.apache.hadoop.net.unix.TemporarySocketDirectory;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
|
||||||
|
public class TestParallelUnixDomainRead extends TestParallelReadUtil {
|
||||||
|
private static TemporarySocketDirectory sockDir;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
static public void setupCluster() throws Exception {
|
||||||
|
sockDir = new TemporarySocketDirectory();
|
||||||
|
HdfsConfiguration conf = new HdfsConfiguration();
|
||||||
|
conf.set(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY,
|
||||||
|
new File(sockDir.getDir(), "TestParallelLocalRead.%d.sock").getAbsolutePath());
|
||||||
|
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, false);
|
||||||
|
DomainSocket.disableBindPathValidation();
|
||||||
|
setupCluster(1, conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
static public void teardownCluster() throws Exception {
|
||||||
|
sockDir.close();
|
||||||
|
TestParallelReadUtil.teardownCluster();
|
||||||
|
}
|
||||||
|
}
|
|
@ -20,9 +20,11 @@ package org.apache.hadoop.hdfs;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
import java.io.EOFException;
|
import java.io.EOFException;
|
||||||
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.RandomAccessFile;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.security.PrivilegedExceptionAction;
|
import java.util.concurrent.TimeoutException;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
import org.apache.hadoop.fs.FSDataInputStream;
|
||||||
|
@ -30,21 +32,22 @@ import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
|
import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
|
||||||
import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
|
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||||
import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtocol;
|
import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtocol;
|
||||||
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
|
||||||
import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
|
|
||||||
import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset;
|
import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset;
|
||||||
import org.apache.hadoop.io.IOUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.net.unix.DomainSocket;
|
||||||
|
import org.apache.hadoop.net.unix.TemporarySocketDirectory;
|
||||||
import org.apache.hadoop.security.token.Token;
|
import org.apache.hadoop.security.token.Token;
|
||||||
|
import org.apache.hadoop.util.StringUtils;
|
||||||
import org.apache.hadoop.util.Time;
|
import org.apache.hadoop.util.Time;
|
||||||
|
import org.junit.AfterClass;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -55,8 +58,18 @@ import org.junit.Test;
|
||||||
* system.
|
* system.
|
||||||
*/
|
*/
|
||||||
public class TestShortCircuitLocalRead {
|
public class TestShortCircuitLocalRead {
|
||||||
|
private static TemporarySocketDirectory sockDir;
|
||||||
|
|
||||||
static final String DIR = "/" + TestShortCircuitLocalRead.class.getSimpleName() + "/";
|
@BeforeClass
|
||||||
|
public static void init() {
|
||||||
|
sockDir = new TemporarySocketDirectory();
|
||||||
|
DomainSocket.disableBindPathValidation();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void shutdown() throws IOException {
|
||||||
|
sockDir.close();
|
||||||
|
}
|
||||||
|
|
||||||
static final long seed = 0xDEADBEEFL;
|
static final long seed = 0xDEADBEEFL;
|
||||||
static final int blockSize = 5120;
|
static final int blockSize = 5120;
|
||||||
|
@ -81,7 +94,9 @@ public class TestShortCircuitLocalRead {
|
||||||
for (int idx = 0; idx < len; idx++) {
|
for (int idx = 0; idx < len; idx++) {
|
||||||
if (expected[from + idx] != actual[idx]) {
|
if (expected[from + idx] != actual[idx]) {
|
||||||
Assert.fail(message + " byte " + (from + idx) + " differs. expected "
|
Assert.fail(message + " byte " + (from + idx) + " differs. expected "
|
||||||
+ expected[from + idx] + " actual " + actual[idx]);
|
+ expected[from + idx] + " actual " + actual[idx] +
|
||||||
|
"\nexpected: " + StringUtils.byteToHexString(expected, from, from + len) +
|
||||||
|
"\nactual: " + StringUtils.byteToHexString(actual, 0, len));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -170,8 +185,9 @@ public class TestShortCircuitLocalRead {
|
||||||
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
|
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
|
||||||
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
|
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
|
||||||
ignoreChecksum);
|
ignoreChecksum);
|
||||||
conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
|
conf.set(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY,
|
||||||
UserGroupInformation.getCurrentUser().getShortUserName());
|
new File(sockDir.getDir(),
|
||||||
|
"TestShortCircuitLocalRead.__PORT__.sock").getAbsolutePath());
|
||||||
if (simulatedStorage) {
|
if (simulatedStorage) {
|
||||||
SimulatedFSDataset.setFactory(conf);
|
SimulatedFSDataset.setFactory(conf);
|
||||||
}
|
}
|
||||||
|
@ -229,23 +245,17 @@ public class TestShortCircuitLocalRead {
|
||||||
doTestShortCircuitRead(false, 10*blockSize+100, 777);
|
doTestShortCircuitRead(false, 10*blockSize+100, 777);
|
||||||
doTestShortCircuitRead(true, 10*blockSize+100, 777);
|
doTestShortCircuitRead(true, 10*blockSize+100, 777);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetBlockLocalPathInfo() throws IOException, InterruptedException {
|
public void testDeprecatedGetBlockLocalPathInfoRpc()
|
||||||
|
throws IOException, InterruptedException {
|
||||||
final Configuration conf = new Configuration();
|
final Configuration conf = new Configuration();
|
||||||
conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
|
|
||||||
"alloweduser1,alloweduser2");
|
|
||||||
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
|
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
|
||||||
.format(true).build();
|
.format(true).build();
|
||||||
cluster.waitActive();
|
cluster.waitActive();
|
||||||
final DataNode dn = cluster.getDataNodes().get(0);
|
|
||||||
FileSystem fs = cluster.getFileSystem();
|
FileSystem fs = cluster.getFileSystem();
|
||||||
try {
|
try {
|
||||||
DFSTestUtil.createFile(fs, new Path("/tmp/x"), 16, (short) 1, 23);
|
DFSTestUtil.createFile(fs, new Path("/tmp/x"), 16, (short) 1, 23);
|
||||||
UserGroupInformation aUgi1 =
|
|
||||||
UserGroupInformation.createRemoteUser("alloweduser1");
|
|
||||||
UserGroupInformation aUgi2 =
|
|
||||||
UserGroupInformation.createRemoteUser("alloweduser2");
|
|
||||||
LocatedBlocks lb = cluster.getNameNode().getRpcServer()
|
LocatedBlocks lb = cluster.getNameNode().getRpcServer()
|
||||||
.getBlockLocations("/tmp/x", 0, 16);
|
.getBlockLocations("/tmp/x", 0, 16);
|
||||||
// Create a new block object, because the block inside LocatedBlock at
|
// Create a new block object, because the block inside LocatedBlock at
|
||||||
|
@ -253,51 +263,11 @@ public class TestShortCircuitLocalRead {
|
||||||
ExtendedBlock blk = new ExtendedBlock(lb.get(0).getBlock());
|
ExtendedBlock blk = new ExtendedBlock(lb.get(0).getBlock());
|
||||||
Token<BlockTokenIdentifier> token = lb.get(0).getBlockToken();
|
Token<BlockTokenIdentifier> token = lb.get(0).getBlockToken();
|
||||||
final DatanodeInfo dnInfo = lb.get(0).getLocations()[0];
|
final DatanodeInfo dnInfo = lb.get(0).getLocations()[0];
|
||||||
ClientDatanodeProtocol proxy = aUgi1
|
ClientDatanodeProtocol proxy =
|
||||||
.doAs(new PrivilegedExceptionAction<ClientDatanodeProtocol>() {
|
DFSUtil.createClientDatanodeProtocolProxy(dnInfo, conf, 60000, false);
|
||||||
@Override
|
|
||||||
public ClientDatanodeProtocol run() throws Exception {
|
|
||||||
return DFSUtil.createClientDatanodeProtocolProxy(dnInfo, conf,
|
|
||||||
60000, false);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// This should succeed
|
|
||||||
BlockLocalPathInfo blpi = proxy.getBlockLocalPathInfo(blk, token);
|
|
||||||
Assert.assertEquals(
|
|
||||||
DataNodeTestUtils.getFSDataset(dn).getBlockLocalPathInfo(blk).getBlockPath(),
|
|
||||||
blpi.getBlockPath());
|
|
||||||
|
|
||||||
// Try with the other allowed user
|
|
||||||
proxy = aUgi2
|
|
||||||
.doAs(new PrivilegedExceptionAction<ClientDatanodeProtocol>() {
|
|
||||||
@Override
|
|
||||||
public ClientDatanodeProtocol run() throws Exception {
|
|
||||||
return DFSUtil.createClientDatanodeProtocolProxy(dnInfo, conf,
|
|
||||||
60000, false);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// This should succeed as well
|
|
||||||
blpi = proxy.getBlockLocalPathInfo(blk, token);
|
|
||||||
Assert.assertEquals(
|
|
||||||
DataNodeTestUtils.getFSDataset(dn).getBlockLocalPathInfo(blk).getBlockPath(),
|
|
||||||
blpi.getBlockPath());
|
|
||||||
|
|
||||||
// Now try with a disallowed user
|
|
||||||
UserGroupInformation bUgi = UserGroupInformation
|
|
||||||
.createRemoteUser("notalloweduser");
|
|
||||||
proxy = bUgi
|
|
||||||
.doAs(new PrivilegedExceptionAction<ClientDatanodeProtocol>() {
|
|
||||||
@Override
|
|
||||||
public ClientDatanodeProtocol run() throws Exception {
|
|
||||||
return DFSUtil.createClientDatanodeProtocolProxy(dnInfo, conf,
|
|
||||||
60000, false);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
try {
|
try {
|
||||||
proxy.getBlockLocalPathInfo(blk, token);
|
proxy.getBlockLocalPathInfo(blk, token);
|
||||||
Assert.fail("The call should have failed as " + bUgi.getShortUserName()
|
Assert.fail("The call should have failed as this user "
|
||||||
+ " is not allowed to call getBlockLocalPathInfo");
|
+ " is not allowed to call getBlockLocalPathInfo");
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
Assert.assertTrue(ex.getMessage().contains(
|
Assert.assertTrue(ex.getMessage().contains(
|
||||||
|
@ -315,8 +285,6 @@ public class TestShortCircuitLocalRead {
|
||||||
Configuration conf = new Configuration();
|
Configuration conf = new Configuration();
|
||||||
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
|
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
|
||||||
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, false);
|
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, false);
|
||||||
conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
|
|
||||||
UserGroupInformation.getCurrentUser().getShortUserName());
|
|
||||||
if (simulatedStorage) {
|
if (simulatedStorage) {
|
||||||
SimulatedFSDataset.setFactory(conf);
|
SimulatedFSDataset.setFactory(conf);
|
||||||
}
|
}
|
||||||
|
@ -354,6 +322,86 @@ public class TestShortCircuitLocalRead {
|
||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHandleTruncatedBlockFile() throws IOException {
|
||||||
|
MiniDFSCluster cluster = null;
|
||||||
|
HdfsConfiguration conf = new HdfsConfiguration();
|
||||||
|
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
|
||||||
|
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, false);
|
||||||
|
conf.set(DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY, "CRC32C");
|
||||||
|
final Path TEST_PATH = new Path("/a");
|
||||||
|
final Path TEST_PATH2 = new Path("/b");
|
||||||
|
final long RANDOM_SEED = 4567L;
|
||||||
|
final long RANDOM_SEED2 = 4568L;
|
||||||
|
FSDataInputStream fsIn = null;
|
||||||
|
final int TEST_LENGTH = 3456;
|
||||||
|
|
||||||
|
try {
|
||||||
|
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
|
||||||
|
cluster.waitActive();
|
||||||
|
FileSystem fs = cluster.getFileSystem();
|
||||||
|
DFSTestUtil.createFile(fs, TEST_PATH,
|
||||||
|
TEST_LENGTH, (short)1, RANDOM_SEED);
|
||||||
|
DFSTestUtil.createFile(fs, TEST_PATH2,
|
||||||
|
TEST_LENGTH, (short)1, RANDOM_SEED2);
|
||||||
|
fsIn = cluster.getFileSystem().open(TEST_PATH2);
|
||||||
|
byte original[] = new byte[TEST_LENGTH];
|
||||||
|
IOUtils.readFully(fsIn, original, 0, TEST_LENGTH);
|
||||||
|
fsIn.close();
|
||||||
|
fsIn = null;
|
||||||
|
try {
|
||||||
|
DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Assert.fail("unexpected InterruptedException during " +
|
||||||
|
"waitReplication: " + e);
|
||||||
|
} catch (TimeoutException e) {
|
||||||
|
Assert.fail("unexpected TimeoutException during " +
|
||||||
|
"waitReplication: " + e);
|
||||||
|
}
|
||||||
|
ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
|
||||||
|
File dataFile = MiniDFSCluster.getBlockFile(0, block);
|
||||||
|
cluster.shutdown();
|
||||||
|
cluster = null;
|
||||||
|
RandomAccessFile raf = null;
|
||||||
|
try {
|
||||||
|
raf = new RandomAccessFile(dataFile, "rw");
|
||||||
|
raf.setLength(0);
|
||||||
|
} finally {
|
||||||
|
if (raf != null) raf.close();
|
||||||
|
}
|
||||||
|
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).format(false).build();
|
||||||
|
cluster.waitActive();
|
||||||
|
fs = cluster.getFileSystem();
|
||||||
|
fsIn = fs.open(TEST_PATH);
|
||||||
|
try {
|
||||||
|
byte buf[] = new byte[100];
|
||||||
|
fsIn.seek(2000);
|
||||||
|
fsIn.readFully(buf, 0, buf.length);
|
||||||
|
Assert.fail("shouldn't be able to read from corrupt 0-length " +
|
||||||
|
"block file.");
|
||||||
|
} catch (IOException e) {
|
||||||
|
DFSClient.LOG.error("caught exception ", e);
|
||||||
|
}
|
||||||
|
fsIn.close();
|
||||||
|
fsIn = null;
|
||||||
|
|
||||||
|
// We should still be able to read the other file.
|
||||||
|
// This is important because it indicates that we detected that the
|
||||||
|
// previous block was corrupt, rather than blaming the problem on
|
||||||
|
// communication.
|
||||||
|
fsIn = fs.open(TEST_PATH2);
|
||||||
|
byte buf[] = new byte[original.length];
|
||||||
|
fsIn.readFully(buf, 0, buf.length);
|
||||||
|
TestBlockReaderLocal.assertArrayRegionsEqual(original, 0, buf, 0,
|
||||||
|
original.length);
|
||||||
|
fsIn.close();
|
||||||
|
fsIn = null;
|
||||||
|
} finally {
|
||||||
|
if (fsIn != null) fsIn.close();
|
||||||
|
if (cluster != null) cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test to run benchmarks between shortcircuit read vs regular read with
|
* Test to run benchmarks between shortcircuit read vs regular read with
|
||||||
|
|
|
@ -148,7 +148,7 @@ public class TestBlockTokenWithDFS {
|
||||||
blockReader = BlockReaderFactory.newBlockReader(
|
blockReader = BlockReaderFactory.newBlockReader(
|
||||||
conf, file, block, lblock.getBlockToken(), 0, -1,
|
conf, file, block, lblock.getBlockToken(), 0, -1,
|
||||||
true, "TestBlockTokenWithDFS", TcpPeerServer.peerFromSocket(s),
|
true, "TestBlockTokenWithDFS", TcpPeerServer.peerFromSocket(s),
|
||||||
nodes[0]);
|
nodes[0], null, false);
|
||||||
|
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
if (ex instanceof InvalidBlockTokenException) {
|
if (ex instanceof InvalidBlockTokenException) {
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
package org.apache.hadoop.hdfs.server.datanode;
|
package org.apache.hadoop.hdfs.server.datanode;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
|
@ -962,6 +963,12 @@ public class SimulatedFSDataset implements FsDatasetSpi<FsVolumeSpi> {
|
||||||
public BlockLocalPathInfo getBlockLocalPathInfo(ExtendedBlock b) {
|
public BlockLocalPathInfo getBlockLocalPathInfo(ExtendedBlock b) {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FileInputStream[] getShortCircuitFdsForRead(ExtendedBlock block)
|
||||||
|
throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public HdfsBlocksMetadata getHdfsBlocksMetadata(List<ExtendedBlock> blocks)
|
public HdfsBlocksMetadata getHdfsBlocksMetadata(List<ExtendedBlock> blocks)
|
||||||
|
|
|
@ -32,6 +32,7 @@ import java.util.Map;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hdfs.BlockReader;
|
||||||
import org.apache.hadoop.hdfs.BlockReaderFactory;
|
import org.apache.hadoop.hdfs.BlockReaderFactory;
|
||||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
import org.apache.hadoop.hdfs.DFSTestUtil;
|
import org.apache.hadoop.hdfs.DFSTestUtil;
|
||||||
|
@ -281,11 +282,11 @@ public class TestDataNodeVolumeFailure {
|
||||||
String file = BlockReaderFactory.getFileName(targetAddr,
|
String file = BlockReaderFactory.getFileName(targetAddr,
|
||||||
"test-blockpoolid",
|
"test-blockpoolid",
|
||||||
block.getBlockId());
|
block.getBlockId());
|
||||||
BlockReaderFactory.newBlockReader(conf, file, block,
|
BlockReader blockReader =
|
||||||
|
BlockReaderFactory.newBlockReader(conf, file, block,
|
||||||
lblock.getBlockToken(), 0, -1, true, "TestDataNodeVolumeFailure",
|
lblock.getBlockToken(), 0, -1, true, "TestDataNodeVolumeFailure",
|
||||||
TcpPeerServer.peerFromSocket(s), datanode);
|
TcpPeerServer.peerFromSocket(s), datanode, null, false);
|
||||||
|
blockReader.close(null, null);
|
||||||
// nothing - if it fails - it will throw and exception
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue