Merge r1401869 through r1402273 from trunk.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-2802@1402278 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tsz-wo Sze 2012-10-25 19:09:34 +00:00
commit 40d2b6f308
18 changed files with 395 additions and 180 deletions

View File

@ -150,6 +150,8 @@ Trunk (Unreleased)
HDFS-4052. BlockManager#invalidateWork should print log outside the lock. HDFS-4052. BlockManager#invalidateWork should print log outside the lock.
(Jing Zhao via suresh) (Jing Zhao via suresh)
HDFS-4110. Refine a log printed in JNStorage. (Liang Xie via suresh)
OPTIMIZATIONS OPTIMIZATIONS
BUG FIXES BUG FIXES
@ -233,6 +235,9 @@ Trunk (Unreleased)
HDFS-2434. TestNameNodeMetrics.testCorruptBlock fails intermittently. HDFS-2434. TestNameNodeMetrics.testCorruptBlock fails intermittently.
(Jing Zhao via suresh) (Jing Zhao via suresh)
HDFS-4067. TestUnderReplicatedBlocks intermittently fails due to
ReplicaAlreadyExistsException. (Jing Zhao via suresh)
BREAKDOWN OF HDFS-3077 SUBTASKS BREAKDOWN OF HDFS-3077 SUBTASKS
HDFS-3077. Quorum-based protocol for reading and writing edit logs. HDFS-3077. Quorum-based protocol for reading and writing edit logs.
@ -413,6 +418,9 @@ Release 2.0.3-alpha - Unreleased
HDFS-4099. Clean up replication code and add more javadoc. (szetszwo) HDFS-4099. Clean up replication code and add more javadoc. (szetszwo)
HDFS-4107. Add utility methods for casting INode to INodeFile and
INodeFileUnderConstruction. (szetszwo)
OPTIMIZATIONS OPTIMIZATIONS
BUG FIXES BUG FIXES
@ -488,6 +496,10 @@ Release 2.0.3-alpha - Unreleased
HDFS-4022. Replication not happening for appended block. HDFS-4022. Replication not happening for appended block.
(Vinay via umamahesh) (Vinay via umamahesh)
HDFS-3948. Do not use hflush in TestWebHDFS.testNamenodeRestart() since the
out stream returned by WebHdfsFileSystem does not support it. (Jing Zhao
via szetszwo)
Release 2.0.2-alpha - 2012-09-07 Release 2.0.2-alpha - 2012-09-07
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -171,8 +171,7 @@ class JNStorage extends Storage {
void format(NamespaceInfo nsInfo) throws IOException { void format(NamespaceInfo nsInfo) throws IOException {
setStorageInfo(nsInfo); setStorageInfo(nsInfo);
LOG.info("Formatting journal storage directory " + LOG.info("Formatting journal " + sd + " with nsid: " + getNamespaceID());
sd + " with nsid: " + getNamespaceID());
// Unlock the directory before formatting, because we will // Unlock the directory before formatting, because we will
// re-analyze it after format(). The analyzeStorage() call // re-analyze it after format(). The analyzeStorage() call
// below is reponsible for re-locking it. This is a no-op // below is reponsible for re-locking it. This is a no-op

View File

@ -1012,7 +1012,7 @@ public class FSDirectory implements Closeable {
int i = 0; int i = 0;
int totalBlocks = 0; int totalBlocks = 0;
for(String src : srcs) { for(String src : srcs) {
INodeFile srcInode = getFileINode(src); INodeFile srcInode = (INodeFile)getINode(src);
allSrcInodes[i++] = srcInode; allSrcInodes[i++] = srcInode;
totalBlocks += srcInode.blocks.length; totalBlocks += srcInode.blocks.length;
} }
@ -1300,25 +1300,13 @@ public class FSDirectory implements Closeable {
} }
} }
/**
* Get {@link INode} associated with the file.
*/
INodeFile getFileINode(String src) throws UnresolvedLinkException {
INode inode = getINode(src);
if (inode == null || inode.isDirectory())
return null;
assert !inode.isLink();
return (INodeFile) inode;
}
/** /**
* Get {@link INode} associated with the file / directory. * Get {@link INode} associated with the file / directory.
*/ */
public INode getINode(String src) throws UnresolvedLinkException { public INode getINode(String src) throws UnresolvedLinkException {
readLock(); readLock();
try { try {
INode iNode = rootDir.getNode(src, true); return rootDir.getNode(src, true);
return iNode;
} finally { } finally {
readUnlock(); readUnlock();
} }

View File

@ -477,8 +477,8 @@ public class FSEditLogLoader {
Lease lease = fsNamesys.leaseManager.getLease( Lease lease = fsNamesys.leaseManager.getLease(
reassignLeaseOp.leaseHolder); reassignLeaseOp.leaseHolder);
INodeFileUnderConstruction pendingFile = INodeFileUnderConstruction pendingFile =
(INodeFileUnderConstruction) fsDir.getFileINode( INodeFileUnderConstruction.valueOf(
reassignLeaseOp.path); fsDir.getINode(reassignLeaseOp.path), reassignLeaseOp.path);
fsNamesys.reassignLeaseInternal(lease, fsNamesys.reassignLeaseInternal(lease,
reassignLeaseOp.path, reassignLeaseOp.newHolder, pendingFile); reassignLeaseOp.path, reassignLeaseOp.newHolder, pendingFile);
break; break;

View File

@ -365,14 +365,7 @@ class FSImageFormat {
// verify that file exists in namespace // verify that file exists in namespace
String path = cons.getLocalName(); String path = cons.getLocalName();
INode old = fsDir.getFileINode(path); INodeFile oldnode = INodeFile.valueOf(fsDir.getINode(path), path);
if (old == null) {
throw new IOException("Found lease for non-existent file " + path);
}
if (old.isDirectory()) {
throw new IOException("Found lease for directory " + path);
}
INodeFile oldnode = (INodeFile) old;
fsDir.replaceNode(path, oldnode, cons); fsDir.replaceNode(path, oldnode, cons);
namesystem.leaseManager.addLease(cons.getClientName(), path); namesystem.leaseManager.addLease(cons.getClientName(), path);
} }

View File

@ -1270,11 +1270,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
} }
long now = now(); long now = now();
INodeFile inode = dir.getFileINode(src); final INodeFile inode = INodeFile.valueOf(dir.getINode(src), src);
if (inode == null) {
throw new FileNotFoundException("File does not exist: " + src);
}
assert !inode.isLink();
if (doAccessTime && isAccessTimeSupported()) { if (doAccessTime && isAccessTimeSupported()) {
if (now <= inode.getAccessTime() + getAccessTimePrecision()) { if (now <= inode.getAccessTime() + getAccessTimePrecision()) {
// if we have to set access time but we only have the readlock, then // if we have to set access time but we only have the readlock, then
@ -1390,28 +1386,27 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
// we put the following prerequisite for the operation // we put the following prerequisite for the operation
// replication and blocks sizes should be the same for ALL the blocks // replication and blocks sizes should be the same for ALL the blocks
// check the target // check the target
INode inode = dir.getFileINode(target); final INodeFile trgInode = INodeFile.valueOf(dir.getINode(target), target);
if(trgInode.isUnderConstruction()) {
if(inode == null) { throw new HadoopIllegalArgumentException("concat: target file "
throw new IllegalArgumentException("concat: trg file doesn't exist"); + target + " is under construction");
} }
if(inode.isUnderConstruction()) { // per design target shouldn't be empty and all the blocks same size
throw new IllegalArgumentException("concat: trg file is uner construction");
}
INodeFile trgInode = (INodeFile) inode;
// per design trg shouldn't be empty and all the blocks same size
if(trgInode.blocks.length == 0) { if(trgInode.blocks.length == 0) {
throw new IllegalArgumentException("concat: "+ target + " file is empty"); throw new HadoopIllegalArgumentException("concat: target file "
+ target + " is empty");
} }
long blockSize = trgInode.getPreferredBlockSize(); long blockSize = trgInode.getPreferredBlockSize();
// check the end block to be full // check the end block to be full
if(blockSize != trgInode.blocks[trgInode.blocks.length-1].getNumBytes()) { if(blockSize != trgInode.blocks[trgInode.blocks.length-1].getNumBytes()) {
throw new IllegalArgumentException(target + " blocks size should be the same"); throw new HadoopIllegalArgumentException("The last block in " + target
+ " is not full; last block size = "
+ trgInode.blocks[trgInode.blocks.length-1].getNumBytes()
+ " but file block size = " + blockSize);
} }
si.add(trgInode); si.add(trgInode);
@ -1424,21 +1419,21 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
if(i==srcs.length-1) if(i==srcs.length-1)
endSrc=true; endSrc=true;
INodeFile srcInode = dir.getFileINode(src); final INodeFile srcInode = INodeFile.valueOf(dir.getINode(src), src);
if(src.isEmpty() if(src.isEmpty()
|| srcInode == null
|| srcInode.isUnderConstruction() || srcInode.isUnderConstruction()
|| srcInode.blocks.length == 0) { || srcInode.blocks.length == 0) {
throw new IllegalArgumentException("concat: file " + src + throw new HadoopIllegalArgumentException("concat: source file " + src
" is invalid or empty or underConstruction"); + " is invalid or empty or underConstruction");
} }
// check replication and blocks size // check replication and blocks size
if(repl != srcInode.getFileReplication()) { if(repl != srcInode.getFileReplication()) {
throw new IllegalArgumentException(src + " and " + target + " " + throw new HadoopIllegalArgumentException("concat: the soruce file "
"should have same replication: " + src + " and the target file " + target
+ repl + " vs. " + srcInode.getFileReplication()); + " should have the same replication: source replication is "
+ srcInode.getBlockReplication()
+ " but target replication is " + repl);
} }
//boolean endBlock=false; //boolean endBlock=false;
@ -1448,8 +1443,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
if(endSrc) if(endSrc)
idx = srcInode.blocks.length-2; // end block of endSrc is OK not to be full idx = srcInode.blocks.length-2; // end block of endSrc is OK not to be full
if(idx >= 0 && srcInode.blocks[idx].getNumBytes() != blockSize) { if(idx >= 0 && srcInode.blocks[idx].getNumBytes() != blockSize) {
throw new IllegalArgumentException("concat: blocks sizes of " + throw new HadoopIllegalArgumentException("concat: the soruce file "
src + " and " + target + " should all be the same"); + src + " and the target file " + target
+ " should have the same blocks sizes: target block size is "
+ blockSize + " but the size of source block " + idx + " is "
+ srcInode.blocks[idx].getNumBytes());
} }
si.add(srcInode); si.add(srcInode);
@ -1458,7 +1456,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
// make sure no two files are the same // make sure no two files are the same
if(si.size() < srcs.length+1) { // trg + srcs if(si.size() < srcs.length+1) { // trg + srcs
// it means at least two files are the same // it means at least two files are the same
throw new IllegalArgumentException("at least two files are the same"); throw new HadoopIllegalArgumentException(
"concat: at least two of the source files are the same");
} }
if(NameNode.stateChangeLog.isDebugEnabled()) { if(NameNode.stateChangeLog.isDebugEnabled()) {
@ -1797,13 +1796,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
} }
try { try {
INodeFile myFile = dir.getFileINode(src); blockManager.verifyReplication(src, replication, clientMachine);
try {
blockManager.verifyReplication(src, replication, clientMachine);
} catch(IOException e) {
throw new IOException("failed to create "+e.getMessage());
}
boolean create = flag.contains(CreateFlag.CREATE); boolean create = flag.contains(CreateFlag.CREATE);
final INode myFile = dir.getINode(src);
if (myFile == null) { if (myFile == null) {
if (!create) { if (!create) {
throw new FileNotFoundException("failed to overwrite or append to non-existent file " throw new FileNotFoundException("failed to overwrite or append to non-existent file "
@ -1829,8 +1824,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
blockManager.getDatanodeManager().getDatanodeByHost(clientMachine); blockManager.getDatanodeManager().getDatanodeByHost(clientMachine);
if (append && myFile != null) { if (append && myFile != null) {
final INodeFile f = INodeFile.valueOf(myFile, src);
return prepareFileForWrite( return prepareFileForWrite(
src, myFile, holder, clientMachine, clientNode, true); src, f, holder, clientMachine, clientNode, true);
} else { } else {
// Now we can add the name to the filesystem. This file has no // Now we can add the name to the filesystem. This file has no
// blocks associated with it. // blocks associated with it.
@ -1925,11 +1921,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
throw new IOException("Invalid file name: " + src); throw new IOException("Invalid file name: " + src);
} }
INode inode = dir.getFileINode(src); final INodeFile inode = INodeFile.valueOf(dir.getINode(src), src);
if (inode == null) {
throw new FileNotFoundException("File not found " + src);
}
if (!inode.isUnderConstruction()) { if (!inode.isUnderConstruction()) {
return true; return true;
} }
@ -2330,35 +2322,32 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
private INodeFileUnderConstruction checkLease(String src, String holder) private INodeFileUnderConstruction checkLease(String src, String holder)
throws LeaseExpiredException, UnresolvedLinkException { throws LeaseExpiredException, UnresolvedLinkException {
assert hasReadOrWriteLock(); assert hasReadOrWriteLock();
INodeFile file = dir.getFileINode(src); return checkLease(src, holder, dir.getINode(src));
checkLease(src, holder, file);
return (INodeFileUnderConstruction)file;
} }
private void checkLease(String src, String holder, INode file) private INodeFileUnderConstruction checkLease(String src, String holder,
throws LeaseExpiredException { INode file) throws LeaseExpiredException {
assert hasReadOrWriteLock(); assert hasReadOrWriteLock();
if (file == null || file.isDirectory()) { if (file == null || !(file instanceof INodeFile)) {
Lease lease = leaseManager.getLease(holder); Lease lease = leaseManager.getLease(holder);
throw new LeaseExpiredException("No lease on " + src + throw new LeaseExpiredException(
" File does not exist. " + "No lease on " + src + ": File does not exist. "
(lease != null ? lease.toString() : + (lease != null ? lease.toString()
"Holder " + holder + : "Holder " + holder + " does not have any open files."));
" does not have any open files."));
} }
if (!file.isUnderConstruction()) { if (!file.isUnderConstruction()) {
Lease lease = leaseManager.getLease(holder); Lease lease = leaseManager.getLease(holder);
throw new LeaseExpiredException("No lease on " + src + throw new LeaseExpiredException(
" File is not open for writing. " + "No lease on " + src + ": File is not open for writing. "
(lease != null ? lease.toString() : + (lease != null ? lease.toString()
"Holder " + holder + : "Holder " + holder + " does not have any open files."));
" does not have any open files."));
} }
INodeFileUnderConstruction pendingFile = (INodeFileUnderConstruction)file; INodeFileUnderConstruction pendingFile = (INodeFileUnderConstruction)file;
if (holder != null && !pendingFile.getClientName().equals(holder)) { if (holder != null && !pendingFile.getClientName().equals(holder)) {
throw new LeaseExpiredException("Lease mismatch on " + src + " owned by " throw new LeaseExpiredException("Lease mismatch on " + src + " owned by "
+ pendingFile.getClientName() + " but is accessed by " + holder); + pendingFile.getClientName() + " but is accessed by " + holder);
} }
return pendingFile;
} }
/** /**
@ -2400,15 +2389,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
try { try {
pendingFile = checkLease(src, holder); pendingFile = checkLease(src, holder);
} catch (LeaseExpiredException lee) { } catch (LeaseExpiredException lee) {
INodeFile file = dir.getFileINode(src); final INode inode = dir.getINode(src);
if (file != null && !file.isUnderConstruction()) { if (inode != null && inode instanceof INodeFile && !inode.isUnderConstruction()) {
// This could be a retry RPC - i.e the client tried to close // This could be a retry RPC - i.e the client tried to close
// the file, but missed the RPC response. Thus, it is trying // the file, but missed the RPC response. Thus, it is trying
// again to close the file. If the file still exists and // again to close the file. If the file still exists and
// the client's view of the last block matches the actual // the client's view of the last block matches the actual
// last block, then we'll treat it as a successful close. // last block, then we'll treat it as a successful close.
// See HDFS-3031. // See HDFS-3031.
Block realLastBlock = file.getLastBlock(); final Block realLastBlock = ((INodeFile)inode).getLastBlock();
if (Block.matchingIdAndGenStamp(last, realLastBlock)) { if (Block.matchingIdAndGenStamp(last, realLastBlock)) {
NameNode.stateChangeLog.info("DIR* NameSystem.completeFile: " + NameNode.stateChangeLog.info("DIR* NameSystem.completeFile: " +
"received request from " + holder + " to complete file " + src + "received request from " + holder + " to complete file " + src +
@ -2994,23 +2983,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
LOG.info("Recovering lease=" + lease + ", src=" + src); LOG.info("Recovering lease=" + lease + ", src=" + src);
assert !isInSafeMode(); assert !isInSafeMode();
assert hasWriteLock(); assert hasWriteLock();
INodeFile iFile = dir.getFileINode(src);
if (iFile == null) {
final String message = "DIR* NameSystem.internalReleaseLease: "
+ "attempt to release a create lock on "
+ src + " file does not exist.";
NameNode.stateChangeLog.warn(message);
throw new IOException(message);
}
if (!iFile.isUnderConstruction()) {
final String message = "DIR* NameSystem.internalReleaseLease: "
+ "attempt to release a create lock on "
+ src + " but file is already closed.";
NameNode.stateChangeLog.warn(message);
throw new IOException(message);
}
INodeFileUnderConstruction pendingFile = (INodeFileUnderConstruction) iFile; final INodeFileUnderConstruction pendingFile
= INodeFileUnderConstruction.valueOf(dir.getINode(src), src);
int nrBlocks = pendingFile.numBlocks(); int nrBlocks = pendingFile.numBlocks();
BlockInfo[] blocks = pendingFile.getBlocks(); BlockInfo[] blocks = pendingFile.getBlocks();
@ -4318,17 +4293,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
try { try {
for (Lease lease : leaseManager.getSortedLeases()) { for (Lease lease : leaseManager.getSortedLeases()) {
for (String path : lease.getPaths()) { for (String path : lease.getPaths()) {
INode node; final INodeFileUnderConstruction cons;
try { try {
node = dir.getFileINode(path); cons = INodeFileUnderConstruction.valueOf(dir.getINode(path), path);
} catch (UnresolvedLinkException e) { } catch (UnresolvedLinkException e) {
throw new AssertionError("Lease files should reside on this FS"); throw new AssertionError("Lease files should reside on this FS");
} catch (IOException e) {
throw new RuntimeException(e);
} }
assert node != null : "Found a lease for nonexisting file.";
assert node.isUnderConstruction() :
"Found a lease for file " + path + " that is not under construction." +
" lease=" + lease;
INodeFileUnderConstruction cons = (INodeFileUnderConstruction) node;
BlockInfo[] blocks = cons.getBlocks(); BlockInfo[] blocks = cons.getBlocks();
if(blocks == null) if(blocks == null)
continue; continue;
@ -4911,21 +4883,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
for (Lease lease : leaseManager.getSortedLeases()) { for (Lease lease : leaseManager.getSortedLeases()) {
for(String path : lease.getPaths()) { for(String path : lease.getPaths()) {
// verify that path exists in namespace // verify that path exists in namespace
INode node; final INodeFileUnderConstruction cons;
try { try {
node = dir.getFileINode(path); cons = INodeFileUnderConstruction.valueOf(dir.getINode(path), path);
} catch (UnresolvedLinkException e) { } catch (UnresolvedLinkException e) {
throw new AssertionError("Lease files should reside on this FS"); throw new AssertionError("Lease files should reside on this FS");
} }
if (node == null) {
throw new IOException("saveLeases found path " + path +
" but no matching entry in namespace.");
}
if (!node.isUnderConstruction()) {
throw new IOException("saveLeases found path " + path +
" but is not under construction.");
}
INodeFileUnderConstruction cons = (INodeFileUnderConstruction) node;
FSImageSerialization.writeINodeUnderConstruction(out, cons, path); FSImageSerialization.writeINodeUnderConstruction(out, cons, path);
} }
} }

View File

@ -17,6 +17,7 @@
*/ */
package org.apache.hadoop.hdfs.server.namenode; package org.apache.hadoop.hdfs.server.namenode;
import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.util.List; import java.util.List;
@ -32,6 +33,17 @@ import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
/** I-node for closed file. */ /** I-node for closed file. */
@InterfaceAudience.Private @InterfaceAudience.Private
public class INodeFile extends INode implements BlockCollection { public class INodeFile extends INode implements BlockCollection {
/** Cast INode to INodeFile. */
public static INodeFile valueOf(INode inode, String path) throws IOException {
if (inode == null) {
throw new FileNotFoundException("File does not exist: " + path);
}
if (!(inode instanceof INodeFile)) {
throw new FileNotFoundException("Path is not a file: " + path);
}
return (INodeFile)inode;
}
static final FsPermission UMASK = FsPermission.createImmutable((short)0111); static final FsPermission UMASK = FsPermission.createImmutable((short)0111);
//Number of bits for Block size //Number of bits for Block size

View File

@ -25,8 +25,8 @@ import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
import org.apache.hadoop.hdfs.server.blockmanagement.MutableBlockCollection; import org.apache.hadoop.hdfs.server.blockmanagement.MutableBlockCollection;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
import com.google.common.base.Joiner; import com.google.common.base.Joiner;
@ -35,6 +35,16 @@ import com.google.common.base.Joiner;
*/ */
@InterfaceAudience.Private @InterfaceAudience.Private
class INodeFileUnderConstruction extends INodeFile implements MutableBlockCollection { class INodeFileUnderConstruction extends INodeFile implements MutableBlockCollection {
/** Cast INode to INodeFileUnderConstruction. */
public static INodeFileUnderConstruction valueOf(INode inode, String path
) throws IOException {
final INodeFile file = INodeFile.valueOf(inode, path);
if (!file.isUnderConstruction()) {
throw new IOException("File is not under construction: " + path);
}
return (INodeFileUnderConstruction)file;
}
private String clientName; // lease holder private String clientName; // lease holder
private final String clientMachine; private final String clientMachine;
private final DatanodeDescriptor clientNode; // if client is a cluster node too. private final DatanodeDescriptor clientNode; // if client is a cluster node too.

View File

@ -253,7 +253,7 @@ public class LeaseManager {
private String findPath(INodeFileUnderConstruction pendingFile) { private String findPath(INodeFileUnderConstruction pendingFile) {
try { try {
for (String src : paths) { for (String src : paths) {
if (fsnamesystem.dir.getFileINode(src) == pendingFile) { if (fsnamesystem.dir.getINode(src) == pendingFile) {
return src; return src;
} }
} }

View File

@ -876,7 +876,14 @@ public class TestDFSClientRetries {
new Random().nextBytes(bytes); new Random().nextBytes(bytes);
out4.write(bytes); out4.write(bytes);
out4.write(bytes); out4.write(bytes);
out4.hflush(); if (isWebHDFS) {
// WebHDFS does not support hflush. To avoid DataNode communicating with
// NN while we're shutting down NN, we call out4.close() to finish
// writing the data
out4.close();
} else {
out4.hflush();
}
//shutdown namenode //shutdown namenode
assertTrue(HdfsUtils.isHealthy(uri)); assertTrue(HdfsUtils.isHealthy(uri));
@ -889,10 +896,12 @@ public class TestDFSClientRetries {
public void run() { public void run() {
try { try {
//write some more data and then close the file //write some more data and then close the file
out4.write(bytes); if (!isWebHDFS) {
out4.write(bytes); out4.write(bytes);
out4.write(bytes); out4.write(bytes);
out4.close(); out4.write(bytes);
out4.close();
}
} catch (Exception e) { } catch (Exception e) {
exceptions.add(e); exceptions.add(e);
} }
@ -975,7 +984,11 @@ public class TestDFSClientRetries {
Assert.assertEquals(String.format("count=%d", count), Assert.assertEquals(String.format("count=%d", count),
bytes[count % bytes.length], (byte)r); bytes[count % bytes.length], (byte)r);
} }
Assert.assertEquals(5 * bytes.length, count); if (!isWebHDFS) {
Assert.assertEquals(5 * bytes.length, count);
} else {
Assert.assertEquals(2 * bytes.length, count);
}
in.close(); in.close();
} }

View File

@ -119,8 +119,6 @@ public class TestDistributedFileSystem {
DFSTestUtil.createFile(fileSys, p, 1L, (short)1, 0L); DFSTestUtil.createFile(fileSys, p, 1L, (short)1, 0L);
DFSTestUtil.readFile(fileSys, p); DFSTestUtil.readFile(fileSys, p);
DFSClient client = ((DistributedFileSystem)fileSys).dfs;
fileSys.close(); fileSys.close();
} finally { } finally {
@ -476,7 +474,7 @@ public class TestDistributedFileSystem {
fail("Expecting FileNotFoundException"); fail("Expecting FileNotFoundException");
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
assertTrue("Not throwing the intended exception message", e.getMessage() assertTrue("Not throwing the intended exception message", e.getMessage()
.contains("File does not exist: /test/TestExistingDir")); .contains("Path is not a file: /test/TestExistingDir"));
} }
//hftp //hftp
@ -712,7 +710,6 @@ public class TestDistributedFileSystem {
@Test @Test
public void testCreateWithCustomChecksum() throws Exception { public void testCreateWithCustomChecksum() throws Exception {
Configuration conf = getTestConfiguration(); Configuration conf = getTestConfiguration();
final long grace = 1000L;
MiniDFSCluster cluster = null; MiniDFSCluster cluster = null;
Path testBasePath = new Path("/test/csum"); Path testBasePath = new Path("/test/csum");
// create args // create args

View File

@ -27,6 +27,7 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
import org.junit.Test; import org.junit.Test;
public class TestUnderReplicatedBlocks { public class TestUnderReplicatedBlocks {
@ -49,6 +50,12 @@ public class TestUnderReplicatedBlocks {
ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, FILE_PATH); ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, FILE_PATH);
DatanodeDescriptor dn = bm.blocksMap.nodeIterator(b.getLocalBlock()).next(); DatanodeDescriptor dn = bm.blocksMap.nodeIterator(b.getLocalBlock()).next();
bm.addToInvalidates(b.getLocalBlock(), dn); bm.addToInvalidates(b.getLocalBlock(), dn);
// Compute the invalidate work in NN, and trigger the heartbeat from DN
BlockManagerTestUtil.computeAllPendingWork(bm);
DataNodeTestUtils.triggerHeartbeat(cluster.getDataNode(dn.getIpcPort()));
// Wait to make sure the DataNode receives the deletion request
Thread.sleep(1000);
// Remove the record from blocksMap
bm.blocksMap.removeNode(b.getLocalBlock(), dn); bm.blocksMap.removeNode(b.getLocalBlock(), dn);
// increment this file's replication factor // increment this file's replication factor

View File

@ -83,8 +83,7 @@ public class TestBlockUnderConstruction {
private void verifyFileBlocks(String file, private void verifyFileBlocks(String file,
boolean isFileOpen) throws IOException { boolean isFileOpen) throws IOException {
FSNamesystem ns = cluster.getNamesystem(); FSNamesystem ns = cluster.getNamesystem();
INodeFile inode = ns.dir.getFileINode(file); final INodeFile inode = INodeFile.valueOf(ns.dir.getINode(file), file);
assertTrue("File does not exist: " + inode.toString(), inode != null);
assertTrue("File " + inode.toString() + assertTrue("File " + inode.toString() +
" isUnderConstruction = " + inode.isUnderConstruction() + " isUnderConstruction = " + inode.isUnderConstruction() +
" expected to be " + isFileOpen, " expected to be " + isFileOpen,

View File

@ -18,13 +18,17 @@
package org.apache.hadoop.hdfs.server.namenode; package org.apache.hadoop.hdfs.server.namenode;
import static org.junit.Assert.*; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.FileNotFoundException;
import java.io.IOException;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.permission.PermissionStatus; import org.apache.hadoop.fs.permission.PermissionStatus;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
import org.junit.Test; import org.junit.Test;
public class TestINodeFile { public class TestINodeFile {
@ -199,4 +203,88 @@ public class TestINodeFile {
return iNodes; return iNodes;
} }
/**
* Test for the static {@link INodeFile#valueOf(INode, String)}
* and {@link INodeFileUnderConstruction#valueOf(INode, String)} methods.
* @throws IOException
*/
@Test
public void testValueOf () throws IOException {
final String path = "/testValueOf";
final PermissionStatus perm = new PermissionStatus(
userName, null, FsPermission.getDefault());
final short replication = 3;
{//cast from null
final INode from = null;
//cast to INodeFile, should fail
try {
INodeFile.valueOf(from, path);
fail();
} catch(FileNotFoundException fnfe) {
assertTrue(fnfe.getMessage().contains("File does not exist"));
}
//cast to INodeFileUnderConstruction, should fail
try {
INodeFileUnderConstruction.valueOf(from, path);
fail();
} catch(FileNotFoundException fnfe) {
assertTrue(fnfe.getMessage().contains("File does not exist"));
}
}
{//cast from INodeFile
final INode from = new INodeFile(
perm, null, replication, 0L, 0L, preferredBlockSize);
//cast to INodeFile, should success
final INodeFile f = INodeFile.valueOf(from, path);
assertTrue(f == from);
//cast to INodeFileUnderConstruction, should fail
try {
INodeFileUnderConstruction.valueOf(from, path);
fail();
} catch(IOException ioe) {
assertTrue(ioe.getMessage().contains("File is not under construction"));
}
}
{//cast from INodeFileUnderConstruction
final INode from = new INodeFileUnderConstruction(
perm, replication, 0L, 0L, "client", "machine", null);
//cast to INodeFile, should success
final INodeFile f = INodeFile.valueOf(from, path);
assertTrue(f == from);
//cast to INodeFileUnderConstruction, should success
final INodeFileUnderConstruction u = INodeFileUnderConstruction.valueOf(
from, path);
assertTrue(u == from);
}
{//cast from INodeDirectory
final INode from = new INodeDirectory(perm, 0L);
//cast to INodeFile, should fail
try {
INodeFile.valueOf(from, path);
fail();
} catch(FileNotFoundException fnfe) {
assertTrue(fnfe.getMessage().contains("Path is not a file"));
}
//cast to INodeFileUnderConstruction, should fail
try {
INodeFileUnderConstruction.valueOf(from, path);
fail();
} catch(FileNotFoundException fnfe) {
assertTrue(fnfe.getMessage().contains("Path is not a file"));
}
}
}
} }

View File

@ -610,6 +610,9 @@ Release 0.23.5 - UNRELEASED
MAPREDUCE-4741. WARN and ERROR messages logged during normal AM shutdown. MAPREDUCE-4741. WARN and ERROR messages logged during normal AM shutdown.
(Vinod Kumar Vavilapalli via jlowe) (Vinod Kumar Vavilapalli via jlowe)
MAPREDUCE-4730. Fix Reducer's EventFetcher to scale the map-completion
requests slowly to avoid HADOOP-8942. (Jason Lowe via vinodkv)
Release 0.23.4 - UNRELEASED Release 0.23.4 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -27,10 +27,8 @@ import org.apache.hadoop.mapred.TaskCompletionEvent;
import org.apache.hadoop.mapred.TaskUmbilicalProtocol; import org.apache.hadoop.mapred.TaskUmbilicalProtocol;
import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.mapreduce.TaskAttemptID;
@SuppressWarnings("deprecation")
class EventFetcher<K,V> extends Thread { class EventFetcher<K,V> extends Thread {
private static final long SLEEP_TIME = 1000; private static final long SLEEP_TIME = 1000;
private static final int MAX_EVENTS_TO_FETCH = 10000;
private static final int MAX_RETRIES = 10; private static final int MAX_RETRIES = 10;
private static final int RETRY_PERIOD = 5000; private static final int RETRY_PERIOD = 5000;
private static final Log LOG = LogFactory.getLog(EventFetcher.class); private static final Log LOG = LogFactory.getLog(EventFetcher.class);
@ -38,7 +36,8 @@ class EventFetcher<K,V> extends Thread {
private final TaskAttemptID reduce; private final TaskAttemptID reduce;
private final TaskUmbilicalProtocol umbilical; private final TaskUmbilicalProtocol umbilical;
private final ShuffleScheduler<K,V> scheduler; private final ShuffleScheduler<K,V> scheduler;
private int fromEventId = 0; private int fromEventIdx = 0;
private int maxEventsToFetch;
private ExceptionReporter exceptionReporter = null; private ExceptionReporter exceptionReporter = null;
private int maxMapRuntime = 0; private int maxMapRuntime = 0;
@ -48,13 +47,15 @@ class EventFetcher<K,V> extends Thread {
public EventFetcher(TaskAttemptID reduce, public EventFetcher(TaskAttemptID reduce,
TaskUmbilicalProtocol umbilical, TaskUmbilicalProtocol umbilical,
ShuffleScheduler<K,V> scheduler, ShuffleScheduler<K,V> scheduler,
ExceptionReporter reporter) { ExceptionReporter reporter,
int maxEventsToFetch) {
setName("EventFetcher for fetching Map Completion Events"); setName("EventFetcher for fetching Map Completion Events");
setDaemon(true); setDaemon(true);
this.reduce = reduce; this.reduce = reduce;
this.umbilical = umbilical; this.umbilical = umbilical;
this.scheduler = scheduler; this.scheduler = scheduler;
exceptionReporter = reporter; exceptionReporter = reporter;
this.maxEventsToFetch = maxEventsToFetch;
} }
@Override @Override
@ -112,46 +113,47 @@ class EventFetcher<K,V> extends Thread {
* from a given event ID. * from a given event ID.
* @throws IOException * @throws IOException
*/ */
private int getMapCompletionEvents() throws IOException { protected int getMapCompletionEvents() throws IOException {
int numNewMaps = 0; int numNewMaps = 0;
TaskCompletionEvent events[] = null;
MapTaskCompletionEventsUpdate update =
umbilical.getMapCompletionEvents((org.apache.hadoop.mapred.JobID) do {
reduce.getJobID(), MapTaskCompletionEventsUpdate update =
fromEventId, umbilical.getMapCompletionEvents(
MAX_EVENTS_TO_FETCH, (org.apache.hadoop.mapred.JobID)reduce.getJobID(),
(org.apache.hadoop.mapred.TaskAttemptID) fromEventIdx,
reduce); maxEventsToFetch,
TaskCompletionEvent events[] = update.getMapTaskCompletionEvents(); (org.apache.hadoop.mapred.TaskAttemptID)reduce);
LOG.debug("Got " + events.length + " map completion events from " + events = update.getMapTaskCompletionEvents();
fromEventId); LOG.debug("Got " + events.length + " map completion events from " +
fromEventIdx);
// Check if the reset is required.
// Since there is no ordering of the task completion events at the // Check if the reset is required.
// reducer, the only option to sync with the new jobtracker is to reset // Since there is no ordering of the task completion events at the
// the events index // reducer, the only option to sync with the new jobtracker is to reset
if (update.shouldReset()) { // the events index
fromEventId = 0; if (update.shouldReset()) {
scheduler.resetKnownMaps(); fromEventIdx = 0;
} scheduler.resetKnownMaps();
}
// Update the last seen event ID
fromEventId += events.length; // Update the last seen event ID
fromEventIdx += events.length;
// Process the TaskCompletionEvents:
// 1. Save the SUCCEEDED maps in knownOutputs to fetch the outputs. // Process the TaskCompletionEvents:
// 2. Save the OBSOLETE/FAILED/KILLED maps in obsoleteOutputs to stop // 1. Save the SUCCEEDED maps in knownOutputs to fetch the outputs.
// fetching from those maps. // 2. Save the OBSOLETE/FAILED/KILLED maps in obsoleteOutputs to stop
// 3. Remove TIPFAILED maps from neededOutputs since we don't need their // fetching from those maps.
// outputs at all. // 3. Remove TIPFAILED maps from neededOutputs since we don't need their
for (TaskCompletionEvent event : events) { // outputs at all.
switch (event.getTaskStatus()) { for (TaskCompletionEvent event : events) {
switch (event.getTaskStatus()) {
case SUCCEEDED: case SUCCEEDED:
URI u = getBaseURI(event.getTaskTrackerHttp()); URI u = getBaseURI(event.getTaskTrackerHttp());
scheduler.addKnownMapOutput(u.getHost() + ":" + u.getPort(), scheduler.addKnownMapOutput(u.getHost() + ":" + u.getPort(),
u.toString(), u.toString(),
event.getTaskAttemptId()); event.getTaskAttemptId());
numNewMaps ++; numNewMaps ++;
int duration = event.getTaskRunTime(); int duration = event.getTaskRunTime();
if (duration > maxMapRuntime) { if (duration > maxMapRuntime) {
@ -164,15 +166,17 @@ class EventFetcher<K,V> extends Thread {
case OBSOLETE: case OBSOLETE:
scheduler.obsoleteMapOutput(event.getTaskAttemptId()); scheduler.obsoleteMapOutput(event.getTaskAttemptId());
LOG.info("Ignoring obsolete output of " + event.getTaskStatus() + LOG.info("Ignoring obsolete output of " + event.getTaskStatus() +
" map-task: '" + event.getTaskAttemptId() + "'"); " map-task: '" + event.getTaskAttemptId() + "'");
break; break;
case TIPFAILED: case TIPFAILED:
scheduler.tipFailed(event.getTaskAttemptId().getTaskID()); scheduler.tipFailed(event.getTaskAttemptId().getTaskID());
LOG.info("Ignoring output of failed map TIP: '" + LOG.info("Ignoring output of failed map TIP: '" +
event.getTaskAttemptId() + "'"); event.getTaskAttemptId() + "'");
break; break;
}
} }
} } while (events.length == maxEventsToFetch);
return numNewMaps; return numNewMaps;
} }

View File

@ -40,9 +40,12 @@ import org.apache.hadoop.util.Progress;
@InterfaceAudience.Private @InterfaceAudience.Private
@InterfaceStability.Unstable @InterfaceStability.Unstable
@SuppressWarnings({"deprecation", "unchecked", "rawtypes"}) @SuppressWarnings({"unchecked", "rawtypes"})
public class Shuffle<K, V> implements ExceptionReporter { public class Shuffle<K, V> implements ExceptionReporter {
private static final int PROGRESS_FREQUENCY = 2000; private static final int PROGRESS_FREQUENCY = 2000;
private static final int MAX_EVENTS_TO_FETCH = 10000;
private static final int MIN_EVENTS_TO_FETCH = 100;
private static final int MAX_RPC_OUTSTANDING_EVENTS = 3000000;
private final TaskAttemptID reduceId; private final TaskAttemptID reduceId;
private final JobConf jobConf; private final JobConf jobConf;
@ -99,9 +102,17 @@ public class Shuffle<K, V> implements ExceptionReporter {
} }
public RawKeyValueIterator run() throws IOException, InterruptedException { public RawKeyValueIterator run() throws IOException, InterruptedException {
// Scale the maximum events we fetch per RPC call to mitigate OOM issues
// on the ApplicationMaster when a thundering herd of reducers fetch events
// TODO: This should not be necessary after HADOOP-8942
int eventsPerReducer = Math.max(MIN_EVENTS_TO_FETCH,
MAX_RPC_OUTSTANDING_EVENTS / jobConf.getNumReduceTasks());
int maxEventsToFetch = Math.min(MAX_EVENTS_TO_FETCH, eventsPerReducer);
// Start the map-completion events fetcher thread // Start the map-completion events fetcher thread
final EventFetcher<K,V> eventFetcher = final EventFetcher<K,V> eventFetcher =
new EventFetcher<K,V>(reduceId, umbilical, scheduler, this); new EventFetcher<K,V>(reduceId, umbilical, scheduler, this,
maxEventsToFetch);
eventFetcher.start(); eventFetcher.start();
// Start the map-output fetcher threads // Start the map-output fetcher threads

View File

@ -0,0 +1,116 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapreduce.task.reduce;
import static org.mockito.Mockito.any;
import static org.mockito.Mockito.anyInt;
import static org.mockito.Mockito.anyString;
import static org.mockito.Mockito.eq;
import static org.mockito.Mockito.inOrder;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.hadoop.mapred.JobID;
import org.apache.hadoop.mapred.MapTaskCompletionEventsUpdate;
import org.apache.hadoop.mapred.TaskAttemptID;
import org.apache.hadoop.mapred.TaskCompletionEvent;
import org.apache.hadoop.mapred.TaskUmbilicalProtocol;
import org.apache.hadoop.mapreduce.TaskType;
import org.junit.Test;
import org.mockito.InOrder;
public class TestEventFetcher {
@Test
public void testConsecutiveFetch() throws IOException {
final int MAX_EVENTS_TO_FETCH = 100;
TaskAttemptID tid = new TaskAttemptID("12345", 1, TaskType.REDUCE, 1, 1);
TaskUmbilicalProtocol umbilical = mock(TaskUmbilicalProtocol.class);
when(umbilical.getMapCompletionEvents(any(JobID.class),
anyInt(), anyInt(), any(TaskAttemptID.class)))
.thenReturn(getMockedCompletionEventsUpdate(0, 0));
when(umbilical.getMapCompletionEvents(any(JobID.class),
eq(0), eq(MAX_EVENTS_TO_FETCH), eq(tid)))
.thenReturn(getMockedCompletionEventsUpdate(0, MAX_EVENTS_TO_FETCH));
when(umbilical.getMapCompletionEvents(any(JobID.class),
eq(MAX_EVENTS_TO_FETCH), eq(MAX_EVENTS_TO_FETCH), eq(tid)))
.thenReturn(getMockedCompletionEventsUpdate(MAX_EVENTS_TO_FETCH,
MAX_EVENTS_TO_FETCH));
when(umbilical.getMapCompletionEvents(any(JobID.class),
eq(MAX_EVENTS_TO_FETCH*2), eq(MAX_EVENTS_TO_FETCH), eq(tid)))
.thenReturn(getMockedCompletionEventsUpdate(MAX_EVENTS_TO_FETCH*2, 3));
@SuppressWarnings("unchecked")
ShuffleScheduler<String,String> scheduler = mock(ShuffleScheduler.class);
ExceptionReporter reporter = mock(ExceptionReporter.class);
EventFetcherForTest<String,String> ef =
new EventFetcherForTest<String,String>(tid, umbilical, scheduler,
reporter, MAX_EVENTS_TO_FETCH);
ef.getMapCompletionEvents();
verify(reporter, never()).reportException(any(Throwable.class));
InOrder inOrder = inOrder(umbilical);
inOrder.verify(umbilical).getMapCompletionEvents(any(JobID.class),
eq(0), eq(MAX_EVENTS_TO_FETCH), eq(tid));
inOrder.verify(umbilical).getMapCompletionEvents(any(JobID.class),
eq(MAX_EVENTS_TO_FETCH), eq(MAX_EVENTS_TO_FETCH), eq(tid));
inOrder.verify(umbilical).getMapCompletionEvents(any(JobID.class),
eq(MAX_EVENTS_TO_FETCH*2), eq(MAX_EVENTS_TO_FETCH), eq(tid));
verify(scheduler, times(MAX_EVENTS_TO_FETCH*2 + 3)).addKnownMapOutput(
anyString(), anyString(), any(TaskAttemptID.class));
}
private MapTaskCompletionEventsUpdate getMockedCompletionEventsUpdate(
int startIdx, int numEvents) {
ArrayList<TaskCompletionEvent> tceList =
new ArrayList<TaskCompletionEvent>(numEvents);
for (int i = 0; i < numEvents; ++i) {
int eventIdx = startIdx + i;
TaskCompletionEvent tce = new TaskCompletionEvent(eventIdx,
new TaskAttemptID("12345", 1, TaskType.MAP, eventIdx, 0),
eventIdx, true, TaskCompletionEvent.Status.SUCCEEDED,
"http://somehost:8888");
tceList.add(tce);
}
TaskCompletionEvent[] events = {};
return new MapTaskCompletionEventsUpdate(tceList.toArray(events), false);
}
private static class EventFetcherForTest<K,V> extends EventFetcher<K,V> {
public EventFetcherForTest(TaskAttemptID reduce,
TaskUmbilicalProtocol umbilical, ShuffleScheduler<K,V> scheduler,
ExceptionReporter reporter, int maxEventsToFetch) {
super(reduce, umbilical, scheduler, reporter, maxEventsToFetch);
}
@Override
public int getMapCompletionEvents() throws IOException {
return super.getMapCompletionEvents();
}
}
}