From 578dae9ef39eef046b0a0ff9cd830c753a98afad Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Wed, 15 Jan 2014 01:06:53 +0000 Subject: [PATCH 01/32] HDFS-5704. Change OP_UPDATE_BLOCKS with a new OP_ADD_BLOCK. Contributed by Jing Zhao. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558255 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + .../hdfs/server/namenode/FSDirectory.java | 20 + .../hdfs/server/namenode/FSEditLog.java | 21 +- .../hdfs/server/namenode/FSEditLogLoader.java | 52 ++- .../hdfs/server/namenode/FSEditLogOp.java | 104 +++++ .../server/namenode/FSEditLogOpCodes.java | 1 + .../hdfs/server/namenode/FSNamesystem.java | 2 +- .../hadoop/hdfs/TestFileAppendRestart.java | 35 +- .../hdfs/server/namenode/TestAddBlock.java | 157 +++++++ .../TestOfflineEditsViewer.java | 1 - .../src/test/resources/editsStored | Bin 4712 -> 4282 bytes .../src/test/resources/editsStored.xml | 416 +++++++----------- 12 files changed, 542 insertions(+), 269 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddBlock.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index fcc5f067b26..1734d58ae46 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -927,6 +927,8 @@ Release 2.3.0 - UNRELEASED HDFS-5677. Need error checking for HA cluster configuration. (Vincent Sheffer via cos) + HDFS-5704. Change OP_UPDATE_BLOCKS with a new OP_ADD_BLOCK. (jing9) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java index 011bcf26ba4..9a907cc9483 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java @@ -417,6 +417,26 @@ public class FSDirectory implements Closeable { } } + /** + * Persist the new block (the last block of the given file). + */ + void persistNewBlock(String path, INodeFile file) { + Preconditions.checkArgument(file.isUnderConstruction()); + waitForReady(); + + writeLock(); + try { + fsImage.getEditLog().logAddBlock(path, file); + } finally { + writeUnlock(); + } + if (NameNode.stateChangeLog.isDebugEnabled()) { + NameNode.stateChangeLog.debug("DIR* FSDirectory.persistNewBlock: " + + path + " with new block " + file.getLastBlock().toString() + + ", current total block count is " + file.getBlocks().length); + } + } + /** * Close file. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java index 098d4574d8a..d98dad196a8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java @@ -36,17 +36,18 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Options; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; import org.apache.hadoop.hdfs.protocol.CachePoolInfo; import org.apache.hadoop.hdfs.protocol.HdfsConstants; -import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole; import org.apache.hadoop.hdfs.server.common.Storage.FormatConfirmable; import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddBlockOp; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCacheDirectiveInfoOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCachePoolOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddOp; -import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCacheDirectiveInfoOp; -import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ModifyCacheDirectiveInfoOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllocateBlockIdOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllowSnapshotOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CancelDelegationTokenOp; @@ -59,11 +60,12 @@ import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DisallowSnapshotOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.GetDelegationTokenOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.LogSegmentOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ModifyCacheDirectiveInfoOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ModifyCachePoolOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.OpInstanceCache; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ReassignLeaseOp; -import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveCachePoolOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveCacheDirectiveInfoOp; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveCachePoolOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOldOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameSnapshotOp; @@ -709,6 +711,17 @@ public class FSEditLog implements LogsPurgeable { logEdit(op); } + public void logAddBlock(String path, INodeFile file) { + Preconditions.checkArgument(file.isUnderConstruction()); + BlockInfo[] blocks = file.getBlocks(); + Preconditions.checkState(blocks != null && blocks.length > 0); + BlockInfo pBlock = blocks.length > 1 ? blocks[blocks.length - 2] : null; + BlockInfo lastBlock = blocks[blocks.length - 1]; + AddBlockOp op = AddBlockOp.getInstance(cache.get()).setPath(path) + .setPenultimateBlock(pBlock).setLastBlock(lastBlock); + logEdit(op); + } + public void logUpdateBlocks(String path, INodeFile file, boolean toLogRpcIds) { Preconditions.checkArgument(file.isUnderConstruction()); UpdateBlocksOp op = UpdateBlocksOp.getInstance(cache.get()) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java index 634b509cb53..ce2cf2f7be7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction; import org.apache.hadoop.hdfs.server.common.Storage; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddBlockOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCacheDirectiveInfoOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCachePoolOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCloseOp; @@ -411,7 +412,18 @@ public class FSEditLogLoader { } break; } - + case OP_ADD_BLOCK: { + AddBlockOp addBlockOp = (AddBlockOp) op; + String path = addBlockOp.getPath(); + if (FSNamesystem.LOG.isDebugEnabled()) { + FSNamesystem.LOG.debug(op.opCode + ": " + path + + " new block id : " + addBlockOp.getLastBlock().getBlockId()); + } + INodeFile oldFile = INodeFile.valueOf(fsDir.getINode(path), path); + // add the new block to the INodeFile + addNewBlock(fsDir, addBlockOp, oldFile); + break; + } case OP_SET_REPLICATION: { SetReplicationOp setReplicationOp = (SetReplicationOp)op; short replication = fsNamesys.getBlockManager().adjustReplication( @@ -713,6 +725,44 @@ public class FSEditLogLoader { return sb.toString(); } + /** + * Add a new block into the given INodeFile + */ + private void addNewBlock(FSDirectory fsDir, AddBlockOp op, INodeFile file) + throws IOException { + BlockInfo[] oldBlocks = file.getBlocks(); + Block pBlock = op.getPenultimateBlock(); + Block newBlock= op.getLastBlock(); + + if (pBlock != null) { // the penultimate block is not null + Preconditions.checkState(oldBlocks != null && oldBlocks.length > 0); + // compare pBlock with the last block of oldBlocks + Block oldLastBlock = oldBlocks[oldBlocks.length - 1]; + if (oldLastBlock.getBlockId() != pBlock.getBlockId() + || oldLastBlock.getGenerationStamp() != pBlock.getGenerationStamp()) { + throw new IOException( + "Mismatched block IDs or generation stamps for the old last block of file " + + op.getPath() + ", the old last block is " + oldLastBlock + + ", and the block read from editlog is " + pBlock); + } + + oldLastBlock.setNumBytes(pBlock.getNumBytes()); + if (oldLastBlock instanceof BlockInfoUnderConstruction) { + fsNamesys.getBlockManager().forceCompleteBlock(file, + (BlockInfoUnderConstruction) oldLastBlock); + fsNamesys.getBlockManager().processQueuedMessagesForBlock(pBlock); + } + } else { // the penultimate block is null + Preconditions.checkState(oldBlocks == null || oldBlocks.length == 0); + } + // add the new block + BlockInfo newBI = new BlockInfoUnderConstruction( + newBlock, file.getBlockReplication()); + fsNamesys.getBlockManager().addBlockCollection(newBI, file); + file.addBlock(newBI); + fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock); + } + /** * Update in-memory data structures with new block information. * @throws IOException diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java index 5828c3e29a9..a9676cd89dc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdfs.server.namenode; import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_ADD; +import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_ADD_BLOCK; import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_ADD_CACHE_DIRECTIVE; import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_ADD_CACHE_POOL; import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_ALLOCATE_BLOCK_ID; @@ -166,6 +167,7 @@ public abstract class FSEditLogOp { inst.put(OP_ADD_CACHE_POOL, new AddCachePoolOp()); inst.put(OP_MODIFY_CACHE_POOL, new ModifyCachePoolOp()); inst.put(OP_REMOVE_CACHE_POOL, new RemoveCachePoolOp()); + inst.put(OP_ADD_BLOCK, new AddBlockOp()); } public FSEditLogOp get(FSEditLogOpCodes opcode) { @@ -600,6 +602,108 @@ public abstract class FSEditLogOp { } } + static class AddBlockOp extends FSEditLogOp { + private String path; + private Block penultimateBlock; + private Block lastBlock; + + private AddBlockOp() { + super(OP_ADD_BLOCK); + } + + static AddBlockOp getInstance(OpInstanceCache cache) { + return (AddBlockOp) cache.get(OP_ADD_BLOCK); + } + + AddBlockOp setPath(String path) { + this.path = path; + return this; + } + + public String getPath() { + return path; + } + + AddBlockOp setPenultimateBlock(Block pBlock) { + this.penultimateBlock = pBlock; + return this; + } + + Block getPenultimateBlock() { + return penultimateBlock; + } + + AddBlockOp setLastBlock(Block lastBlock) { + this.lastBlock = lastBlock; + return this; + } + + Block getLastBlock() { + return lastBlock; + } + + @Override + public void writeFields(DataOutputStream out) throws IOException { + FSImageSerialization.writeString(path, out); + int size = penultimateBlock != null ? 2 : 1; + Block[] blocks = new Block[size]; + if (penultimateBlock != null) { + blocks[0] = penultimateBlock; + } + blocks[size - 1] = lastBlock; + FSImageSerialization.writeCompactBlockArray(blocks, out); + // clientId and callId + writeRpcIds(rpcClientId, rpcCallId, out); + } + + @Override + void readFields(DataInputStream in, int logVersion) throws IOException { + path = FSImageSerialization.readString(in); + Block[] blocks = FSImageSerialization.readCompactBlockArray(in, + logVersion); + Preconditions.checkState(blocks.length == 2 || blocks.length == 1); + penultimateBlock = blocks.length == 1 ? null : blocks[0]; + lastBlock = blocks[blocks.length - 1]; + readRpcIds(in, logVersion); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("AddBlockOp [path=") + .append(path) + .append(", penultimateBlock=") + .append(penultimateBlock == null ? "NULL" : penultimateBlock) + .append(", lastBlock=") + .append(lastBlock); + appendRpcIdsToString(sb, rpcClientId, rpcCallId); + sb.append("]"); + return sb.toString(); + } + + @Override + protected void toXml(ContentHandler contentHandler) throws SAXException { + XMLUtils.addSaxString(contentHandler, "PATH", path); + if (penultimateBlock != null) { + FSEditLogOp.blockToXml(contentHandler, penultimateBlock); + } + FSEditLogOp.blockToXml(contentHandler, lastBlock); + appendRpcIdsToXml(contentHandler, rpcClientId, rpcCallId); + } + + @Override + void fromXml(Stanza st) throws InvalidXmlException { + this.path = st.getValue("PATH"); + List blocks = st.getChildren("BLOCK"); + int size = blocks.size(); + Preconditions.checkState(size == 1 || size == 2); + this.penultimateBlock = size == 2 ? + FSEditLogOp.blockFromXml(blocks.get(0)) : null; + this.lastBlock = FSEditLogOp.blockFromXml(blocks.get(size - 1)); + readRpcIdsFromXml(st); + } + } + /** * {@literal @AtMostOnce} for {@link ClientProtocol#updatePipeline}, but * {@literal @Idempotent} for some other ops. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java index 42cb6d7d4ed..21cb284cc72 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java @@ -66,6 +66,7 @@ public enum FSEditLogOpCodes { OP_MODIFY_CACHE_POOL ((byte) 36), OP_REMOVE_CACHE_POOL ((byte) 37), OP_MODIFY_CACHE_DIRECTIVE ((byte) 38), + OP_ADD_BLOCK ((byte) 39), // Note that fromByte(..) depends on OP_INVALID being at the last position. OP_INVALID ((byte) -1); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 8a9cbd1b5e2..1c9096e66a4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -2629,7 +2629,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, newBlock = createNewBlock(); saveAllocatedBlock(src, inodesInPath, newBlock, targets); - dir.persistBlocks(src, pendingFile, false); + dir.persistNewBlock(src, pendingFile); offset = pendingFile.computeFileSize(); } finally { writeUnlock(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppendRestart.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppendRestart.java index 570b19f8d2e..f557fd5aec4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppendRestart.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppendRestart.java @@ -97,29 +97,30 @@ public class TestFileAppendRestart { counts = FSImageTestUtil.countEditLogOpTypes(editLog); // OP_ADD to create file - // OP_UPDATE_BLOCKS for first block + // OP_ADD_BLOCK for first block // OP_CLOSE to close file // OP_ADD to reopen file - // OP_UPDATE_BLOCKS for second block + // OP_ADD_BLOCK for second block // OP_CLOSE to close file assertEquals(2, (int)counts.get(FSEditLogOpCodes.OP_ADD).held); - assertEquals(2, (int)counts.get(FSEditLogOpCodes.OP_UPDATE_BLOCKS).held); + assertEquals(2, (int)counts.get(FSEditLogOpCodes.OP_ADD_BLOCK).held); assertEquals(2, (int)counts.get(FSEditLogOpCodes.OP_CLOSE).held); Path p2 = new Path("/not-block-boundaries"); writeAndAppend(fs, p2, BLOCK_SIZE/2, BLOCK_SIZE); counts = FSImageTestUtil.countEditLogOpTypes(editLog); // OP_ADD to create file - // OP_UPDATE_BLOCKS for first block + // OP_ADD_BLOCK for first block // OP_CLOSE to close file // OP_ADD to re-establish the lease // OP_UPDATE_BLOCKS from the updatePipeline call (increments genstamp of last block) - // OP_UPDATE_BLOCKS at the start of the second block + // OP_ADD_BLOCK at the start of the second block // OP_CLOSE to close file - // Total: 2 OP_ADDs, 3 OP_UPDATE_BLOCKS, and 2 OP_CLOSEs in addition - // to the ones above + // Total: 2 OP_ADDs, 1 OP_UPDATE_BLOCKS, 2 OP_ADD_BLOCKs, and 2 OP_CLOSEs + // in addition to the ones above assertEquals(2+2, (int)counts.get(FSEditLogOpCodes.OP_ADD).held); - assertEquals(2+3, (int)counts.get(FSEditLogOpCodes.OP_UPDATE_BLOCKS).held); + assertEquals(1, (int)counts.get(FSEditLogOpCodes.OP_UPDATE_BLOCKS).held); + assertEquals(2+2, (int)counts.get(FSEditLogOpCodes.OP_ADD_BLOCK).held); assertEquals(2+2, (int)counts.get(FSEditLogOpCodes.OP_CLOSE).held); cluster.restartNameNode(); @@ -176,13 +177,14 @@ public class TestFileAppendRestart { } /** - * Test to append to the file, when one of datanode in the existing pipeline is down. - * @throws Exception + * Test to append to the file, when one of datanode in the existing pipeline + * is down. */ @Test public void testAppendWithPipelineRecovery() throws Exception { Configuration conf = new Configuration(); MiniDFSCluster cluster = null; + FSDataOutputStream out = null; try { cluster = new MiniDFSCluster.Builder(conf).manageDataDfsDirs(true) .manageNameDfsDirs(true).numDataNodes(4) @@ -192,11 +194,20 @@ public class TestFileAppendRestart { DistributedFileSystem fs = cluster.getFileSystem(); Path path = new Path("/test1"); - DFSTestUtil.createFile(fs, path, 1024, (short) 3, 1l); + + out = fs.create(path, true, BLOCK_SIZE, (short) 3, BLOCK_SIZE); + AppendTestUtil.write(out, 0, 1024); + out.close(); cluster.stopDataNode(3); - DFSTestUtil.appendFile(fs, path, "hello"); + out = fs.append(path); + AppendTestUtil.write(out, 1024, 1024); + out.close(); + + cluster.restartNameNode(true); + AppendTestUtil.check(fs, path, 2048); } finally { + IOUtils.closeStream(out); if (null != cluster) { cluster.shutdown(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddBlock.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddBlock.java new file mode 100644 index 00000000000..301ee25ad6b --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddBlock.java @@ -0,0 +1,157 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.EnumSet; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSOutputStream; +import org.apache.hadoop.hdfs.DFSTestUtil; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.client.HdfsDataOutputStream.SyncFlag; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; +import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Test AddBlockOp is written and read correctly + */ +public class TestAddBlock { + private static final short REPLICATION = 3; + private static final int BLOCKSIZE = 1024; + + private MiniDFSCluster cluster; + private Configuration conf; + + @Before + public void setup() throws IOException { + conf = new Configuration(); + conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCKSIZE); + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION) + .build(); + cluster.waitActive(); + } + + @After + public void tearDown() { + if (cluster != null) { + cluster.shutdown(); + } + } + + /** + * Test adding new blocks. Restart the NameNode in the test to make sure the + * AddBlockOp in the editlog is applied correctly. + */ + @Test + public void testAddBlock() throws Exception { + DistributedFileSystem fs = cluster.getFileSystem(); + + final Path file1 = new Path("/file1"); + final Path file2 = new Path("/file2"); + final Path file3 = new Path("/file3"); + final Path file4 = new Path("/file4"); + + DFSTestUtil.createFile(fs, file1, BLOCKSIZE - 1, REPLICATION, 0L); + DFSTestUtil.createFile(fs, file2, BLOCKSIZE, REPLICATION, 0L); + DFSTestUtil.createFile(fs, file3, BLOCKSIZE * 2 - 1, REPLICATION, 0L); + DFSTestUtil.createFile(fs, file4, BLOCKSIZE * 2, REPLICATION, 0L); + + // restart NameNode + cluster.restartNameNode(true); + FSDirectory fsdir = cluster.getNamesystem().getFSDirectory(); + + // check file1 + INodeFile file1Node = fsdir.getINode4Write(file1.toString()).asFile(); + BlockInfo[] file1Blocks = file1Node.getBlocks(); + assertEquals(1, file1Blocks.length); + assertEquals(BLOCKSIZE - 1, file1Blocks[0].getNumBytes()); + assertEquals(BlockUCState.COMPLETE, file1Blocks[0].getBlockUCState()); + + // check file2 + INodeFile file2Node = fsdir.getINode4Write(file2.toString()).asFile(); + BlockInfo[] file2Blocks = file2Node.getBlocks(); + assertEquals(1, file2Blocks.length); + assertEquals(BLOCKSIZE, file2Blocks[0].getNumBytes()); + assertEquals(BlockUCState.COMPLETE, file2Blocks[0].getBlockUCState()); + + // check file3 + INodeFile file3Node = fsdir.getINode4Write(file3.toString()).asFile(); + BlockInfo[] file3Blocks = file3Node.getBlocks(); + assertEquals(2, file3Blocks.length); + assertEquals(BLOCKSIZE, file3Blocks[0].getNumBytes()); + assertEquals(BlockUCState.COMPLETE, file3Blocks[0].getBlockUCState()); + assertEquals(BLOCKSIZE - 1, file3Blocks[1].getNumBytes()); + assertEquals(BlockUCState.COMPLETE, file3Blocks[1].getBlockUCState()); + + // check file4 + INodeFile file4Node = fsdir.getINode4Write(file4.toString()).asFile(); + BlockInfo[] file4Blocks = file4Node.getBlocks(); + assertEquals(2, file4Blocks.length); + assertEquals(BLOCKSIZE, file4Blocks[0].getNumBytes()); + assertEquals(BlockUCState.COMPLETE, file4Blocks[0].getBlockUCState()); + assertEquals(BLOCKSIZE, file4Blocks[1].getNumBytes()); + assertEquals(BlockUCState.COMPLETE, file4Blocks[1].getBlockUCState()); + } + + /** + * Test adding new blocks but without closing the corresponding the file + */ + @Test + public void testAddBlockUC() throws Exception { + DistributedFileSystem fs = cluster.getFileSystem(); + final Path file1 = new Path("/file1"); + DFSTestUtil.createFile(fs, file1, BLOCKSIZE - 1, REPLICATION, 0L); + + FSDataOutputStream out = null; + try { + // append files without closing the streams + out = fs.append(file1); + String appendContent = "appending-content"; + out.writeBytes(appendContent); + ((DFSOutputStream) out.getWrappedStream()).hsync( + EnumSet.of(SyncFlag.UPDATE_LENGTH)); + + // restart NN + cluster.restartNameNode(true); + FSDirectory fsdir = cluster.getNamesystem().getFSDirectory(); + + INodeFile fileNode = fsdir.getINode4Write(file1.toString()).asFile(); + BlockInfo[] fileBlocks = fileNode.getBlocks(); + assertEquals(2, fileBlocks.length); + assertEquals(BLOCKSIZE, fileBlocks[0].getNumBytes()); + assertEquals(BlockUCState.COMPLETE, fileBlocks[0].getBlockUCState()); + assertEquals(appendContent.length() - 1, fileBlocks[1].getNumBytes()); + assertEquals(BlockUCState.UNDER_CONSTRUCTION, + fileBlocks[1].getBlockUCState()); + } finally { + if (out != null) { + out.close(); + } + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/TestOfflineEditsViewer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/TestOfflineEditsViewer.java index 46f31026823..00ec2b790da 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/TestOfflineEditsViewer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/TestOfflineEditsViewer.java @@ -21,7 +21,6 @@ package org.apache.hadoop.hdfs.tools.offlineEditsViewer; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.nio.ByteBuffer; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored index c68762ae3f5b628ff57a93a9de952415dcf3ea34..8cba085408de27fa35c43a2b30767c2ce90f9844 100644 GIT binary patch literal 4282 zcmcIndrVVj6hF7OwJnGsKEVf98Hz%Mq9UN1mm!Ke${dkxZrapx!SZNHTQzLP%^7jo z27fU3$CAm6qT`QcKH`FLzNSl@F-n$gIwQd>8kLvNMH3V3zGo}9mxe#|H0inh&Ub#l zbG~!F?|uyg0v!{9iqJ6rw^ifV51sIZP|$ov-)Z_|Ps7yemRwu>bA1fAiaKiTGJTeN z>_Pw5n%kYH*VHjr(9;K52O~2oTs4Z_=~Wzl1q1-0z}1*~ETr*F9ht7La=9y`d>iW& zZ>86>u})}(3u;`7+i$m(*oyPtey4EB>ViVMCCi#^&7NzuX4|b`wq`AulZif7 zh%35KAvEU{C$)chKXz(!+Fk@~Jl^zSvzage7?_430=qc8z*2AdOehy3BUDh)G~-&& zCZ(LAb!Y}=Zdp6Kji!#G1P#9{ntoCLC8l+YjNL@FEJ9pHxy!3K{T^?7{*xDG!W7je(!q^MN0JQo>PWVZ0b0Tx)GCjH^4+{Su>H9 z2ga+A!*|!Lc;t?%rgdf{Sf4)WWW+FmKF9_b!wxf#+v)Jz{SI%X;^$}DEHn+DZCGP@ zrG=j~C289piSPiB(_2RDlb5aBk;DcQs200>S`SDw*(oq5cjMg9o5HTj(arM@E38;|c z2ARcerKQQ7jA^lV^Py7>oJ@*HrXyrM7!;$0$dr!{e8qRnG;Agkv9Mb-3QLY(8Lt@; zT5Lpw){myA8V^RI8t)A|lv_{ky)%~10+>!8FASM0fy_v6cb!XO{nK;FWrf8pXcCLmOxFN~NfS=MASv{m@5yV5xKBT!}iv46J4J zt{%9ytCJ%$FtI;6ufComr>c=j`p8vJ%8#%qP^kRY%93sg4InQi7 zhzfbnxus(l=Sdri$3S@cv4@O+LOhL^$Mx|xOUGs+LlyCBjUw_6V`nkR41wSUqe&4& z2#yDXP>tUUZ-xQs<^nnkz(OBnqbfyfu*1G#!6A)>5{84mln$Hs-S1uJCBg$R2WuH! zjoM=0z02#9L5<@5b$!Q+d3`d~!SfC?q7J)HJM6sYaJxjE!HTsEU|MjyzY5OwvcLMpAi%O@Ts{zgRUhS;E6e=HV|Hz`W`GBfxp)<3Uu&hr44+e&syb zL-Cjh&jaisBcKpZzTS43cUZ8RYLs9Rf7B=Uq1(Q;Z2ZKisbIH>whOp&rzHMK5-KJ8*V14Fz(kmQ4^t&q-f>ZH>!aJp?s@J<8yZ zhJ_!{B$U^Ga0y=-h~!Zvn;{`%gTq^HsZkt0#e(kN=-ThCxA+xb=w{H)n2^RoS_r~e zU(Uas{Ok2qyCuQ{un=n*AS$yvy_)ymB5D*Lwf)c*ycxV6ArHQpqR?h~L^sp9r!5KW zh5>I3$2B_Wx=MikJe0*Ol*hL%t-kT}2a0!C|N0`VdrT4f+i15rY89KOT)ES@y_0X0 ze8vG0V&;2h9y)nRqr6zQ&{|NWXr4iMIiK}b-XJeYH04b&)OkEL_}odNl?Q{*@2IE{ z`afZp*CeJjd#|WLn0b)BT%jNY#;@B>O|RuWS07)UMk9nJOydkHJ1bm=zs=Hk~9dW#n4;0*nr|vexZ+WzElJTc)##OM?ztQ{O6G)O$OHOU~n%WJ9cZaDpJe)=kF&6@C^cW+{&qK=FnHk5pG z@AARFN9_zt-?NB9A%NV-IvAR`qM}N%mAVwWTd~y%+0&CPAOHx~%<~uQ9pFflWGiM6Uiaky=TMTG?!3m0c)F0*FY%$DTYGm~Z}r&v;K z7BE_-rA$pie-;Qocr|+SOW{T9zq;6_|7lHIGvd~N+5CC8Q569Qq!#?Um^4@y7k;rf z3av|RL3~aV*^6pty;4SLXK@`h-l*xlM|J~w4IJEjE-v^)(I3qsb;3ZbEJAo+w(3fBPsIzE(~nV=nojpU4&W+F&@^5 z*8tGA-Z%H^ED~i>pExwE+y8CcJw{KTXt8^C>Yg)-3v^=PDca)Fk6e@LBd!p zcf0Pe1f;2TN(K;{mn@mU#HbZ}8d?vuFfoZMrdCvh*NEXbsj{L^HpkSxm_2Gw;bFD0 zM~xTd&S8icABGP`*{ewKB=TqhXeY@+HT~VRpVofw{2oqa-2f!)IvH$u&$eLQFMq~Blfh?i6;fC<>j00D;jTF+WWP#XeJNVP2~e31cy_EiM8F`anaiK2PE^p%bcOeaCB!*W2QroRvWmeIO2{nvS!G46QhpJX0@ncT>FjeFjg!pwfNeKJVm_ z7ds<}lQ5fD*#IG=iq@IgB>WX1_W`(}j=-XBdwcKTA#&&M{fEIm7f0#22^rd!JmD}lnKvhRL zM8x&SxLjGD$`!llNe1(%pZLiQgMh>my9?#I&u2+J_)fjgWy+4WqzURNg3$5fauH9)o6ArwceM^Q)fsXVv8Gjuvl(A{%`QSn6mOvNtFV~#8w6fj=3#4S+U9@ zgW|0_+ke^3idD9srLf8yj4JOfQhE0#-~O0Ea|Q!7d7)gSB}L4Lr3 zL7qE)_C6#R{X4@}Xs=cZon=b#%hi7|#XPEb=g6yVc}y`M9tQYXkY?t%)ve=LoqmS; m5B}h2FL4dIoW#ir-#!y26Z4gqEk|dhe@Z`dpdjv8P}+Y&rl05l diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml index 28a6dcf343a..1704020047e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml @@ -13,8 +13,8 @@ 2 1 - 1389121087930 - d48b4b3e6a43707b + 1389736494300 + d1a0861e6b9e394e @@ -24,8 +24,8 @@ 3 2 - 1389121087937 - 62b6fae6bff918a9 + 1389736494302 + 8239b8f0ed7e6ce6 @@ -35,19 +35,19 @@ 4 0 16386 - /file_create_u\0001;F431 + /file_create 1 - 1388429889312 - 1388429889312 + 1389045295288 + 1389045295288 512 - DFSClient_NONMAPREDUCE_-1396063717_1 + DFSClient_NONMAPREDUCE_-216163116_1 127.0.0.1 - andrew + jing supergroup 420 - bfe81b9e-5c10-4f90-a5e1-b707da7bb781 + 48c96601-9238-4d1f-b78b-ef0f1e922ba2 7 @@ -57,15 +57,15 @@ 5 0 0 - /file_create_u\0001;F431 + /file_create 1 - 1388429889328 - 1388429889312 + 1389045295307 + 1389045295288 512 - andrew + jing supergroup 420 @@ -76,10 +76,10 @@ 6 0 - /file_create_u\0001;F431 + /file_create /file_moved - 1388429889336 - bfe81b9e-5c10-4f90-a5e1-b707da7bb781 + 1389045295311 + 48c96601-9238-4d1f-b78b-ef0f1e922ba2 9 @@ -89,8 +89,8 @@ 7 0 /file_moved - 1388429889346 - bfe81b9e-5c10-4f90-a5e1-b707da7bb781 + 1389045295318 + 48c96601-9238-4d1f-b78b-ef0f1e922ba2 10 @@ -101,9 +101,9 @@ 0 16387 /directory_mkdir - 1388429889357 + 1389045295326 - andrew + jing supergroup 493 @@ -136,7 +136,7 @@ 12 /directory_mkdir snapshot1 - bfe81b9e-5c10-4f90-a5e1-b707da7bb781 + 48c96601-9238-4d1f-b78b-ef0f1e922ba2 15 @@ -147,7 +147,7 @@ /directory_mkdir snapshot1 snapshot2 - bfe81b9e-5c10-4f90-a5e1-b707da7bb781 + 48c96601-9238-4d1f-b78b-ef0f1e922ba2 16 @@ -157,7 +157,7 @@ 14 /directory_mkdir snapshot2 - bfe81b9e-5c10-4f90-a5e1-b707da7bb781 + 48c96601-9238-4d1f-b78b-ef0f1e922ba2 17 @@ -167,19 +167,19 @@ 15 0 16388 - /file_create_u\0001;F431 + /file_create 1 - 1388429889412 - 1388429889412 + 1389045295354 + 1389045295354 512 - DFSClient_NONMAPREDUCE_-1396063717_1 + DFSClient_NONMAPREDUCE_-216163116_1 127.0.0.1 - andrew + jing supergroup 420 - bfe81b9e-5c10-4f90-a5e1-b707da7bb781 + 48c96601-9238-4d1f-b78b-ef0f1e922ba2 18 @@ -189,15 +189,15 @@ 16 0 0 - /file_create_u\0001;F431 + /file_create 1 - 1388429889420 - 1388429889412 + 1389045295357 + 1389045295354 512 - andrew + jing supergroup 420 @@ -207,7 +207,7 @@ OP_SET_REPLICATION 17 - /file_create_u\0001;F431 + /file_create 1 @@ -215,7 +215,7 @@ OP_SET_PERMISSIONS 18 - /file_create_u\0001;F431 + /file_create 511 @@ -223,7 +223,7 @@ OP_SET_OWNER 19 - /file_create_u\0001;F431 + /file_create newOwner @@ -232,7 +232,7 @@ 20 0 - /file_create_u\0001;F431 + /file_create 1285195527000 1285195527000 @@ -251,11 +251,11 @@ 22 0 - /file_create_u\0001;F431 + /file_create /file_moved - 1388429889495 + 1389045295378 NONE - bfe81b9e-5c10-4f90-a5e1-b707da7bb781 + 48c96601-9238-4d1f-b78b-ef0f1e922ba2 25 @@ -267,17 +267,17 @@ 16389 /file_concat_target 1 - 1388429889511 - 1388429889511 + 1389045295383 + 1389045295383 512 - DFSClient_NONMAPREDUCE_-1396063717_1 + DFSClient_NONMAPREDUCE_-216163116_1 127.0.0.1 - andrew + jing supergroup 420 - bfe81b9e-5c10-4f90-a5e1-b707da7bb781 + 48c96601-9238-4d1f-b78b-ef0f1e922ba2 27 @@ -296,7 +296,7 @@ - OP_UPDATE_BLOCKS + OP_ADD_BLOCK 26 /file_concat_target @@ -324,7 +324,7 @@ - OP_UPDATE_BLOCKS + OP_ADD_BLOCK 29 /file_concat_target @@ -357,15 +357,10 @@ - OP_UPDATE_BLOCKS + OP_ADD_BLOCK 32 /file_concat_target - - 1073741825 - 512 - 1001 - 1073741826 512 @@ -388,8 +383,8 @@ 0 /file_concat_target 1 - 1388429889812 - 1388429889511 + 1389045295484 + 1389045295383 512 @@ -409,7 +404,7 @@ 1003 - andrew + jing supergroup 420 @@ -423,17 +418,17 @@ 16390 /file_concat_0 1 - 1388429889825 - 1388429889825 + 1389045295486 + 1389045295486 512 - DFSClient_NONMAPREDUCE_-1396063717_1 + DFSClient_NONMAPREDUCE_-216163116_1 127.0.0.1 - andrew + jing supergroup 420 - bfe81b9e-5c10-4f90-a5e1-b707da7bb781 + 48c96601-9238-4d1f-b78b-ef0f1e922ba2 40 @@ -452,7 +447,7 @@ - OP_UPDATE_BLOCKS + OP_ADD_BLOCK 37 /file_concat_0 @@ -480,7 +475,7 @@ - OP_UPDATE_BLOCKS + OP_ADD_BLOCK 40 /file_concat_0 @@ -513,15 +508,10 @@ - OP_UPDATE_BLOCKS + OP_ADD_BLOCK 43 /file_concat_0 - - 1073741828 - 512 - 1004 - 1073741829 512 @@ -544,8 +534,8 @@ 0 /file_concat_0 1 - 1388429889909 - 1388429889825 + 1389045295507 + 1389045295486 512 @@ -565,7 +555,7 @@ 1006 - andrew + jing supergroup 420 @@ -579,17 +569,17 @@ 16391 /file_concat_1 1 - 1388429889920 - 1388429889920 + 1389045295509 + 1389045295509 512 - DFSClient_NONMAPREDUCE_-1396063717_1 + DFSClient_NONMAPREDUCE_-216163116_1 127.0.0.1 - andrew + jing supergroup 420 - bfe81b9e-5c10-4f90-a5e1-b707da7bb781 + 48c96601-9238-4d1f-b78b-ef0f1e922ba2 52 @@ -608,7 +598,7 @@ - OP_UPDATE_BLOCKS + OP_ADD_BLOCK 48 /file_concat_1 @@ -636,7 +626,7 @@ - OP_UPDATE_BLOCKS + OP_ADD_BLOCK 51 /file_concat_1 @@ -669,15 +659,10 @@ - OP_UPDATE_BLOCKS + OP_ADD_BLOCK 54 /file_concat_1 - - 1073741831 - 512 - 1007 - 1073741832 512 @@ -700,8 +685,8 @@ 0 /file_concat_1 1 - 1388429890016 - 1388429889920 + 1389045295532 + 1389045295509 512 @@ -721,7 +706,7 @@ 1009 - andrew + jing supergroup 420 @@ -733,12 +718,12 @@ 56 0 /file_concat_target - 1388429890031 + 1389045295535 /file_concat_0 /file_concat_1 - bfe81b9e-5c10-4f90-a5e1-b707da7bb781 + 48c96601-9238-4d1f-b78b-ef0f1e922ba2 63 @@ -750,175 +735,57 @@ 16392 /file_symlink /file_concat_target - 1388429890046 - 1388429890046 + 1389045295540 + 1389045295540 - andrew + jing supergroup 511 - bfe81b9e-5c10-4f90-a5e1-b707da7bb781 + 48c96601-9238-4d1f-b78b-ef0f1e922ba2 64 - - OP_GET_DELEGATION_TOKEN - - 58 - - HDFS_DELEGATION_TOKEN - 1 - andrew - JobTracker - - 1388429890059 - 1389034690059 - 2 - - 1388516290059 - - - - OP_RENEW_DELEGATION_TOKEN - - 59 - - HDFS_DELEGATION_TOKEN - 1 - andrew - JobTracker - - 1388429890059 - 1389034690059 - 2 - - 1388516290109 - - - - OP_CANCEL_DELEGATION_TOKEN - - 60 - - HDFS_DELEGATION_TOKEN - 1 - andrew - JobTracker - - 1388429890059 - 1389034690059 - 2 - - - - - OP_ADD_CACHE_POOL - - 61 - poolparty - andrew - andrew - 493 - 9223372036854775807 - 2305843009213693951 - bfe81b9e-5c10-4f90-a5e1-b707da7bb781 - 68 - - - - OP_MODIFY_CACHE_POOL - - 62 - poolparty - carlton - party - 448 - 1989 - bfe81b9e-5c10-4f90-a5e1-b707da7bb781 - 69 - - - - OP_ADD_CACHE_DIRECTIVE - - 63 - 1 - /bar - 1 - poolparty - 2305844397643584141 - bfe81b9e-5c10-4f90-a5e1-b707da7bb781 - 70 - - - - OP_MODIFY_CACHE_DIRECTIVE - - 64 - 1 - /bar2 - bfe81b9e-5c10-4f90-a5e1-b707da7bb781 - 71 - - - - OP_REMOVE_CACHE_DIRECTIVE - - 65 - 1 - bfe81b9e-5c10-4f90-a5e1-b707da7bb781 - 72 - - - - OP_REMOVE_CACHE_POOL - - 66 - poolparty - bfe81b9e-5c10-4f90-a5e1-b707da7bb781 - 73 - - OP_ADD - 67 + 58 0 16393 /hard-lease-recovery-test 1 - 1388429890261 - 1388429890261 + 1389045295543 + 1389045295543 512 - DFSClient_NONMAPREDUCE_-1396063717_1 + DFSClient_NONMAPREDUCE_-216163116_1 127.0.0.1 - andrew + jing supergroup 420 - bfe81b9e-5c10-4f90-a5e1-b707da7bb781 - 74 + 48c96601-9238-4d1f-b78b-ef0f1e922ba2 + 65 OP_ALLOCATE_BLOCK_ID - 68 + 59 1073741834 OP_SET_GENSTAMP_V2 - 69 + 60 1010 - OP_UPDATE_BLOCKS + OP_ADD_BLOCK - 70 + 61 /hard-lease-recovery-test 1073741834 @@ -932,7 +799,7 @@ OP_UPDATE_BLOCKS - 71 + 62 /hard-lease-recovery-test 1073741834 @@ -946,31 +813,15 @@ OP_SET_GENSTAMP_V2 - 72 + 63 1011 OP_REASSIGN_LEASE - 73 - DFSClient_NONMAPREDUCE_-1396063717_1 - /hard-lease-recovery-test - HDFS_NameNode - - - - OP_SET_GENSTAMP_V2 - - 74 - 1012 - - - - OP_REASSIGN_LEASE - - 75 - HDFS_NameNode + 64 + DFSClient_NONMAPREDUCE_-216163116_1 /hard-lease-recovery-test HDFS_NameNode @@ -978,32 +829,97 @@ OP_CLOSE - 76 + 65 0 0 /hard-lease-recovery-test 1 - 1388429895216 - 1388429890261 + 1389045298180 + 1389045295543 512 1073741834 11 - 1012 + 1011 - andrew + jing supergroup 420 + + OP_ADD_CACHE_POOL + + 66 + pool1 + jing + staff + 493 + 9223372036854775807 + 2305843009213693951 + 48c96601-9238-4d1f-b78b-ef0f1e922ba2 + 72 + + + + OP_MODIFY_CACHE_POOL + + 67 + pool1 + 99 + 48c96601-9238-4d1f-b78b-ef0f1e922ba2 + 73 + + + + OP_ADD_CACHE_DIRECTIVE + + 68 + 1 + /path + 1 + pool1 + 2305844398258992525 + 48c96601-9238-4d1f-b78b-ef0f1e922ba2 + 74 + + + + OP_MODIFY_CACHE_DIRECTIVE + + 69 + 1 + 2 + 48c96601-9238-4d1f-b78b-ef0f1e922ba2 + 75 + + + + OP_REMOVE_CACHE_DIRECTIVE + + 70 + 1 + 48c96601-9238-4d1f-b78b-ef0f1e922ba2 + 76 + + + + OP_REMOVE_CACHE_POOL + + 71 + pool1 + 48c96601-9238-4d1f-b78b-ef0f1e922ba2 + 77 + + OP_END_LOG_SEGMENT - 77 + 72 From a913cdb398ff9ea89ff98bcda804f7ea9457971f Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Wed, 15 Jan 2014 05:45:24 +0000 Subject: [PATCH 02/32] HADOOP-10234. "hadoop.cmd jar" does not propagate exit code. Contributed by Chris Nauroth. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558296 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 2 ++ hadoop-common-project/hadoop-common/src/main/bin/hadoop.cmd | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index d50a666adab..2dac015c1fd 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -605,6 +605,8 @@ Release 2.3.0 - UNRELEASED HADOOP-10178. Configuration deprecation always emit "deprecated" warnings when a new key is used. (Shanyu Zhao via cnauroth) + HADOOP-10234. "hadoop.cmd jar" does not propagate exit code. (cnauroth) + Release 2.2.0 - 2013-10-13 INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop.cmd b/hadoop-common-project/hadoop-common/src/main/bin/hadoop.cmd index 63b2945c6ec..54b81e364bf 100644 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop.cmd +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop.cmd @@ -143,7 +143,7 @@ call :updatepath %HADOOP_BIN_PATH% call %JAVA% %JAVA_HEAP_MAX% %HADOOP_OPTS% -classpath %CLASSPATH% %CLASS% %hadoop-command-arguments% - goto :eof + exit /b %ERRORLEVEL% :fs set CLASS=org.apache.hadoop.fs.FsShell From 5ed1772f3891b942e3d2de6ec6cb111b32ff4e58 Mon Sep 17 00:00:00 2001 From: Junping Du Date: Wed, 15 Jan 2014 10:19:28 +0000 Subject: [PATCH 03/32] YARN-1603. Remove two *.orig files which were unexpectedly committed. (Zhijie Shen via junping_du) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558337 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 1dabb970354..5db3a9f97a6 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -335,6 +335,9 @@ Release 2.4.0 - UNRELEASED YARN-1598. HA-related rmadmin commands don't work on a secure cluster (kasha) + YARN-1603. Remove two *.orig files which were unexpectedly committed. + (Zhijie Shen via junping_du) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES From 2400f79db0e5d6bb148b6aa6038c42976708ce09 Mon Sep 17 00:00:00 2001 From: Junping Du Date: Wed, 15 Jan 2014 11:02:23 +0000 Subject: [PATCH 04/32] svn merge -c -1558337 . for reverting YARN-1603, which only partly committed. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558339 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 5db3a9f97a6..1dabb970354 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -335,9 +335,6 @@ Release 2.4.0 - UNRELEASED YARN-1598. HA-related rmadmin commands don't work on a secure cluster (kasha) - YARN-1603. Remove two *.orig files which were unexpectedly committed. - (Zhijie Shen via junping_du) - Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES From 4d413350d7d92c83457c2c1b08b5e23175e41725 Mon Sep 17 00:00:00 2001 From: Junping Du Date: Wed, 15 Jan 2014 11:05:15 +0000 Subject: [PATCH 05/32] YARN-1603. Remove two *.orig files which were unexpectedly committed. (Zhijie Shen via junping_du) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558340 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../scheduler/fair/FairScheduler.java.orig | 1361 ----------------- .../fifo/TestFifoScheduler.java.orig | 615 -------- 3 files changed, 3 insertions(+), 1976 deletions(-) delete mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java.orig delete mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java.orig diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 1dabb970354..5db3a9f97a6 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -335,6 +335,9 @@ Release 2.4.0 - UNRELEASED YARN-1598. HA-related rmadmin commands don't work on a secure cluster (kasha) + YARN-1603. Remove two *.orig files which were unexpectedly committed. + (Zhijie Shen via junping_du) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java.orig b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java.orig deleted file mode 100644 index 9fc43299681..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java.orig +++ /dev/null @@ -1,1361 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.classification.InterfaceAudience.LimitedPrivate; -import org.apache.hadoop.classification.InterfaceStability.Unstable; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; -import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; -import org.apache.hadoop.yarn.api.records.Container; -import org.apache.hadoop.yarn.api.records.ContainerId; -import org.apache.hadoop.yarn.api.records.ContainerStatus; -import org.apache.hadoop.yarn.api.records.NodeId; -import org.apache.hadoop.yarn.api.records.Priority; -import org.apache.hadoop.yarn.api.records.QueueACL; -import org.apache.hadoop.yarn.api.records.QueueInfo; -import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; -import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.api.records.ResourceRequest; -import org.apache.hadoop.yarn.conf.YarnConfiguration; -import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; -import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; -import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger; -import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants; -import org.apache.hadoop.yarn.server.resourcemanager.RMContext; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; -import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceWeights; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRejectedEvent; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; -import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; -import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; -import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; -import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; -import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.ContainerExpiredSchedulerEvent; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; -import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; -import org.apache.hadoop.yarn.util.Clock; -import org.apache.hadoop.yarn.util.SystemClock; -import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator; -import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator; -import org.apache.hadoop.yarn.util.resource.ResourceCalculator; -import org.apache.hadoop.yarn.util.resource.Resources; - -import com.google.common.annotations.VisibleForTesting; - -/** - * A scheduler that schedules resources between a set of queues. The scheduler - * keeps track of the resources used by each queue, and attempts to maintain - * fairness by scheduling tasks at queues whose allocations are farthest below - * an ideal fair distribution. - * - * The fair scheduler supports hierarchical queues. All queues descend from a - * queue named "root". Available resources are distributed among the children - * of the root queue in the typical fair scheduling fashion. Then, the children - * distribute the resources assigned to them to their children in the same - * fashion. Applications may only be scheduled on leaf queues. Queues can be - * specified as children of other queues by placing them as sub-elements of their - * parents in the fair scheduler configuration file. - * - * A queue's name starts with the names of its parents, with periods as - * separators. So a queue named "queue1" under the root named, would be - * referred to as "root.queue1", and a queue named "queue2" under a queue - * named "parent1" would be referred to as "root.parent1.queue2". - */ -@LimitedPrivate("yarn") -@Unstable -@SuppressWarnings("unchecked") -public class FairScheduler implements ResourceScheduler { - private boolean initialized; - private FairSchedulerConfiguration conf; - private RMContext rmContext; - private Resource minimumAllocation; - private Resource maximumAllocation; - private Resource incrAllocation; - private QueueManager queueMgr; - private Clock clock; - private boolean usePortForNodeName; - - private static final Log LOG = LogFactory.getLog(FairScheduler.class); - - private static final ResourceCalculator RESOURCE_CALCULATOR = - new DefaultResourceCalculator(); - - // Value that container assignment methods return when a container is - // reserved - public static final Resource CONTAINER_RESERVED = Resources.createResource(-1); - - // How often fair shares are re-calculated (ms) - protected long UPDATE_INTERVAL = 500; - - private final static List EMPTY_CONTAINER_LIST = - new ArrayList(); - - private static final Allocation EMPTY_ALLOCATION = - new Allocation(EMPTY_CONTAINER_LIST, Resources.createResource(0)); - - // Aggregate metrics - FSQueueMetrics rootMetrics; - - // Time when we last updated preemption vars - protected long lastPreemptionUpdateTime; - // Time we last ran preemptTasksIfNecessary - private long lastPreemptCheckTime; - - // This stores per-application scheduling information, - @VisibleForTesting - protected Map applications = - new ConcurrentHashMap(); - - // Nodes in the cluster, indexed by NodeId - private Map nodes = - new ConcurrentHashMap(); - - // Aggregate capacity of the cluster - private Resource clusterCapacity = - RecordFactoryProvider.getRecordFactory(null).newRecordInstance(Resource.class); - - // How often tasks are preempted - protected long preemptionInterval; - - // ms to wait before force killing stuff (must be longer than a couple - // of heartbeats to give task-kill commands a chance to act). - protected long waitTimeBeforeKill; - - // Containers whose AMs have been warned that they will be preempted soon. - private List warnedContainers = new ArrayList(); - - protected boolean preemptionEnabled; - protected boolean sizeBasedWeight; // Give larger weights to larger jobs - protected WeightAdjuster weightAdjuster; // Can be null for no weight adjuster - protected boolean continuousSchedulingEnabled; // Continuous Scheduling enabled or not - protected int continuousSchedulingSleepMs; // Sleep time for each pass in continuous scheduling - private Comparator nodeAvailableResourceComparator = - new NodeAvailableResourceComparator(); // Node available resource comparator - protected double nodeLocalityThreshold; // Cluster threshold for node locality - protected double rackLocalityThreshold; // Cluster threshold for rack locality - protected long nodeLocalityDelayMs; // Delay for node locality - protected long rackLocalityDelayMs; // Delay for rack locality - private FairSchedulerEventLog eventLog; // Machine-readable event log - protected boolean assignMultiple; // Allocate multiple containers per - // heartbeat - protected int maxAssign; // Max containers to assign per heartbeat - - @VisibleForTesting - final MaxRunningAppsEnforcer maxRunningEnforcer; - - private AllocationFileLoaderService allocsLoader; - @VisibleForTesting - AllocationConfiguration allocConf; - - public FairScheduler() { - clock = new SystemClock(); - allocsLoader = new AllocationFileLoaderService(); - queueMgr = new QueueManager(this); - maxRunningEnforcer = new MaxRunningAppsEnforcer(this); - } - - private void validateConf(Configuration conf) { - // validate scheduler memory allocation setting - int minMem = conf.getInt( - YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB); - int maxMem = conf.getInt( - YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB); - - if (minMem < 0 || minMem > maxMem) { - throw new YarnRuntimeException("Invalid resource scheduler memory" - + " allocation configuration" - + ", " + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB - + "=" + minMem - + ", " + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB - + "=" + maxMem + ", min should equal greater than 0" - + ", max should be no smaller than min."); - } - - // validate scheduler vcores allocation setting - int minVcores = conf.getInt( - YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); - int maxVcores = conf.getInt( - YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES); - - if (minVcores < 0 || minVcores > maxVcores) { - throw new YarnRuntimeException("Invalid resource scheduler vcores" - + " allocation configuration" - + ", " + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES - + "=" + minVcores - + ", " + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES - + "=" + maxVcores + ", min should equal greater than 0" - + ", max should be no smaller than min."); - } - } - - public FairSchedulerConfiguration getConf() { - return conf; - } - - public QueueManager getQueueManager() { - return queueMgr; - } - - @Override - public RMContainer getRMContainer(ContainerId containerId) { - FSSchedulerApp attempt = getCurrentAttemptForContainer(containerId); - return (attempt == null) ? null : attempt.getRMContainer(containerId); - } - - private FSSchedulerApp getCurrentAttemptForContainer( - ContainerId containerId) { - SchedulerApplication app = - applications.get(containerId.getApplicationAttemptId() - .getApplicationId()); - if (app != null) { - return (FSSchedulerApp) app.getCurrentAppAttempt(); - } - return null; - } - - /** - * A runnable which calls {@link FairScheduler#update()} every - * UPDATE_INTERVAL milliseconds. - */ - private class UpdateThread implements Runnable { - public void run() { - while (true) { - try { - Thread.sleep(UPDATE_INTERVAL); - update(); - preemptTasksIfNecessary(); - } catch (Exception e) { - LOG.error("Exception in fair scheduler UpdateThread", e); - } - } - } - } - - /** - * Recompute the internal variables used by the scheduler - per-job weights, - * fair shares, deficits, minimum slot allocations, and amount of used and - * required resources per job. - */ - protected synchronized void update() { - updatePreemptionVariables(); // Determine if any queues merit preemption - - FSQueue rootQueue = queueMgr.getRootQueue(); - - // Recursively update demands for all queues - rootQueue.updateDemand(); - - rootQueue.setFairShare(clusterCapacity); - // Recursively compute fair shares for all queues - // and update metrics - rootQueue.recomputeShares(); - } - - /** - * Update the preemption fields for all QueueScheduables, i.e. the times since - * each queue last was at its guaranteed share and at > 1/2 of its fair share - * for each type of task. - */ - private void updatePreemptionVariables() { - long now = clock.getTime(); - lastPreemptionUpdateTime = now; - for (FSLeafQueue sched : queueMgr.getLeafQueues()) { - if (!isStarvedForMinShare(sched)) { - sched.setLastTimeAtMinShare(now); - } - if (!isStarvedForFairShare(sched)) { - sched.setLastTimeAtHalfFairShare(now); - } - } - } - - /** - * Is a queue below its min share for the given task type? - */ - boolean isStarvedForMinShare(FSLeafQueue sched) { - Resource desiredShare = Resources.min(RESOURCE_CALCULATOR, clusterCapacity, - sched.getMinShare(), sched.getDemand()); - return Resources.lessThan(RESOURCE_CALCULATOR, clusterCapacity, - sched.getResourceUsage(), desiredShare); - } - - /** - * Is a queue being starved for fair share for the given task type? This is - * defined as being below half its fair share. - */ - boolean isStarvedForFairShare(FSLeafQueue sched) { - Resource desiredFairShare = Resources.min(RESOURCE_CALCULATOR, clusterCapacity, - Resources.multiply(sched.getFairShare(), .5), sched.getDemand()); - return Resources.lessThan(RESOURCE_CALCULATOR, clusterCapacity, - sched.getResourceUsage(), desiredFairShare); - } - - /** - * Check for queues that need tasks preempted, either because they have been - * below their guaranteed share for minSharePreemptionTimeout or they have - * been below half their fair share for the fairSharePreemptionTimeout. If - * such queues exist, compute how many tasks of each type need to be preempted - * and then select the right ones using preemptTasks. - */ - protected synchronized void preemptTasksIfNecessary() { - if (!preemptionEnabled) { - return; - } - - long curTime = clock.getTime(); - if (curTime - lastPreemptCheckTime < preemptionInterval) { - return; - } - lastPreemptCheckTime = curTime; - - Resource resToPreempt = Resources.none(); - - for (FSLeafQueue sched : queueMgr.getLeafQueues()) { - resToPreempt = Resources.add(resToPreempt, resToPreempt(sched, curTime)); - } - if (Resources.greaterThan(RESOURCE_CALCULATOR, clusterCapacity, resToPreempt, - Resources.none())) { - preemptResources(queueMgr.getLeafQueues(), resToPreempt); - } - } - - /** - * Preempt a quantity of resources from a list of QueueSchedulables. The - * policy for this is to pick apps from queues that are over their fair share, - * but make sure that no queue is placed below its fair share in the process. - * We further prioritize preemption by choosing containers with lowest - * priority to preempt. - */ - protected void preemptResources(Collection scheds, - Resource toPreempt) { - if (scheds.isEmpty() || Resources.equals(toPreempt, Resources.none())) { - return; - } - - Map apps = - new HashMap(); - Map queues = - new HashMap(); - - // Collect running containers from over-scheduled queues - List runningContainers = new ArrayList(); - for (FSLeafQueue sched : scheds) { - if (Resources.greaterThan(RESOURCE_CALCULATOR, clusterCapacity, - sched.getResourceUsage(), sched.getFairShare())) { - for (AppSchedulable as : sched.getRunnableAppSchedulables()) { - for (RMContainer c : as.getApp().getLiveContainers()) { - runningContainers.add(c); - apps.put(c, as.getApp()); - queues.put(c, sched); - } - } - } - } - - // Sort containers into reverse order of priority - Collections.sort(runningContainers, new Comparator() { - public int compare(RMContainer c1, RMContainer c2) { - int ret = c1.getContainer().getPriority().compareTo( - c2.getContainer().getPriority()); - if (ret == 0) { - return c2.getContainerId().compareTo(c1.getContainerId()); - } - return ret; - } - }); - - // Scan down the list of containers we've already warned and kill them - // if we need to. Remove any containers from the list that we don't need - // or that are no longer running. - Iterator warnedIter = warnedContainers.iterator(); - Set preemptedThisRound = new HashSet(); - while (warnedIter.hasNext()) { - RMContainer container = warnedIter.next(); - if (container.getState() == RMContainerState.RUNNING && - Resources.greaterThan(RESOURCE_CALCULATOR, clusterCapacity, - toPreempt, Resources.none())) { - warnOrKillContainer(container, apps.get(container), queues.get(container)); - preemptedThisRound.add(container); - Resources.subtractFrom(toPreempt, container.getContainer().getResource()); - } else { - warnedIter.remove(); - } - } - - // Scan down the rest of the containers until we've preempted enough, making - // sure we don't preempt too many from any queue - Iterator runningIter = runningContainers.iterator(); - while (runningIter.hasNext() && - Resources.greaterThan(RESOURCE_CALCULATOR, clusterCapacity, - toPreempt, Resources.none())) { - RMContainer container = runningIter.next(); - FSLeafQueue sched = queues.get(container); - if (!preemptedThisRound.contains(container) && - Resources.greaterThan(RESOURCE_CALCULATOR, clusterCapacity, - sched.getResourceUsage(), sched.getFairShare())) { - warnOrKillContainer(container, apps.get(container), sched); - - warnedContainers.add(container); - Resources.subtractFrom(toPreempt, container.getContainer().getResource()); - } - } - } - - private void warnOrKillContainer(RMContainer container, FSSchedulerApp app, - FSLeafQueue queue) { - LOG.info("Preempting container (prio=" + container.getContainer().getPriority() + - "res=" + container.getContainer().getResource() + - ") from queue " + queue.getName()); - - Long time = app.getContainerPreemptionTime(container); - - if (time != null) { - // if we asked for preemption more than maxWaitTimeBeforeKill ms ago, - // proceed with kill - if (time + waitTimeBeforeKill < clock.getTime()) { - ContainerStatus status = - SchedulerUtils.createPreemptedContainerStatus( - container.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER); - - // TODO: Not sure if this ever actually adds this to the list of cleanup - // containers on the RMNode (see SchedulerNode.releaseContainer()). - completedContainer(container, status, RMContainerEventType.KILL); - LOG.info("Killing container" + container + - " (after waiting for premption for " + - (clock.getTime() - time) + "ms)"); - } - } else { - // track the request in the FSSchedulerApp itself - app.addPreemption(container, clock.getTime()); - } - } - - /** - * Return the resource amount that this queue is allowed to preempt, if any. - * If the queue has been below its min share for at least its preemption - * timeout, it should preempt the difference between its current share and - * this min share. If it has been below half its fair share for at least the - * fairSharePreemptionTimeout, it should preempt enough tasks to get up to its - * full fair share. If both conditions hold, we preempt the max of the two - * amounts (this shouldn't happen unless someone sets the timeouts to be - * identical for some reason). - */ - protected Resource resToPreempt(FSLeafQueue sched, long curTime) { - String queue = sched.getName(); - long minShareTimeout = allocConf.getMinSharePreemptionTimeout(queue); - long fairShareTimeout = allocConf.getFairSharePreemptionTimeout(); - Resource resDueToMinShare = Resources.none(); - Resource resDueToFairShare = Resources.none(); - if (curTime - sched.getLastTimeAtMinShare() > minShareTimeout) { - Resource target = Resources.min(RESOURCE_CALCULATOR, clusterCapacity, - sched.getMinShare(), sched.getDemand()); - resDueToMinShare = Resources.max(RESOURCE_CALCULATOR, clusterCapacity, - Resources.none(), Resources.subtract(target, sched.getResourceUsage())); - } - if (curTime - sched.getLastTimeAtHalfFairShare() > fairShareTimeout) { - Resource target = Resources.min(RESOURCE_CALCULATOR, clusterCapacity, - sched.getFairShare(), sched.getDemand()); - resDueToFairShare = Resources.max(RESOURCE_CALCULATOR, clusterCapacity, - Resources.none(), Resources.subtract(target, sched.getResourceUsage())); - } - Resource resToPreempt = Resources.max(RESOURCE_CALCULATOR, clusterCapacity, - resDueToMinShare, resDueToFairShare); - if (Resources.greaterThan(RESOURCE_CALCULATOR, clusterCapacity, - resToPreempt, Resources.none())) { - String message = "Should preempt " + resToPreempt + " res for queue " - + sched.getName() + ": resDueToMinShare = " + resDueToMinShare - + ", resDueToFairShare = " + resDueToFairShare; - LOG.info(message); - } - return resToPreempt; - } - - public RMContainerTokenSecretManager getContainerTokenSecretManager() { - return rmContext.getContainerTokenSecretManager(); - } - - // synchronized for sizeBasedWeight - public synchronized ResourceWeights getAppWeight(AppSchedulable app) { - double weight = 1.0; - if (sizeBasedWeight) { - // Set weight based on current memory demand - weight = Math.log1p(app.getDemand().getMemory()) / Math.log(2); - } - weight *= app.getPriority().getPriority(); - if (weightAdjuster != null) { - // Run weight through the user-supplied weightAdjuster - weight = weightAdjuster.adjustWeight(app, weight); - } - return new ResourceWeights((float)weight); - } - - @Override - public Resource getMinimumResourceCapability() { - return minimumAllocation; - } - - public Resource getIncrementResourceCapability() { - return incrAllocation; - } - - @Override - public Resource getMaximumResourceCapability() { - return maximumAllocation; - } - - public double getNodeLocalityThreshold() { - return nodeLocalityThreshold; - } - - public double getRackLocalityThreshold() { - return rackLocalityThreshold; - } - - public long getNodeLocalityDelayMs() { - return nodeLocalityDelayMs; - } - - public long getRackLocalityDelayMs() { - return rackLocalityDelayMs; - } - - public boolean isContinuousSchedulingEnabled() { - return continuousSchedulingEnabled; - } - - public synchronized int getContinuousSchedulingSleepMs() { - return continuousSchedulingSleepMs; - } - - public Resource getClusterCapacity() { - return clusterCapacity; - } - - public synchronized Clock getClock() { - return clock; - } - - protected synchronized void setClock(Clock clock) { - this.clock = clock; - } - - public FairSchedulerEventLog getEventLog() { - return eventLog; - } - - /** - * Add a new application to the scheduler, with a given id, queue name, and - * user. This will accept a new app even if the user or queue is above - * configured limits, but the app will not be marked as runnable. - */ - protected synchronized void addApplication(ApplicationId applicationId, - String queueName, String user) { - if (queueName == null || queueName.isEmpty()) { - String message = "Reject application " + applicationId + - " submitted by user " + user + " with an empty queue name."; - LOG.info(message); - rmContext.getDispatcher().getEventHandler() - .handle(new RMAppRejectedEvent(applicationId, message)); - return; - } - - RMApp rmApp = rmContext.getRMApps().get(applicationId); - FSLeafQueue queue = assignToQueue(rmApp, queueName, user); - if (queue == null) { - rmContext.getDispatcher().getEventHandler().handle( - new RMAppRejectedEvent(applicationId, - "Application rejected by queue placement policy")); - return; - } - - // Enforce ACLs - UserGroupInformation userUgi = UserGroupInformation.createRemoteUser(user); - - if (!queue.hasAccess(QueueACL.SUBMIT_APPLICATIONS, userUgi) - && !queue.hasAccess(QueueACL.ADMINISTER_QUEUE, userUgi)) { - String msg = "User " + userUgi.getUserName() + - " cannot submit applications to queue " + queue.getName(); - LOG.info(msg); - rmContext.getDispatcher().getEventHandler() - .handle(new RMAppRejectedEvent(applicationId, msg)); - return; - } - - SchedulerApplication application = - new SchedulerApplication(queue, user); - applications.put(applicationId, application); - queue.getMetrics().submitApp(user); - - LOG.info("Accepted application " + applicationId + " from user: " + user - + ", in queue: " + queueName + ", currently num of applications: " - + applications.size()); - rmContext.getDispatcher().getEventHandler() - .handle(new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED)); - } - - /** - * Add a new application attempt to the scheduler. - */ - protected synchronized void addApplicationAttempt( - ApplicationAttemptId applicationAttemptId, - boolean transferStateFromPreviousAttempt) { - SchedulerApplication application = - applications.get(applicationAttemptId.getApplicationId()); - String user = application.getUser(); - FSLeafQueue queue = (FSLeafQueue) application.getQueue(); - - FSSchedulerApp attempt = - new FSSchedulerApp(applicationAttemptId, user, - queue, new ActiveUsersManager(getRootQueueMetrics()), - rmContext); - if (transferStateFromPreviousAttempt) { - attempt.transferStateFromPreviousAttempt(application - .getCurrentAppAttempt()); - } - application.setCurrentAppAttempt(attempt); - - boolean runnable = maxRunningEnforcer.canAppBeRunnable(queue, user); - queue.addApp(attempt, runnable); - if (runnable) { - maxRunningEnforcer.trackRunnableApp(attempt); - } else { - maxRunningEnforcer.trackNonRunnableApp(attempt); - } - - queue.getMetrics().submitAppAttempt(user); - - LOG.info("Added Application Attempt " + applicationAttemptId - + " to scheduler from user: " + user); - rmContext.getDispatcher().getEventHandler().handle( - new RMAppAttemptEvent(applicationAttemptId, - RMAppAttemptEventType.ATTEMPT_ADDED)); - } - - @VisibleForTesting - FSLeafQueue assignToQueue(RMApp rmApp, String queueName, String user) { - FSLeafQueue queue = null; - try { - QueuePlacementPolicy placementPolicy = allocConf.getPlacementPolicy(); - queueName = placementPolicy.assignAppToQueue(queueName, user); - if (queueName == null) { - return null; - } - queue = queueMgr.getLeafQueue(queueName, true); - } catch (IOException ex) { - LOG.error("Error assigning app to queue, rejecting", ex); - } - - if (rmApp != null) { - rmApp.setQueue(queue.getName()); - } else { - LOG.warn("Couldn't find RM app to set queue name on"); - } - - return queue; - } - - private synchronized void removeApplication(ApplicationId applicationId, - RMAppState finalState) { - SchedulerApplication application = applications.get(applicationId); - if (application == null){ - LOG.warn("Couldn't find application " + applicationId); - return; - } - application.stop(finalState); - applications.remove(applicationId); - } - - private synchronized void removeApplicationAttempt( - ApplicationAttemptId applicationAttemptId, - RMAppAttemptState rmAppAttemptFinalState, boolean keepContainers) { - LOG.info("Application " + applicationAttemptId + " is done." + - " finalState=" + rmAppAttemptFinalState); - SchedulerApplication application = - applications.get(applicationAttemptId.getApplicationId()); - FSSchedulerApp attempt = getSchedulerApp(applicationAttemptId); - - if (attempt == null || application == null) { - LOG.info("Unknown application " + applicationAttemptId + " has completed!"); - return; - } - - // Release all the running containers - for (RMContainer rmContainer : attempt.getLiveContainers()) { - if (keepContainers - && rmContainer.getState().equals(RMContainerState.RUNNING)) { - // do not kill the running container in the case of work-preserving AM - // restart. - LOG.info("Skip killing " + rmContainer.getContainerId()); - continue; - } - completedContainer(rmContainer, - SchedulerUtils.createAbnormalContainerStatus( - rmContainer.getContainerId(), - SchedulerUtils.COMPLETED_APPLICATION), - RMContainerEventType.KILL); - } - - // Release all reserved containers - for (RMContainer rmContainer : attempt.getReservedContainers()) { - completedContainer(rmContainer, - SchedulerUtils.createAbnormalContainerStatus( - rmContainer.getContainerId(), - "Application Complete"), - RMContainerEventType.KILL); - } - // Clean up pending requests, metrics etc. - attempt.stop(rmAppAttemptFinalState); - - // Inform the queue - FSLeafQueue queue = queueMgr.getLeafQueue(attempt.getQueue() - .getQueueName(), false); - boolean wasRunnable = queue.removeApp(attempt); - - if (wasRunnable) { - maxRunningEnforcer.updateRunnabilityOnAppRemoval(attempt); - } else { - maxRunningEnforcer.untrackNonRunnableApp(attempt); - } - } - - /** - * Clean up a completed container. - */ - private synchronized void completedContainer(RMContainer rmContainer, - ContainerStatus containerStatus, RMContainerEventType event) { - if (rmContainer == null) { - LOG.info("Null container completed..."); - return; - } - - Container container = rmContainer.getContainer(); - - // Get the application for the finished container - FSSchedulerApp application = - getCurrentAttemptForContainer(container.getId()); - ApplicationId appId = - container.getId().getApplicationAttemptId().getApplicationId(); - if (application == null) { - LOG.info("Container " + container + " of" + - " unknown application attempt " + appId + - " completed with event " + event); - return; - } - - // Get the node on which the container was allocated - FSSchedulerNode node = nodes.get(container.getNodeId()); - - if (rmContainer.getState() == RMContainerState.RESERVED) { - application.unreserve(node, rmContainer.getReservedPriority()); - node.unreserveResource(application); - } else { - application.containerCompleted(rmContainer, containerStatus, event); - node.releaseContainer(container); - updateRootQueueMetrics(); - } - - LOG.info("Application attempt " + application.getApplicationAttemptId() - + " released container " + container.getId() + " on node: " + node - + " with event: " + event); - } - - private synchronized void addNode(RMNode node) { - nodes.put(node.getNodeID(), new FSSchedulerNode(node, usePortForNodeName)); - Resources.addTo(clusterCapacity, node.getTotalCapability()); - updateRootQueueMetrics(); - - LOG.info("Added node " + node.getNodeAddress() + - " cluster capacity: " + clusterCapacity); - } - - private synchronized void removeNode(RMNode rmNode) { - FSSchedulerNode node = nodes.get(rmNode.getNodeID()); - // This can occur when an UNHEALTHY node reconnects - if (node == null) { - return; - } - Resources.subtractFrom(clusterCapacity, rmNode.getTotalCapability()); - updateRootQueueMetrics(); - - // Remove running containers - List runningContainers = node.getRunningContainers(); - for (RMContainer container : runningContainers) { - completedContainer(container, - SchedulerUtils.createAbnormalContainerStatus( - container.getContainerId(), - SchedulerUtils.LOST_CONTAINER), - RMContainerEventType.KILL); - } - - // Remove reservations, if any - RMContainer reservedContainer = node.getReservedContainer(); - if (reservedContainer != null) { - completedContainer(reservedContainer, - SchedulerUtils.createAbnormalContainerStatus( - reservedContainer.getContainerId(), - SchedulerUtils.LOST_CONTAINER), - RMContainerEventType.KILL); - } - - nodes.remove(rmNode.getNodeID()); - LOG.info("Removed node " + rmNode.getNodeAddress() + - " cluster capacity: " + clusterCapacity); - } - - @Override - public Allocation allocate(ApplicationAttemptId appAttemptId, - List ask, List release, List blacklistAdditions, List blacklistRemovals) { - - // Make sure this application exists - FSSchedulerApp application = getSchedulerApp(appAttemptId); - if (application == null) { - LOG.info("Calling allocate on removed " + - "or non existant application " + appAttemptId); - return EMPTY_ALLOCATION; - } - - // Sanity check - SchedulerUtils.normalizeRequests(ask, new DominantResourceCalculator(), - clusterCapacity, minimumAllocation, maximumAllocation, incrAllocation); - - // Release containers - for (ContainerId releasedContainerId : release) { - RMContainer rmContainer = getRMContainer(releasedContainerId); - if (rmContainer == null) { - RMAuditLogger.logFailure(application.getUser(), - AuditConstants.RELEASE_CONTAINER, - "Unauthorized access or invalid container", "FairScheduler", - "Trying to release container not owned by app or with invalid id", - application.getApplicationId(), releasedContainerId); - } - completedContainer(rmContainer, - SchedulerUtils.createAbnormalContainerStatus( - releasedContainerId, - SchedulerUtils.RELEASED_CONTAINER), - RMContainerEventType.RELEASED); - } - - synchronized (application) { - if (!ask.isEmpty()) { - if (LOG.isDebugEnabled()) { - LOG.debug("allocate: pre-update" + - " applicationAttemptId=" + appAttemptId + - " application=" + application.getApplicationId()); - } - application.showRequests(); - - // Update application requests - application.updateResourceRequests(ask); - - LOG.debug("allocate: post-update"); - application.showRequests(); - } - - if (LOG.isDebugEnabled()) { - LOG.debug("allocate:" + - " applicationAttemptId=" + appAttemptId + - " #ask=" + ask.size()); - - LOG.debug("Preempting " + application.getPreemptionContainers().size() - + " container(s)"); - } - - Set preemptionContainerIds = new HashSet(); - for (RMContainer container : application.getPreemptionContainers()) { - preemptionContainerIds.add(container.getContainerId()); - } - - application.updateBlacklist(blacklistAdditions, blacklistRemovals); - - return new Allocation(application.pullNewlyAllocatedContainers(), - application.getHeadroom(), preemptionContainerIds); - } - } - - /** - * Process a container which has launched on a node, as reported by the node. - */ - private void containerLaunchedOnNode(ContainerId containerId, FSSchedulerNode node) { - // Get the application for the finished container - FSSchedulerApp application = getCurrentAttemptForContainer(containerId); - if (application == null) { - LOG.info("Unknown application " - + containerId.getApplicationAttemptId().getApplicationId() - + " launched container " + containerId + " on node: " + node); - return; - } - - application.containerLaunchedOnNode(containerId, node.getNodeID()); - } - - /** - * Process a heartbeat update from a node. - */ - private synchronized void nodeUpdate(RMNode nm) { - if (LOG.isDebugEnabled()) { - LOG.debug("nodeUpdate: " + nm + " cluster capacity: " + clusterCapacity); - } - eventLog.log("HEARTBEAT", nm.getHostName()); - FSSchedulerNode node = nodes.get(nm.getNodeID()); - - // Update resource if any change - SchedulerUtils.updateResourceIfChanged(node, nm, clusterCapacity, LOG); - - List containerInfoList = nm.pullContainerUpdates(); - List newlyLaunchedContainers = new ArrayList(); - List completedContainers = new ArrayList(); - for(UpdatedContainerInfo containerInfo : containerInfoList) { - newlyLaunchedContainers.addAll(containerInfo.getNewlyLaunchedContainers()); - completedContainers.addAll(containerInfo.getCompletedContainers()); - } - // Processing the newly launched containers - for (ContainerStatus launchedContainer : newlyLaunchedContainers) { - containerLaunchedOnNode(launchedContainer.getContainerId(), node); - } - - // Process completed containers - for (ContainerStatus completedContainer : completedContainers) { - ContainerId containerId = completedContainer.getContainerId(); - LOG.debug("Container FINISHED: " + containerId); - completedContainer(getRMContainer(containerId), - completedContainer, RMContainerEventType.FINISHED); - } - - if (continuousSchedulingEnabled) { - if (!completedContainers.isEmpty()) { - attemptScheduling(node); - } - } else { - attemptScheduling(node); - } - } - - private void continuousScheduling() { - while (true) { - List nodeIdList = new ArrayList(nodes.keySet()); - Collections.sort(nodeIdList, nodeAvailableResourceComparator); - - // iterate all nodes - for (NodeId nodeId : nodeIdList) { - if (nodes.containsKey(nodeId)) { - FSSchedulerNode node = nodes.get(nodeId); - try { - if (Resources.fitsIn(minimumAllocation, - node.getAvailableResource())) { - attemptScheduling(node); - } - } catch (Throwable ex) { - LOG.warn("Error while attempting scheduling for node " + node + - ": " + ex.toString(), ex); - } - } - } - try { - Thread.sleep(getContinuousSchedulingSleepMs()); - } catch (InterruptedException e) { - LOG.warn("Error while doing sleep in continuous scheduling: " + - e.toString(), e); - } - } - } - - /** Sort nodes by available resource */ - private class NodeAvailableResourceComparator implements Comparator { - - @Override - public int compare(NodeId n1, NodeId n2) { - return RESOURCE_CALCULATOR.compare(clusterCapacity, - nodes.get(n2).getAvailableResource(), - nodes.get(n1).getAvailableResource()); - } - } - - private synchronized void attemptScheduling(FSSchedulerNode node) { - // Assign new containers... - // 1. Check for reserved applications - // 2. Schedule if there are no reservations - - AppSchedulable reservedAppSchedulable = node.getReservedAppSchedulable(); - if (reservedAppSchedulable != null) { - Priority reservedPriority = node.getReservedContainer().getReservedPriority(); - if (!reservedAppSchedulable.hasContainerForNode(reservedPriority, node)) { - // Don't hold the reservation if app can no longer use it - LOG.info("Releasing reservation that cannot be satisfied for application " - + reservedAppSchedulable.getApp().getApplicationAttemptId() - + " on node " + node); - reservedAppSchedulable.unreserve(reservedPriority, node); - reservedAppSchedulable = null; - } else { - // Reservation exists; try to fulfill the reservation - LOG.info("Trying to fulfill reservation for application " - + reservedAppSchedulable.getApp().getApplicationAttemptId() - + " on node: " + node); - - node.getReservedAppSchedulable().assignReservedContainer(node); - } - } - if (reservedAppSchedulable == null) { - // No reservation, schedule at queue which is farthest below fair share - int assignedContainers = 0; - while (node.getReservedContainer() == null) { - boolean assignedContainer = false; - if (Resources.greaterThan(RESOURCE_CALCULATOR, clusterCapacity, - queueMgr.getRootQueue().assignContainer(node), - Resources.none())) { - assignedContainers++; - assignedContainer = true; - } - if (!assignedContainer) { break; } - if (!assignMultiple) { break; } - if ((assignedContainers >= maxAssign) && (maxAssign > 0)) { break; } - } - } - updateRootQueueMetrics(); - } - - @Override - public SchedulerNodeReport getNodeReport(NodeId nodeId) { - FSSchedulerNode node = nodes.get(nodeId); - return node == null ? null : new SchedulerNodeReport(node); - } - - public FSSchedulerApp getSchedulerApp(ApplicationAttemptId appAttemptId) { - SchedulerApplication app = - applications.get(appAttemptId.getApplicationId()); - if (app != null) { - return (FSSchedulerApp) app.getCurrentAppAttempt(); - } - return null; - } - - @Override - public SchedulerAppReport getSchedulerAppInfo( - ApplicationAttemptId appAttemptId) { - FSSchedulerApp attempt = getSchedulerApp(appAttemptId); - if (attempt == null) { - LOG.error("Request for appInfo of unknown attempt" + appAttemptId); - return null; - } - return new SchedulerAppReport(attempt); - } - - @Override - public ApplicationResourceUsageReport getAppResourceUsageReport( - ApplicationAttemptId appAttemptId) { - FSSchedulerApp attempt = getSchedulerApp(appAttemptId); - if (attempt == null) { - LOG.error("Request for appInfo of unknown attempt" + appAttemptId); - return null; - } - return attempt.getResourceUsageReport(); - } - - /** - * Subqueue metrics might be a little out of date because fair shares are - * recalculated at the update interval, but the root queue metrics needs to - * be updated synchronously with allocations and completions so that cluster - * metrics will be consistent. - */ - private void updateRootQueueMetrics() { - rootMetrics.setAvailableResourcesToQueue( - Resources.subtract( - clusterCapacity, rootMetrics.getAllocatedResources())); - } - - @Override - public QueueMetrics getRootQueueMetrics() { - return rootMetrics; - } - - @Override - public void handle(SchedulerEvent event) { - switch (event.getType()) { - case NODE_ADDED: - if (!(event instanceof NodeAddedSchedulerEvent)) { - throw new RuntimeException("Unexpected event type: " + event); - } - NodeAddedSchedulerEvent nodeAddedEvent = (NodeAddedSchedulerEvent)event; - addNode(nodeAddedEvent.getAddedRMNode()); - break; - case NODE_REMOVED: - if (!(event instanceof NodeRemovedSchedulerEvent)) { - throw new RuntimeException("Unexpected event type: " + event); - } - NodeRemovedSchedulerEvent nodeRemovedEvent = (NodeRemovedSchedulerEvent)event; - removeNode(nodeRemovedEvent.getRemovedRMNode()); - break; - case NODE_UPDATE: - if (!(event instanceof NodeUpdateSchedulerEvent)) { - throw new RuntimeException("Unexpected event type: " + event); - } - NodeUpdateSchedulerEvent nodeUpdatedEvent = (NodeUpdateSchedulerEvent)event; - nodeUpdate(nodeUpdatedEvent.getRMNode()); - break; - case APP_ADDED: - if (!(event instanceof AppAddedSchedulerEvent)) { - throw new RuntimeException("Unexpected event type: " + event); - } - AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event; - addApplication(appAddedEvent.getApplicationId(), - appAddedEvent.getQueue(), appAddedEvent.getUser()); - break; - case APP_REMOVED: - if (!(event instanceof AppRemovedSchedulerEvent)) { - throw new RuntimeException("Unexpected event type: " + event); - } - AppRemovedSchedulerEvent appRemovedEvent = (AppRemovedSchedulerEvent)event; - removeApplication(appRemovedEvent.getApplicationID(), - appRemovedEvent.getFinalState()); - break; - case APP_ATTEMPT_ADDED: - if (!(event instanceof AppAttemptAddedSchedulerEvent)) { - throw new RuntimeException("Unexpected event type: " + event); - } - AppAttemptAddedSchedulerEvent appAttemptAddedEvent = - (AppAttemptAddedSchedulerEvent) event; - addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(), - appAttemptAddedEvent.getTransferStateFromPreviousAttempt()); - break; - case APP_ATTEMPT_REMOVED: - if (!(event instanceof AppAttemptRemovedSchedulerEvent)) { - throw new RuntimeException("Unexpected event type: " + event); - } - AppAttemptRemovedSchedulerEvent appAttemptRemovedEvent = - (AppAttemptRemovedSchedulerEvent) event; - removeApplicationAttempt( - appAttemptRemovedEvent.getApplicationAttemptID(), - appAttemptRemovedEvent.getFinalAttemptState(), - appAttemptRemovedEvent.getKeepContainersAcrossAppAttempts()); - break; - case CONTAINER_EXPIRED: - if (!(event instanceof ContainerExpiredSchedulerEvent)) { - throw new RuntimeException("Unexpected event type: " + event); - } - ContainerExpiredSchedulerEvent containerExpiredEvent = - (ContainerExpiredSchedulerEvent)event; - ContainerId containerId = containerExpiredEvent.getContainerId(); - completedContainer(getRMContainer(containerId), - SchedulerUtils.createAbnormalContainerStatus( - containerId, - SchedulerUtils.EXPIRED_CONTAINER), - RMContainerEventType.EXPIRE); - break; - default: - LOG.error("Unknown event arrived at FairScheduler: " + event.toString()); - } - } - - @Override - public void recover(RMState state) throws Exception { - // NOT IMPLEMENTED - } - - @Override - public synchronized void reinitialize(Configuration conf, RMContext rmContext) - throws IOException { - if (!initialized) { - this.conf = new FairSchedulerConfiguration(conf); - validateConf(this.conf); - minimumAllocation = this.conf.getMinimumAllocation(); - maximumAllocation = this.conf.getMaximumAllocation(); - incrAllocation = this.conf.getIncrementAllocation(); - continuousSchedulingEnabled = this.conf.isContinuousSchedulingEnabled(); - continuousSchedulingSleepMs = - this.conf.getContinuousSchedulingSleepMs(); - nodeLocalityThreshold = this.conf.getLocalityThresholdNode(); - rackLocalityThreshold = this.conf.getLocalityThresholdRack(); - nodeLocalityDelayMs = this.conf.getLocalityDelayNodeMs(); - rackLocalityDelayMs = this.conf.getLocalityDelayRackMs(); - preemptionEnabled = this.conf.getPreemptionEnabled(); - assignMultiple = this.conf.getAssignMultiple(); - maxAssign = this.conf.getMaxAssign(); - sizeBasedWeight = this.conf.getSizeBasedWeight(); - preemptionInterval = this.conf.getPreemptionInterval(); - waitTimeBeforeKill = this.conf.getWaitTimeBeforeKill(); - usePortForNodeName = this.conf.getUsePortForNodeName(); - - rootMetrics = FSQueueMetrics.forQueue("root", null, true, conf); - this.rmContext = rmContext; - this.eventLog = new FairSchedulerEventLog(); - eventLog.init(this.conf); - - initialized = true; - - allocConf = new AllocationConfiguration(conf); - try { - queueMgr.initialize(conf); - } catch (Exception e) { - throw new IOException("Failed to start FairScheduler", e); - } - - Thread updateThread = new Thread(new UpdateThread()); - updateThread.setName("FairSchedulerUpdateThread"); - updateThread.setDaemon(true); - updateThread.start(); - - if (continuousSchedulingEnabled) { - // start continuous scheduling thread - Thread schedulingThread = new Thread( - new Runnable() { - @Override - public void run() { - continuousScheduling(); - } - } - ); - schedulingThread.setName("ContinuousScheduling"); - schedulingThread.setDaemon(true); - schedulingThread.start(); - } - - allocsLoader.init(conf); - allocsLoader.setReloadListener(new AllocationReloadListener()); - // If we fail to load allocations file on initialize, we want to fail - // immediately. After a successful load, exceptions on future reloads - // will just result in leaving things as they are. - try { - allocsLoader.reloadAllocations(); - } catch (Exception e) { - throw new IOException("Failed to initialize FairScheduler", e); - } - allocsLoader.start(); - } else { - try { - allocsLoader.reloadAllocations(); - } catch (Exception e) { - LOG.error("Failed to reload allocations file", e); - } - } - } - - @Override - public QueueInfo getQueueInfo(String queueName, boolean includeChildQueues, - boolean recursive) throws IOException { - if (!queueMgr.exists(queueName)) { - throw new IOException("queue " + queueName + " does not exist"); - } - return queueMgr.getQueue(queueName).getQueueInfo(includeChildQueues, - recursive); - } - - @Override - public List getQueueUserAclInfo() { - UserGroupInformation user = null; - try { - user = UserGroupInformation.getCurrentUser(); - } catch (IOException ioe) { - return new ArrayList(); - } - - return queueMgr.getRootQueue().getQueueUserAclInfo(user); - } - - @Override - public int getNumClusterNodes() { - return nodes.size(); - } - - @Override - public synchronized boolean checkAccess(UserGroupInformation callerUGI, - QueueACL acl, String queueName) { - FSQueue queue = getQueueManager().getQueue(queueName); - if (queue == null) { - if (LOG.isDebugEnabled()) { - LOG.debug("ACL not found for queue access-type " + acl - + " for queue " + queueName); - } - return false; - } - return queue.hasAccess(acl, callerUGI); - } - - public AllocationConfiguration getAllocationConfiguration() { - return allocConf; - } - - private class AllocationReloadListener implements - AllocationFileLoaderService.Listener { - - @Override - public void onReload(AllocationConfiguration queueInfo) { - // Commit the reload; also create any queue defined in the alloc file - // if it does not already exist, so it can be displayed on the web UI. - synchronized (FairScheduler.this) { - allocConf = queueInfo; - allocConf.getDefaultSchedulingPolicy().initialize(clusterCapacity); - queueMgr.updateAllocationConfiguration(allocConf); - } - } - } - - @Override - public List getAppsInQueue(String queueName) { - FSQueue queue = queueMgr.getQueue(queueName); - if (queue == null) { - return null; - } - List apps = new ArrayList(); - queue.collectSchedulerApplications(apps); - return apps; - } - -} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java.orig b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java.orig deleted file mode 100644 index 2dc0e8805e7..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java.orig +++ /dev/null @@ -1,615 +0,0 @@ -/** -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.lang.reflect.Method; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; - -import junit.framework.Assert; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.net.NetworkTopology; -import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; -import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.ContainerId; -import org.apache.hadoop.yarn.api.records.NodeId; -import org.apache.hadoop.yarn.api.records.Priority; -import org.apache.hadoop.yarn.api.records.QueueInfo; -import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.api.records.ResourceOption; -import org.apache.hadoop.yarn.api.records.ResourceRequest; -import org.apache.hadoop.yarn.conf.YarnConfiguration; -import org.apache.hadoop.yarn.event.AsyncDispatcher; -import org.apache.hadoop.yarn.event.InlineDispatcher; -import org.apache.hadoop.yarn.exceptions.YarnException; -import org.apache.hadoop.yarn.factories.RecordFactory; -import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; -import org.apache.hadoop.yarn.server.resourcemanager.Application; -import org.apache.hadoop.yarn.server.resourcemanager.MockNodes; -import org.apache.hadoop.yarn.server.resourcemanager.MockRM; -import org.apache.hadoop.yarn.server.resourcemanager.RMContext; -import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl; -import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; -import org.apache.hadoop.yarn.server.resourcemanager.Task; -import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.TestSchedulerUtils; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.TestCapacityScheduler; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; -import org.apache.hadoop.yarn.server.resourcemanager.security.NMTokenSecretManagerInRM; -import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; -import org.apache.hadoop.yarn.server.utils.BuilderUtils; -import org.apache.hadoop.yarn.util.resource.Resources; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -public class TestFifoScheduler { - private static final Log LOG = LogFactory.getLog(TestFifoScheduler.class); - private final int GB = 1024; - - private ResourceManager resourceManager = null; - - private static final RecordFactory recordFactory = - RecordFactoryProvider.getRecordFactory(null); - - @Before - public void setUp() throws Exception { - resourceManager = new ResourceManager(); - Configuration conf = new Configuration(); - conf.setClass(YarnConfiguration.RM_SCHEDULER, - FifoScheduler.class, ResourceScheduler.class); - resourceManager.init(conf); - } - - @After - public void tearDown() throws Exception { - resourceManager.stop(); - } - - private org.apache.hadoop.yarn.server.resourcemanager.NodeManager - registerNode(String hostName, int containerManagerPort, int nmHttpPort, - String rackName, Resource capability) throws IOException, - YarnException { - return new org.apache.hadoop.yarn.server.resourcemanager.NodeManager( - hostName, containerManagerPort, nmHttpPort, rackName, capability, - resourceManager); - } - - private ApplicationAttemptId createAppAttemptId(int appId, int attemptId) { - ApplicationId appIdImpl = ApplicationId.newInstance(0, appId); - ApplicationAttemptId attId = - ApplicationAttemptId.newInstance(appIdImpl, attemptId); - return attId; - } - - private ResourceRequest createResourceRequest(int memory, String host, - int priority, int numContainers) { - ResourceRequest request = recordFactory - .newRecordInstance(ResourceRequest.class); - request.setCapability(Resources.createResource(memory)); - request.setResourceName(host); - request.setNumContainers(numContainers); - Priority prio = recordFactory.newRecordInstance(Priority.class); - prio.setPriority(priority); - request.setPriority(prio); - return request; - } - - @Test(timeout=5000) - public void testFifoSchedulerCapacityWhenNoNMs() { - FifoScheduler scheduler = new FifoScheduler(); - QueueInfo queueInfo = scheduler.getQueueInfo(null, false, false); - Assert.assertEquals(0.0f, queueInfo.getCurrentCapacity()); - } - - @Test(timeout=5000) - public void testAppAttemptMetrics() throws Exception { - AsyncDispatcher dispatcher = new InlineDispatcher(); - RMContext rmContext = new RMContextImpl(dispatcher, null, - null, null, null, null, null, null, null); - - FifoScheduler schedular = new FifoScheduler(); - schedular.reinitialize(new Configuration(), rmContext); - QueueMetrics metrics = schedular.getRootQueueMetrics(); - int beforeAppsSubmitted = metrics.getAppsSubmitted(); - - ApplicationId appId = BuilderUtils.newApplicationId(200, 1); - ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId( - appId, 1); - - SchedulerEvent appEvent = new AppAddedSchedulerEvent(appId, "queue", "user"); - schedular.handle(appEvent); - SchedulerEvent attemptEvent = - new AppAttemptAddedSchedulerEvent(appAttemptId, false); - schedular.handle(attemptEvent); - - appAttemptId = BuilderUtils.newApplicationAttemptId(appId, 2); - SchedulerEvent attemptEvent2 = - new AppAttemptAddedSchedulerEvent(appAttemptId, false); - schedular.handle(attemptEvent2); - - int afterAppsSubmitted = metrics.getAppsSubmitted(); - Assert.assertEquals(1, afterAppsSubmitted - beforeAppsSubmitted); - } - - @Test(timeout=2000) - public void testNodeLocalAssignment() throws Exception { - AsyncDispatcher dispatcher = new InlineDispatcher(); - Configuration conf = new Configuration(); - RMContainerTokenSecretManager containerTokenSecretManager = - new RMContainerTokenSecretManager(conf); - containerTokenSecretManager.rollMasterKey(); - NMTokenSecretManagerInRM nmTokenSecretManager = - new NMTokenSecretManagerInRM(conf); - nmTokenSecretManager.rollMasterKey(); - RMContext rmContext = new RMContextImpl(dispatcher, null, null, null, null, - null, containerTokenSecretManager, nmTokenSecretManager, null); - - FifoScheduler scheduler = new FifoScheduler(); - scheduler.reinitialize(new Configuration(), rmContext); - - RMNode node0 = MockNodes.newNodeInfo(1, - Resources.createResource(1024 * 64), 1, "127.0.0.1"); - NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node0); - scheduler.handle(nodeEvent1); - - int _appId = 1; - int _appAttemptId = 1; - ApplicationAttemptId appAttemptId = createAppAttemptId(_appId, - _appAttemptId); - AppAddedSchedulerEvent appEvent = - new AppAddedSchedulerEvent(appAttemptId.getApplicationId(), "queue1", - "user1"); - scheduler.handle(appEvent); - AppAttemptAddedSchedulerEvent attemptEvent = - new AppAttemptAddedSchedulerEvent(appAttemptId, false); - scheduler.handle(attemptEvent); - - int memory = 64; - int nConts = 3; - int priority = 20; - - List ask = new ArrayList(); - ResourceRequest nodeLocal = createResourceRequest(memory, - node0.getHostName(), priority, nConts); - ResourceRequest rackLocal = createResourceRequest(memory, - node0.getRackName(), priority, nConts); - ResourceRequest any = createResourceRequest(memory, ResourceRequest.ANY, priority, - nConts); - ask.add(nodeLocal); - ask.add(rackLocal); - ask.add(any); - scheduler.allocate(appAttemptId, ask, new ArrayList(), null, null); - - NodeUpdateSchedulerEvent node0Update = new NodeUpdateSchedulerEvent(node0); - - // Before the node update event, there are 3 local requests outstanding - Assert.assertEquals(3, nodeLocal.getNumContainers()); - - scheduler.handle(node0Update); - - // After the node update event, check that there are no more local requests - // outstanding - Assert.assertEquals(0, nodeLocal.getNumContainers()); - //Also check that the containers were scheduled - SchedulerAppReport info = scheduler.getSchedulerAppInfo(appAttemptId); - Assert.assertEquals(3, info.getLiveContainers().size()); - } - - @Test(timeout=2000) - public void testUpdateResourceOnNode() throws Exception { - AsyncDispatcher dispatcher = new InlineDispatcher(); - Configuration conf = new Configuration(); - RMContainerTokenSecretManager containerTokenSecretManager = - new RMContainerTokenSecretManager(conf); - containerTokenSecretManager.rollMasterKey(); - NMTokenSecretManagerInRM nmTokenSecretManager = - new NMTokenSecretManagerInRM(conf); - nmTokenSecretManager.rollMasterKey(); - RMContext rmContext = new RMContextImpl(dispatcher, null, null, null, null, - null, containerTokenSecretManager, nmTokenSecretManager, null); - - FifoScheduler scheduler = new FifoScheduler(){ - @SuppressWarnings("unused") - public Map getNodes(){ - return nodes; - } - }; - scheduler.reinitialize(new Configuration(), rmContext); - RMNode node0 = MockNodes.newNodeInfo(1, - Resources.createResource(2048, 4), 1, "127.0.0.1"); - NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node0); - scheduler.handle(nodeEvent1); - - Method method = scheduler.getClass().getDeclaredMethod("getNodes"); - @SuppressWarnings("unchecked") - Map schedulerNodes = - (Map) method.invoke(scheduler); - assertEquals(schedulerNodes.values().size(), 1); - - // set resource of RMNode to 1024 and verify it works. - node0.setResourceOption(ResourceOption.newInstance( - Resources.createResource(1024, 4), RMNode.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT)); - assertEquals(node0.getTotalCapability().getMemory(), 1024); - // verify that SchedulerNode's resource hasn't been changed. - assertEquals(schedulerNodes.get(node0.getNodeID()). - getAvailableResource().getMemory(), 2048); - // now, NM heartbeat comes. - NodeUpdateSchedulerEvent node0Update = new NodeUpdateSchedulerEvent(node0); - scheduler.handle(node0Update); - // SchedulerNode's available resource is changed. - assertEquals(schedulerNodes.get(node0.getNodeID()). - getAvailableResource().getMemory(), 1024); - QueueInfo queueInfo = scheduler.getQueueInfo(null, false, false); - Assert.assertEquals(0.0f, queueInfo.getCurrentCapacity()); - - int _appId = 1; - int _appAttemptId = 1; - ApplicationAttemptId appAttemptId = createAppAttemptId(_appId, - _appAttemptId); - AppAddedSchedulerEvent appEvent = - new AppAddedSchedulerEvent(appAttemptId.getApplicationId(), "queue1", - "user1"); - scheduler.handle(appEvent); - AppAttemptAddedSchedulerEvent attemptEvent = - new AppAttemptAddedSchedulerEvent(appAttemptId, false); - scheduler.handle(attemptEvent); - - int memory = 1024; - int priority = 1; - - List ask = new ArrayList(); - ResourceRequest nodeLocal = createResourceRequest(memory, - node0.getHostName(), priority, 1); - ResourceRequest rackLocal = createResourceRequest(memory, - node0.getRackName(), priority, 1); - ResourceRequest any = createResourceRequest(memory, ResourceRequest.ANY, priority, - 1); - ask.add(nodeLocal); - ask.add(rackLocal); - ask.add(any); - scheduler.allocate(appAttemptId, ask, new ArrayList(), null, null); - - // Before the node update event, there are one local request - Assert.assertEquals(1, nodeLocal.getNumContainers()); - - // Now schedule. - scheduler.handle(node0Update); - - // After the node update event, check no local request - Assert.assertEquals(0, nodeLocal.getNumContainers()); - // Also check that one container was scheduled - SchedulerAppReport info = scheduler.getSchedulerAppInfo(appAttemptId); - Assert.assertEquals(1, info.getLiveContainers().size()); - // And check the default Queue now is full. - queueInfo = scheduler.getQueueInfo(null, false, false); - Assert.assertEquals(1.0f, queueInfo.getCurrentCapacity()); - } - -// @Test - public void testFifoScheduler() throws Exception { - - LOG.info("--- START: testFifoScheduler ---"); - - final int GB = 1024; - - // Register node1 - String host_0 = "host_0"; - org.apache.hadoop.yarn.server.resourcemanager.NodeManager nm_0 = - registerNode(host_0, 1234, 2345, NetworkTopology.DEFAULT_RACK, - Resources.createResource(4 * GB, 1)); - nm_0.heartbeat(); - - // Register node2 - String host_1 = "host_1"; - org.apache.hadoop.yarn.server.resourcemanager.NodeManager nm_1 = - registerNode(host_1, 1234, 2345, NetworkTopology.DEFAULT_RACK, - Resources.createResource(2 * GB, 1)); - nm_1.heartbeat(); - - // ResourceRequest priorities - Priority priority_0 = - org.apache.hadoop.yarn.server.resourcemanager.resource.Priority.create(0); - Priority priority_1 = - org.apache.hadoop.yarn.server.resourcemanager.resource.Priority.create(1); - - // Submit an application - Application application_0 = new Application("user_0", resourceManager); - application_0.submit(); - - application_0.addNodeManager(host_0, 1234, nm_0); - application_0.addNodeManager(host_1, 1234, nm_1); - - Resource capability_0_0 = Resources.createResource(GB); - application_0.addResourceRequestSpec(priority_1, capability_0_0); - - Resource capability_0_1 = Resources.createResource(2 * GB); - application_0.addResourceRequestSpec(priority_0, capability_0_1); - - Task task_0_0 = new Task(application_0, priority_1, - new String[] {host_0, host_1}); - application_0.addTask(task_0_0); - - // Submit another application - Application application_1 = new Application("user_1", resourceManager); - application_1.submit(); - - application_1.addNodeManager(host_0, 1234, nm_0); - application_1.addNodeManager(host_1, 1234, nm_1); - - Resource capability_1_0 = Resources.createResource(3 * GB); - application_1.addResourceRequestSpec(priority_1, capability_1_0); - - Resource capability_1_1 = Resources.createResource(4 * GB); - application_1.addResourceRequestSpec(priority_0, capability_1_1); - - Task task_1_0 = new Task(application_1, priority_1, - new String[] {host_0, host_1}); - application_1.addTask(task_1_0); - - // Send resource requests to the scheduler - LOG.info("Send resource requests to the scheduler"); - application_0.schedule(); - application_1.schedule(); - - // Send a heartbeat to kick the tires on the Scheduler - LOG.info("Send a heartbeat to kick the tires on the Scheduler... " + - "nm0 -> task_0_0 and task_1_0 allocated, used=4G " + - "nm1 -> nothing allocated"); - nm_0.heartbeat(); // task_0_0 and task_1_0 allocated, used=4G - nm_1.heartbeat(); // nothing allocated - - // Get allocations from the scheduler - application_0.schedule(); // task_0_0 - checkApplicationResourceUsage(GB, application_0); - - application_1.schedule(); // task_1_0 - checkApplicationResourceUsage(3 * GB, application_1); - - nm_0.heartbeat(); - nm_1.heartbeat(); - - checkNodeResourceUsage(4*GB, nm_0); // task_0_0 (1G) and task_1_0 (3G) - checkNodeResourceUsage(0*GB, nm_1); // no tasks, 2G available - - LOG.info("Adding new tasks..."); - - Task task_1_1 = new Task(application_1, priority_1, - new String[] {ResourceRequest.ANY}); - application_1.addTask(task_1_1); - - Task task_1_2 = new Task(application_1, priority_1, - new String[] {ResourceRequest.ANY}); - application_1.addTask(task_1_2); - - Task task_1_3 = new Task(application_1, priority_0, - new String[] {ResourceRequest.ANY}); - application_1.addTask(task_1_3); - - application_1.schedule(); - - Task task_0_1 = new Task(application_0, priority_1, - new String[] {host_0, host_1}); - application_0.addTask(task_0_1); - - Task task_0_2 = new Task(application_0, priority_1, - new String[] {host_0, host_1}); - application_0.addTask(task_0_2); - - Task task_0_3 = new Task(application_0, priority_0, - new String[] {ResourceRequest.ANY}); - application_0.addTask(task_0_3); - - application_0.schedule(); - - // Send a heartbeat to kick the tires on the Scheduler - LOG.info("Sending hb from " + nm_0.getHostName()); - nm_0.heartbeat(); // nothing new, used=4G - - LOG.info("Sending hb from " + nm_1.getHostName()); - nm_1.heartbeat(); // task_0_3, used=2G - - // Get allocations from the scheduler - LOG.info("Trying to allocate..."); - application_0.schedule(); - checkApplicationResourceUsage(3 * GB, application_0); - application_1.schedule(); - checkApplicationResourceUsage(3 * GB, application_1); - nm_0.heartbeat(); - nm_1.heartbeat(); - checkNodeResourceUsage(4*GB, nm_0); - checkNodeResourceUsage(2*GB, nm_1); - - // Complete tasks - LOG.info("Finishing up task_0_0"); - application_0.finishTask(task_0_0); // Now task_0_1 - application_0.schedule(); - application_1.schedule(); - nm_0.heartbeat(); - nm_1.heartbeat(); - checkApplicationResourceUsage(3 * GB, application_0); - checkApplicationResourceUsage(3 * GB, application_1); - checkNodeResourceUsage(4*GB, nm_0); - checkNodeResourceUsage(2*GB, nm_1); - - LOG.info("Finishing up task_1_0"); - application_1.finishTask(task_1_0); // Now task_0_2 - application_0.schedule(); // final overcommit for app0 caused here - application_1.schedule(); - nm_0.heartbeat(); // final overcommit for app0 occurs here - nm_1.heartbeat(); - checkApplicationResourceUsage(4 * GB, application_0); - checkApplicationResourceUsage(0 * GB, application_1); - //checkNodeResourceUsage(1*GB, nm_0); // final over-commit -> rm.node->1G, test.node=2G - checkNodeResourceUsage(2*GB, nm_1); - - LOG.info("Finishing up task_0_3"); - application_0.finishTask(task_0_3); // No more - application_0.schedule(); - application_1.schedule(); - nm_0.heartbeat(); - nm_1.heartbeat(); - checkApplicationResourceUsage(2 * GB, application_0); - checkApplicationResourceUsage(0 * GB, application_1); - //checkNodeResourceUsage(2*GB, nm_0); // final over-commit, rm.node->1G, test.node->2G - checkNodeResourceUsage(0*GB, nm_1); - - LOG.info("Finishing up task_0_1"); - application_0.finishTask(task_0_1); - application_0.schedule(); - application_1.schedule(); - nm_0.heartbeat(); - nm_1.heartbeat(); - checkApplicationResourceUsage(1 * GB, application_0); - checkApplicationResourceUsage(0 * GB, application_1); - - LOG.info("Finishing up task_0_2"); - application_0.finishTask(task_0_2); // now task_1_3 can go! - application_0.schedule(); - application_1.schedule(); - nm_0.heartbeat(); - nm_1.heartbeat(); - checkApplicationResourceUsage(0 * GB, application_0); - checkApplicationResourceUsage(4 * GB, application_1); - - LOG.info("Finishing up task_1_3"); - application_1.finishTask(task_1_3); // now task_1_1 - application_0.schedule(); - application_1.schedule(); - nm_0.heartbeat(); - nm_1.heartbeat(); - checkApplicationResourceUsage(0 * GB, application_0); - checkApplicationResourceUsage(3 * GB, application_1); - - LOG.info("Finishing up task_1_1"); - application_1.finishTask(task_1_1); - application_0.schedule(); - application_1.schedule(); - nm_0.heartbeat(); - nm_1.heartbeat(); - checkApplicationResourceUsage(0 * GB, application_0); - checkApplicationResourceUsage(3 * GB, application_1); - - LOG.info("--- END: testFifoScheduler ---"); - } - - @SuppressWarnings("resource") - @Test - public void testBlackListNodes() throws Exception { - Configuration conf = new Configuration(); - conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class, - ResourceScheduler.class); - MockRM rm = new MockRM(conf); - rm.start(); - FifoScheduler fs = (FifoScheduler) rm.getResourceScheduler(); - - String host = "127.0.0.1"; - RMNode node = - MockNodes.newNodeInfo(0, MockNodes.newResource(4 * GB), 1, host); - fs.handle(new NodeAddedSchedulerEvent(node)); - - ApplicationId appId = BuilderUtils.newApplicationId(100, 1); - ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId( - appId, 1); - SchedulerEvent appEvent = - new AppAddedSchedulerEvent(appId, "default", - "user"); - fs.handle(appEvent); - SchedulerEvent attemptEvent = - new AppAttemptAddedSchedulerEvent(appAttemptId, false); - fs.handle(attemptEvent); - - // Verify the blacklist can be updated independent of requesting containers - fs.allocate(appAttemptId, Collections.emptyList(), - Collections.emptyList(), - Collections.singletonList(host), null); - Assert.assertTrue(fs.getApplicationAttempt(appAttemptId).isBlacklisted(host)); - fs.allocate(appAttemptId, Collections.emptyList(), - Collections.emptyList(), null, - Collections.singletonList(host)); - Assert.assertFalse(fs.getApplicationAttempt(appAttemptId).isBlacklisted(host)); - rm.stop(); - } - - @Test - public void testGetAppsInQueue() throws Exception { - Application application_0 = new Application("user_0", resourceManager); - application_0.submit(); - - Application application_1 = new Application("user_0", resourceManager); - application_1.submit(); - - ResourceScheduler scheduler = resourceManager.getResourceScheduler(); - - List appsInDefault = scheduler.getAppsInQueue("default"); - assertTrue(appsInDefault.contains(application_0.getApplicationAttemptId())); - assertTrue(appsInDefault.contains(application_1.getApplicationAttemptId())); - assertEquals(2, appsInDefault.size()); - - Assert.assertNull(scheduler.getAppsInQueue("someotherqueue")); - } - - @Test - public void testAddAndRemoveAppFromFiFoScheduler() throws Exception { - Configuration conf = new Configuration(); - conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class, - ResourceScheduler.class); - MockRM rm = new MockRM(conf); - FifoScheduler fs = (FifoScheduler)rm.getResourceScheduler(); - TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler(fs.applications, - fs, "queue"); - } - - private void checkApplicationResourceUsage(int expected, - Application application) { - Assert.assertEquals(expected, application.getUsedResources().getMemory()); - } - - private void checkNodeResourceUsage(int expected, - org.apache.hadoop.yarn.server.resourcemanager.NodeManager node) { - Assert.assertEquals(expected, node.getUsed().getMemory()); - node.checkResourceUsage(); - } - - public static void main(String[] arg) throws Exception { - TestFifoScheduler t = new TestFifoScheduler(); - t.setUp(); - t.testFifoScheduler(); - t.tearDown(); - } -} From 0ff3d6397e57a32d6d6cc61660c2303890fbc499 Mon Sep 17 00:00:00 2001 From: Alejandro Abdelnur Date: Wed, 15 Jan 2014 17:00:36 +0000 Subject: [PATCH 06/32] YARN-1601. 3rd party JARs are missing from hadoop-dist output. (tucu) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558465 13f79535-47bb-0310-9956-ffa450edef68 --- .../resources/assemblies/hadoop-yarn-dist.xml | 3 +- hadoop-yarn-project/CHANGES.txt | 2 + hadoop-yarn-project/pom.xml | 37 ++++++++++++++++++- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-yarn-dist.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-yarn-dist.xml index 8423e708cbe..7f0906b52cd 100644 --- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-yarn-dist.xml +++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-yarn-dist.xml @@ -207,8 +207,7 @@ /share/hadoop/${hadoop.component}/lib - org.apache.hadoop:hadoop-common - org.apache.hadoop:hadoop-hdfs + org.apache.hadoop:* org.slf4j:slf4j-api org.slf4j:slf4j-log4j12 diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 5db3a9f97a6..d4e536651a7 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -338,6 +338,8 @@ Release 2.4.0 - UNRELEASED YARN-1603. Remove two *.orig files which were unexpectedly committed. (Zhijie Shen via junping_du) + YARN-1601. 3rd party JARs are missing from hadoop-dist output. (tucu) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/pom.xml b/hadoop-yarn-project/pom.xml index b54bdd9dcd4..f8c3dacf9cb 100644 --- a/hadoop-yarn-project/pom.xml +++ b/hadoop-yarn-project/pom.xml @@ -41,7 +41,42 @@ hadoop-yarn - + + + + + org.apache.hadoop + hadoop-yarn-api + + + org.apache.hadoop + hadoop-yarn-client + + + org.apache.hadoop + hadoop-yarn-common + + + org.apache.hadoop + hadoop-yarn-server-common + + + org.apache.hadoop + hadoop-yarn-server-nodemanager + + + org.apache.hadoop + hadoop-yarn-server-resourcemanager + + + org.apache.hadoop + hadoop-yarn-server-web-proxy + + From 349f25a13225d6a240b577d988a8b1ac6a722578 Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Wed, 15 Jan 2014 18:27:59 +0000 Subject: [PATCH 07/32] HADOOP-10236. Fix typo in o.a.h.ipc.Client#checkResponse. Contributed by Akira Ajisaka. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558498 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 3 +++ .../src/main/java/org/apache/hadoop/ipc/Client.java | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 2dac015c1fd..1f12bc45123 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -517,6 +517,9 @@ Release 2.4.0 - UNRELEASED HADOOP-10223. MiniKdc#main() should close the FileReader it creates. (Ted Yu via tucu) + HADOOP-10236. Fix typo in o.a.h.ipc.Client#checkResponse. (Akira Ajisaka + via suresh) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java index e5f23cc7ced..6e578031809 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java @@ -286,7 +286,7 @@ public class Client { if (!Arrays.equals(id, RpcConstants.DUMMY_CLIENT_ID)) { if (!Arrays.equals(id, clientId)) { throw new IOException("Client IDs not matched: local ID=" - + StringUtils.byteToHexString(clientId) + ", ID in reponse=" + + StringUtils.byteToHexString(clientId) + ", ID in response=" + StringUtils.byteToHexString(header.getClientId().toByteArray())); } } From 037a89abc5cc5ea6b983b21c568a50bc729aa194 Mon Sep 17 00:00:00 2001 From: Colin McCabe Date: Wed, 15 Jan 2014 19:17:50 +0000 Subject: [PATCH 08/32] HDFS-5762. BlockReaderLocal does not return -1 on EOF when doing zero-length reads (cmccabe) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558526 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../org/apache/hadoop/hdfs/BlockReader.java | 2 + .../apache/hadoop/hdfs/BlockReaderLocal.java | 43 ++++++++++------- .../hadoop/hdfs/TestBlockReaderLocal.java | 47 +++++++++++++++++++ 4 files changed, 77 insertions(+), 18 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 1734d58ae46..920f3284686 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -674,6 +674,9 @@ Release 2.4.0 - UNRELEASED HDFS-5220. Expose group resolution time as metric (jxiang via cmccabe) + HDFS-5762. BlockReaderLocal doesn't return -1 on EOF when doing zero-length + reads (Colin Patrick McCabe) + OPTIMIZATIONS HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java index b957f00914f..4e6c0645d6c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java @@ -39,6 +39,8 @@ public interface BlockReader extends ByteBufferReadable { * "Read should not modify user buffer before successful read" * because it first reads the data to user buffer and then checks * the checksum. + * Note: this must return -1 on EOF, even in the case of a 0-byte read. + * See HDFS-5762 for details. */ int read(byte[] buf, int off, int len) throws IOException; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java index f702e9b5c77..c83e20a89ad 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java @@ -328,10 +328,12 @@ class BlockReaderLocal implements BlockReader { private synchronized int drainDataBuf(ByteBuffer buf) throws IOException { - if (dataBuf == null) return 0; + if (dataBuf == null) return -1; int oldLimit = dataBuf.limit(); int nRead = Math.min(dataBuf.remaining(), buf.remaining()); - if (nRead == 0) return 0; + if (nRead == 0) { + return (dataBuf.remaining() == 0) ? -1 : 0; + } try { dataBuf.limit(dataBuf.position() + nRead); buf.put(dataBuf); @@ -444,13 +446,11 @@ class BlockReaderLocal implements BlockReader { int total = 0; while (buf.hasRemaining()) { int nRead = dataIn.read(buf, dataPos); - if (nRead < 0) { - break; - } + if (nRead <= 0) break; dataPos += nRead; total += nRead; } - return (total == 0) ? -1 : total; + return (total == 0 && (dataPos == dataIn.size())) ? -1 : total; } /** @@ -512,15 +512,15 @@ class BlockReaderLocal implements BlockReader { private synchronized int readWithBounceBuffer(ByteBuffer buf, boolean canSkipChecksum) throws IOException { int total = 0; - boolean eof = false; - while (true) { - int bb = drainDataBuf(buf); // drain bounce buffer if possible + int bb = drainDataBuf(buf); // drain bounce buffer if possible + if (bb >= 0) { total += bb; - int needed = buf.remaining(); - if (eof || (needed == 0)) { - break; - } else if (buf.isDirect() && (needed >= maxReadaheadLength) - && ((dataPos % bytesPerChecksum) == 0)) { + if (buf.remaining() == 0) return total; + } + boolean eof = false; + do { + if (buf.isDirect() && (buf.remaining() >= maxReadaheadLength) + && ((dataPos % bytesPerChecksum) == 0)) { // Fast lane: try to read directly into user-supplied buffer, bypassing // bounce buffer. int oldLimit = buf.limit(); @@ -540,9 +540,13 @@ class BlockReaderLocal implements BlockReader { if (fillDataBuf(canSkipChecksum)) { eof = true; } + bb = drainDataBuf(buf); // drain bounce buffer if possible + if (bb >= 0) { + total += bb; + } } - } - return total == 0 ? -1 : total; + } while ((!eof) && (buf.remaining() > 0)); + return (eof && total == 0) ? -1 : total; } @Override @@ -587,8 +591,10 @@ class BlockReaderLocal implements BlockReader { int nRead = dataIn.read(ByteBuffer.wrap(arr, off, len), dataPos); if (nRead > 0) { dataPos += nRead; + } else if ((nRead == 0) && (dataPos == dataIn.size())) { + return -1; } - return nRead == 0 ? -1 : nRead; + return nRead; } private synchronized int readWithBounceBuffer(byte arr[], int off, int len, @@ -599,9 +605,10 @@ class BlockReaderLocal implements BlockReader { dataBuf.limit(maxReadaheadLength); fillDataBuf(canSkipChecksum); } + if (dataBuf.remaining() == 0) return -1; int toRead = Math.min(dataBuf.remaining(), len); dataBuf.get(arr, off, toRead); - return toRead == 0 ? -1 : toRead; + return toRead; } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java index aab4df849c6..bfca9d799b4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java @@ -604,6 +604,24 @@ public class TestBlockReaderLocal { } } + private static class TestBlockReaderLocalReadZeroBytes + extends BlockReaderLocalTest { + @Override + public void doTest(BlockReaderLocal reader, byte original[]) + throws IOException { + byte emptyArr[] = new byte[0]; + Assert.assertEquals(0, reader.read(emptyArr, 0, 0)); + ByteBuffer emptyBuf = ByteBuffer.wrap(emptyArr); + Assert.assertEquals(0, reader.read(emptyBuf)); + reader.skip(1); + Assert.assertEquals(0, reader.read(emptyArr, 0, 0)); + Assert.assertEquals(0, reader.read(emptyBuf)); + reader.skip(BlockReaderLocalTest.TEST_LENGTH - 1); + Assert.assertEquals(-1, reader.read(emptyArr, 0, 0)); + Assert.assertEquals(-1, reader.read(emptyBuf)); + } + } + @Test public void testBlockReaderLocalOnFileWithoutChecksum() throws IOException { @@ -631,6 +649,35 @@ public class TestBlockReaderLocal { runBlockReaderLocalTest(new TestBlockReaderLocalOnFileWithoutChecksum(), false, 0); } + + @Test + public void testBlockReaderLocalReadZeroBytes() + throws IOException { + runBlockReaderLocalTest(new TestBlockReaderLocalReadZeroBytes(), + true, DFSConfigKeys.DFS_DATANODE_READAHEAD_BYTES_DEFAULT); + } + + @Test + public void testBlockReaderLocalReadZeroBytesNoChecksum() + throws IOException { + runBlockReaderLocalTest(new TestBlockReaderLocalReadZeroBytes(), + false, DFSConfigKeys.DFS_DATANODE_READAHEAD_BYTES_DEFAULT); + } + + @Test + public void testBlockReaderLocalReadZeroBytesNoReadahead() + throws IOException { + runBlockReaderLocalTest(new TestBlockReaderLocalReadZeroBytes(), + true, 0); + } + + @Test + public void testBlockReaderLocalReadZeroBytesNoChecksumNoReadahead() + throws IOException { + runBlockReaderLocalTest(new TestBlockReaderLocalReadZeroBytes(), + false, 0); + } + @Test(timeout=60000) public void TestStatisticsForShortCircuitLocalRead() throws Exception { From c3b236ce5d2ca0905d03d63efe74b9f2c8e27436 Mon Sep 17 00:00:00 2001 From: Colin McCabe Date: Wed, 15 Jan 2014 19:23:31 +0000 Subject: [PATCH 09/32] HDFS-5766. In DFSInputStream, do not add datanode to deadNodes after InvalidEncryptionKeyException in fetchBlockByteRange (Liang Xie via Colin Patrick McCabe) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558536 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 4 ++++ .../src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java | 1 + 2 files changed, 5 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 920f3284686..87ef1ef291d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -677,6 +677,10 @@ Release 2.4.0 - UNRELEASED HDFS-5762. BlockReaderLocal doesn't return -1 on EOF when doing zero-length reads (Colin Patrick McCabe) + HDFS-5766. In DFSInputStream, do not add datanode to deadNodes after + InvalidEncryptionKeyException in fetchBlockByteRange (Liang Xie via Colin + Patrick McCabe) + OPTIMIZATIONS HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java index 47efcf339a3..d90317d2bf0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java @@ -969,6 +969,7 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead, // The encryption key used is invalid. refetchEncryptionKey--; dfsClient.clearDataEncryptionKey(); + continue; } else if (refetchToken > 0 && tokenRefetchNeeded(e, targetAddr)) { refetchToken--; fetchBlockAt(block.getStartOffset()); From d3cb721ce0d2d1f6bbfea9848334411e11d13d47 Mon Sep 17 00:00:00 2001 From: Brandon Li Date: Wed, 15 Jan 2014 21:51:55 +0000 Subject: [PATCH 10/32] HADOOP-10125. no need to process RPC request if the client connection has been dropped. Contributed by Min Ma git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558575 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 3 +++ .../src/main/java/org/apache/hadoop/ipc/Server.java | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 1f12bc45123..b0dba7ee595 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -287,6 +287,9 @@ Trunk (Unreleased) HADOOP-10044 Improve the javadoc of rpc code (sanjay Radia) + HADOOP-10125. no need to process RPC request if the client connection + has been dropped (Min Ma via brandonli) + OPTIMIZATIONS HADOOP-7761. Improve the performance of raw comparisons. (todd) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java index 90532628915..8c578fd107e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java @@ -2021,6 +2021,10 @@ public abstract class Server { if (LOG.isDebugEnabled()) { LOG.debug(Thread.currentThread().getName() + ": " + call + " for RpcKind " + call.rpcKind); } + if (!call.connection.channel.isOpen()) { + LOG.info(Thread.currentThread().getName() + ": skipped " + call); + continue; + } String errorClass = null; String error = null; RpcStatusProto returnStatus = RpcStatusProto.SUCCESS; From 51a9755794cbff72146fca24762b1c0cffb744a9 Mon Sep 17 00:00:00 2001 From: Brandon Li Date: Wed, 15 Jan 2014 21:53:47 +0000 Subject: [PATCH 11/32] Fix CHANGE.txt for the contributor's name git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558578 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index b0dba7ee595..48402201e8b 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -288,7 +288,7 @@ Trunk (Unreleased) HADOOP-10044 Improve the javadoc of rpc code (sanjay Radia) HADOOP-10125. no need to process RPC request if the client connection - has been dropped (Min Ma via brandonli) + has been dropped (Ming Ma via brandonli) OPTIMIZATIONS From 7274b5ff93afc6e6e96fb6fcd43f2955960f2644 Mon Sep 17 00:00:00 2001 From: Sanford Ryza Date: Wed, 15 Jan 2014 23:09:32 +0000 Subject: [PATCH 12/32] YARN-1351. Invalid string format in Fair Scheduler log warn message (Konstantin Weitz via Sandy Ryza) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558597 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../scheduler/fair/AllocationFileLoaderService.java | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index d4e536651a7..05ba7f27b61 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -340,6 +340,9 @@ Release 2.4.0 - UNRELEASED YARN-1601. 3rd party JARs are missing from hadoop-dist output. (tucu) + YARN-1351. Invalid string format in Fair Scheduler log warn message + (Konstantin Weitz via Sandy Ryza) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationFileLoaderService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationFileLoaderService.java index 2a7164d49b8..69dcf89b628 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationFileLoaderService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationFileLoaderService.java @@ -387,7 +387,7 @@ public class AllocationFileLoaderService extends AbstractService { if (maxQueueResources.containsKey(queueName) && minQueueResources.containsKey(queueName) && !Resources.fitsIn(minQueueResources.get(queueName), maxQueueResources.get(queueName))) { - LOG.warn(String.format("Queue %s has max resources %d less than min resources %d", + LOG.warn(String.format("Queue %s has max resources %s less than min resources %s", queueName, maxQueueResources.get(queueName), minQueueResources.get(queueName))); } } From ca5d73d1ab45541169f76b183f67e7a6150c1a2c Mon Sep 17 00:00:00 2001 From: Brandon Li Date: Wed, 15 Jan 2014 23:11:01 +0000 Subject: [PATCH 13/32] HDFS-5768. Consolidate the serialization code in DelegationTokenSecretManager. Contributed by Haohui Mai git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558598 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../DelegationTokenSecretManager.java | 203 +++++++++--------- .../hdfs/server/namenode/FSImageFormat.java | 4 +- .../hdfs/server/namenode/FSNamesystem.java | 8 +- 4 files changed, 116 insertions(+), 102 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 87ef1ef291d..7172144afcf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -478,6 +478,9 @@ Trunk (Unreleased) HDFS-5726. Fix compilation error in AbstractINodeDiff for JDK7. (jing9) + HDFS-5768. Consolidate the serialization code in DelegationTokenSecretManager + (Haohui Mai via brandonli) + Release 2.4.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java index b2446cbb806..e291204cc23 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java @@ -59,6 +59,7 @@ public class DelegationTokenSecretManager .getLog(DelegationTokenSecretManager.class); private final FSNamesystem namesystem; + private final SerializerCompat serializerCompat = new SerializerCompat(); public DelegationTokenSecretManager(long delegationKeyUpdateInterval, long delegationTokenMaxLifetime, long delegationTokenRenewInterval, @@ -150,24 +151,21 @@ public class DelegationTokenSecretManager throw new IOException("No delegation token found for this identifier"); } } - + /** * Load SecretManager state from fsimage. * * @param in input stream to read fsimage * @throws IOException */ - public synchronized void loadSecretManagerState(DataInput in) + public synchronized void loadSecretManagerStateCompat(DataInput in) throws IOException { if (running) { // a safety check throw new IOException( "Can't load state from image in a running SecretManager."); } - currentId = in.readInt(); - loadAllKeys(in); - delegationTokenSequenceNumber = in.readInt(); - loadCurrentTokens(in); + serializerCompat.load(in); } /** @@ -177,12 +175,9 @@ public class DelegationTokenSecretManager * @param sdPath String storage directory path * @throws IOException */ - public synchronized void saveSecretManagerState(DataOutputStream out, + public synchronized void saveSecretManagerStateCompat(DataOutputStream out, String sdPath) throws IOException { - out.writeInt(currentId); - saveAllKeys(out, sdPath); - out.writeInt(delegationTokenSequenceNumber); - saveCurrentTokens(out, sdPath); + serializerCompat.save(out, sdPath); } /** @@ -282,91 +277,6 @@ public class DelegationTokenSecretManager return allKeys.size(); } - /** - * Private helper methods to save delegation keys and tokens in fsimage - */ - private synchronized void saveCurrentTokens(DataOutputStream out, - String sdPath) throws IOException { - StartupProgress prog = NameNode.getStartupProgress(); - Step step = new Step(StepType.DELEGATION_TOKENS, sdPath); - prog.beginStep(Phase.SAVING_CHECKPOINT, step); - prog.setTotal(Phase.SAVING_CHECKPOINT, step, currentTokens.size()); - Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step); - out.writeInt(currentTokens.size()); - Iterator iter = currentTokens.keySet() - .iterator(); - while (iter.hasNext()) { - DelegationTokenIdentifier id = iter.next(); - id.write(out); - DelegationTokenInformation info = currentTokens.get(id); - out.writeLong(info.getRenewDate()); - counter.increment(); - } - prog.endStep(Phase.SAVING_CHECKPOINT, step); - } - - /* - * Save the current state of allKeys - */ - private synchronized void saveAllKeys(DataOutputStream out, String sdPath) - throws IOException { - StartupProgress prog = NameNode.getStartupProgress(); - Step step = new Step(StepType.DELEGATION_KEYS, sdPath); - prog.beginStep(Phase.SAVING_CHECKPOINT, step); - prog.setTotal(Phase.SAVING_CHECKPOINT, step, currentTokens.size()); - Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step); - out.writeInt(allKeys.size()); - Iterator iter = allKeys.keySet().iterator(); - while (iter.hasNext()) { - Integer key = iter.next(); - allKeys.get(key).write(out); - counter.increment(); - } - prog.endStep(Phase.SAVING_CHECKPOINT, step); - } - - /** - * Private helper methods to load Delegation tokens from fsimage - */ - private synchronized void loadCurrentTokens(DataInput in) - throws IOException { - StartupProgress prog = NameNode.getStartupProgress(); - Step step = new Step(StepType.DELEGATION_TOKENS); - prog.beginStep(Phase.LOADING_FSIMAGE, step); - int numberOfTokens = in.readInt(); - prog.setTotal(Phase.LOADING_FSIMAGE, step, numberOfTokens); - Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, step); - for (int i = 0; i < numberOfTokens; i++) { - DelegationTokenIdentifier id = new DelegationTokenIdentifier(); - id.readFields(in); - long expiryTime = in.readLong(); - addPersistedDelegationToken(id, expiryTime); - counter.increment(); - } - prog.endStep(Phase.LOADING_FSIMAGE, step); - } - - /** - * Private helper method to load delegation keys from fsimage. - * @param in - * @throws IOException - */ - private synchronized void loadAllKeys(DataInput in) throws IOException { - StartupProgress prog = NameNode.getStartupProgress(); - Step step = new Step(StepType.DELEGATION_KEYS); - prog.beginStep(Phase.LOADING_FSIMAGE, step); - int numberOfKeys = in.readInt(); - prog.setTotal(Phase.LOADING_FSIMAGE, step, numberOfKeys); - Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, step); - for (int i = 0; i < numberOfKeys; i++) { - DelegationKey value = new DelegationKey(); - value.readFields(in); - addKey(value); - counter.increment(); - } - prog.endStep(Phase.LOADING_FSIMAGE, step); - } - /** * Call namesystem to update editlogs for new master key. */ @@ -420,4 +330,105 @@ public class DelegationTokenSecretManager c.addToken(new Text(ugi.getShortUserName()), token); return c; } + + private final class SerializerCompat { + private void load(DataInput in) throws IOException { + currentId = in.readInt(); + loadAllKeys(in); + delegationTokenSequenceNumber = in.readInt(); + loadCurrentTokens(in); + } + + private void save(DataOutputStream out, String sdPath) throws IOException { + out.writeInt(currentId); + saveAllKeys(out, sdPath); + out.writeInt(delegationTokenSequenceNumber); + saveCurrentTokens(out, sdPath); + } + + /** + * Private helper methods to save delegation keys and tokens in fsimage + */ + private synchronized void saveCurrentTokens(DataOutputStream out, + String sdPath) throws IOException { + StartupProgress prog = NameNode.getStartupProgress(); + Step step = new Step(StepType.DELEGATION_TOKENS, sdPath); + prog.beginStep(Phase.SAVING_CHECKPOINT, step); + prog.setTotal(Phase.SAVING_CHECKPOINT, step, currentTokens.size()); + Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step); + out.writeInt(currentTokens.size()); + Iterator iter = currentTokens.keySet() + .iterator(); + while (iter.hasNext()) { + DelegationTokenIdentifier id = iter.next(); + id.write(out); + DelegationTokenInformation info = currentTokens.get(id); + out.writeLong(info.getRenewDate()); + counter.increment(); + } + prog.endStep(Phase.SAVING_CHECKPOINT, step); + } + + /* + * Save the current state of allKeys + */ + private synchronized void saveAllKeys(DataOutputStream out, String sdPath) + throws IOException { + StartupProgress prog = NameNode.getStartupProgress(); + Step step = new Step(StepType.DELEGATION_KEYS, sdPath); + prog.beginStep(Phase.SAVING_CHECKPOINT, step); + prog.setTotal(Phase.SAVING_CHECKPOINT, step, currentTokens.size()); + Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step); + out.writeInt(allKeys.size()); + Iterator iter = allKeys.keySet().iterator(); + while (iter.hasNext()) { + Integer key = iter.next(); + allKeys.get(key).write(out); + counter.increment(); + } + prog.endStep(Phase.SAVING_CHECKPOINT, step); + } + + /** + * Private helper methods to load Delegation tokens from fsimage + */ + private synchronized void loadCurrentTokens(DataInput in) + throws IOException { + StartupProgress prog = NameNode.getStartupProgress(); + Step step = new Step(StepType.DELEGATION_TOKENS); + prog.beginStep(Phase.LOADING_FSIMAGE, step); + int numberOfTokens = in.readInt(); + prog.setTotal(Phase.LOADING_FSIMAGE, step, numberOfTokens); + Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, step); + for (int i = 0; i < numberOfTokens; i++) { + DelegationTokenIdentifier id = new DelegationTokenIdentifier(); + id.readFields(in); + long expiryTime = in.readLong(); + addPersistedDelegationToken(id, expiryTime); + counter.increment(); + } + prog.endStep(Phase.LOADING_FSIMAGE, step); + } + + /** + * Private helper method to load delegation keys from fsimage. + * @param in + * @throws IOException + */ + private synchronized void loadAllKeys(DataInput in) throws IOException { + StartupProgress prog = NameNode.getStartupProgress(); + Step step = new Step(StepType.DELEGATION_KEYS); + prog.beginStep(Phase.LOADING_FSIMAGE, step); + int numberOfKeys = in.readInt(); + prog.setTotal(Phase.LOADING_FSIMAGE, step, numberOfKeys); + Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, step); + for (int i = 0; i < numberOfKeys; i++) { + DelegationKey value = new DelegationKey(); + value.readFields(in); + addKey(value); + counter.increment(); + } + prog.endStep(Phase.LOADING_FSIMAGE, step); + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java index 5c2ca578ad4..7a3e066e8de 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java @@ -870,7 +870,7 @@ public class FSImageFormat { //This must not happen if security is turned on. return; } - namesystem.loadSecretManagerState(in); + namesystem.loadSecretManagerStateCompat(in); } private void loadCacheManagerState(DataInput in) throws IOException { @@ -1032,7 +1032,7 @@ public class FSImageFormat { sourceNamesystem.saveFilesUnderConstruction(out, snapshotUCMap); context.checkCancelled(); - sourceNamesystem.saveSecretManagerState(out, sdPath); + sourceNamesystem.saveSecretManagerStateCompat(out, sdPath); context.checkCancelled(); sourceNamesystem.getCacheManager().saveState(out, sdPath); context.checkCancelled(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 1c9096e66a4..c130fc785b3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -6250,16 +6250,16 @@ public class FSNamesystem implements Namesystem, FSClusterStats, * @param out save state of the secret manager * @param sdPath String storage directory path */ - void saveSecretManagerState(DataOutputStream out, String sdPath) + void saveSecretManagerStateCompat(DataOutputStream out, String sdPath) throws IOException { - dtSecretManager.saveSecretManagerState(out, sdPath); + dtSecretManager.saveSecretManagerStateCompat(out, sdPath); } /** * @param in load the state of secret manager from input stream */ - void loadSecretManagerState(DataInput in) throws IOException { - dtSecretManager.loadSecretManagerState(in); + void loadSecretManagerStateCompat(DataInput in) throws IOException { + dtSecretManager.loadSecretManagerStateCompat(in); } /** From a506df8e483cdd27629cfcbc2b0e1aecd27e448a Mon Sep 17 00:00:00 2001 From: Brandon Li Date: Wed, 15 Jan 2014 23:15:24 +0000 Subject: [PATCH 14/32] HDFS-5775. Consolidate the code for serialization in CacheManager. Contributed by Haohui Mai git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558599 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../hdfs/server/namenode/CacheManager.java | 202 ++++++++++-------- .../hdfs/server/namenode/FSImageFormat.java | 4 +- 3 files changed, 112 insertions(+), 97 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 7172144afcf..e2aa07861dc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -481,6 +481,9 @@ Trunk (Unreleased) HDFS-5768. Consolidate the serialization code in DelegationTokenSecretManager (Haohui Mai via brandonli) + HDFS-5775. Consolidate the code for serialization in CacheManager + (Haohui Mai via brandonli) + Release 2.4.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java index 5cb8fe92332..e6fb5fc218e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java @@ -160,6 +160,8 @@ public final class CacheManager { */ private final ReentrantLock crmLock = new ReentrantLock(); + private final SerializerCompat serializerCompat = new SerializerCompat(); + /** * The CacheReplicationMonitor. */ @@ -926,11 +928,9 @@ public final class CacheManager { * @param sdPath path of the storage directory * @throws IOException */ - public void saveState(DataOutputStream out, String sdPath) + public void saveStateCompat(DataOutputStream out, String sdPath) throws IOException { - out.writeLong(nextDirectiveId); - savePools(out, sdPath); - saveDirectives(out, sdPath); + serializerCompat.save(out, sdPath); } /** @@ -939,105 +939,117 @@ public final class CacheManager { * @param in DataInput from which to restore state * @throws IOException */ - public void loadState(DataInput in) throws IOException { - nextDirectiveId = in.readLong(); - // pools need to be loaded first since directives point to their parent pool - loadPools(in); - loadDirectives(in); + public void loadStateCompat(DataInput in) throws IOException { + serializerCompat.load(in); } - /** - * Save cache pools to fsimage - */ - private void savePools(DataOutputStream out, - String sdPath) throws IOException { - StartupProgress prog = NameNode.getStartupProgress(); - Step step = new Step(StepType.CACHE_POOLS, sdPath); - prog.beginStep(Phase.SAVING_CHECKPOINT, step); - prog.setTotal(Phase.SAVING_CHECKPOINT, step, cachePools.size()); - Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step); - out.writeInt(cachePools.size()); - for (CachePool pool: cachePools.values()) { - FSImageSerialization.writeCachePoolInfo(out, pool.getInfo(true)); - counter.increment(); + private final class SerializerCompat { + private void save(DataOutputStream out, String sdPath) throws IOException { + out.writeLong(nextDirectiveId); + savePools(out, sdPath); + saveDirectives(out, sdPath); } - prog.endStep(Phase.SAVING_CHECKPOINT, step); - } - /* - * Save cache entries to fsimage - */ - private void saveDirectives(DataOutputStream out, String sdPath) - throws IOException { - StartupProgress prog = NameNode.getStartupProgress(); - Step step = new Step(StepType.CACHE_ENTRIES, sdPath); - prog.beginStep(Phase.SAVING_CHECKPOINT, step); - prog.setTotal(Phase.SAVING_CHECKPOINT, step, directivesById.size()); - Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step); - out.writeInt(directivesById.size()); - for (CacheDirective directive : directivesById.values()) { - FSImageSerialization.writeCacheDirectiveInfo(out, directive.toInfo()); - counter.increment(); + private void load(DataInput in) throws IOException { + nextDirectiveId = in.readLong(); + // pools need to be loaded first since directives point to their parent pool + loadPools(in); + loadDirectives(in); } - prog.endStep(Phase.SAVING_CHECKPOINT, step); - } - /** - * Load cache pools from fsimage - */ - private void loadPools(DataInput in) - throws IOException { - StartupProgress prog = NameNode.getStartupProgress(); - Step step = new Step(StepType.CACHE_POOLS); - prog.beginStep(Phase.LOADING_FSIMAGE, step); - int numberOfPools = in.readInt(); - prog.setTotal(Phase.LOADING_FSIMAGE, step, numberOfPools); - Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, step); - for (int i = 0; i < numberOfPools; i++) { - addCachePool(FSImageSerialization.readCachePoolInfo(in)); - counter.increment(); - } - prog.endStep(Phase.LOADING_FSIMAGE, step); - } - - /** - * Load cache directives from the fsimage - */ - private void loadDirectives(DataInput in) throws IOException { - StartupProgress prog = NameNode.getStartupProgress(); - Step step = new Step(StepType.CACHE_ENTRIES); - prog.beginStep(Phase.LOADING_FSIMAGE, step); - int numDirectives = in.readInt(); - prog.setTotal(Phase.LOADING_FSIMAGE, step, numDirectives); - Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, step); - for (int i = 0; i < numDirectives; i++) { - CacheDirectiveInfo info = FSImageSerialization.readCacheDirectiveInfo(in); - // Get pool reference by looking it up in the map - final String poolName = info.getPool(); - CachePool pool = cachePools.get(poolName); - if (pool == null) { - throw new IOException("Directive refers to pool " + poolName + - ", which does not exist."); + /** + * Save cache pools to fsimage + */ + private void savePools(DataOutputStream out, + String sdPath) throws IOException { + StartupProgress prog = NameNode.getStartupProgress(); + Step step = new Step(StepType.CACHE_POOLS, sdPath); + prog.beginStep(Phase.SAVING_CHECKPOINT, step); + prog.setTotal(Phase.SAVING_CHECKPOINT, step, cachePools.size()); + Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step); + out.writeInt(cachePools.size()); + for (CachePool pool: cachePools.values()) { + FSImageSerialization.writeCachePoolInfo(out, pool.getInfo(true)); + counter.increment(); } - CacheDirective directive = - new CacheDirective(info.getId(), info.getPath().toUri().getPath(), - info.getReplication(), info.getExpiration().getAbsoluteMillis()); - boolean addedDirective = pool.getDirectiveList().add(directive); - assert addedDirective; - if (directivesById.put(directive.getId(), directive) != null) { - throw new IOException("A directive with ID " + directive.getId() + - " already exists"); - } - List directives = - directivesByPath.get(directive.getPath()); - if (directives == null) { - directives = new LinkedList(); - directivesByPath.put(directive.getPath(), directives); - } - directives.add(directive); - counter.increment(); + prog.endStep(Phase.SAVING_CHECKPOINT, step); + } + + /* + * Save cache entries to fsimage + */ + private void saveDirectives(DataOutputStream out, String sdPath) + throws IOException { + StartupProgress prog = NameNode.getStartupProgress(); + Step step = new Step(StepType.CACHE_ENTRIES, sdPath); + prog.beginStep(Phase.SAVING_CHECKPOINT, step); + prog.setTotal(Phase.SAVING_CHECKPOINT, step, directivesById.size()); + Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step); + out.writeInt(directivesById.size()); + for (CacheDirective directive : directivesById.values()) { + FSImageSerialization.writeCacheDirectiveInfo(out, directive.toInfo()); + counter.increment(); + } + prog.endStep(Phase.SAVING_CHECKPOINT, step); + } + + /** + * Load cache pools from fsimage + */ + private void loadPools(DataInput in) + throws IOException { + StartupProgress prog = NameNode.getStartupProgress(); + Step step = new Step(StepType.CACHE_POOLS); + prog.beginStep(Phase.LOADING_FSIMAGE, step); + int numberOfPools = in.readInt(); + prog.setTotal(Phase.LOADING_FSIMAGE, step, numberOfPools); + Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, step); + for (int i = 0; i < numberOfPools; i++) { + addCachePool(FSImageSerialization.readCachePoolInfo(in)); + counter.increment(); + } + prog.endStep(Phase.LOADING_FSIMAGE, step); + } + + /** + * Load cache directives from the fsimage + */ + private void loadDirectives(DataInput in) throws IOException { + StartupProgress prog = NameNode.getStartupProgress(); + Step step = new Step(StepType.CACHE_ENTRIES); + prog.beginStep(Phase.LOADING_FSIMAGE, step); + int numDirectives = in.readInt(); + prog.setTotal(Phase.LOADING_FSIMAGE, step, numDirectives); + Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, step); + for (int i = 0; i < numDirectives; i++) { + CacheDirectiveInfo info = FSImageSerialization.readCacheDirectiveInfo(in); + // Get pool reference by looking it up in the map + final String poolName = info.getPool(); + CachePool pool = cachePools.get(poolName); + if (pool == null) { + throw new IOException("Directive refers to pool " + poolName + + ", which does not exist."); + } + CacheDirective directive = + new CacheDirective(info.getId(), info.getPath().toUri().getPath(), + info.getReplication(), info.getExpiration().getAbsoluteMillis()); + boolean addedDirective = pool.getDirectiveList().add(directive); + assert addedDirective; + if (directivesById.put(directive.getId(), directive) != null) { + throw new IOException("A directive with ID " + directive.getId() + + " already exists"); + } + List directives = + directivesByPath.get(directive.getPath()); + if (directives == null) { + directives = new LinkedList(); + directivesByPath.put(directive.getPath(), directives); + } + directives.add(directive); + counter.increment(); + } + prog.endStep(Phase.LOADING_FSIMAGE, step); } - prog.endStep(Phase.LOADING_FSIMAGE, step); } public void waitForRescanIfNeeded() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java index 7a3e066e8de..fd86f81fc4c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java @@ -878,7 +878,7 @@ public class FSImageFormat { if (!LayoutVersion.supports(Feature.CACHING, imgVersion)) { return; } - namesystem.getCacheManager().loadState(in); + namesystem.getCacheManager().loadStateCompat(in); } private int getLayoutVersion() { @@ -1034,7 +1034,7 @@ public class FSImageFormat { context.checkCancelled(); sourceNamesystem.saveSecretManagerStateCompat(out, sdPath); context.checkCancelled(); - sourceNamesystem.getCacheManager().saveState(out, sdPath); + sourceNamesystem.getCacheManager().saveStateCompat(out, sdPath); context.checkCancelled(); out.flush(); context.checkCancelled(); From e02ded811f0906e1747b69b2d181e43426d2e143 Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Thu, 16 Jan 2014 02:28:10 +0000 Subject: [PATCH 15/32] HDFS-5777. Update LayoutVersion for the new editlog op OP_ADD_BLOCK. Contributed by Jing Zhao. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558675 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 6 +- .../hadoop/hdfs/protocol/LayoutVersion.java | 7 +- .../server/namenode/FSEditLogOpCodes.java | 14 +- .../ImageLoaderCurrent.java | 2 +- .../src/test/resources/editsStored | Bin 4282 -> 4268 bytes .../src/test/resources/editsStored.xml | 168 +++++++++--------- 6 files changed, 100 insertions(+), 97 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index e2aa07861dc..fc34f931b3f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -484,6 +484,10 @@ Trunk (Unreleased) HDFS-5775. Consolidate the code for serialization in CacheManager (Haohui Mai via brandonli) + HDFS-5704. Change OP_UPDATE_BLOCKS with a new OP_ADD_BLOCK. (jing9) + + HDFS-5777. Update LayoutVersion for the new editlog op OP_ADD_BLOCK. (jing9) + Release 2.4.0 - UNRELEASED INCOMPATIBLE CHANGES @@ -940,8 +944,6 @@ Release 2.3.0 - UNRELEASED HDFS-5677. Need error checking for HA cluster configuration. (Vincent Sheffer via cos) - HDFS-5704. Change OP_UPDATE_BLOCKS with a new OP_ADD_BLOCK. (jing9) - OPTIMIZATIONS BUG FIXES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java index 094e4793cf2..e3a36fe9814 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java @@ -107,11 +107,12 @@ public class LayoutVersion { "block IDs in the edits log and image files"), EDITLOG_SUPPORT_RETRYCACHE(-47, "Record ClientId and CallId in editlog to " + "enable rebuilding retry cache in case of HA failover"), - CACHING(-48, "Support for cache pools and path-based caching"), - ADD_DATANODE_AND_STORAGE_UUIDS(-49, "Replace StorageID with DatanodeUuid." + EDITLOG_ADD_BLOCK(-48, "Add new editlog that only records allocation of " + + "the new block instead of the entire block list"), + CACHING(-49, "Support for cache pools and path-based caching"), + ADD_DATANODE_AND_STORAGE_UUIDS(-50, "Replace StorageID with DatanodeUuid." + " Use distinct StorageUuid per storage directory."); - final int lv; final int ancestorLV; final String description; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java index 21cb284cc72..95702477231 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java @@ -60,13 +60,13 @@ public enum FSEditLogOpCodes { OP_DISALLOW_SNAPSHOT ((byte) 30), OP_SET_GENSTAMP_V2 ((byte) 31), OP_ALLOCATE_BLOCK_ID ((byte) 32), - OP_ADD_CACHE_DIRECTIVE ((byte) 33), - OP_REMOVE_CACHE_DIRECTIVE ((byte) 34), - OP_ADD_CACHE_POOL ((byte) 35), - OP_MODIFY_CACHE_POOL ((byte) 36), - OP_REMOVE_CACHE_POOL ((byte) 37), - OP_MODIFY_CACHE_DIRECTIVE ((byte) 38), - OP_ADD_BLOCK ((byte) 39), + OP_ADD_BLOCK ((byte) 33), + OP_ADD_CACHE_DIRECTIVE ((byte) 34), + OP_REMOVE_CACHE_DIRECTIVE ((byte) 35), + OP_ADD_CACHE_POOL ((byte) 36), + OP_MODIFY_CACHE_POOL ((byte) 37), + OP_REMOVE_CACHE_POOL ((byte) 38), + OP_MODIFY_CACHE_DIRECTIVE ((byte) 39), // Note that fromByte(..) depends on OP_INVALID being at the last position. OP_INVALID ((byte) -1); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java index c851cb59a1a..8415c6bc30d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java @@ -126,7 +126,7 @@ class ImageLoaderCurrent implements ImageLoader { new SimpleDateFormat("yyyy-MM-dd HH:mm"); private static int[] versions = { -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39, - -40, -41, -42, -43, -44, -45, -46, -47, -48, -49 }; + -40, -41, -42, -43, -44, -45, -46, -47, -48, -49, -50 }; private int imageVersion = 0; private final Map subtreeMap = new HashMap(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored index 8cba085408de27fa35c43a2b30767c2ce90f9844..c545cfd41492c79aeb2f803f0bfdf39fdf2192a6 100644 GIT binary patch delta 1440 zcmdm`xJJ?U|NsBzBp4uou}$nqpeU5X#Ms5?d@x%jl;cjX!P}Fy-)gJ85)5Fn%uEp3 z2oBEdL)^Kq-!dOp-jy@aSCwamMes@n21aKPJ+V%nQE}otaeYHGLjwy7Ba?VT22MjG zb3FqfG-MFiI;pSWaj^&cvW}lF%@=x}0@xsleO8?Sn_Va%a}ZsT^%H20Fj9@Aj3VHaXp02fo}-T(jq delta 1456 zcmZ3ZxJ%LZ|NsBzB^V%pu}$nqpeU5X#Ms5?Jm~<-9gd3&+T^n5S^5e6W8sC#GBZJB z?{PF)?)dPwF6Y^u{ZR!IeN}lfdo^}2FfclU=!tdmjLH+|i5uz~8JZcI85H~1DLxS$W5(Y z)-(An8`$0&n7xx#+0$6|E_yC9xu0DfzGt!q2gnJ*9NH{y$r8qs zo50+)9F{DG>HaS#zXEebIW1Xw&xyX79K;E>cQVZ0$*r7eEPoXqn@@fZk(mmWnM#(6 zOy-=pcYJaO7sy4MxwKi1=WpCT`4O1Q%WcVWI!j%7vKKcIBK##j~g(FYp%x{vG&O`{~nsWna9!ssALON^A?OG{vWGmhotDULX%Z^ zjX-g*9jbXdSw42WU70m^@(Es$pFZ=Nu^j$BPja#@Ulh1V;6W}DCjaJ@7G~h}aB&Nc z_e;!8^~+C5ecs!AXYzGEkULCRVD6YK%5TQ{>vj5l<;em3CX!h4rbip2`g)bgbNOYl z$$GxJ{`tD%-^=uy=u@>&=pDp^AH0Q5MnE^ymPjO)hIy~y1_MI pQ|c3J^ZJ=8NQn^;yECvB - -49 + -50 OP_START_LOG_SEGMENT @@ -13,8 +13,8 @@ 2 1 - 1389736494300 - d1a0861e6b9e394e + 1390519460949 + dc8d30edc97df67d @@ -24,8 +24,8 @@ 3 2 - 1389736494302 - 8239b8f0ed7e6ce6 + 1390519460952 + 096bc20b6debed03 @@ -37,18 +37,18 @@ 16386 /file_create 1 - 1389045295288 - 1389045295288 + 1389828264873 + 1389828264873 512 - DFSClient_NONMAPREDUCE_-216163116_1 + DFSClient_NONMAPREDUCE_16108824_1 127.0.0.1 jing supergroup 420 - 48c96601-9238-4d1f-b78b-ef0f1e922ba2 - 7 + b5928e80-e373-4807-a688-f94483d08ce5 + 9 @@ -59,8 +59,8 @@ 0 /file_create 1 - 1389045295307 - 1389045295288 + 1389828265699 + 1389828264873 512 @@ -78,9 +78,9 @@ 0 /file_create /file_moved - 1389045295311 - 48c96601-9238-4d1f-b78b-ef0f1e922ba2 - 9 + 1389828265705 + b5928e80-e373-4807-a688-f94483d08ce5 + 11 @@ -89,9 +89,9 @@ 7 0 /file_moved - 1389045295318 - 48c96601-9238-4d1f-b78b-ef0f1e922ba2 - 10 + 1389828265712 + b5928e80-e373-4807-a688-f94483d08ce5 + 12 @@ -101,7 +101,7 @@ 0 16387 /directory_mkdir - 1389045295326 + 1389828265722 jing supergroup @@ -136,8 +136,8 @@ 12 /directory_mkdir snapshot1 - 48c96601-9238-4d1f-b78b-ef0f1e922ba2 - 15 + b5928e80-e373-4807-a688-f94483d08ce5 + 17 @@ -147,8 +147,8 @@ /directory_mkdir snapshot1 snapshot2 - 48c96601-9238-4d1f-b78b-ef0f1e922ba2 - 16 + b5928e80-e373-4807-a688-f94483d08ce5 + 18 @@ -157,8 +157,8 @@ 14 /directory_mkdir snapshot2 - 48c96601-9238-4d1f-b78b-ef0f1e922ba2 - 17 + b5928e80-e373-4807-a688-f94483d08ce5 + 19 @@ -169,18 +169,18 @@ 16388 /file_create 1 - 1389045295354 - 1389045295354 + 1389828265757 + 1389828265757 512 - DFSClient_NONMAPREDUCE_-216163116_1 + DFSClient_NONMAPREDUCE_16108824_1 127.0.0.1 jing supergroup 420 - 48c96601-9238-4d1f-b78b-ef0f1e922ba2 - 18 + b5928e80-e373-4807-a688-f94483d08ce5 + 20 @@ -191,8 +191,8 @@ 0 /file_create 1 - 1389045295357 - 1389045295354 + 1389828265759 + 1389828265757 512 @@ -253,10 +253,10 @@ 0 /file_create /file_moved - 1389045295378 + 1389828265782 NONE - 48c96601-9238-4d1f-b78b-ef0f1e922ba2 - 25 + b5928e80-e373-4807-a688-f94483d08ce5 + 27 @@ -267,18 +267,18 @@ 16389 /file_concat_target 1 - 1389045295383 - 1389045295383 + 1389828265787 + 1389828265787 512 - DFSClient_NONMAPREDUCE_-216163116_1 + DFSClient_NONMAPREDUCE_16108824_1 127.0.0.1 jing supergroup 420 - 48c96601-9238-4d1f-b78b-ef0f1e922ba2 - 27 + b5928e80-e373-4807-a688-f94483d08ce5 + 29 @@ -383,8 +383,8 @@ 0 /file_concat_target 1 - 1389045295484 - 1389045295383 + 1389828266540 + 1389828265787 512 @@ -418,18 +418,18 @@ 16390 /file_concat_0 1 - 1389045295486 - 1389045295486 + 1389828266544 + 1389828266544 512 - DFSClient_NONMAPREDUCE_-216163116_1 + DFSClient_NONMAPREDUCE_16108824_1 127.0.0.1 jing supergroup 420 - 48c96601-9238-4d1f-b78b-ef0f1e922ba2 - 40 + b5928e80-e373-4807-a688-f94483d08ce5 + 41 @@ -534,8 +534,8 @@ 0 /file_concat_0 1 - 1389045295507 - 1389045295486 + 1389828266569 + 1389828266544 512 @@ -569,18 +569,18 @@ 16391 /file_concat_1 1 - 1389045295509 - 1389045295509 + 1389828266572 + 1389828266572 512 - DFSClient_NONMAPREDUCE_-216163116_1 + DFSClient_NONMAPREDUCE_16108824_1 127.0.0.1 jing supergroup 420 - 48c96601-9238-4d1f-b78b-ef0f1e922ba2 - 52 + b5928e80-e373-4807-a688-f94483d08ce5 + 53 @@ -685,8 +685,8 @@ 0 /file_concat_1 1 - 1389045295532 - 1389045295509 + 1389828266599 + 1389828266572 512 @@ -718,13 +718,13 @@ 56 0 /file_concat_target - 1389045295535 + 1389828266603 /file_concat_0 /file_concat_1 - 48c96601-9238-4d1f-b78b-ef0f1e922ba2 - 63 + b5928e80-e373-4807-a688-f94483d08ce5 + 64 @@ -735,15 +735,15 @@ 16392 /file_symlink /file_concat_target - 1389045295540 - 1389045295540 + 1389828266633 + 1389828266633 jing supergroup 511 - 48c96601-9238-4d1f-b78b-ef0f1e922ba2 - 64 + b5928e80-e373-4807-a688-f94483d08ce5 + 66 @@ -754,18 +754,18 @@ 16393 /hard-lease-recovery-test 1 - 1389045295543 - 1389045295543 + 1389828266637 + 1389828266637 512 - DFSClient_NONMAPREDUCE_-216163116_1 + DFSClient_NONMAPREDUCE_16108824_1 127.0.0.1 jing supergroup 420 - 48c96601-9238-4d1f-b78b-ef0f1e922ba2 - 65 + b5928e80-e373-4807-a688-f94483d08ce5 + 67 @@ -821,7 +821,7 @@ OP_REASSIGN_LEASE 64 - DFSClient_NONMAPREDUCE_-216163116_1 + DFSClient_NONMAPREDUCE_16108824_1 /hard-lease-recovery-test HDFS_NameNode @@ -834,8 +834,8 @@ 0 /hard-lease-recovery-test 1 - 1389045298180 - 1389045295543 + 1389828269751 + 1389828266637 512 @@ -861,8 +861,8 @@ 493 9223372036854775807 2305843009213693951 - 48c96601-9238-4d1f-b78b-ef0f1e922ba2 - 72 + b5928e80-e373-4807-a688-f94483d08ce5 + 74 @@ -871,8 +871,8 @@ 67 pool1 99 - 48c96601-9238-4d1f-b78b-ef0f1e922ba2 - 73 + b5928e80-e373-4807-a688-f94483d08ce5 + 75 @@ -883,9 +883,9 @@ /path 1 pool1 - 2305844398258992525 - 48c96601-9238-4d1f-b78b-ef0f1e922ba2 - 74 + 2305844399041964876 + b5928e80-e373-4807-a688-f94483d08ce5 + 76 @@ -894,8 +894,8 @@ 69 1 2 - 48c96601-9238-4d1f-b78b-ef0f1e922ba2 - 75 + b5928e80-e373-4807-a688-f94483d08ce5 + 77 @@ -903,8 +903,8 @@ 70 1 - 48c96601-9238-4d1f-b78b-ef0f1e922ba2 - 76 + b5928e80-e373-4807-a688-f94483d08ce5 + 78 @@ -912,8 +912,8 @@ 71 pool1 - 48c96601-9238-4d1f-b78b-ef0f1e922ba2 - 77 + b5928e80-e373-4807-a688-f94483d08ce5 + 79 From 3a6f8b878501cc6961a8388813f33bbeb5ebae34 Mon Sep 17 00:00:00 2001 From: Alejandro Abdelnur Date: Thu, 16 Jan 2014 17:10:20 +0000 Subject: [PATCH 16/32] MAPREDUCE-5724. JobHistoryServer does not start if HDFS is not running. (tucu) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558852 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 + .../v2/jobhistory/JHAdminConfig.java | 7 + .../hadoop-mapreduce-client-hs/pom.xml | 10 ++ .../mapreduce/v2/hs/HistoryFileManager.java | 141 ++++++++++++++---- .../v2/hs/TestHistoryFileManager.java | 139 +++++++++++++++++ 5 files changed, 269 insertions(+), 31 deletions(-) create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestHistoryFileManager.java diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 35a538e3132..14dc7013fe2 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -272,6 +272,9 @@ Release 2.4.0 - UNRELEASED MAPREDUCE-5689. MRAppMaster does not preempt reducers when scheduled maps cannot be fulfilled. (lohit via kasha) + MAPREDUCE-5724. JobHistoryServer does not start if HDFS is not running. + (tucu) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java index ee7dae93cac..2cc233688b8 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java @@ -77,6 +77,13 @@ public class JHAdminConfig { public static final String MR_HISTORY_DONE_DIR = MR_HISTORY_PREFIX + "done-dir"; + /** + * Maximum time the History server will wait for the FileSystem for History + * files to become available. Default value is -1, forever. + */ + public static final String MR_HISTORY_MAX_START_WAIT_TIME = + MR_HISTORY_PREFIX + "maximum-start-wait-time-millis"; + public static final long DEFAULT_MR_HISTORY_MAX_START_WAIT_TIME = -1; /** * Path where history files should be stored after a job finished and before * they are pulled into the job history server. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml index 31587782f43..3ac191fb86b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml @@ -33,6 +33,10 @@ + + org.apache.hadoop + hadoop-hdfs + org.apache.hadoop hadoop-mapreduce-client-common @@ -53,6 +57,12 @@ test-jar test + + org.apache.hadoop + hadoop-hdfs + test-jar + test + diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java index d0861002408..f53f18896d9 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java @@ -20,6 +20,7 @@ package org.apache.hadoop.mapreduce.v2.hs; import java.io.FileNotFoundException; import java.io.IOException; +import java.net.ConnectException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -69,6 +70,8 @@ import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import com.google.common.annotations.VisibleForTesting; import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.yarn.util.Clock; +import org.apache.hadoop.yarn.util.SystemClock; /** * This class provides a way to interact with history files in a thread safe @@ -464,7 +467,8 @@ public class HistoryFileManager extends AbstractService { private JobACLsManager aclsMgr; - private Configuration conf; + @VisibleForTesting + Configuration conf; private String serialNumberFormat; @@ -491,36 +495,10 @@ public class HistoryFileManager extends AbstractService { + (JobHistoryUtils.SERIAL_NUMBER_DIRECTORY_DIGITS + serialNumberLowDigits) + "d"); - String doneDirPrefix = null; - doneDirPrefix = JobHistoryUtils - .getConfiguredHistoryServerDoneDirPrefix(conf); - try { - doneDirPrefixPath = FileContext.getFileContext(conf).makeQualified( - new Path(doneDirPrefix)); - doneDirFc = FileContext.getFileContext(doneDirPrefixPath.toUri(), conf); - doneDirFc.setUMask(JobHistoryUtils.HISTORY_DONE_DIR_UMASK); - mkdir(doneDirFc, doneDirPrefixPath, new FsPermission( - JobHistoryUtils.HISTORY_DONE_DIR_PERMISSION)); - } catch (IOException e) { - throw new YarnRuntimeException("Error creating done directory: [" - + doneDirPrefixPath + "]", e); - } - - String intermediateDoneDirPrefix = null; - intermediateDoneDirPrefix = JobHistoryUtils - .getConfiguredHistoryIntermediateDoneDirPrefix(conf); - try { - intermediateDoneDirPath = FileContext.getFileContext(conf).makeQualified( - new Path(intermediateDoneDirPrefix)); - intermediateDoneDirFc = FileContext.getFileContext( - intermediateDoneDirPath.toUri(), conf); - mkdir(intermediateDoneDirFc, intermediateDoneDirPath, new FsPermission( - JobHistoryUtils.HISTORY_INTERMEDIATE_DONE_DIR_PERMISSIONS.toShort())); - } catch (IOException e) { - LOG.info("error creating done directory on dfs " + e); - throw new YarnRuntimeException("Error creating intermediate done directory: [" - + intermediateDoneDirPath + "]", e); - } + long maxFSWaitTime = conf.getLong( + JHAdminConfig.MR_HISTORY_MAX_START_WAIT_TIME, + JHAdminConfig.DEFAULT_MR_HISTORY_MAX_START_WAIT_TIME); + createHistoryDirs(new SystemClock(), 10 * 1000, maxFSWaitTime); this.aclsMgr = new JobACLsManager(conf); @@ -544,6 +522,107 @@ public class HistoryFileManager extends AbstractService { super.serviceInit(conf); } + @VisibleForTesting + void createHistoryDirs(Clock clock, long intervalCheckMillis, + long timeOutMillis) throws IOException { + long start = clock.getTime(); + boolean done = false; + int counter = 0; + while (!done && + ((timeOutMillis == -1) || (clock.getTime() - start < timeOutMillis))) { + done = tryCreatingHistoryDirs(counter++ % 3 == 0); // log every 3 attempts, 30sec + try { + Thread.sleep(intervalCheckMillis); + } catch (InterruptedException ex) { + throw new YarnRuntimeException(ex); + } + } + if (!done) { + throw new YarnRuntimeException("Timed out '" + timeOutMillis+ + "ms' waiting for FileSystem to become available"); + } + } + + /** + * DistributedFileSystem returns a RemoteException with a message stating + * SafeModeException in it. So this is only way to check it is because of + * being in safe mode. + */ + private boolean isBecauseSafeMode(Throwable ex) { + return ex.toString().contains("SafeModeException"); + } + + /** + * Returns TRUE if the history dirs were created, FALSE if they could not + * be created because the FileSystem is not reachable or in safe mode and + * throws and exception otherwise. + */ + @VisibleForTesting + boolean tryCreatingHistoryDirs(boolean logWait) throws IOException { + boolean succeeded = true; + String doneDirPrefix = JobHistoryUtils. + getConfiguredHistoryServerDoneDirPrefix(conf); + try { + doneDirPrefixPath = FileContext.getFileContext(conf).makeQualified( + new Path(doneDirPrefix)); + doneDirFc = FileContext.getFileContext(doneDirPrefixPath.toUri(), conf); + doneDirFc.setUMask(JobHistoryUtils.HISTORY_DONE_DIR_UMASK); + mkdir(doneDirFc, doneDirPrefixPath, new FsPermission( + JobHistoryUtils.HISTORY_DONE_DIR_PERMISSION)); + } catch (ConnectException ex) { + if (logWait) { + LOG.info("Waiting for FileSystem at " + + doneDirPrefixPath.toUri().getAuthority() + "to be available"); + } + succeeded = false; + } catch (IOException e) { + if (isBecauseSafeMode(e)) { + succeeded = false; + if (logWait) { + LOG.info("Waiting for FileSystem at " + + doneDirPrefixPath.toUri().getAuthority() + + "to be out of safe mode"); + } + } else { + throw new YarnRuntimeException("Error creating done directory: [" + + doneDirPrefixPath + "]", e); + } + } + if (succeeded) { + String intermediateDoneDirPrefix = JobHistoryUtils. + getConfiguredHistoryIntermediateDoneDirPrefix(conf); + try { + intermediateDoneDirPath = FileContext.getFileContext(conf).makeQualified( + new Path(intermediateDoneDirPrefix)); + intermediateDoneDirFc = FileContext.getFileContext( + intermediateDoneDirPath.toUri(), conf); + mkdir(intermediateDoneDirFc, intermediateDoneDirPath, new FsPermission( + JobHistoryUtils.HISTORY_INTERMEDIATE_DONE_DIR_PERMISSIONS.toShort())); + } catch (ConnectException ex) { + succeeded = false; + if (logWait) { + LOG.info("Waiting for FileSystem at " + + intermediateDoneDirPath.toUri().getAuthority() + + "to be available"); + } + } catch (IOException e) { + if (isBecauseSafeMode(e)) { + succeeded = false; + if (logWait) { + LOG.info("Waiting for FileSystem at " + + intermediateDoneDirPath.toUri().getAuthority() + + "to be out of safe mode"); + } + } else { + throw new YarnRuntimeException( + "Error creating intermediate done directory: [" + + intermediateDoneDirPath + "]", e); + } + } + } + return succeeded; + } + @Override public void serviceStop() throws Exception { ShutdownThreadsHelper.shutdownExecutorService(moveToDoneExecutor); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestHistoryFileManager.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestHistoryFileManager.java new file mode 100644 index 00000000000..de617202da5 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestHistoryFileManager.java @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapreduce.v2.hs; + + +import junit.framework.Assert; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.mapreduce.v2.app.ControlledClock; +import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; +import org.apache.hadoop.yarn.util.Clock; +import org.apache.hadoop.yarn.util.SystemClock; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.util.UUID; + +public class TestHistoryFileManager { + private static MiniDFSCluster dfsCluster = null; + + @BeforeClass + public static void setUpClass() throws Exception { + Configuration conf = new HdfsConfiguration(); + dfsCluster = new MiniDFSCluster.Builder(conf).build(); + } + + @AfterClass + public static void cleanUpClass() throws Exception { + dfsCluster.shutdown(); + } + + private void testTryCreateHistoryDirs(Configuration conf, boolean expected) + throws Exception { + conf.set(JHAdminConfig.MR_HISTORY_DONE_DIR, "/" + UUID.randomUUID()); + conf.set(JHAdminConfig.MR_HISTORY_INTERMEDIATE_DONE_DIR, "/" + UUID.randomUUID()); + HistoryFileManager hfm = new HistoryFileManager(); + hfm.conf = conf; + Assert.assertEquals(expected, hfm.tryCreatingHistoryDirs(false)); + } + + @Test + public void testCreateDirsWithoutFileSystem() throws Exception { + Configuration conf = new YarnConfiguration(); + conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "hdfs://localhost:1"); + testTryCreateHistoryDirs(conf, false); + } + + @Test + public void testCreateDirsWithFileSystem() throws Exception { + dfsCluster.getFileSystem().setSafeMode( + HdfsConstants.SafeModeAction.SAFEMODE_LEAVE); + Assert.assertFalse(dfsCluster.getFileSystem().isInSafeMode()); + testTryCreateHistoryDirs(dfsCluster.getConfiguration(0), true); + } + + @Test + public void testCreateDirsWithFileSystemInSafeMode() throws Exception { + dfsCluster.getFileSystem().setSafeMode( + HdfsConstants.SafeModeAction.SAFEMODE_ENTER); + Assert.assertTrue(dfsCluster.getFileSystem().isInSafeMode()); + testTryCreateHistoryDirs(dfsCluster.getConfiguration(0), false); + } + + private void testCreateHistoryDirs(Configuration conf, Clock clock) + throws Exception { + conf.set(JHAdminConfig.MR_HISTORY_DONE_DIR, "/" + UUID.randomUUID()); + conf.set(JHAdminConfig.MR_HISTORY_INTERMEDIATE_DONE_DIR, "/" + UUID.randomUUID()); + HistoryFileManager hfm = new HistoryFileManager(); + hfm.conf = conf; + hfm.createHistoryDirs(clock, 500, 2000); + } + + @Test + public void testCreateDirsWithFileSystemBecomingAvailBeforeTimeout() + throws Exception { + dfsCluster.getFileSystem().setSafeMode( + HdfsConstants.SafeModeAction.SAFEMODE_ENTER); + Assert.assertTrue(dfsCluster.getFileSystem().isInSafeMode()); + new Thread() { + @Override + public void run() { + try { + Thread.sleep(500); + dfsCluster.getFileSystem().setSafeMode( + HdfsConstants.SafeModeAction.SAFEMODE_LEAVE); + Assert.assertTrue(dfsCluster.getFileSystem().isInSafeMode()); + } catch (Exception ex) { + Assert.fail(ex.toString()); + } + } + }.start(); + testCreateHistoryDirs(dfsCluster.getConfiguration(0), new SystemClock()); + } + + @Test(expected = YarnRuntimeException.class) + public void testCreateDirsWithFileSystemNotBecomingAvailBeforeTimeout() + throws Exception { + dfsCluster.getFileSystem().setSafeMode( + HdfsConstants.SafeModeAction.SAFEMODE_ENTER); + Assert.assertTrue(dfsCluster.getFileSystem().isInSafeMode()); + final ControlledClock clock = new ControlledClock(new SystemClock()); + clock.setTime(1); + new Thread() { + @Override + public void run() { + try { + Thread.sleep(500); + clock.setTime(3000); + } catch (Exception ex) { + Assert.fail(ex.toString()); + } + } + }.start(); + testCreateHistoryDirs(dfsCluster.getConfiguration(0), clock); + } + +} From e05ff82bd95dd802233e2424e8eafbd7ec6944c5 Mon Sep 17 00:00:00 2001 From: Owen O'Malley Date: Thu, 16 Jan 2014 17:48:50 +0000 Subject: [PATCH 17/32] HADOOP-10177. Create CLI tools for managing keys. (Larry McCay via omalley) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558867 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 9 +- .../hadoop-common/src/main/bin/hadoop | 2 + .../crypto/key/JavaKeyStoreProvider.java | 2 +- .../apache/hadoop/crypto/key/KeyProvider.java | 11 + .../apache/hadoop/crypto/key/KeyShell.java | 474 ++++++++++++++++++ .../hadoop/crypto/key/UserProvider.java | 5 + .../hadoop/crypto/key/TestKeyShell.java | 176 +++++++ 7 files changed, 675 insertions(+), 4 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyShell.java diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 48402201e8b..1b098a1dc34 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -98,7 +98,8 @@ Trunk (Unreleased) HADOOP-8844. Add a plaintext fs -text test-case. (Akira AJISAKA via harsh) - HADOOP-9432 Add support for markdown .md files in site documentation (stevel) + HADOOP-9432 Add support for markdown .md files in site documentation + (stevel) HADOOP-9186. test-patch.sh should report build failure to JIRA. (Binglin Chang via Colin Patrick McCabe) @@ -110,6 +111,8 @@ Trunk (Unreleased) HADOOP-10201. Add listing to KeyProvider API. (Larry McCay via omalley) + HADOOP-10177. Create CLI tools for managing keys. (Larry McCay via omalley) + BUG FIXES HADOOP-9451. Fault single-layer config if node group topology is enabled. @@ -117,8 +120,8 @@ Trunk (Unreleased) HADOOP-8419. Fixed GzipCode NPE reset for IBM JDK. (Yu Li via eyang) - HADOOP-8177. MBeans shouldn't try to register when it fails to create MBeanName. - (Devaraj K via umamahesh) + HADOOP-8177. MBeans shouldn't try to register when it fails to create + MBeanName. (Devaraj K via umamahesh) HADOOP-8018. Hudson auto test for HDFS has started throwing javadoc (Jon Eagles via bobby) diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop b/hadoop-common-project/hadoop-common/src/main/bin/hadoop index ddc6b4af648..3fa414b76f5 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop @@ -104,6 +104,8 @@ case $COMMAND in CLASS=org.apache.hadoop.util.VersionInfo elif [ "$COMMAND" = "jar" ] ; then CLASS=org.apache.hadoop.util.RunJar + elif [ "$COMMAND" = "key" ] ; then + CLASS=org.apache.hadoop.crypto.key.KeyShell elif [ "$COMMAND" = "checknative" ] ; then CLASS=org.apache.hadoop.util.NativeLibraryChecker elif [ "$COMMAND" = "distcp" ] ; then diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/JavaKeyStoreProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/JavaKeyStoreProvider.java index 93a47deaa73..f85f955fccc 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/JavaKeyStoreProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/JavaKeyStoreProvider.java @@ -77,7 +77,7 @@ public class JavaKeyStoreProvider extends KeyProvider { private JavaKeyStoreProvider(URI uri, Configuration conf) throws IOException { this.uri = uri; path = unnestUri(uri); - fs = FileSystem.get(conf); + fs = path.getFileSystem(conf); // Get the password from the user's environment String pw = System.getenv(KEYSTORE_PASSWORD_NAME); if (pw == null) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java index 6f9f016f62c..3bbb5568c2d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java @@ -244,6 +244,17 @@ public abstract class KeyProvider { return new Options(conf); } + /** + * Indicates whether this provider represents a store + * that is intended for transient use - such as the UserProvider + * is. These providers are generally used to provide access to + * keying material rather than for long term storage. + * @return true if transient, false otherwise + */ + public boolean isTransient() { + return false; + } + /** * Get the key material for a specific version of the key. This method is used * when decrypting data. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java new file mode 100644 index 00000000000..db30f8fc891 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java @@ -0,0 +1,474 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.crypto.key; + +import java.io.IOException; +import java.io.PrintStream; +import java.security.InvalidParameterException; +import java.security.NoSuchAlgorithmException; +import java.util.List; + +import javax.crypto.KeyGenerator; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.crypto.key.KeyProvider.Metadata; +import org.apache.hadoop.crypto.key.KeyProvider.Options; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +/** + * This program is the CLI utility for the KeyProvider facilities in Hadoop. + */ +public class KeyShell extends Configured implements Tool { + final static private String USAGE_PREFIX = "Usage: hadoop key " + + "[generic options]\n"; + final static private String COMMANDS = + " [--help]\n" + + " [" + CreateCommand.USAGE + "]\n" + + " [" + RollCommand.USAGE + "]\n" + + " [" + DeleteCommand.USAGE + "]\n" + + " [" + ListCommand.USAGE + "]\n"; + + private boolean interactive = false; + private Command command = null; + + /** allows stdout to be captured if necessary */ + public PrintStream out = System.out; + /** allows stderr to be captured if necessary */ + public PrintStream err = System.err; + + private boolean userSuppliedProvider = false; + + @Override + public int run(String[] args) throws Exception { + int exitCode = 0; + try { + exitCode = init(args); + if (exitCode != 0) { + return exitCode; + } + if (command.validate()) { + command.execute(); + } else { + exitCode = -1; + } + } catch (Exception e) { + e.printStackTrace(err); + return -1; + } + return exitCode; + } + + /** + * Parse the command line arguments and initialize the data + *
+   * % hadoop key create keyName [--size size] [--cipher algorithm]
+   *    [--provider providerPath]
+   * % hadoop key roll keyName [--provider providerPath]
+   * % hadoop key list [-provider providerPath]
+   * % hadoop key delete keyName [--provider providerPath] [-i]
+   * 
+ * @param args + * @return + * @throws IOException + */ + private int init(String[] args) throws IOException { + for (int i = 0; i < args.length; i++) { // parse command line + if (args[i].equals("create")) { + String keyName = args[++i]; + command = new CreateCommand(keyName); + if (keyName.equals("--help")) { + printKeyShellUsage(); + return -1; + } + } else if (args[i].equals("delete")) { + String keyName = args[++i]; + command = new DeleteCommand(keyName); + if (keyName.equals("--help")) { + printKeyShellUsage(); + return -1; + } + } else if (args[i].equals("roll")) { + String keyName = args[++i]; + command = new RollCommand(keyName); + if (keyName.equals("--help")) { + printKeyShellUsage(); + return -1; + } + } else if (args[i].equals("list")) { + command = new ListCommand(); + } else if (args[i].equals("--size")) { + getConf().set(KeyProvider.DEFAULT_BITLENGTH_NAME, args[++i]); + } else if (args[i].equals("--cipher")) { + getConf().set(KeyProvider.DEFAULT_CIPHER_NAME, args[++i]); + } else if (args[i].equals("--provider")) { + userSuppliedProvider = true; + getConf().set(KeyProviderFactory.KEY_PROVIDER_PATH, args[++i]); + } else if (args[i].equals("-i") || (args[i].equals("--interactive"))) { + interactive = true; + } else if (args[i].equals("--help")) { + printKeyShellUsage(); + return -1; + } else { + printKeyShellUsage(); + ToolRunner.printGenericCommandUsage(System.err); + return -1; + } + } + return 0; + } + + private void printKeyShellUsage() { + out.println(USAGE_PREFIX + COMMANDS); + if (command != null) { + out.println(command.getUsage()); + } + else { + out.println("=========================================================" + + "======"); + out.println(CreateCommand.USAGE + ":\n\n" + CreateCommand.DESC); + out.println("=========================================================" + + "======"); + out.println(RollCommand.USAGE + ":\n\n" + RollCommand.DESC); + out.println("=========================================================" + + "======"); + out.println(DeleteCommand.USAGE + ":\n\n" + DeleteCommand.DESC); + out.println("=========================================================" + + "======"); + out.println(ListCommand.USAGE + ":\n\n" + ListCommand.DESC); + } + } + + private abstract class Command { + protected KeyProvider provider = null; + + public boolean validate() { + return true; + } + + protected KeyProvider getKeyProvider() { + KeyProvider provider = null; + List providers; + try { + providers = KeyProviderFactory.getProviders(getConf()); + if (userSuppliedProvider) { + provider = providers.get(0); + } + else { + for (KeyProvider p : providers) { + if (!p.isTransient()) { + provider = p; + break; + } + } + } + } catch (IOException e) { + e.printStackTrace(err); + } + return provider; + } + + protected byte[] generateKey(int size, String algorithm) + throws NoSuchAlgorithmException { + out.println("Generating key using size: " + size + " and algorithm: " + + algorithm); + KeyGenerator keyGenerator = KeyGenerator.getInstance(algorithm); + keyGenerator.init(size); + byte[] key = keyGenerator.generateKey().getEncoded(); + return key; + } + + protected void printProviderWritten() { + out.println(provider.getClass().getName() + " has been updated."); + } + + protected void warnIfTransientProvider() { + if (provider.isTransient()) { + out.println("WARNING: you are modifying a transient provider."); + } + } + + public abstract void execute() throws Exception; + + public abstract String getUsage(); + } + + private class ListCommand extends Command { + public static final String USAGE = "list [--provider] [--help]"; + public static final String DESC = + "The list subcommand displays the keynames contained within \n" + + "a particular provider - as configured in core-site.xml or " + + "indicated\nthrough the --provider argument."; + + public boolean validate() { + boolean rc = true; + provider = getKeyProvider(); + if (provider == null) { + out.println("There are no non-transient KeyProviders configured.\n" + + "Consider using the --provider option to indicate the provider\n" + + "to use. If you want to list a transient provider then you\n" + + "you MUST use the --provider argument."); + rc = false; + } + return rc; + } + + public void execute() throws IOException { + List keys; + try { + keys = provider.getKeys(); + out.println("Listing keys for KeyProvider: " + provider.toString()); + for (String keyName : keys) { + out.println(keyName); + } + } catch (IOException e) { + out.println("Cannot list keys for KeyProvider: " + provider.toString() + + ": " + e.getMessage()); + throw e; + } + } + + @Override + public String getUsage() { + return USAGE + ":\n\n" + DESC; + } + } + + private class RollCommand extends Command { + public static final String USAGE = "roll [--provider] [--help]"; + public static final String DESC = + "The roll subcommand creates a new version of the key specified\n" + + "through the argument within the provider indicated using\n" + + "the --provider argument"; + + String keyName = null; + + public RollCommand(String keyName) { + this.keyName = keyName; + } + + public boolean validate() { + boolean rc = true; + provider = getKeyProvider(); + if (provider == null) { + out.println("There are no valid KeyProviders configured.\n" + + "Key will not be rolled.\n" + + "Consider using the --provider option to indicate the provider" + + " to use."); + rc = false; + } + if (keyName == null) { + out.println("There is no keyName specified. Please provide the" + + "mandatory . See the usage description with --help."); + rc = false; + } + return rc; + } + + public void execute() throws NoSuchAlgorithmException, IOException { + try { + Metadata md = provider.getMetadata(keyName); + warnIfTransientProvider(); + out.println("Rolling key version from KeyProvider: " + + provider.toString() + " for key name: " + keyName); + try { + byte[] material = null; + material = generateKey(md.getBitLength(), md.getAlgorithm()); + provider.rollNewVersion(keyName, material); + out.println(keyName + " has been successfully rolled."); + provider.flush(); + printProviderWritten(); + } catch (NoSuchAlgorithmException e) { + out.println("Cannot roll key: " + keyName + " within KeyProvider: " + + provider.toString()); + throw e; + } + } catch (IOException e1) { + out.println("Cannot roll key: " + keyName + " within KeyProvider: " + + provider.toString()); + throw e1; + } + } + + @Override + public String getUsage() { + return USAGE + ":\n\n" + DESC; + } + } + + private class DeleteCommand extends Command { + public static final String USAGE = "delete [--provider] [--help]"; + public static final String DESC = + "The delete subcommand deletes all of the versions of the key\n" + + "specified as the argument from within the provider\n" + + "indicated through the --provider argument"; + + String keyName = null; + boolean cont = true; + + public DeleteCommand(String keyName) { + this.keyName = keyName; + } + + @Override + public boolean validate() { + provider = getKeyProvider(); + if (provider == null) { + out.println("There are no valid KeyProviders configured.\n" + + "Nothing will be deleted.\n" + + "Consider using the --provider option to indicate the provider" + + " to use."); + return false; + } + if (keyName == null) { + out.println("There is no keyName specified. Please provide the" + + "mandatory . See the usage description with --help."); + return false; + } + if (interactive) { + try { + cont = ToolRunner + .confirmPrompt("You are about to DELETE all versions of " + + "the key: " + keyName + " from KeyProvider " + + provider.toString() + ". Continue?:"); + if (!cont) { + out.println("Nothing has been be deleted."); + } + return cont; + } catch (IOException e) { + out.println(keyName + " will not be deleted."); + e.printStackTrace(err); + } + } + return true; + } + + public void execute() throws IOException { + warnIfTransientProvider(); + out.println("Deleting key: " + keyName + " from KeyProvider: " + + provider.toString()); + if (cont) { + try { + provider.deleteKey(keyName); + out.println(keyName + " has been successfully deleted."); + provider.flush(); + printProviderWritten(); + } catch (IOException e) { + out.println(keyName + "has NOT been deleted."); + throw e; + } + } + } + + @Override + public String getUsage() { + return USAGE + ":\n\n" + DESC; + } + } + + private class CreateCommand extends Command { + public static final String USAGE = "create [--cipher] " + + "[--size] [--provider] [--help]"; + public static final String DESC = + "The create subcommand creates a new key for the name specified\n" + + "as the argument within the provider indicated through\n" + + "the --provider argument. You may also indicate the specific\n" + + "cipher through the --cipher argument. The default for cipher is\n" + + "currently \"AES/CTR/NoPadding\". The default keysize is \"256\".\n" + + "You may also indicate the requested key length through the --size\n" + + "argument."; + + String keyName = null; + + public CreateCommand(String keyName) { + this.keyName = keyName; + } + + public boolean validate() { + boolean rc = true; + provider = getKeyProvider(); + if (provider == null) { + out.println("There are no valid KeyProviders configured.\nKey" + + " will not be created.\n" + + "Consider using the --provider option to indicate the provider" + + " to use."); + rc = false; + } + if (keyName == null) { + out.println("There is no keyName specified. Please provide the" + + "mandatory . See the usage description with --help."); + rc = false; + } + return rc; + } + + public void execute() throws IOException, NoSuchAlgorithmException { + warnIfTransientProvider(); + try { + Options options = KeyProvider.options(getConf()); + String alg = getAlgorithm(options.getCipher()); + byte[] material = generateKey(options.getBitLength(), alg); + provider.createKey(keyName, material, options); + out.println(keyName + " has been successfully created."); + provider.flush(); + printProviderWritten(); + } catch (InvalidParameterException e) { + out.println(keyName + " has NOT been created. " + e.getMessage()); + throw e; + } catch (IOException e) { + out.println(keyName + " has NOT been created. " + e.getMessage()); + throw e; + } catch (NoSuchAlgorithmException e) { + out.println(keyName + " has NOT been created. " + e.getMessage()); + throw e; + } + } + + /** + * Get the algorithm from the cipher. + * @return the algorithm name + */ + public String getAlgorithm(String cipher) { + int slash = cipher.indexOf('/'); + if (slash == - 1) { + return cipher; + } else { + return cipher.substring(0, slash); + } + } + + @Override + public String getUsage() { + return USAGE + ":\n\n" + DESC; + } + } + + /** + * Main program. + * + * @param args + * Command line arguments + * @throws Exception + */ + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new Configuration(), new KeyShell(), args); + System.exit(res); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/UserProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/UserProvider.java index 424e7ca8503..89ecc42ba0c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/UserProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/UserProvider.java @@ -49,6 +49,11 @@ public class UserProvider extends KeyProvider { credentials = user.getCredentials(); } + @Override + public boolean isTransient() { + return true; + } + @Override public KeyVersion getKeyVersion(String versionName) { byte[] bytes = credentials.getSecretKey(new Text(versionName)); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyShell.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyShell.java new file mode 100644 index 00000000000..2d2f7ee431b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyShell.java @@ -0,0 +1,176 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.crypto.key; + +import static org.junit.Assert.*; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.PrintStream; + +import org.apache.hadoop.conf.Configuration; +import org.junit.Before; +import org.junit.Test; + +public class TestKeyShell { + private final ByteArrayOutputStream outContent = new ByteArrayOutputStream(); + private final ByteArrayOutputStream errContent = new ByteArrayOutputStream(); + private static final File tmpDir = + new File(System.getProperty("test.build.data", "/tmp"), "key"); + + @Before + public void setup() throws Exception { + System.setOut(new PrintStream(outContent)); + System.setErr(new PrintStream(errContent)); + } + + @Test + public void testKeySuccessfulKeyLifecycle() throws Exception { + outContent.flush(); + String[] args1 = {"create", "key1", "--provider", + "jceks://file" + tmpDir + "/keystore.jceks"}; + int rc = 0; + KeyShell ks = new KeyShell(); + ks.setConf(new Configuration()); + rc = ks.run(args1); + assertEquals(0, rc); + assertTrue(outContent.toString().contains("key1 has been successfully " + + "created.")); + + outContent.flush(); + String[] args2 = {"list", "--provider", + "jceks://file" + tmpDir + "/keystore.jceks"}; + rc = ks.run(args2); + assertEquals(0, rc); + assertTrue(outContent.toString().contains("key1")); + + outContent.flush(); + String[] args3 = {"roll", "key1", "--provider", + "jceks://file" + tmpDir + "/keystore.jceks"}; + rc = ks.run(args3); + assertEquals(0, rc); + assertTrue(outContent.toString().contains("key1 has been successfully " + + "rolled.")); + + outContent.flush(); + String[] args4 = {"delete", "key1", "--provider", + "jceks://file" + tmpDir + "/keystore.jceks"}; + rc = ks.run(args4); + assertEquals(0, rc); + assertTrue(outContent.toString().contains("key1 has been successfully " + + "deleted.")); + + outContent.flush(); + String[] args5 = {"list", "--provider", + "jceks://file" + tmpDir + "/keystore.jceks"}; + rc = ks.run(args5); + assertEquals(0, rc); + assertTrue(outContent.toString().contains("key1")); + } + + @Test + public void testInvalidKeySize() throws Exception { + String[] args1 = {"create", "key1", "--size", "56", "--provider", + "jceks://file" + tmpDir + "/keystore.jceks"}; + + int rc = 0; + KeyShell ks = new KeyShell(); + ks.setConf(new Configuration()); + rc = ks.run(args1); + assertEquals(-1, rc); + assertTrue(outContent.toString().contains("key1 has NOT been created.")); + } + + @Test + public void testInvalidCipher() throws Exception { + String[] args1 = {"create", "key1", "--cipher", "LJM", "--provider", + "jceks://file" + tmpDir + "/keystore.jceks"}; + + int rc = 0; + KeyShell ks = new KeyShell(); + ks.setConf(new Configuration()); + rc = ks.run(args1); + assertEquals(-1, rc); + assertTrue(outContent.toString().contains("key1 has NOT been created.")); + } + + @Test + public void testInvalidProvider() throws Exception { + String[] args1 = {"create", "key1", "--cipher", "AES", "--provider", + "sdff://file/tmp/keystore.jceks"}; + + int rc = 0; + KeyShell ks = new KeyShell(); + ks.setConf(new Configuration()); + rc = ks.run(args1); + assertEquals(-1, rc); + assertTrue(outContent.toString().contains("There are no valid " + + "KeyProviders configured.")); + } + + @Test + public void testTransientProviderWarning() throws Exception { + String[] args1 = {"create", "key1", "--cipher", "AES", "--provider", + "user:///"}; + + int rc = 0; + KeyShell ks = new KeyShell(); + ks.setConf(new Configuration()); + rc = ks.run(args1); + assertEquals(0, rc); + assertTrue(outContent.toString().contains("WARNING: you are modifying a " + + "transient provider.")); + } + + @Test + public void testTransientProviderOnlyConfig() throws Exception { + String[] args1 = {"create", "key1"}; + + int rc = 0; + KeyShell ks = new KeyShell(); + Configuration config = new Configuration(); + config.set(KeyProviderFactory.KEY_PROVIDER_PATH, "user:///"); + ks.setConf(config); + rc = ks.run(args1); + assertEquals(-1, rc); + assertTrue(outContent.toString().contains("There are no valid " + + "KeyProviders configured.")); + } + + @Test + public void testFullCipher() throws Exception { + String[] args1 = {"create", "key1", "--cipher", "AES/CBC/pkcs5Padding", + "--provider", "jceks://file" + tmpDir + "/keystore.jceks"}; + + int rc = 0; + KeyShell ks = new KeyShell(); + ks.setConf(new Configuration()); + rc = ks.run(args1); + assertEquals(0, rc); + assertTrue(outContent.toString().contains("key1 has been successfully " + + "created.")); + + outContent.flush(); + String[] args2 = {"delete", "key1", "--provider", + "jceks://file" + tmpDir + "/keystore.jceks"}; + rc = ks.run(args2); + assertEquals(0, rc); + assertTrue(outContent.toString().contains("key1 has been successfully " + + "deleted.")); + } +} From 752441434c99003595c67f22b3a526881e70156b Mon Sep 17 00:00:00 2001 From: Karthik Kambatla Date: Thu, 16 Jan 2014 18:42:25 +0000 Subject: [PATCH 18/32] YARN-1608. LinuxContainerExecutor has a few DEBUG messages at INFO level (kasha) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558874 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../yarn/server/nodemanager/LinuxContainerExecutor.java | 8 +++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 05ba7f27b61..a91bc91a738 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -343,6 +343,9 @@ Release 2.4.0 - UNRELEASED YARN-1351. Invalid string format in Fair Scheduler log warn message (Konstantin Weitz via Sandy Ryza) + YARN-1608. LinuxContainerExecutor has a few DEBUG messages at INFO level + (kasha) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java index ebed090c2ff..cbdcb13d406 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java @@ -217,8 +217,6 @@ public class LinuxContainerExecutor extends ContainerExecutor { } String[] commandArray = command.toArray(new String[command.size()]); ShellCommandExecutor shExec = new ShellCommandExecutor(commandArray); - // TODO: DEBUG - LOG.info("initApplication: " + Arrays.toString(commandArray)); if (LOG.isDebugEnabled()) { LOG.debug("initApplication: " + Arrays.toString(commandArray)); } @@ -274,8 +272,9 @@ public class LinuxContainerExecutor extends ContainerExecutor { String[] commandArray = command.toArray(new String[command.size()]); shExec = new ShellCommandExecutor(commandArray, null, // NM's cwd container.getLaunchContext().getEnvironment()); // sanitized env - // DEBUG - LOG.info("launchContainer: " + Arrays.toString(commandArray)); + if (LOG.isDebugEnabled()) { + LOG.debug("launchContainer: " + Arrays.toString(commandArray)); + } shExec.execute(); if (LOG.isDebugEnabled()) { logOutput(shExec.getOutput()); @@ -374,7 +373,6 @@ public class LinuxContainerExecutor extends ContainerExecutor { } String[] commandArray = command.toArray(new String[command.size()]); ShellCommandExecutor shExec = new ShellCommandExecutor(commandArray); - LOG.info(" -- DEBUG -- deleteAsUser: " + Arrays.toString(commandArray)); if (LOG.isDebugEnabled()) { LOG.debug("deleteAsUser: " + Arrays.toString(commandArray)); } From 3928de29fca3dc1d3bd910cdd54c4e484f2d2fab Mon Sep 17 00:00:00 2001 From: Karthik Kambatla Date: Thu, 16 Jan 2014 18:54:49 +0000 Subject: [PATCH 19/32] YARN-1606. Fix the default value of yarn.resourcemanager.zk-timeout-ms in yarn-default.xml (kasha) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558882 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../hadoop-yarn-common/src/main/resources/yarn-default.xml | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index a91bc91a738..757900eae96 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -346,6 +346,9 @@ Release 2.4.0 - UNRELEASED YARN-1608. LinuxContainerExecutor has a few DEBUG messages at INFO level (kasha) + YARN-1606. Fix the default value of yarn.resourcemanager.zk-timeout-ms + in yarn-default.xml (kasha) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index ead11eb7123..2c339a9d46c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -335,7 +335,7 @@ Expirations happens when the cluster does not hear from the client within the specified session timeout period (i.e. no heartbeat). yarn.resourcemanager.zk-timeout-ms - 60000 + 10000 From 5421725907267d88609442c220aed3f32ccf6ad1 Mon Sep 17 00:00:00 2001 From: Daryn Sharp Date: Thu, 16 Jan 2014 18:54:52 +0000 Subject: [PATCH 20/32] HADOOP-10146. Workaround JDK7 Process fd close bug (daryn) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558883 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 2 ++ .../java/org/apache/hadoop/util/Shell.java | 18 ++++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 1b098a1dc34..2dee49b1bb5 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -526,6 +526,8 @@ Release 2.4.0 - UNRELEASED HADOOP-10236. Fix typo in o.a.h.ipc.Client#checkResponse. (Akira Ajisaka via suresh) + HADOOP-10146. Workaround JDK7 Process fd close bug (daryn) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java index 8013f22b97a..59c64c63583 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java @@ -21,6 +21,7 @@ import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.InputStreamReader; +import java.io.InputStream; import java.util.Arrays; import java.util.Map; import java.util.Timer; @@ -511,7 +512,17 @@ abstract public class Shell { } // close the input stream try { - inReader.close(); + // JDK 7 tries to automatically drain the input streams for us + // when the process exits, but since close is not synchronized, + // it creates a race if we close the stream first and the same + // fd is recycled. the stream draining thread will attempt to + // drain that fd!! it may block, OOM, or cause bizarre behavior + // see: https://bugs.openjdk.java.net/browse/JDK-8024521 + // issue is fixed in build 7u60 + InputStream stdout = process.getInputStream(); + synchronized (stdout) { + inReader.close(); + } } catch (IOException ioe) { LOG.warn("Error while closing the input stream", ioe); } @@ -524,7 +535,10 @@ abstract public class Shell { LOG.warn("Interrupted while joining errThread"); } try { - errReader.close(); + InputStream stderr = process.getErrorStream(); + synchronized (stderr) { + errReader.close(); + } } catch (IOException ioe) { LOG.warn("Error while closing the error stream", ioe); } From a6ea460a9150e84128ebef97ab6ea8881215de03 Mon Sep 17 00:00:00 2001 From: Jason Darrell Lowe Date: Thu, 16 Jan 2014 22:56:09 +0000 Subject: [PATCH 21/32] MAPREDUCE-5672. Provide optional RollingFileAppender for container log4j (syslog). Contributed by Gera Shegalov git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558948 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 + .../hadoop/mapred/MapReduceChildJVM.java | 6 +- .../hadoop/mapreduce/v2/util/MRApps.java | 13 +- .../apache/hadoop/mapreduce/MRJobConfig.java | 10 +- .../src/main/resources/mapred-default.xml | 25 ++++ .../org/apache/hadoop/mapred/YARNRunner.java | 4 +- .../hadoop/mapreduce/v2/TestMRJobs.java | 124 +++++++++++++++++- .../hadoop/yarn/conf/YarnConfiguration.java | 3 + .../yarn/ContainerRollingLogAppender.java | 65 +++++++++ .../main/resources/container-log4j.properties | 7 + 10 files changed, 253 insertions(+), 7 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ContainerRollingLogAppender.java diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 14dc7013fe2..b0e8cfcf3cc 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -275,6 +275,9 @@ Release 2.4.0 - UNRELEASED MAPREDUCE-5724. JobHistoryServer does not start if HDFS is not running. (tucu) + MAPREDUCE-5672. Provide optional RollingFileAppender for container log4j + (syslog) (Gera Shegalov via jlowe) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java index 7cba10e1202..2302490aaa8 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java @@ -149,8 +149,10 @@ public class MapReduceChildJVM { private static void setupLog4jProperties(Task task, Vector vargs, long logSize) { - String logLevel = getChildLogLevel(task.conf, task.isMapTask()); - MRApps.addLog4jSystemProperties(logLevel, logSize, vargs); + String logLevel = getChildLogLevel(task.conf, task.isMapTask()); + int numBackups = task.conf.getInt(MRJobConfig.TASK_LOG_BACKUPS, + MRJobConfig.DEFAULT_TASK_LOG_BACKUPS); + MRApps.addLog4jSystemProperties(logLevel, logSize, numBackups, vargs); } public static List getVMCommand( diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java index 650bdd499eb..c998a8d4d0e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java @@ -61,6 +61,7 @@ import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.util.ApplicationClassLoader; import org.apache.hadoop.yarn.util.Apps; import org.apache.hadoop.yarn.util.ConverterUtils; +import org.apache.log4j.RollingFileAppender; /** * Helper class for MR applications @@ -476,16 +477,24 @@ public class MRApps extends Apps { * Add the JVM system properties necessary to configure {@link ContainerLogAppender}. * @param logLevel the desired log level (eg INFO/WARN/DEBUG) * @param logSize See {@link ContainerLogAppender#setTotalLogFileSize(long)} + * @param numBackups See {@link RollingFileAppender#setMaxBackupIndex(int)} * @param vargs the argument list to append to */ public static void addLog4jSystemProperties( - String logLevel, long logSize, List vargs) { + String logLevel, long logSize, int numBackups, List vargs) { vargs.add("-Dlog4j.configuration=container-log4j.properties"); vargs.add("-D" + YarnConfiguration.YARN_APP_CONTAINER_LOG_DIR + "=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR); vargs.add( "-D" + YarnConfiguration.YARN_APP_CONTAINER_LOG_SIZE + "=" + logSize); - vargs.add("-Dhadoop.root.logger=" + logLevel + ",CLA"); + if (logSize > 0L && numBackups > 0) { + // log should be rolled + vargs.add("-D" + YarnConfiguration.YARN_APP_CONTAINER_LOG_BACKUPS + "=" + + numBackups); + vargs.add("-Dhadoop.root.logger=" + logLevel + ",CRLA"); + } else { + vargs.add("-Dhadoop.root.logger=" + logLevel + ",CLA"); + } } /** diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index 1be7ba3e3b9..f1a3e453fb3 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -412,6 +412,10 @@ public interface MRJobConfig { MR_AM_PREFIX+"log.level"; public static final String DEFAULT_MR_AM_LOG_LEVEL = "INFO"; + public static final String MR_AM_LOG_BACKUPS = + MR_AM_PREFIX + "container.log.backups"; + public static final int DEFAULT_MR_AM_LOG_BACKUPS = 0; // don't roll + /**The number of splits when reporting progress in MR*/ public static final String MR_AM_NUM_PROGRESS_SPLITS = MR_AM_PREFIX+"num-progress-splits"; @@ -692,7 +696,11 @@ public interface MRJobConfig { + "$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*"; public static final String WORKFLOW_ID = "mapreduce.workflow.id"; - + + public static final String TASK_LOG_BACKUPS = + MR_PREFIX + "task.container.log.backups"; + public static final int DEFAULT_TASK_LOG_BACKUPS = 0; // don't roll + public static final String WORKFLOW_NAME = "mapreduce.workflow.name"; public static final String WORKFLOW_NODE_NAME = diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index fcaa275cc1a..4f1b20cfeaa 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -510,6 +510,31 @@ + + yarn.app.mapreduce.task.container.log.backups + 0 + Number of backup files for task logs when using + ContainerRollingLogAppender (CRLA). See + org.apache.log4j.RollingFileAppender.maxBackupIndex. By default, + ContainerLogAppender (CLA) is used, and container logs are not rolled. CRLA + is enabled for tasks when both mapreduce.task.userlog.limit.kb and + yarn.app.mapreduce.task.container.log.backups are greater than zero. + + + + + yarn.app.mapreduce.am.container.log.backups + 0 + Number of backup files for the ApplicationMaster logs when using + ContainerRollingLogAppender (CRLA). See + org.apache.log4j.RollingFileAppender.maxBackupIndex. By default, + ContainerLogAppender (CLA) is used, and container logs are not rolled. CRLA + is enabled for the ApplicationMaster when both + mapreduce.task.userlog.limit.kb and + yarn.app.mapreduce.am.container.log.backups are greater than zero. + + + mapreduce.job.maxtaskfailures.per.tracker 3 diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java index 3d2952ba2ed..fc23c6541f9 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java @@ -392,7 +392,9 @@ public class YARNRunner implements ClientProtocol { long logSize = TaskLog.getTaskLogLength(new JobConf(conf)); String logLevel = jobConf.get( MRJobConfig.MR_AM_LOG_LEVEL, MRJobConfig.DEFAULT_MR_AM_LOG_LEVEL); - MRApps.addLog4jSystemProperties(logLevel, logSize, vargs); + int numBackups = jobConf.getInt(MRJobConfig.MR_AM_LOG_BACKUPS, + MRJobConfig.DEFAULT_MR_AM_LOG_BACKUPS); + MRApps.addLog4jSystemProperties(logLevel, logSize, numBackups, vargs); // Check for Java Lib Path usage in MAP and REDUCE configs warnForJavaLibPath(conf.get(MRJobConfig.MAP_JAVA_OPTS,""), "map", diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java index 1ca1fd400b4..5601c7f84c6 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java @@ -23,10 +23,12 @@ import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; +import java.io.InputStreamReader; import java.io.IOException; import java.io.StringReader; import java.net.URI; import java.security.PrivilegedExceptionAction; +import java.util.EnumSet; import java.util.HashMap; import java.util.Map; import java.util.jar.JarOutputStream; @@ -53,6 +55,8 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.TaskLog; import org.apache.hadoop.mapreduce.Counters; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobCounter; @@ -65,17 +69,22 @@ import org.apache.hadoop.mapreduce.TaskCompletionEvent; import org.apache.hadoop.mapreduce.TaskID; import org.apache.hadoop.mapreduce.TaskReport; import org.apache.hadoop.mapreduce.TaskType; +import org.apache.hadoop.mapreduce.TypeConverter; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hadoop.mapreduce.v2.api.records.JobId; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.util.JarFinder; import org.apache.hadoop.util.Shell; +import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; +import org.apache.log4j.Level; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; @@ -84,6 +93,9 @@ import org.junit.Test; public class TestMRJobs { private static final Log LOG = LogFactory.getLog(TestMRJobs.class); + private static final EnumSet TERMINAL_RM_APP_STATES = + EnumSet.of(RMAppState.FINISHED, RMAppState.FAILED, RMAppState.KILLED); + private static final int NUM_NODE_MGRS = 3; protected static MiniMRYarnCluster mrCluster; protected static MiniDFSCluster dfsCluster; @@ -122,7 +134,8 @@ public class TestMRJobs { } if (mrCluster == null) { - mrCluster = new MiniMRYarnCluster(TestMRJobs.class.getName(), 3); + mrCluster = new MiniMRYarnCluster(TestMRJobs.class.getName(), + NUM_NODE_MGRS); Configuration conf = new Configuration(); conf.set("fs.defaultFS", remoteFs.getUri().toString()); // use HDFS conf.set(MRJobConfig.MR_AM_STAGING_DIR, "/apps_staging_dir"); @@ -416,6 +429,115 @@ public class TestMRJobs { // TODO later: add explicit "isUber()" checks of some sort } + @Test(timeout = 120000) + public void testContainerRollingLog() throws IOException, + InterruptedException, ClassNotFoundException { + if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) { + LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + + " not found. Not running test."); + return; + } + + final SleepJob sleepJob = new SleepJob(); + final JobConf sleepConf = new JobConf(mrCluster.getConfig()); + sleepConf.set(MRJobConfig.MAP_LOG_LEVEL, Level.ALL.toString()); + sleepConf.set(MRJobConfig.MR_AM_LOG_LEVEL, Level.ALL.toString()); + sleepConf.setLong(MRJobConfig.TASK_USERLOG_LIMIT, 1); + sleepConf.setInt(MRJobConfig.TASK_LOG_BACKUPS, 3); + sleepConf.setInt(MRJobConfig.MR_AM_LOG_BACKUPS, 7); + sleepJob.setConf(sleepConf); + + final Job job = sleepJob.createJob(1, 0, 1L, 100, 0L, 0); + job.setJarByClass(SleepJob.class); + job.addFileToClassPath(APP_JAR); // The AppMaster jar itself. + job.waitForCompletion(true); + final JobId jobId = TypeConverter.toYarn(job.getJobID()); + final ApplicationId appID = jobId.getAppId(); + int pollElapsed = 0; + while (true) { + Thread.sleep(1000); + pollElapsed += 1000; + if (TERMINAL_RM_APP_STATES.contains( + mrCluster.getResourceManager().getRMContext().getRMApps().get(appID) + .getState())) { + break; + } + if (pollElapsed >= 60000) { + LOG.warn("application did not reach terminal state within 60 seconds"); + break; + } + } + Assert.assertEquals(RMAppState.FINISHED, mrCluster.getResourceManager() + .getRMContext().getRMApps().get(appID).getState()); + + // Job finished, verify logs + // + + final String appIdStr = appID.toString(); + final String appIdSuffix = appIdStr.substring("application_".length(), + appIdStr.length()); + final String containerGlob = "container_" + appIdSuffix + "_*_*"; + final String syslogGlob = appIdStr + + Path.SEPARATOR + containerGlob + + Path.SEPARATOR + TaskLog.LogName.SYSLOG; + int numAppMasters = 0; + int numMapTasks = 0; + + for (int i = 0; i < NUM_NODE_MGRS; i++) { + final Configuration nmConf = mrCluster.getNodeManager(i).getConfig(); + for (String logDir : + nmConf.getTrimmedStrings(YarnConfiguration.NM_LOG_DIRS)) { + final Path absSyslogGlob = + new Path(logDir + Path.SEPARATOR + syslogGlob); + LOG.info("Checking for glob: " + absSyslogGlob); + final FileStatus[] syslogs = localFs.globStatus(absSyslogGlob); + for (FileStatus slog : syslogs) { + // check all syslogs for the container + // + final FileStatus[] sysSiblings = localFs.globStatus(new Path( + slog.getPath().getParent(), TaskLog.LogName.SYSLOG + "*")); + boolean foundAppMaster = false; + floop: + for (FileStatus f : sysSiblings) { + final BufferedReader reader = new BufferedReader( + new InputStreamReader(localFs.open(f.getPath()))); + String line; + try { + while ((line = reader.readLine()) != null) { + if (line.contains(MRJobConfig.APPLICATION_MASTER_CLASS)) { + foundAppMaster = true; + break floop; + } + } + } finally { + reader.close(); + } + } + + if (foundAppMaster) { + numAppMasters++; + } else { + numMapTasks++; + } + + Assert.assertSame("Number of sylog* files", + foundAppMaster + ? sleepConf.getInt(MRJobConfig.MR_AM_LOG_BACKUPS, 0) + 1 + : sleepConf.getInt(MRJobConfig.TASK_LOG_BACKUPS, 0) + 1, + sysSiblings.length); + } + } + } + // Make sure we checked non-empty set + // + Assert.assertEquals("No AppMaster log found!", 1, numAppMasters); + if (sleepConf.getBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false)) { + Assert.assertEquals("MapTask log with uber found!", 0, numMapTasks); + } else { + Assert.assertEquals("No MapTask log found!", 1, numMapTasks); + } + } + public static class DistributedCacheChecker extends Mapper { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 4adba7983ef..dc195858cb8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -928,6 +928,9 @@ public class YarnConfiguration extends Configuration { public static final String YARN_APP_CONTAINER_LOG_SIZE = YARN_PREFIX + "app.container.log.filesize"; + public static final String YARN_APP_CONTAINER_LOG_BACKUPS = + YARN_PREFIX + "app.container.log.backups"; + //////////////////////////////// // Other Configs //////////////////////////////// diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ContainerRollingLogAppender.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ContainerRollingLogAppender.java new file mode 100644 index 00000000000..bdf1b09a420 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ContainerRollingLogAppender.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.log4j.RollingFileAppender; + +import java.io.File; +import java.io.Flushable; + +/** + * A simple log4j-appender for container's logs. + * + */ +@Public +@Unstable +public class ContainerRollingLogAppender extends RollingFileAppender + implements Flushable { + private String containerLogDir; + + @Override + public void activateOptions() { + synchronized (this) { + setFile(new File(this.containerLogDir, "syslog").toString()); + setAppend(true); + super.activateOptions(); + } + } + + @Override + public void flush() { + if (qw != null) { + qw.flush(); + } + } + + /** + * Getter/Setter methods for log4j. + */ + + public String getContainerLogDir() { + return this.containerLogDir; + } + + public void setContainerLogDir(String containerLogDir) { + this.containerLogDir = containerLogDir; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/container-log4j.properties b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/container-log4j.properties index 26fd1aa00e9..06dec52e1ba 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/container-log4j.properties +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/container-log4j.properties @@ -35,6 +35,13 @@ log4j.appender.CLA.totalLogFileSize=${yarn.app.container.log.filesize} log4j.appender.CLA.layout=org.apache.log4j.PatternLayout log4j.appender.CLA.layout.ConversionPattern=%d{ISO8601} %p [%t] %c: %m%n +log4j.appender.CRLA=org.apache.hadoop.yarn.ContainerRollingLogAppender +log4j.appender.CRLA.containerLogDir=${yarn.app.container.log.dir} +log4j.appender.CRLA.maximumFileSize=${yarn.app.container.log.filesize} +log4j.appender.CRLA.maxBackupIndex=${yarn.app.container.log.backups} +log4j.appender.CRLA.layout=org.apache.log4j.PatternLayout +log4j.appender.CRLA.layout.ConversionPattern=%d{ISO8601} %p [%t] %c: %m%n + # # Event Counter Appender # Sends counts of logging messages at different severity levels to Hadoop Metrics. From 7981908929a06eb300f858231607e4963cddf95a Mon Sep 17 00:00:00 2001 From: Brandon Li Date: Thu, 16 Jan 2014 23:03:23 +0000 Subject: [PATCH 22/32] backported HADOOP-10125 to branch2, update CHANGES.txt git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558953 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 2dee49b1bb5..0ec656b8b8f 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -290,9 +290,6 @@ Trunk (Unreleased) HADOOP-10044 Improve the javadoc of rpc code (sanjay Radia) - HADOOP-10125. no need to process RPC request if the client connection - has been dropped (Ming Ma via brandonli) - OPTIMIZATIONS HADOOP-7761. Improve the performance of raw comparisons. (todd) @@ -528,6 +525,9 @@ Release 2.4.0 - UNRELEASED HADOOP-10146. Workaround JDK7 Process fd close bug (daryn) + HADOOP-10125. no need to process RPC request if the client connection + has been dropped (Ming Ma via brandonli) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES From ccee5b3e441e57d86ea9bf9e4481290b00c8fb74 Mon Sep 17 00:00:00 2001 From: Jason Darrell Lowe Date: Thu, 16 Jan 2014 23:16:11 +0000 Subject: [PATCH 23/32] Moving MR-5672 from bugs to improvements in CHANGES.txt git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558959 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index b0e8cfcf3cc..42cbc0e3ae3 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -198,6 +198,9 @@ Release 2.4.0 - UNRELEASED MAPREDUCE-3310. Custom grouping comparator cannot be set for Combiners (tucu) + MAPREDUCE-5672. Provide optional RollingFileAppender for container log4j + (syslog) (Gera Shegalov via jlowe) + OPTIMIZATIONS MAPREDUCE-5484. YarnChild unnecessarily loads job conf twice (Sandy Ryza) @@ -275,9 +278,6 @@ Release 2.4.0 - UNRELEASED MAPREDUCE-5724. JobHistoryServer does not start if HDFS is not running. (tucu) - MAPREDUCE-5672. Provide optional RollingFileAppender for container log4j - (syslog) (Gera Shegalov via jlowe) - Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES From a7ec44d41b0ad1fa133408a10caaae32a27ea569 Mon Sep 17 00:00:00 2001 From: Colin McCabe Date: Fri, 17 Jan 2014 01:32:05 +0000 Subject: [PATCH 24/32] HDFS-5784. Reserve space in edit log header and fsimage header for feature flag section (cmccabe) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1558974 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../hadoop/hdfs/protocol/LayoutFlags.java | 64 ++++++++++++++++++ .../hadoop/hdfs/protocol/LayoutVersion.java | 3 +- .../namenode/EditLogFileInputStream.java | 11 +++ .../namenode/EditLogFileOutputStream.java | 2 + .../hdfs/server/namenode/FSImageFormat.java | 5 ++ .../ImageLoaderCurrent.java | 6 +- .../hdfs/qjournal/server/TestJournalNode.java | 1 + .../server/namenode/TestFSEditLogLoader.java | 5 +- .../src/test/resources/editsStored | Bin 4268 -> 4272 bytes .../src/test/resources/editsStored.xml | 2 +- 11 files changed, 97 insertions(+), 5 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutFlags.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index fc34f931b3f..acc623a94c2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -522,6 +522,9 @@ Release 2.4.0 - UNRELEASED as a collection of storages (see breakdown of tasks below for features and contributors). + HDFS-5784. reserve space in edit log header and fsimage header for feature + flag section (cmccabe) + IMPROVEMENTS HDFS-5267. Remove volatile from LightWeightHashSet. (Junping Du via llu) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutFlags.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutFlags.java new file mode 100644 index 00000000000..fc1717f26a5 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutFlags.java @@ -0,0 +1,64 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.protocol; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Sets; + +/** + * LayoutFlags represent features which the FSImage and edit logs can either + * support or not, independently of layout version. + * + * Note: all flags starting with 'test' are reserved for unit test purposes. + */ +@InterfaceAudience.Private +public class LayoutFlags { + /** + * Load a LayoutFlags object from a stream. + * + * @param in The stream to read from. + * @throws IOException + */ + public static LayoutFlags read(DataInputStream in) + throws IOException { + int length = in.readInt(); + if (length < 0) { + throw new IOException("The length of the feature flag section " + + "was negative at " + length + " bytes."); + } else if (length > 0) { + throw new IOException("Found feature flags which we can't handle. " + + "Please upgrade your software."); + } + return new LayoutFlags(); + } + + private LayoutFlags() { + } + + public static void write(DataOutputStream out) throws IOException { + out.writeInt(0); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java index e3a36fe9814..b80ce6f9cdc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java @@ -111,7 +111,8 @@ public class LayoutVersion { + "the new block instead of the entire block list"), CACHING(-49, "Support for cache pools and path-based caching"), ADD_DATANODE_AND_STORAGE_UUIDS(-50, "Replace StorageID with DatanodeUuid." - + " Use distinct StorageUuid per storage directory."); + + " Use distinct StorageUuid per storage directory."), + ADD_LAYOUT_FLAGS(-51, "Add support for layout flags."); final int lv; final int ancestorLV; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java index 494a9dacddf..0cbbf71e2fe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java @@ -34,6 +34,9 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.protocol.LayoutFlags; +import org.apache.hadoop.hdfs.protocol.LayoutVersion; +import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature; import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.namenode.TransferFsImage.HttpGetFailedException; import org.apache.hadoop.hdfs.web.URLConnectionFactory; @@ -146,6 +149,14 @@ public class EditLogFileInputStream extends EditLogInputStream { } catch (EOFException eofe) { throw new LogHeaderCorruptException("No header found in log"); } + if (LayoutVersion.supports(Feature.ADD_LAYOUT_FLAGS, logVersion)) { + try { + LayoutFlags.read(dataIn); + } catch (EOFException eofe) { + throw new LogHeaderCorruptException("EOF while reading layout " + + "flags from log"); + } + } reader = new FSEditLogOp.Reader(dataIn, tracker, logVersion); reader.setMaxOpSize(maxOpSize); state = State.OPEN; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java index 92e375cc236..894fc135bd4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java @@ -32,6 +32,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.protocol.LayoutFlags; import org.apache.hadoop.io.IOUtils; import com.google.common.annotations.VisibleForTesting; @@ -132,6 +133,7 @@ public class EditLogFileOutputStream extends EditLogOutputStream { @VisibleForTesting public static void writeHeader(DataOutputStream out) throws IOException { out.writeInt(HdfsConstants.LAYOUT_VERSION); + LayoutFlags.write(out); } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java index fd86f81fc4c..385917e8e0e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java @@ -48,6 +48,7 @@ import org.apache.hadoop.fs.permission.PermissionStatus; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.LayoutVersion; import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature; +import org.apache.hadoop.hdfs.protocol.LayoutFlags; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; @@ -261,6 +262,9 @@ public class FSImageFormat { } boolean supportSnapshot = LayoutVersion.supports(Feature.SNAPSHOT, imgVersion); + if (LayoutVersion.supports(Feature.ADD_LAYOUT_FLAGS, imgVersion)) { + LayoutFlags.read(in); + } // read namespaceID: first appeared in version -2 in.readInt(); @@ -990,6 +994,7 @@ public class FSImageFormat { DataOutputStream out = new DataOutputStream(fos); try { out.writeInt(HdfsConstants.LAYOUT_VERSION); + LayoutFlags.write(out); // We use the non-locked version of getNamespaceInfo here since // the coordinating thread of saveNamespace already has read-locked // the namespace for us. If we attempt to take another readlock diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java index 8415c6bc30d..c529fb5cdc2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java @@ -28,6 +28,7 @@ import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates; +import org.apache.hadoop.hdfs.protocol.LayoutFlags; import org.apache.hadoop.hdfs.protocol.LayoutVersion; import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; @@ -126,7 +127,7 @@ class ImageLoaderCurrent implements ImageLoader { new SimpleDateFormat("yyyy-MM-dd HH:mm"); private static int[] versions = { -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39, - -40, -41, -42, -43, -44, -45, -46, -47, -48, -49, -50 }; + -40, -41, -42, -43, -44, -45, -46, -47, -48, -49, -50, -51 }; private int imageVersion = 0; private final Map subtreeMap = new HashMap(); @@ -157,6 +158,9 @@ class ImageLoaderCurrent implements ImageLoader { imageVersion = in.readInt(); if( !canLoadVersion(imageVersion)) throw new IOException("Cannot process fslayout version " + imageVersion); + if (LayoutVersion.supports(Feature.ADD_LAYOUT_FLAGS, imageVersion)) { + LayoutFlags.read(in); + } v.visit(ImageElement.IMAGE_VERSION, imageVersion); v.visit(ImageElement.NAMESPACE_ID, in.readInt()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java index e594f6e306f..6faefc45be1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java @@ -191,6 +191,7 @@ public class TestJournalNode { "/getJournal?segmentTxId=1&jid=" + journalId)); byte[] expected = Bytes.concat( Ints.toByteArray(HdfsConstants.LAYOUT_VERSION), + (new byte[] { 0, 0, 0, 0 }), // layout flags section EDITS_DATA); assertArrayEquals(expected, retrievedViaHttp); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java index 77293119085..833ef95e64b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java @@ -377,8 +377,9 @@ public class TestFSEditLogLoader { File testDir = new File(TEST_DIR, "testValidateEmptyEditLog"); SortedMap offsetToTxId = Maps.newTreeMap(); File logFile = prepareUnfinalizedTestEditLog(testDir, 0, offsetToTxId); - // Truncate the file so that there is nothing except the header - truncateFile(logFile, 4); + // Truncate the file so that there is nothing except the header and + // layout flags section. + truncateFile(logFile, 8); EditLogValidation validation = EditLogFileInputStream.validateEditLog(logFile); assertTrue(!validation.hasCorruptHeader()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored index c545cfd41492c79aeb2f803f0bfdf39fdf2192a6..c6174327d112c413f165f5bc912915952859e83f 100644 GIT binary patch delta 17 XcmZ3ZxIvNQ|NsAI8Gv9T%L)MiMpp*j delta 13 Vcmdm>xJHrX|NsBzHgc>G001zE2HyYx diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml index baebd184d40..3a60b6dc5c5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml @@ -1,6 +1,6 @@ - -50 + -51 OP_START_LOG_SEGMENT From 67cbde300811289692b4746ceb6f180f2003667b Mon Sep 17 00:00:00 2001 From: Sanford Ryza Date: Fri, 17 Jan 2014 08:56:22 +0000 Subject: [PATCH 25/32] YARN-1496. Protocol additions to allow moving apps between queues (Sandy Ryza) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1559050 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop/mapred/TestClientRedirect.java | 9 + hadoop-yarn-project/CHANGES.txt | 3 + .../yarn/api/ApplicationClientProtocol.java | 15 ++ .../MoveApplicationAcrossQueuesRequest.java | 69 ++++++++ .../MoveApplicationAcrossQueuesResponse.java | 47 ++++++ .../proto/applicationclient_protocol.proto | 1 + .../src/main/proto/yarn_service_protos.proto | 8 + ...ApplicationClientProtocolPBClientImpl.java | 21 +++ ...pplicationClientProtocolPBServiceImpl.java | 21 +++ ...eApplicationAcrossQueuesRequestPBImpl.java | 158 ++++++++++++++++++ ...ApplicationAcrossQueuesResponsePBImpl.java | 68 ++++++++ .../resourcemanager/ClientRMService.java | 8 + 12 files changed, 428 insertions(+) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/MoveApplicationAcrossQueuesRequest.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/MoveApplicationAcrossQueuesResponse.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/MoveApplicationAcrossQueuesRequestPBImpl.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/MoveApplicationAcrossQueuesResponsePBImpl.java diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java index d85a23d6179..5eb3df17f2c 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java @@ -90,6 +90,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse; +import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesResponse; import org.apache.hadoop.yarn.api.protocolrecords.RenewDelegationTokenRequest; import org.apache.hadoop.yarn.api.protocolrecords.RenewDelegationTokenResponse; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; @@ -99,6 +101,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; @@ -354,6 +357,12 @@ public class TestClientRedirect { CancelDelegationTokenRequest request) throws IOException { return null; } + + @Override + public MoveApplicationAcrossQueuesResponse moveApplicationAcrossQueues( + MoveApplicationAcrossQueuesRequest request) throws YarnException, IOException { + return null; + } } class HistoryService extends AMService implements HSClientProtocol { diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 757900eae96..305a8217cf7 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -6,6 +6,9 @@ Trunk - Unreleased NEW FEATURES + YARN-1496. Protocol additions to allow moving apps between queues (Sandy + Ryza) + IMPROVEMENTS OPTIMIZATIONS diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationClientProtocol.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationClientProtocol.java index 864980bd1d4..0abaafb0966 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationClientProtocol.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationClientProtocol.java @@ -44,6 +44,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse; +import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesResponse; import org.apache.hadoop.yarn.api.protocolrecords.RenewDelegationTokenRequest; import org.apache.hadoop.yarn.api.protocolrecords.RenewDelegationTokenResponse; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; @@ -339,4 +341,17 @@ public interface ApplicationClientProtocol { public CancelDelegationTokenResponse cancelDelegationToken( CancelDelegationTokenRequest request) throws YarnException, IOException; + + /** + * Move an application to a new queue. + * + * @param request the application ID and the target queue + * @return an empty response + * @throws YarnException + * @throws IOException + */ + @Public + @Unstable + public MoveApplicationAcrossQueuesResponse moveApplicationAcrossQueues( + MoveApplicationAcrossQueuesRequest request) throws YarnException, IOException; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/MoveApplicationAcrossQueuesRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/MoveApplicationAcrossQueuesRequest.java new file mode 100644 index 00000000000..528996f95ed --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/MoveApplicationAcrossQueuesRequest.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.api.protocolrecords; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.ApplicationClientProtocol; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.util.Records; + +/** + *

The request sent by the client to the ResourceManager + * to move a submitted application to a different queue.

+ * + *

The request includes the {@link ApplicationId} of the application to be + * moved and the queue to place it in.

+ * + * @see ApplicationClientProtocol#moveApplicationAcrossQueues(MoveApplicationAcrossQueuesRequest) + */ +@Public +@Unstable +public abstract class MoveApplicationAcrossQueuesRequest { + public static MoveApplicationAcrossQueuesRequest newInstance(ApplicationId appId, String queue) { + MoveApplicationAcrossQueuesRequest request = + Records.newRecord(MoveApplicationAcrossQueuesRequest.class); + request.setApplicationId(appId); + request.setTargetQueue(queue); + return request; + } + + /** + * Get the ApplicationId of the application to be moved. + * @return ApplicationId of the application to be moved + */ + public abstract ApplicationId getApplicationId(); + + /** + * Set the ApplicationId of the application to be moved. + * @param appId ApplicationId of the application to be moved + */ + public abstract void setApplicationId(ApplicationId appId); + + /** + * Get the queue to place the application in. + * @return the name of the queue to place the application in + */ + public abstract String getTargetQueue(); + + /** + * Get the queue to place the application in. + * @param queue the name of the queue to place the application in + */ + public abstract void setTargetQueue(String queue); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/MoveApplicationAcrossQueuesResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/MoveApplicationAcrossQueuesResponse.java new file mode 100644 index 00000000000..109e7c4f71a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/MoveApplicationAcrossQueuesResponse.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.api.protocolrecords; + +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.ApplicationClientProtocol; +import org.apache.hadoop.yarn.util.Records; + +/** + *

+ * The response sent by the ResourceManager to the client moving + * a submitted application to a different queue. + *

+ *

+ * A response without exception means that the move has completed successfully. + *

+ * + * @see ApplicationClientProtocol#moveApplicationAcrossQueues(MoveApplicationAcrossQueuesRequest) + */ +@Public +@Unstable +public class MoveApplicationAcrossQueuesResponse { + @Private + @Unstable + public MoveApplicationAcrossQueuesResponse newInstance() { + MoveApplicationAcrossQueuesResponse response = + Records.newRecord(MoveApplicationAcrossQueuesResponse.class); + return response; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/applicationclient_protocol.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/applicationclient_protocol.proto index af18c879521..eda2641710d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/applicationclient_protocol.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/applicationclient_protocol.proto @@ -44,5 +44,6 @@ service ApplicationClientProtocolService { rpc getDelegationToken(hadoop.common.GetDelegationTokenRequestProto) returns (hadoop.common.GetDelegationTokenResponseProto); rpc renewDelegationToken(hadoop.common.RenewDelegationTokenRequestProto) returns (hadoop.common.RenewDelegationTokenResponseProto); rpc cancelDelegationToken(hadoop.common.CancelDelegationTokenRequestProto) returns (hadoop.common.CancelDelegationTokenResponseProto); + rpc moveApplicationAcrossQueues(MoveApplicationAcrossQueuesRequestProto) returns (MoveApplicationAcrossQueuesResponseProto); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto index dc97eecdc7a..68d914e961a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto @@ -127,6 +127,14 @@ message GetClusterMetricsResponseProto { optional YarnClusterMetricsProto cluster_metrics = 1; } +message MoveApplicationAcrossQueuesRequestProto { + required ApplicationIdProto application_id = 1; + required string target_queue = 2; +} + +message MoveApplicationAcrossQueuesResponseProto { +} + message GetApplicationsRequestProto { repeated string application_types = 1; repeated YarnApplicationStateProto application_states = 2; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ApplicationClientProtocolPBClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ApplicationClientProtocolPBClientImpl.java index 88352eac453..d5243dc6ffb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ApplicationClientProtocolPBClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ApplicationClientProtocolPBClientImpl.java @@ -51,6 +51,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse; +import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesResponse; import org.apache.hadoop.yarn.api.protocolrecords.RenewDelegationTokenRequest; import org.apache.hadoop.yarn.api.protocolrecords.RenewDelegationTokenResponse; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; @@ -75,6 +77,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetQueueUserAclsInfoRe import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetQueueUserAclsInfoResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.KillApplicationRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.KillApplicationResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.MoveApplicationAcrossQueuesRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.MoveApplicationAcrossQueuesResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.RenewDelegationTokenRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.RenewDelegationTokenResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationRequestPBImpl; @@ -89,6 +93,7 @@ import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationRequestPr import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetQueueInfoRequestProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetQueueUserAclsInfoRequestProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.KillApplicationRequestProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.MoveApplicationAcrossQueuesRequestProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.SubmitApplicationRequestProto; import com.google.protobuf.ServiceException; @@ -291,4 +296,20 @@ public class ApplicationClientProtocolPBClientImpl implements ApplicationClientP return null; } } + + @Override + public MoveApplicationAcrossQueuesResponse moveApplicationAcrossQueues( + MoveApplicationAcrossQueuesRequest request) throws YarnException, + IOException { + MoveApplicationAcrossQueuesRequestProto requestProto = + ((MoveApplicationAcrossQueuesRequestPBImpl) request).getProto(); + try { + return new MoveApplicationAcrossQueuesResponsePBImpl( + proxy.moveApplicationAcrossQueues(null, requestProto)); + + } catch (ServiceException e) { + RPCUtil.unwrapAndThrowException(e); + return null; + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ApplicationClientProtocolPBServiceImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ApplicationClientProtocolPBServiceImpl.java index b38819dfab0..61068e8b134 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ApplicationClientProtocolPBServiceImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ApplicationClientProtocolPBServiceImpl.java @@ -39,6 +39,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse; +import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesResponse; import org.apache.hadoop.yarn.api.protocolrecords.RenewDelegationTokenResponse; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.CancelDelegationTokenRequestPBImpl; @@ -61,6 +62,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetQueueUserAclsInfoRe import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetQueueUserAclsInfoResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.KillApplicationRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.KillApplicationResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.MoveApplicationAcrossQueuesRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.MoveApplicationAcrossQueuesResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.RenewDelegationTokenRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.RenewDelegationTokenResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationRequestPBImpl; @@ -82,6 +85,8 @@ import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetQueueUserAclsInfoReques import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetQueueUserAclsInfoResponseProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.KillApplicationRequestProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.KillApplicationResponseProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.MoveApplicationAcrossQueuesRequestProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.MoveApplicationAcrossQueuesResponseProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.SubmitApplicationRequestProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.SubmitApplicationResponseProto; @@ -278,4 +283,20 @@ public class ApplicationClientProtocolPBServiceImpl implements ApplicationClient throw new ServiceException(e); } } + + @Override + public MoveApplicationAcrossQueuesResponseProto moveApplicationAcrossQueues( + RpcController controller, MoveApplicationAcrossQueuesRequestProto proto) + throws ServiceException { + MoveApplicationAcrossQueuesRequestPBImpl request = + new MoveApplicationAcrossQueuesRequestPBImpl(proto); + try { + MoveApplicationAcrossQueuesResponse response = real.moveApplicationAcrossQueues(request); + return ((MoveApplicationAcrossQueuesResponsePBImpl)response).getProto(); + } catch (YarnException e) { + throw new ServiceException(e); + } catch (IOException e) { + throw new ServiceException(e); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/MoveApplicationAcrossQueuesRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/MoveApplicationAcrossQueuesRequestPBImpl.java new file mode 100644 index 00000000000..1aaefb3345e --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/MoveApplicationAcrossQueuesRequestPBImpl.java @@ -0,0 +1,158 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.api.protocolrecords.impl.pb; + +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesRequest; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl; +import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.MoveApplicationAcrossQueuesRequestProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.MoveApplicationAcrossQueuesRequestProtoOrBuilder; + +import com.google.protobuf.TextFormat; + +@Private +@Unstable +public class MoveApplicationAcrossQueuesRequestPBImpl extends MoveApplicationAcrossQueuesRequest { + MoveApplicationAcrossQueuesRequestProto proto = MoveApplicationAcrossQueuesRequestProto.getDefaultInstance(); + MoveApplicationAcrossQueuesRequestProto.Builder builder = null; + boolean viaProto = false; + + private ApplicationId applicationId; + private String targetQueue; + + public MoveApplicationAcrossQueuesRequestPBImpl() { + builder = MoveApplicationAcrossQueuesRequestProto.newBuilder(); + } + + public MoveApplicationAcrossQueuesRequestPBImpl(MoveApplicationAcrossQueuesRequestProto proto) { + this.proto = proto; + viaProto = true; + } + + public MoveApplicationAcrossQueuesRequestProto getProto() { + mergeLocalToProto(); + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + @Override + public ApplicationId getApplicationId() { + if (this.applicationId != null) { + return this.applicationId; + } + + MoveApplicationAcrossQueuesRequestProtoOrBuilder p = viaProto ? proto : builder; + if (!p.hasApplicationId()) { + return null; + } + + this.applicationId = convertFromProtoFormat(p.getApplicationId()); + return this.applicationId; + } + + @Override + public void setApplicationId(ApplicationId appId) { + maybeInitBuilder(); + if (applicationId == null) { + builder.clearApplicationId(); + } + applicationId = appId; + } + + @Override + public String getTargetQueue() { + if (this.targetQueue != null) { + return this.targetQueue; + } + + MoveApplicationAcrossQueuesRequestProtoOrBuilder p = viaProto ? proto : builder; + if (!p.hasApplicationId()) { + return null; + } + + this.targetQueue = p.getTargetQueue(); + return this.targetQueue; + } + + @Override + public void setTargetQueue(String queue) { + maybeInitBuilder(); + if (applicationId == null) { + builder.clearTargetQueue(); + } + targetQueue = queue; + } + + private void mergeLocalToBuilder() { + if (applicationId != null) { + builder.setApplicationId(convertToProtoFormat(this.applicationId)); + } + if (targetQueue != null) { + builder.setTargetQueue(this.targetQueue); + } + } + + private void mergeLocalToProto() { + if (viaProto) { + maybeInitBuilder(); + } + mergeLocalToBuilder(); + proto = builder.build(); + viaProto = true; + } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = MoveApplicationAcrossQueuesRequestProto.newBuilder(proto); + } + viaProto = false; + } + + @Override + public int hashCode() { + return getProto().hashCode(); + } + + @Override + public boolean equals(Object other) { + if (other == null) { + return false; + } + if (other.getClass().isAssignableFrom(this.getClass())) { + return this.getProto().equals(this.getClass().cast(other).getProto()); + } + return false; + } + + @Override + public String toString() { + return TextFormat.shortDebugString(getProto()); + } + + private ApplicationIdPBImpl convertFromProtoFormat(ApplicationIdProto p) { + return new ApplicationIdPBImpl(p); + } + + private ApplicationIdProto convertToProtoFormat(ApplicationId t) { + return ((ApplicationIdPBImpl)t).getProto(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/MoveApplicationAcrossQueuesResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/MoveApplicationAcrossQueuesResponsePBImpl.java new file mode 100644 index 00000000000..ab1d2ae8012 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/MoveApplicationAcrossQueuesResponsePBImpl.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.api.protocolrecords.impl.pb; + +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesResponse; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.MoveApplicationAcrossQueuesResponseProto; + +import com.google.protobuf.TextFormat; + +@Private +@Unstable +public class MoveApplicationAcrossQueuesResponsePBImpl extends MoveApplicationAcrossQueuesResponse { + MoveApplicationAcrossQueuesResponseProto proto = MoveApplicationAcrossQueuesResponseProto.getDefaultInstance(); + MoveApplicationAcrossQueuesResponseProto.Builder builder = null; + boolean viaProto = false; + + public MoveApplicationAcrossQueuesResponsePBImpl() { + builder = MoveApplicationAcrossQueuesResponseProto.newBuilder(); + } + + public MoveApplicationAcrossQueuesResponsePBImpl(MoveApplicationAcrossQueuesResponseProto proto) { + this.proto = proto; + viaProto = true; + } + + public MoveApplicationAcrossQueuesResponseProto getProto() { + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + @Override + public int hashCode() { + return getProto().hashCode(); + } + + @Override + public boolean equals(Object other) { + if (other == null) + return false; + if (other.getClass().isAssignableFrom(this.getClass())) { + return this.getProto().equals(this.getClass().cast(other).getProto()); + } + return false; + } + + @Override + public String toString() { + return TextFormat.shortDebugString(getProto()); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java index cd2226fc3fa..1df67f82a86 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java @@ -64,6 +64,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse; +import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesResponse; import org.apache.hadoop.yarn.api.protocolrecords.RenewDelegationTokenRequest; import org.apache.hadoop.yarn.api.protocolrecords.RenewDelegationTokenResponse; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; @@ -683,6 +685,12 @@ public class ClientRMService extends AbstractService implements throw RPCUtil.getRemoteException(e); } } + + @Override + public MoveApplicationAcrossQueuesResponse moveApplicationAcrossQueues( + MoveApplicationAcrossQueuesRequest request) throws YarnException { + throw new UnsupportedOperationException("Move not yet supported"); + } private String getRenewerForToken(Token token) throws IOException { From f667371746de02c52434af6e48f3e2a4f83917ac Mon Sep 17 00:00:00 2001 From: Sanford Ryza Date: Fri, 17 Jan 2014 17:43:00 +0000 Subject: [PATCH 26/32] MAPREDUCE-5650. Job fails when hprof mapreduce.task.profile.map/reduce.params is specified (Gera Shegalov via Sandy Ryza) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1559201 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 + .../hadoop/mapred/MapReduceChildJVM.java | 18 +- .../src/main/resources/mapred-default.xml | 25 ++ .../mapreduce/v2/TestMRJobsWithProfiler.java | 244 ++++++++++++++++++ 4 files changed, 277 insertions(+), 13 deletions(-) create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobsWithProfiler.java diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 42cbc0e3ae3..338a160b16b 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -334,6 +334,9 @@ Release 2.3.0 - UNRELEASED MAPREDUCE-5674. Missing start and finish time in mapred.JobStatus. (Chuan Liu via cnauroth) + MAPREDUCE-5650. Job fails when hprof mapreduce.task.profile.map/reduce.params + is specified (Gera Shegalov via Sandy Ryza) + Release 2.2.0 - 2013-10-13 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java index 2302490aaa8..20de915356f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java @@ -212,19 +212,11 @@ public class MapReduceChildJVM { if (conf.getProfileEnabled()) { if (conf.getProfileTaskRange(task.isMapTask() ).isIncluded(task.getPartition())) { - vargs.add( - String.format( - conf.getProfileParams(), - getTaskLogFile(TaskLog.LogName.PROFILE) - ) - ); - if (task.isMapTask()) { - vargs.add(conf.get(MRJobConfig.TASK_MAP_PROFILE_PARAMS, "")); - } - else { - vargs.add(conf.get(MRJobConfig.TASK_REDUCE_PROFILE_PARAMS, "")); - } - + final String profileParams = conf.get(task.isMapTask() + ? MRJobConfig.TASK_MAP_PROFILE_PARAMS + : MRJobConfig.TASK_REDUCE_PROFILE_PARAMS, conf.getProfileParams()); + vargs.add(String.format(profileParams, + getTaskLogFile(TaskLog.LogName.PROFILE))); } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index 4f1b20cfeaa..dea2adf1350 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -602,6 +602,31 @@
+ + mapreduce.task.profile.params + + JVM profiler parameters used to profile map and reduce task + attempts. This string may contain a single format specifier %s that will + be replaced by the path to profile.out in the task attempt log directory. + To specify different profiling options for map tasks and reduce tasks, + more specific parameters mapreduce.task.profile.map.params and + mapreduce.task.profile.reduce.params should be used. + + + + mapreduce.task.profile.map.params + ${mapreduce.task.profile.params} + Map-task-specific JVM profiler parameters. See + mapreduce.task.profile.params + + + + mapreduce.task.profile.reduce.params + ${mapreduce.task.profile.params} + Reduce-task-specific JVM profiler parameters. See + mapreduce.task.profile.params + + mapreduce.task.skip.start.attempts 2 diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobsWithProfiler.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobsWithProfiler.java new file mode 100644 index 00000000000..de17528d7fc --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobsWithProfiler.java @@ -0,0 +1,244 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.mapreduce.v2; + +import java.io.*; +import java.util.*; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import junit.framework.Assert; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.SleepJob; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.*; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.mapred.*; +import org.apache.hadoop.mapreduce.*; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.mapreduce.v2.api.records.JobId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +public class TestMRJobsWithProfiler { + + private static final Log LOG = + LogFactory.getLog(TestMRJobsWithProfiler.class); + + private static final EnumSet TERMINAL_RM_APP_STATES = + EnumSet.of(RMAppState.FINISHED, RMAppState.FAILED, RMAppState.KILLED); + + private static MiniMRYarnCluster mrCluster; + + private static final Configuration CONF = new Configuration(); + private static final FileSystem localFs; + static { + try { + localFs = FileSystem.getLocal(CONF); + } catch (IOException io) { + throw new RuntimeException("problem getting local fs", io); + } + } + + private static final Path TEST_ROOT_DIR = + new Path("target", TestMRJobs.class.getName() + "-tmpDir"). + makeQualified(localFs.getUri(), localFs.getWorkingDirectory()); + + private static final Path APP_JAR = new Path(TEST_ROOT_DIR, "MRAppJar.jar"); + + @Before + public void setup() throws InterruptedException, IOException { + + if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) { + LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + + " not found. Not running test."); + return; + } + + if (mrCluster == null) { + mrCluster = new MiniMRYarnCluster(getClass().getName()); + mrCluster.init(CONF); + mrCluster.start(); + } + + // Copy MRAppJar and make it private. TODO: FIXME. This is a hack to + // workaround the absent public discache. + localFs.copyFromLocalFile(new Path(MiniMRYarnCluster.APPJAR), APP_JAR); + localFs.setPermission(APP_JAR, new FsPermission("700")); + } + + @After + public void tearDown() { + if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) { + LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + + " not found. Not running test."); + return; + } + + if (mrCluster != null) { + mrCluster.stop(); + } + } + + + @Test (timeout = 120000) + public void testProfiler() throws IOException, InterruptedException, + ClassNotFoundException { + if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) { + LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + + " not found. Not running test."); + return; + } + + final SleepJob sleepJob = new SleepJob(); + final JobConf sleepConf = new JobConf(mrCluster.getConfig()); + + sleepConf.setProfileEnabled(true); + // profile map split 1 + sleepConf.setProfileTaskRange(true, "1"); + // profile reduce of map output partitions 1 + sleepConf.setProfileTaskRange(false, "1"); + + // use hprof for map to profile.out + sleepConf.set(MRJobConfig.TASK_MAP_PROFILE_PARAMS, + "-agentlib:hprof=cpu=times,heap=sites,force=n,thread=y,verbose=n," + + "file=%s"); + + // use Xprof for reduce to stdout + sleepConf.set(MRJobConfig.TASK_REDUCE_PROFILE_PARAMS, "-Xprof"); + sleepJob.setConf(sleepConf); + + // 2-map-2-reduce SleepJob + final Job job = sleepJob.createJob(2, 2, 500, 1, 500, 1); + job.setJarByClass(SleepJob.class); + job.addFileToClassPath(APP_JAR); // The AppMaster jar itself. + job.waitForCompletion(true); + final JobId jobId = TypeConverter.toYarn(job.getJobID()); + final ApplicationId appID = jobId.getAppId(); + int pollElapsed = 0; + while (true) { + Thread.sleep(1000); + pollElapsed += 1000; + + if (TERMINAL_RM_APP_STATES.contains( + mrCluster.getResourceManager().getRMContext().getRMApps().get(appID) + .getState())) { + break; + } + + if (pollElapsed >= 60000) { + LOG.warn("application did not reach terminal state within 60 seconds"); + break; + } + } + Assert.assertEquals(RMAppState.FINISHED, mrCluster.getResourceManager() + .getRMContext().getRMApps().get(appID).getState()); + + // Job finished, verify logs + // + final Configuration nmConf = mrCluster.getNodeManager(0).getConfig(); + + final String appIdStr = appID.toString(); + final String appIdSuffix = appIdStr.substring( + "application_".length(), appIdStr.length()); + final String containerGlob = "container_" + appIdSuffix + "_*_*"; + + final Map taLogDirs = new HashMap(); + final Pattern taskPattern = Pattern.compile( + ".*Task:(attempt_" + + appIdSuffix + "_[rm]_" + "[0-9]+_[0-9]+).*"); + for (String logDir : + nmConf.getTrimmedStrings(YarnConfiguration.NM_LOG_DIRS)) + { + // filter out MRAppMaster and create attemptId->logDir map + // + for (FileStatus fileStatus : + localFs.globStatus(new Path(logDir + + Path.SEPARATOR + appIdStr + + Path.SEPARATOR + containerGlob + + Path.SEPARATOR + TaskLog.LogName.SYSLOG))) + { + final BufferedReader br = new BufferedReader( + new InputStreamReader(localFs.open(fileStatus.getPath()))); + String line; + while ((line = br.readLine()) != null) { + final Matcher m = taskPattern.matcher(line); + if (m.matches()) { + // found Task done message + taLogDirs.put(TaskAttemptID.forName(m.group(1)), + fileStatus.getPath().getParent()); + break; + } + } + br.close(); + } + } + + Assert.assertEquals(4, taLogDirs.size()); // all 4 attempts found + + for (Map.Entry dirEntry : taLogDirs.entrySet()) { + final TaskAttemptID tid = dirEntry.getKey(); + final Path profilePath = new Path(dirEntry.getValue(), + TaskLog.LogName.PROFILE.toString()); + final Path stdoutPath = new Path(dirEntry.getValue(), + TaskLog.LogName.STDOUT.toString()); + if (tid.getTaskType() == TaskType.MAP) { + if (tid.getTaskID().getId() == 1) { + // verify profile.out + final BufferedReader br = new BufferedReader(new InputStreamReader( + localFs.open(profilePath))); + final String line = br.readLine(); + Assert.assertTrue("No hprof content found!", + line !=null && line.startsWith("JAVA PROFILE")); + br.close(); + Assert.assertEquals(0L, localFs.getFileStatus(stdoutPath).getLen()); + } else { + Assert.assertFalse("hprof file should not exist", + localFs.exists(profilePath)); + } + } else { + Assert.assertFalse("hprof file should not exist", + localFs.exists(profilePath)); + if (tid.getTaskID().getId() == 1) { + final BufferedReader br = new BufferedReader(new InputStreamReader( + localFs.open(stdoutPath))); + boolean flatProfFound = false; + String line; + while ((line = br.readLine()) != null) { + if (line.startsWith("Flat profile")) { + flatProfFound = true; + break; + } + } + br.close(); + Assert.assertTrue("Xprof flat profile not found!", flatProfFound); + } else { + Assert.assertEquals(0L, localFs.getFileStatus(stdoutPath).getLen()); + } + } + } + } +} From fb01200153db9121fd4367dc38d60e78d2ffcfae Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Fri, 17 Jan 2014 18:18:36 +0000 Subject: [PATCH 27/32] HDFS-5794. Fix the inconsistency of layout version number of ADD_DATANODE_AND_STORAGE_UUIDS between trunk and branch-2. Contributed by Jing Zhao. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1559209 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index acc623a94c2..a31c9fa136f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -772,6 +772,9 @@ Release 2.4.0 - UNRELEASED HDFS-5579. Under construction files make DataNode decommission take very long hours. (zhaoyunjiong via jing9) + HDFS-5794. Fix the inconsistency of layout version number of + ADD_DATANODE_AND_STORAGE_UUIDS between trunk and branch-2. (jing9) + BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS HDFS-4985. Add storage type to the protocol and expose it in block report diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java index b80ce6f9cdc..923ed70ac8f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java @@ -109,10 +109,10 @@ public class LayoutVersion { + "enable rebuilding retry cache in case of HA failover"), EDITLOG_ADD_BLOCK(-48, "Add new editlog that only records allocation of " + "the new block instead of the entire block list"), - CACHING(-49, "Support for cache pools and path-based caching"), - ADD_DATANODE_AND_STORAGE_UUIDS(-50, "Replace StorageID with DatanodeUuid." + ADD_DATANODE_AND_STORAGE_UUIDS(-49, "Replace StorageID with DatanodeUuid." + " Use distinct StorageUuid per storage directory."), - ADD_LAYOUT_FLAGS(-51, "Add support for layout flags."); + ADD_LAYOUT_FLAGS(-50, "Add support for layout flags."), + CACHING(-51, "Support for cache pools and path-based caching"); final int lv; final int ancestorLV; From ebb9a704bd7c7c7ddb8a44c74bb1107425ee0e83 Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Fri, 17 Jan 2014 18:29:38 +0000 Subject: [PATCH 28/32] Move HDFS-5704 and HDFS-5777 to the correct section in CHANGES.txt git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1559210 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index a31c9fa136f..1345d90eeb2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -484,9 +484,8 @@ Trunk (Unreleased) HDFS-5775. Consolidate the code for serialization in CacheManager (Haohui Mai via brandonli) - HDFS-5704. Change OP_UPDATE_BLOCKS with a new OP_ADD_BLOCK. (jing9) - - HDFS-5777. Update LayoutVersion for the new editlog op OP_ADD_BLOCK. (jing9) + HDFS-5794. Fix the inconsistency of layout version number of + ADD_DATANODE_AND_STORAGE_UUIDS between trunk and branch-2. (jing9) Release 2.4.0 - UNRELEASED @@ -694,6 +693,8 @@ Release 2.4.0 - UNRELEASED InvalidEncryptionKeyException in fetchBlockByteRange (Liang Xie via Colin Patrick McCabe) + HDFS-5704. Change OP_UPDATE_BLOCKS with a new OP_ADD_BLOCK. (jing9) + OPTIMIZATIONS HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn) @@ -772,8 +773,7 @@ Release 2.4.0 - UNRELEASED HDFS-5579. Under construction files make DataNode decommission take very long hours. (zhaoyunjiong via jing9) - HDFS-5794. Fix the inconsistency of layout version number of - ADD_DATANODE_AND_STORAGE_UUIDS between trunk and branch-2. (jing9) + HDFS-5777. Update LayoutVersion for the new editlog op OP_ADD_BLOCK. (jing9) BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS From cdc54c87599867a8e4c6dea97049b94f6ecbafc6 Mon Sep 17 00:00:00 2001 From: Alejandro Abdelnur Date: Fri, 17 Jan 2014 19:30:27 +0000 Subject: [PATCH 29/32] HADOOP-10235. Hadoop tarball has 2 versions of stax-api JARs. (tucu) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1559230 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 2 ++ hadoop-project/pom.xml | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 0ec656b8b8f..0ea6bb6938e 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -528,6 +528,8 @@ Release 2.4.0 - UNRELEASED HADOOP-10125. no need to process RPC request if the client connection has been dropped (Ming Ma via brandonli) + HADOOP-10235. Hadoop tarball has 2 versions of stax-api JARs. (tucu) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index bb718cdf209..84858c386db 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -422,7 +422,7 @@ ${jersey.version} - javax.xml.stream + stax stax-api @@ -770,6 +770,12 @@ org.codehaus.jettison jettison 1.1 + + + stax + stax-api + + com.sun.jersey From 0fd646b967443f44c237c95f93e03cb0a6a57f8d Mon Sep 17 00:00:00 2001 From: Jason Darrell Lowe Date: Fri, 17 Jan 2014 21:38:37 +0000 Subject: [PATCH 30/32] MAPREDUCE-5717. Task pings are interpreted as task progress. Contributed by Jason Lowe git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1559256 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 2 + .../mapred/TaskAttemptListenerImpl.java | 1 - .../mapred/TestTaskAttemptListenerImpl.java | 46 +++++++++++++++++++ 3 files changed, 48 insertions(+), 1 deletion(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 338a160b16b..d9a02feca67 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -146,6 +146,8 @@ Trunk (Unreleased) MAPREDUCE-5191. TestQueue#testQueue fails with timeout on Windows. (Ivan Mitic via hitesh) + MAPREDUCE-5717. Task pings are interpreted as task progress (jlowe) + Release 2.4.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java index 9f4b9c7ec63..03b6e52db8e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java @@ -361,7 +361,6 @@ public class TaskAttemptListenerImpl extends CompositeService if (taskStatus == null) { //We are using statusUpdate only as a simple ping LOG.info("Ping from " + taskAttemptID.toString()); - taskHeartbeatHandler.progressing(yarnAttemptID); return feedback; } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestTaskAttemptListenerImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestTaskAttemptListenerImpl.java index 6563cda9aec..256f0b7bb7a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestTaskAttemptListenerImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestTaskAttemptListenerImpl.java @@ -381,4 +381,50 @@ public class TestTaskAttemptListenerImpl { } + @SuppressWarnings("rawtypes") + @Test + public void testStatusUpdateProgress() + throws IOException, InterruptedException { + AppContext appCtx = mock(AppContext.class); + JobTokenSecretManager secret = mock(JobTokenSecretManager.class); + RMHeartbeatHandler rmHeartbeatHandler = + mock(RMHeartbeatHandler.class); + TaskHeartbeatHandler hbHandler = mock(TaskHeartbeatHandler.class); + Dispatcher dispatcher = mock(Dispatcher.class); + EventHandler ea = mock(EventHandler.class); + when(dispatcher.getEventHandler()).thenReturn(ea); + + when(appCtx.getEventHandler()).thenReturn(ea); + CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy(); + policy.init(appCtx); + MockTaskAttemptListenerImpl listener = + new MockTaskAttemptListenerImpl(appCtx, secret, + rmHeartbeatHandler, hbHandler, policy); + Configuration conf = new Configuration(); + listener.init(conf); + listener.start(); + JVMId id = new JVMId("foo",1, true, 1); + WrappedJvmID wid = new WrappedJvmID(id.getJobId(), id.isMap, id.getId()); + + TaskAttemptID attemptID = new TaskAttemptID("1", 1, TaskType.MAP, 1, 1); + TaskAttemptId attemptId = TypeConverter.toYarn(attemptID); + Task task = mock(Task.class); + listener.registerPendingTask(task, wid); + listener.registerLaunchedTask(attemptId, wid); + verify(hbHandler).register(attemptId); + + // make sure a ping doesn't report progress + AMFeedback feedback = listener.statusUpdate(attemptID, null); + assertTrue(feedback.getTaskFound()); + verify(hbHandler, never()).progressing(eq(attemptId)); + + // make sure a status update does report progress + MapTaskStatus mockStatus = new MapTaskStatus(attemptID, 0.0f, 1, + TaskStatus.State.RUNNING, "", "RUNNING", "", TaskStatus.Phase.MAP, + new Counters()); + feedback = listener.statusUpdate(attemptID, mockStatus); + assertTrue(feedback.getTaskFound()); + verify(hbHandler).progressing(eq(attemptId)); + listener.close(); + } } From d7979b8b35156301ed1c1a5bcad73a75c8ea3034 Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Fri, 17 Jan 2014 23:27:00 +0000 Subject: [PATCH 31/32] HADOOP-10240. Windows build instructions incorrectly state requirement of protoc 2.4.1 instead of 2.5.0. Contributed by Chris Nauroth. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1559278 13f79535-47bb-0310-9956-ffa450edef68 --- BUILDING.txt | 8 ++++---- hadoop-common-project/hadoop-common/CHANGES.txt | 3 +++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/BUILDING.txt b/BUILDING.txt index 2deb2ac2790..54e6ddd6315 100644 --- a/BUILDING.txt +++ b/BUILDING.txt @@ -183,11 +183,11 @@ Building on Windows Requirements: * Windows System -* JDK 1.6 -* Maven 3.0 -* Windows SDK or Visual Studio 2010 Professional -* ProtocolBuffer 2.4.1+ (for MapReduce and HDFS) +* JDK 1.6+ +* Maven 3.0 or later * Findbugs 1.3.9 (if running findbugs) +* ProtocolBuffer 2.5.0 +* Windows SDK or Visual Studio 2010 Professional * Unix command-line tools from GnuWin32 or Cygwin: sh, mkdir, rm, cp, tar, gzip * Internet connection for first build (to fetch all Maven and Hadoop dependencies) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 0ea6bb6938e..ab78efe5362 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -620,6 +620,9 @@ Release 2.3.0 - UNRELEASED HADOOP-10234. "hadoop.cmd jar" does not propagate exit code. (cnauroth) + HADOOP-10240. Windows build instructions incorrectly state requirement of + protoc 2.4.1 instead of 2.5.0. (cnauroth) + Release 2.2.0 - 2013-10-13 INCOMPATIBLE CHANGES From 4b0ae92c19803d094ee89420fc05a485427b502d Mon Sep 17 00:00:00 2001 From: Brandon Li Date: Fri, 17 Jan 2014 23:42:03 +0000 Subject: [PATCH 32/32] Update hadoop-common/CHANGES.txt for HADOOP-10112 in branch2.3 git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1559286 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index ab78efe5362..12d9a19b8f6 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -623,6 +623,8 @@ Release 2.3.0 - UNRELEASED HADOOP-10240. Windows build instructions incorrectly state requirement of protoc 2.4.1 instead of 2.5.0. (cnauroth) + HADOOP-10112. har file listing doesn't work with wild card. (brandonli) + Release 2.2.0 - 2013-10-13 INCOMPATIBLE CHANGES